burtenshaw
commited on
Commit
·
01ba912
1
Parent(s):
fe1770b
respond to feedback on prompts
Browse files- data/generate_dpo.py +6 -6
data/generate_dpo.py
CHANGED
@@ -10,25 +10,25 @@ from typing_extensions import override
|
|
10 |
|
11 |
CHOSEN_TEMPLATE = """
|
12 |
You are provide with a conversation between a human and an AI assistant.
|
13 |
-
The final message
|
14 |
{% for message in conversation %}
|
15 |
{{ message["role"] }}: {{ message["content"] }}
|
16 |
{% endfor %}
|
17 |
-
|
18 |
""".rstrip()
|
19 |
|
20 |
-
CHOSEN_SYSTEM_PROMPT = "You are a helpful AI assistant. Your task is to
|
21 |
|
22 |
REJECT_TEMPLATE = """
|
23 |
You are provide with a conversation between a human and an AI assistant.
|
24 |
-
The final message
|
25 |
{% for message in conversation %}
|
26 |
{{ message["role"] }}: {{ message["content"] }}
|
27 |
{% endfor %}
|
28 |
-
|
29 |
""".rstrip()
|
30 |
|
31 |
-
REJECT_SYSTEM_PROMPT = "You are a helpful AI assistant. Your task is to
|
32 |
|
33 |
|
34 |
class FilterConversationRatings(Step):
|
|
|
10 |
|
11 |
CHOSEN_TEMPLATE = """
|
12 |
You are provide with a conversation between a human and an AI assistant.
|
13 |
+
The final message is of poor quality positively. Your task is to regenerate one of high quality.
|
14 |
{% for message in conversation %}
|
15 |
{{ message["role"] }}: {{ message["content"] }}
|
16 |
{% endfor %}
|
17 |
+
High quality response:
|
18 |
""".rstrip()
|
19 |
|
20 |
+
CHOSEN_SYSTEM_PROMPT = "You are a helpful AI assistant. Your task is to generate high quality response when other assistants created a poor quality response."
|
21 |
|
22 |
REJECT_TEMPLATE = """
|
23 |
You are provide with a conversation between a human and an AI assistant.
|
24 |
+
The final message is of high quality positively. Your task is to regenerate one of poor quality.
|
25 |
{% for message in conversation %}
|
26 |
{{ message["role"] }}: {{ message["content"] }}
|
27 |
{% endfor %}
|
28 |
+
Poor quality response:
|
29 |
""".rstrip()
|
30 |
|
31 |
+
REJECT_SYSTEM_PROMPT = "You are a helpful AI assistant. Your task is to generate a poor quality response when other assistants created a high quality response."
|
32 |
|
33 |
|
34 |
class FilterConversationRatings(Step):
|