File size: 3,646 Bytes
3ce130a
 
76d3fa1
 
 
 
 
 
 
 
 
 
 
3ce130a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76d3fa1
3ce130a
76d3fa1
 
 
 
 
 
3ce130a
 
 
 
 
 
 
 
76d3fa1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from typing import Tuple, List

class Agent:
    def __init__(self, name, background, goal, secrets, personality):
        self.name = name
        self.background = background
        self.goal = goal
        self.secrets = secrets
        self.personality = personality

def get_starter_prompt(machine_agent, human_agent, scenario):
    return f"Prompt after formatting:\nImagine you are {machine_agent.name}, your task is to act/speak as {machine_agent.name} would, keeping in mind {machine_agent.name}'s social goal.\nYou can find {machine_agent.name}'s background and goal in the 'Here is the context of the interaction' field.\nNote that {machine_agent.name}'s secret and goal is only visible to you.\nYou should try your best to achieve {machine_agent.name}'s goal in a way that align with their character traits.\nAdditionally, maintaining the conversation's naturalness and realism is essential (e.g., do not repeat what other people has already said before).\n\nHere is the context of this interaction:\n Scenario: {scenario}\nParticipants: {human_agent.name} and {machine_agent.name}\n{human_agent.name}'s background: {human_agent.background} Personality and values description: {human_agent.personality} \n{machine_agent.name}'s background: {machine_agent.background} Personality and values description: {machine_agent.personality} {machine_agent.name}'s secrets: {machine_agent.secrets}\n{human_agent.name}'s goal: Unknown\n{machine_agent.name}'s goal: {machine_agent.name}\nConversation Starts:"

# we define history as  
# [(user_message, bot_message), (user_message, bot_message)]

# we define dialogue history as 
# user_name: user_message\nbot_name: bot_message\nuser_name: user_message\nbot_name: bot_message\n

def dialogue_history_length_check(string, max_token, tokenizer):
    prompt_tokens = len(tokenizer(string)["input_ids"])
    return max(prompt_tokens - max_token, 0)


def truncate_dialogue_history_to_length(dia_his, surpass_num, tokenizer):
    dia_sen = dia_his.split("\n")
    remove_len = 0
    i = 0
    while remove_len < surpass_num:
        remove_len += len(tokenizer(dia_sen[i])["input_ids"])
        i += 1
    trunc_dia = "\n".join(p for p in dia_sen[i:])
    return trunc_dia


def dialogue_history_creation(history, user_name, bot_name):
    dialogue_history = ""
    for idx, turn in enumerate(history):
        user_message, bot_message = turn
        # TODOTODO (haofeiyu): we first assume that human talks first
        user_turn_idx = idx * 2
        bot_turn_idx = idx * 2 + 1
        dialogue_history = f"{dialogue_history}\n\nTurn #{user_turn_idx}: {user_name}: {user_message}\n\nTurn #{bot_turn_idx}: {bot_name}: {bot_message}"
    last_turn_idx = len(history) * 2
    return dialogue_history, last_turn_idx


def dialogue_history_truncation(dialogue_history, max_token_num, tokenizer):
    surpass_num = dialogue_history_length_check(dialogue_history, max_token_num, tokenizer)
    if surpass_num > 0:
        dialogue_history = truncate_dialogue_history_to_length(dialogue_history, surpass_num, tokenizer)
    return dialogue_history


def format_sotopia_prompt(
    message: str,
    history: List[Tuple[str, str]],
    instructions: str,
    user_name: str,
    bot_name: str,
    include_all_chat_history: bool = True,
    index : int = 1
) -> str:
    prompt = instructions.strip()
    dialogue_history, last_turn_idx = dialogue_history_creation(
        history, 
        user_name, 
        bot_name
    )
    prompt = f"{prompt}\n{dialogue_history}"
    prompt = f"{prompt}\n\nTurn #{last_turn_idx+1}: {user_name}: {message}\n.\nYou are at Turn #{last_turn_idx+2}."
    return prompt