Spaces:

cmu-lti
/

sotopia-space

Runtime error

App Files Files Community

Wonderplex commited on Apr 18, 2024

Commit

c423c55

1 Parent(s): 0c22348

adopated sotopia logics (#44)

Browse files

Files changed (4) hide show

.gitignore +3 -1
app.py +22 -76
sotopia_pi_generate.py +248 -0
utils.py +20 -6

.gitignore CHANGED Viewed

@@ -1,2 +1,4 @@
 __pycache__/
-.cache/

 __pycache__/
+.cache/
+openai_api.key
+core

app.py CHANGED Viewed

@@ -1,25 +1,19 @@
 import os
 from collections import defaultdict
-from dataclasses import dataclass
-from uuid import uuid4
 import json
 import gradio as gr
-import torch
-import transformers
-from peft import PeftConfig, PeftModel, get_peft_model
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    BitsAndBytesConfig,
-)
-from utils import Environment, Agent, format_sotopia_prompt, get_starter_prompt, format_bot_message
 from functools import cache
 DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
 DEFAULT_MODEL_SELECTION = "cmu-lti/sotopia-pi-mistral-7b-BC_SR" # "mistralai/Mistral-7B-Instruct-v0.1"
-TEMPERATURE = 0.0
 TOP_P = 1
 MAX_TOKENS = 1024
@@ -27,6 +21,7 @@ ENVIRONMENT_PROFILES = "profiles/environment_profiles.jsonl"
 AGENT_PROFILES = "profiles/agent_profiles.jsonl"
 RELATIONSHIP_PROFILES = "profiles/relationship_profiles.jsonl"
 @cache
 def get_sotopia_profiles(env_file=ENVIRONMENT_PROFILES, agent_file=AGENT_PROFILES, relationship_file=RELATIONSHIP_PROFILES):
@@ -68,35 +63,6 @@ def get_sotopia_profiles(env_file=ENVIRONMENT_PROFILES, agent_file=AGENT_PROFILE
     return environments, environment_dict, agent_dict, relationship_dict
-@cache
-def prepare_model(model_name):
-    compute_type = torch.float16
-    if 'cmu-lti/sotopia-pi-mistral-7b-BC_SR'in model_name:
-        model = AutoModelForCausalLM.from_pretrained(
-        "mistralai/Mistral-7B-Instruct-v0.1",
-        cache_dir="./.cache",
-        device_map='cuda',
-        quantization_config=BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=compute_type,
-            )
-        )
-        tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
-        model = PeftModel.from_pretrained(model, model_name).to("cuda")
-    elif 'mistralai/Mistral-7B-Instruct-v0.1' in model_name:
-        model = AutoModelForCausalLM.from_pretrained(
-        "mistralai/Mistral-7B-Instruct-v0.1",
-        cache_dir="./.cache",
-        device_map='cuda',
-        )
-        tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
-    else:
-         raise RuntimeError(f"Model {model_name} not supported")
-    return model, tokenizer
 def introduction():
     with gr.Column(scale=2):
@@ -162,7 +128,7 @@ def sotopia_info_accordion(accordion_visible=True):
     with gr.Accordion("Sotopia Information", open=accordion_visible):
         with gr.Column():
             model_name_dropdown = gr.Dropdown(
-                choices=["cmu-lti/sotopia-pi-mistral-7b-BC_SR", "mistralai/Mistral-7B-Instruct-v0.1", "GPT3.5"],
                 value="cmu-lti/sotopia-pi-mistral-7b-BC_SR",
                 interactive=True,
                 label="Model Selection"
@@ -213,50 +179,30 @@ def instructions_accordion(instructions, according_visible=False):
 def chat_tab():
     # history are input output pairs
     def run_chat(
         message,
         history,
-        instructions,
         user_agent_dropdown,
         bot_agent_dropdown,
         model_selection:str
     ):
-        user_name, bot_name = user_agent_dropdown.value.name, bot_agent_dropdown.value.name
-        model, tokenizer = prepare_model(model_selection)
-        prompt = format_sotopia_prompt(
-            message, history, instructions, user_name, bot_name
-        )
-        input_tokens = tokenizer(
-            prompt, return_tensors="pt", padding="do_not_pad"
-        ).input_ids.to("cuda")
-        input_length = input_tokens.shape[-1]
-        output_tokens = model.generate(
-            input_tokens,
-            temperature=TEMPERATURE,
-            top_p=TOP_P,
-            max_length=MAX_TOKENS,
-            pad_token_id=tokenizer.eos_token_id,
-            num_return_sequences=1,
-        )
-        output_tokens = output_tokens[:, input_length:]
-        text_output = tokenizer.decode(
-            output_tokens[0], skip_special_tokens=True
-        )
-        output = ""
-        for _ in range(5):
-            try:
-                output = format_bot_message(text_output)
-                break
-            except Exception as e:
-                print(e)
-                print("Retrying...")
-        return output
-    _, environment_dict, agent_dict, _ = get_sotopia_profiles()
     with gr.Column():
         with gr.Row():
             model_name_dropdown, scenario_dropdown, user_agent_dropdown, bot_agent_dropdown = sotopia_info_accordion()
-            starter_prompt = gr.Textbox(value=get_starter_prompt(agent_dict[user_agent_dropdown.value], agent_dict[bot_agent_dropdown.value], environment_dict[scenario_dropdown.value]), label="Modify the prompt as needed:", visible=False)
         with gr.Column():
             with gr.Blocks():
@@ -279,7 +225,7 @@ def chat_tab():
                         rtl=False,
                     ),
                     additional_inputs=[
-                        starter_prompt,
                         user_agent_dropdown,
                         bot_agent_dropdown,
                         model_name_dropdown,

 import os
 from collections import defaultdict
 import json
 import gradio as gr
+from utils import Environment, Agent, get_context_prompt, dialogue_history_prompt
 from functools import cache
+from sotopia_pi_generate import prepare_model, generate_action
+with open("openai_api.key", "r") as f:
+    os.environ["OPENAI_API_KEY"] = f.read().strip()
 DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
 DEFAULT_MODEL_SELECTION = "cmu-lti/sotopia-pi-mistral-7b-BC_SR" # "mistralai/Mistral-7B-Instruct-v0.1"
+TEMPERATURE = 0.7
 TOP_P = 1
 MAX_TOKENS = 1024
 AGENT_PROFILES = "profiles/agent_profiles.jsonl"
 RELATIONSHIP_PROFILES = "profiles/relationship_profiles.jsonl"
+ACTION_TYPES = ['none', 'action', 'non-verbal communication', 'speak', 'leave']
 @cache
 def get_sotopia_profiles(env_file=ENVIRONMENT_PROFILES, agent_file=AGENT_PROFILES, relationship_file=RELATIONSHIP_PROFILES):
     return environments, environment_dict, agent_dict, relationship_dict
 def introduction():
     with gr.Column(scale=2):
     with gr.Accordion("Sotopia Information", open=accordion_visible):
         with gr.Column():
             model_name_dropdown = gr.Dropdown(
+                choices=["cmu-lti/sotopia-pi-mistral-7b-BC_SR", "mistralai/Mistral-7B-Instruct-v0.1", "gpt-3.5-turbo"],
                 value="cmu-lti/sotopia-pi-mistral-7b-BC_SR",
                 interactive=True,
                 label="Model Selection"
 def chat_tab():
     # history are input output pairs
+    _, environment_dict, agent_dict, _ = get_sotopia_profiles()
     def run_chat(
         message,
         history,
+        environment_selection,
         user_agent_dropdown,
         bot_agent_dropdown,
         model_selection:str
     ):
+        environment = environment_dict[environment_selection]
+        user_agent = agent_dict[user_agent_dropdown]
+        bot_agent = agent_dict[bot_agent_dropdown]
+        import pdb; pdb.set_trace()
+        context = get_context_prompt(bot_agent, user_agent, environment)
+        dialogue_history, next_turn_idx = dialogue_history_prompt(message, history, user_agent, bot_agent)
+        prompt_history = f"{context}\n\n{dialogue_history}"
+        agent_action = generate_action(model_selection, prompt_history, next_turn_idx, ACTION_TYPES, bot_agent.name, TEMPERATURE)
+        import pdb; pdb.set_trace()
+        return agent_action.to_natural_language()
     with gr.Column():
         with gr.Row():
             model_name_dropdown, scenario_dropdown, user_agent_dropdown, bot_agent_dropdown = sotopia_info_accordion()
         with gr.Column():
             with gr.Blocks():
                         rtl=False,
                     ),
                     additional_inputs=[
+                        scenario_dropdown,
                         user_agent_dropdown,
                         bot_agent_dropdown,
                         model_name_dropdown,

sotopia_pi_generate.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import re
+import torch
+from peft import PeftModel
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+)
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+from langchain_community.chat_models import ChatLiteLLM
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from langchain.chains import LLMChain
+from langchain.output_parsers import PydanticOutputParser
+from langchain.prompts import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+    PromptTemplate,
+)
+from langchain.schema import BaseOutputParser, OutputParserException
+from typing import TypeVar
+from sotopia.messages import ActionType, AgentAction
+from sotopia.utils import format_docstring
+from functools import cache
+import logging
+OutputType = TypeVar("OutputType", bound=object)
+log = logging.getLogger("generate")
+# logging_handler = LoggingCallbackHandler("langchain")
+def generate_action(
+    model_name: str,
+    history: str,
+    turn_number: int,
+    action_types: list[ActionType],
+    agent: str,
+    temperature: float = 0.7,
+) -> tuple[AgentAction, str]:
+    """
+    Using langchain to generate an example episode
+    """
+    try:
+        # Normal case, model as agent
+        template = """
+            Imagine you are {agent}, your task is to act/speak as {agent} would, keeping in mind {agent}'s social goal.
+            You can find {agent}'s goal (or background) in the 'Here is the context of the interaction' field.
+            Note that {agent}'s goal is only visible to you.
+            You should try your best to achieve {agent}'s goal in a way that align with their character traits.
+            Additionally, maintaining the conversation's naturalness and realism is essential (e.g., do not repeat what other people has already said before).
+            {history}.
+            You are at Turn #{turn_number}. Your available action types are
+            {action_list}.
+            Note: You can "leave" this conversation if 1. you have achieved your social goals, 2. this conversation makes you uncomfortable, 3. you find it uninteresting/you lose your patience, 4. or for other reasons you want to leave.
+            Please only generate a JSON string including the action type and the argument.
+            Your action should follow the given format:
+            {format_instructions}
+        """
+        return generate(
+            model_name=model_name,
+            template=template,
+            input_values=dict(
+                agent=agent,
+                turn_number=str(turn_number),
+                history=history,
+                action_list=" ".join(action_types),
+            ),
+            output_parser=PydanticOutputParser(pydantic_object=AgentAction),
+            temperature=temperature,
+        )
+    except Exception:
+        return AgentAction(action_type="none", argument=""), ""
+@cache
+def prepare_model(model_name):
+    compute_type = torch.float16
+    if 'cmu-lti/sotopia-pi-mistral-7b-BC_SR'in model_name:
+        tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", token="REDACTED")
+        model = AutoModelForCausalLM.from_pretrained(
+        "mistralai/Mistral-7B-Instruct-v0.1",
+        cache_dir="./.cache",
+        device_map='cuda',
+        quantization_config=BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=compute_type,
+            ),
+        token="REDACTED"
+        )
+        model = PeftModel.from_pretrained(model, model_name).to("cuda")
+    else:
+         raise RuntimeError(f"Model {model_name} not supported")
+    return model, tokenizer
+def obtain_chain_hf(
+    model_name: str,
+    template: str,
+    input_variables: list[str],
+    temperature: float = 0.7,
+    max_retries: int = 6,
+    max_tokens: int = 2700
+) -> LLMChain:
+    human_message_prompt = HumanMessagePromptTemplate(
+        prompt=PromptTemplate(template=template, input_variables=input_variables)
+    )
+    chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])
+    model, tokenizer = prepare_model(model_name)
+    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=max_tokens, temperature=temperature)
+    hf = HuggingFacePipeline(pipeline=pipe)
+    import pdb; pdb.set_trace()
+    chain = LLMChain(llm=hf, prompt=chat_prompt_template)
+    return chain
+def generate(
+    model_name: str,
+    template: str,
+    input_values: dict[str, str],
+    output_parser: BaseOutputParser[OutputType],
+    temperature: float = 0.7,
+) -> tuple[OutputType, str]:
+    import pdb; pdb.set_trace()
+    input_variables = re.findall(r"{(.*?)}", template)
+    assert (
+        set(input_variables) == set(list(input_values.keys()) + ["format_instructions"])
+        or set(input_variables) == set(list(input_values.keys()))
+    ), f"The variables in the template must match input_values except for format_instructions. Got {sorted(input_values.keys())}, expect {sorted(input_variables)}"
+    # process template
+    template = format_docstring(template)
+    chain = obtain_chain(model_name, template, input_variables, temperature)
+    if "format_instructions" not in input_values:
+        input_values["format_instructions"] = output_parser.get_format_instructions()
+    result = chain.predict([], **input_values)
+    import pdb; pdb.set_trace()
+    try:
+        parsed_result = output_parser.parse(result)
+    except KeyboardInterrupt:
+        raise KeyboardInterrupt
+    except Exception as e:
+        log.debug(
+            f"[red] Failed to parse result: {result}\nEncounter Exception {e}\nstart to reparse",
+            extra={"markup": True},
+        )
+        reformat_parsed_result = format_bad_output(
+            result, format_instructions=output_parser.get_format_instructions()
+        )
+        parsed_result = output_parser.parse(reformat_parsed_result)
+    log.info(f"Generated result: {parsed_result}")
+    return parsed_result
+def format_bad_output(
+    ill_formed_output: str,
+    format_instructions: str,
+    model_name: str = "gpt-3.5-turbo",
+) -> str:
+    template = """
+    Given the string that can not be parsed by json parser, reformat it to a string that can be parsed by json parser.
+    Original string: {ill_formed_output}
+    Format instructions: {format_instructions}
+    Please only generate the JSON:
+    """
+    chain = obtain_chain(
+        model_name=model_name,
+        template=template,
+        input_variables=re.findall(r"{(.*?)}", template),
+    )
+    input_values = {
+        "ill_formed_output": ill_formed_output,
+        "format_instructions": format_instructions,
+    }
+    reformat = chain.predict([], **input_values)
+    log.info(f"Reformated output: {reformat}")
+    return reformat
+def obtain_chain(
+    model_name: str,
+    template: str,
+    input_variables: list[str],
+    temperature: float = 0.7,
+    max_retries: int = 6,
+) -> LLMChain:
+    """
+    Using langchain to sample profiles for participants
+    """
+    if model_name in ["cmu-lti/sotopia-pi-mistral-7b-BC_SR"]:
+        return obtain_chain_hf(
+            model_name=model_name,
+            template=template,
+            input_variables=input_variables,
+            temperature=temperature,
+            max_retries=max_retries,
+        )
+    model_name = _return_fixed_model_version(model_name)
+    chat = ChatLiteLLM(
+        model=model_name,
+        temperature=temperature,
+        max_tokens=2700,  # tweak as needed
+        max_retries=max_retries,
+    )
+    human_message_prompt = HumanMessagePromptTemplate(
+        prompt=PromptTemplate(template=template, input_variables=input_variables)
+    )
+    chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])
+    chain = LLMChain(llm=chat, prompt=chat_prompt_template)
+    return chain
+def format_bad_output(
+    ill_formed_output: str,
+    format_instructions: str,
+    model_name: str = "gpt-3.5-turbo",
+) -> str:
+    template = """
+    Given the string that can not be parsed by json parser, reformat it to a string that can be parsed by json parser.
+    Original string: {ill_formed_output}
+    Format instructions: {format_instructions}
+    Please only generate the JSON:
+    """
+    chain = obtain_chain(
+        model_name=model_name,
+        template=template,
+        input_variables=re.findall(r"{(.*?)}", template),
+    )
+    input_values = {
+        "ill_formed_output": ill_formed_output,
+        "format_instructions": format_instructions,
+    }
+    reformat = chain.predict([], **input_values)
+    log.info(f"Reformated output: {reformat}")
+    return reformat
+def _return_fixed_model_version(model_name: str) -> str:
+    return {
+        "gpt-3.5-turbo": "gpt-3.5-turbo-0613",
+        "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8nY2zgdt",
+        "gpt-3.5-turbo-ft-MF": "ft:gpt-3.5-turbo-0613:academicscmu::8nuER4bO",
+        "gpt-4": "gpt-4-0613",
+        "gpt-4-turbo": "gpt-4-1106-preview",
+    }[model_name]

utils.py CHANGED Viewed

@@ -44,7 +44,10 @@ def get_format_guide():
     """
 def get_starter_prompt(machine_agent, human_agent, environment):
-    return f"Prompt after formatting:\nImagine you are {machine_agent.name}, your task is to act/speak as {machine_agent.name} would, keeping in mind {machine_agent.name}'s social goal.\nYou can find {machine_agent.name}'s background and goal in the 'Here is the context of the interaction' field.\nNote that {machine_agent.name}'s secret and goal is only visible to you.\nYou should try your best to achieve {machine_agent.name}'s goal in a way that align with their character traits.\nAdditionally, maintaining the conversation's naturalness and realism is essential (e.g., do not repeat what other people has already said before).\n\nHere is the context of this interaction:\n Scenario: {environment.scenario}\nParticipants: {human_agent.name} and {machine_agent.name}\n{human_agent.name}'s background: {human_agent.background} Personality and values description: {human_agent.personality} \n{machine_agent.name}'s background: {machine_agent.background} Personality and values description: {machine_agent.personality} {machine_agent.name}'s secrets: {machine_agent.secret}\n{human_agent.name}'s goal: Unknown\n{machine_agent.name}'s goal: {environment.agent_goals[1]}\nConversation Starts:"
 # we define history as
@@ -102,6 +105,20 @@ def dialogue_history_creation(history, user_name, bot_name):
     last_turn_idx = len(history) * 2
     return dialogue_history, last_turn_idx
 def dialogue_history_truncation(dialogue_history, max_token_num, tokenizer):
     surpass_num = dialogue_history_length_check(
@@ -114,15 +131,12 @@ def dialogue_history_truncation(dialogue_history, max_token_num, tokenizer):
     return dialogue_history
-def format_sotopia_prompt(
     message: str,
     history: List[Tuple[str, str]],
     instructions: str,
     user_name: str,
     bot_name: str,
-    include_all_chat_history: bool = True,
-    index: int = 1,
-    use_format_guide: bool = True,
 ) -> str:
     prompt = instructions.strip()
     dialogue_history, last_turn_idx = dialogue_history_creation(
@@ -130,4 +144,4 @@ def format_sotopia_prompt(
     )
     prompt = f"{prompt}\n{dialogue_history}"
     prompt = f"{prompt}\n\nTurn #{last_turn_idx+1}: {user_name}: {message}\n.\nYou are at Turn #{last_turn_idx+2}."
-    return prompt + get_format_guide() if use_format_guide else prompt

     """
 def get_starter_prompt(machine_agent, human_agent, environment):
+    return f"Imagine you are {machine_agent.name}, your task is to act/speak as {machine_agent.name} would, keeping in mind {machine_agent.name}'s social goal.\nYou can find {machine_agent.name}'s background and goal in the 'Here is the context of the interaction' field.\nNote that {machine_agent.name}'s secret and goal is only visible to you.\nYou should try your best to achieve {machine_agent.name}'s goal in a way that align with their character traits.\nAdditionally, maintaining the conversation's naturalness and realism is essential (e.g., do not repeat what other people has already said before).\n\nHere is the context of this interaction:\n Scenario: {environment.scenario}\nParticipants: {human_agent.name} and {machine_agent.name}\n{human_agent.name}'s background: {human_agent.background} Personality and values description: {human_agent.personality} \n{machine_agent.name}'s background: {machine_agent.background} Personality and values description: {machine_agent.personality} {machine_agent.name}'s secrets: {machine_agent.secret}\n{human_agent.name}'s goal: Unknown\n{machine_agent.name}'s goal: {environment.agent_goals[1]}\nConversation Starts:"
+def get_context_prompt(machine_agent, human_agent, environment):
+    return f"Here is the context of this interaction:\n Scenario: {environment.scenario}\nParticipants: {human_agent.name} and {machine_agent.name}\n{human_agent.name}'s background: {human_agent.background} Personality and values description: {human_agent.personality} \n{machine_agent.name}'s background: {machine_agent.background} Personality and values description: {machine_agent.personality} {machine_agent.name}'s secrets: {machine_agent.secret}\n{human_agent.name}'s goal: Unknown\n{machine_agent.name}'s goal: {environment.agent_goals[1]}\nConversation Starts:"
 # we define history as
     last_turn_idx = len(history) * 2
     return dialogue_history, last_turn_idx
+def dialogue_history_prompt(message, history, user_agent, bot_agent):
+    dialogue_history = ""
+    for idx, turn in enumerate(history):
+        user_message, bot_message = turn
+        # TODOTODO (haofeiyu): we first assume that human talks first
+        user_turn_idx = idx * 2
+        bot_turn_idx = idx * 2 + 1
+        if not bot_message.startswith("["): # if action type == speak, need to add 'said: ' to be consistent with the dialog prompt
+            bot_message = "said :" + bot_message
+        dialogue_history = f"{dialogue_history}\n\nTurn #{user_turn_idx}: {user_agent.name}: {user_message}\n\nTurn #{bot_turn_idx}: {bot_agent.name}: {bot_message}"
+    last_turn_idx = len(history) * 2
+    dialogue_history = f"{dialogue_history}\n\nTurn #{last_turn_idx+1}: {user_agent.name}: {message}\n."
+    return dialogue_history, last_turn_idx+2
 def dialogue_history_truncation(dialogue_history, max_token_num, tokenizer):
     surpass_num = dialogue_history_length_check(
     return dialogue_history
+def format_hostory_prompt(
     message: str,
     history: List[Tuple[str, str]],
     instructions: str,
     user_name: str,
     bot_name: str,
 ) -> str:
     prompt = instructions.strip()
     dialogue_history, last_turn_idx = dialogue_history_creation(
     )
     prompt = f"{prompt}\n{dialogue_history}"
     prompt = f"{prompt}\n\nTurn #{last_turn_idx+1}: {user_name}: {message}\n.\nYou are at Turn #{last_turn_idx+2}."
+    return prompt