jjz5463 commited on
Commit
2649124
1 Parent(s): 4c558f8

initial commit

Browse files
Files changed (5) hide show
  1. README.md +4 -7
  2. app.py +109 -0
  3. chatbot_simulator.py +199 -0
  4. requirements.txt +3 -0
  5. task_specific_data_population.py +150 -0
README.md CHANGED
@@ -1,13 +1,10 @@
1
  ---
2
  title: App Simulator
3
- emoji: 🏃
4
- colorFrom: indigo
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.4.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: simulate any real word app as a text-based chatbot
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: App Simulator
3
+ emoji: 🐢
4
+ colorFrom: red
5
+ colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
 
 
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from chatbot_simulator import ChatbotSimulation
3
+ from task_specific_data_population import DataPopulation
4
+ import os
5
+ openai_api_key = os.getenv("OPENAI_API_KEY")
6
+
7
+
8
+ simulation = None
9
+ conversation = []
10
+ display_conversation = []
11
+
12
+ def initialize_simulator(task, app_name, sitemap):
13
+ """Initialize the simulator."""
14
+ success = False # Track if the operation succeeds
15
+ retry_count = 0 # Track the number of retries
16
+ max_retries = 50 # Set the maximum number of retries
17
+
18
+ while not success and retry_count < max_retries:
19
+ try:
20
+ # Process data (simulating data loading)
21
+ data_population = DataPopulation(api_key=openai_api_key)
22
+ sitemap_data, page_details, user_state = data_population.process_data(task, sitemap)
23
+
24
+ global simulation
25
+ simulation = ChatbotSimulation(
26
+ site_map=sitemap_data,
27
+ page_details=page_details,
28
+ user_state=user_state,
29
+ task=task,
30
+ app_name=app_name,
31
+ log_location=f'conversation_log_{app_name}_human.txt',
32
+ openai_api_key=openai_api_key,
33
+ agent='human'
34
+ )
35
+ text = simulation.start_conversation()
36
+ global conversation
37
+ conversation.append({"role": "assistant", "content": text})
38
+ log_conversation(simulation.log_location)
39
+ display_conversation.append(('Start Simulator', text))
40
+ display_conversation.pop(0)
41
+ return display_conversation
42
+ except Exception as e:
43
+ # Handle the exception and increment retry count
44
+ retry_count += 1
45
+ print(f"Attempt {retry_count}/{max_retries}: An error occurred: {e}. Retrying...")
46
+
47
+
48
+ def log_conversation(log_location):
49
+ """
50
+ Append the conversation to the specified log file location.
51
+ """
52
+ try:
53
+ with open(log_location, 'a') as f: # Use 'a' for append mode
54
+ for message in conversation:
55
+ f.write(f"{message['role']}: {message['content']}\n\n")
56
+ except Exception as e:
57
+ print(f"Error logging conversation: {e}")
58
+
59
+
60
+ def chatbot_interaction(user_input):
61
+ """Handle the conversation."""
62
+ if simulation is None:
63
+ return "Simulation is not initialized. Please start the simulator."
64
+
65
+ try:
66
+ # Perform one round of conversation
67
+ response = simulation.one_conversation_round(user_input)
68
+ global conversation
69
+ conversation.append({"role": "user", "content": user_input})
70
+ conversation.append({"role": "assistant", "content": response})
71
+ log_conversation(simulation.log_location)
72
+ display_conversation.append((user_input, response))
73
+ display_conversation.pop(0)
74
+ return display_conversation
75
+ except Exception as e:
76
+ return f"An error occurred: {e}"
77
+
78
+
79
+ # Gradio Interface
80
+ with gr.Blocks() as demo:
81
+ gr.Markdown("## Simulator Setup")
82
+
83
+ task_input = gr.Textbox(label="Task", placeholder="Describe your task...")
84
+ app_name_input = gr.Textbox(label="App Name", placeholder="Enter the app name...")
85
+ sitemap_input = gr.Textbox(label="Sitemap", placeholder="Enter the Hugging Face link to sitemap...")
86
+
87
+ initialize_button = gr.Button("Initialize Simulator")
88
+ #setup_output = gr.Textbox(label="Setup Status", interactive=False)
89
+
90
+ chatbot = gr.Chatbot(label="Simulator Chat", height=800)
91
+ user_message = gr.Textbox(label="Enter your message", placeholder="Type your message here...")
92
+ submit_button = gr.Button("Send")
93
+
94
+ # Initialize simulator and display the welcome message in chatbot
95
+ initialize_button.click(
96
+ initialize_simulator,
97
+ inputs=[task_input, app_name_input, sitemap_input],
98
+ outputs=chatbot # Show setup message in the chatbot
99
+ )
100
+
101
+ # Handle conversation
102
+ submit_button.click(
103
+ chatbot_interaction,
104
+ inputs=user_message,
105
+ outputs=chatbot
106
+ )
107
+
108
+ # Launch the app
109
+ demo.launch()
chatbot_simulator.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import json_repair
3
+
4
+
5
+ class ChatbotSimulation:
6
+ def __init__(self, site_map, page_details, user_state, task,
7
+ app_name, log_location, openai_api_key, agent='human',
8
+ max_steps=50, max_tokens=8192, buffer_tokens=500):
9
+ self.sitemap = site_map
10
+ self.page_details = page_details
11
+ self.user_state = user_state
12
+ self.user_state['current_page'] = 'Home' # Initialize current page
13
+ self.user_state['task_completed'] = 0
14
+ self.task = task
15
+ self.app_name = app_name
16
+ self.log_location = log_location
17
+ self.agent = agent.lower()
18
+ if self.agent not in ['human', 'llm']:
19
+ raise ValueError("Invalid agent type. Expected 'Human' or 'llm'.")
20
+ self.max_steps = max_steps
21
+ self.max_tokens = max_tokens
22
+ self.buffer_tokens = buffer_tokens
23
+ self.conversation = [] # Stores recent conversation snippets
24
+ self.prompt_count = 0
25
+ self.client = OpenAI(api_key=openai_api_key)
26
+ self.actions = []
27
+
28
+ def _get_page_uid(self, page_name):
29
+ """Retrieve the UID of the given page from the sitemap."""
30
+ return self.sitemap['pages'].get(page_name, {}).get('uid')
31
+
32
+ def _get_page_details(self, page_name):
33
+ """Retrieve the page details using its UID."""
34
+ uid = self._get_page_uid(page_name)
35
+ return self.page_details.get(uid, {})
36
+
37
+ def _generate_system_prompt(self):
38
+ """Create a dynamic system prompt based on the current state."""
39
+ current_page = self.user_state['current_page']
40
+ page_info = self._get_page_details(current_page)
41
+
42
+ return f"""
43
+ You are interacting with the {self.app_name} app. Your task is: {self.task}.
44
+ You are currently on the {current_page} page. Current user state: {self.user_state}.
45
+
46
+ Page Information:
47
+ {page_info}
48
+
49
+ - **Features**: Represent available options the user can select on this page.
50
+ - **User Data**: Represents user-specific data accessible on this page.
51
+
52
+ Provide instructions or request input from the user. If the user provides an invalid action, respond with:
53
+ "Invalid action. Please select a valid option."
54
+
55
+ ### Instruction Format:
56
+ You are at the {current_page} page. You have the following options:
57
+ 1. Feature 1
58
+ 2. Feature 2
59
+ 3. Feature 3
60
+ 4. Feature 4
61
+
62
+ Please enter your choice as the corresponding number.
63
+
64
+ Rules:
65
+ - Be sure to display all options that is available in features.
66
+ - Be robotic and emotionless. Avoid offering any advice to the user.
67
+ - If a feature requires `input_text`, request input as: "Enter query as: [number]: query"
68
+ """
69
+
70
+ def _get_openai_response(self, prompt):
71
+ """Fetch response from OpenAI API."""
72
+ self._trim_conversation()
73
+ response = self.client.chat.completions.create(
74
+ model="gpt-4",
75
+ messages=prompt,
76
+ max_tokens=1500, # Adjusted max_tokens if needed
77
+ temperature=1.0,
78
+ )
79
+ return response.choices[0].message.content
80
+
81
+ def _calculate_token_count(self, conversation):
82
+ """Estimate the token count in the conversation."""
83
+ return sum(len(entry['content'].split()) for entry in conversation)
84
+
85
+ def _trim_conversation(self):
86
+ """Trim the conversation to keep it within the token limit."""
87
+ while self._calculate_token_count(self.conversation) > self.max_tokens - self.buffer_tokens:
88
+ self.conversation.pop(0) #
89
+
90
+ def one_conversation_round(self, user_input):
91
+ """Conduct one round of conversation between the user and the assistant."""
92
+ # User provides input
93
+ self.conversation.append({"role": "user", "content": user_input})
94
+ self.actions.append(user_input)
95
+
96
+ # Update user state using GPT's response
97
+ update_prompt = f"""
98
+ Update the user state based on the input: '{user_input}'.
99
+
100
+ Current user state (JSON format): {self.user_state}
101
+
102
+ Sitemap: {self.sitemap}
103
+
104
+ Instructions:
105
+ 1. If the 'current_page' has changed, update it to a page from the sitemap.
106
+ 2. If the task is finished, update 'task_completed' to 1. Otherwise, leave it unchanged.
107
+ 3. If no updates are needed, return the user state exactly as provided, without modification.
108
+ 4. Preserve the **exact JSON structure** and **format** of the provided user state.
109
+ 5. The output **must be a single JSON dictionary** representing the updated user state—do not wrap it in a list.
110
+ 6. Do not change any other fields unless explicitly required by the instructions.
111
+
112
+ Important:
113
+ - Ensure 'current_page' and 'task_completed' are keys in the returned dictionary.
114
+ - Return **only the JSON object** without additional output or wrapping.
115
+ - **AVOID OUTPUT A LIST**, must be JSON!
116
+
117
+ The format for each page should be:
118
+
119
+ {{
120
+ "page_name": page_name
121
+ "features": {{}},
122
+ "user_data": {{
123
+ "Data Entry 1": "Details of Data Entry 1",
124
+ "Data Entry 2": "Details of Data Entry 2"
125
+ }}
126
+ }}
127
+ """
128
+
129
+ self.conversation.append({"role": "assistant", "content": update_prompt})
130
+ updated_state = self._get_openai_response(self.conversation)
131
+
132
+ # Parse and update the user state
133
+ updated_state = json_repair.loads(updated_state)
134
+
135
+ if isinstance(updated_state, list):
136
+ reformat_prompt = f'''
137
+ Given the {updated_state}, reformat it into a proper JSON
138
+ with only 3 keys: page_name, features, user_data
139
+ Follow the format:
140
+ {{
141
+ "page_name": page_name
142
+ "features": {{}},
143
+ "user_data": {{
144
+ "Data Entry 1": "Details of Data Entry 1",
145
+ "Data Entry 2": "Details of Data Entry 2"
146
+ }}
147
+ }}
148
+ '''
149
+ self.conversation.append({"role": "assistant", "content": reformat_prompt})
150
+ reformat_state = self._get_openai_response(self.conversation)
151
+ updated_state = json_repair.loads(reformat_state)
152
+
153
+ if updated_state['task_completed']:
154
+ return f"Task completed! You took {self.prompt_count} steps."
155
+
156
+ self.user_state = updated_state
157
+
158
+ system_prompt = self._generate_system_prompt()
159
+
160
+ # GPT generates the page instructions
161
+ self.conversation.append({"role": "system", "content": system_prompt})
162
+ gpt_instruction = self._get_openai_response(self.conversation)
163
+ self.conversation.append({"role": "assistant", "content": gpt_instruction})
164
+ return gpt_instruction
165
+
166
+ def _generate_agent_input(self):
167
+ """
168
+ Simulate the agent generating input based on the conversation state.
169
+ """
170
+ agent_prompt = f"""
171
+ Imagine you are an agent navigate through the Uber environment.
172
+ Your overarching task is: {self.task}. You may have done some part of the task, or none at all.
173
+ You will have access to all of your previous actions in the environment, as well as the last message from the assistant giving the current state of the environment.
174
+ The last message from the assistant was: {self.conversation[-1]['content']}
175
+ Respond first with a brief "Plan" which suggests what steps you are going to take to accomplish the task, and what your immediate.
176
+ Then generate an "Action" which is the immediate next step you can take.
177
+ """
178
+
179
+ messages = [{"role": "system", "content": agent_prompt}]
180
+ for action in self.actions:
181
+ messages.append({"role": "user", "content": action})
182
+ messages.append({"role": "assistant", "content": self.conversation[-1]['content']})
183
+
184
+ agent_response = self.client.chat.completions.create(
185
+ model="gpt-4",
186
+ messages=messages,
187
+ temperature=1.0,
188
+ )
189
+ print(f"LLM-Agent: {agent_response.choices[0].message.content}")
190
+ return agent_response.choices[0].message.content
191
+
192
+ def start_conversation(self):
193
+ greeting = f'\n Welcome to {self.app_name} simulator! Your task is: {self.task}. \n'
194
+ system_prompt = self._generate_system_prompt()
195
+ # GPT generates the page instructions
196
+ self.conversation.append({"role": "system", "content": system_prompt})
197
+ gpt_instruction = self._get_openai_response(self.conversation)
198
+ self.conversation.append({"role": "assistant", "content": gpt_instruction})
199
+ return greeting + gpt_instruction
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ datadreamer.dev==0.38.0
2
+ huggingface-hub==0.24.7
3
+ json_repair
task_specific_data_population.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # task = task_generation(sitemap)
2
+ from openai import OpenAI
3
+ from datasets import load_dataset
4
+ import json_repair
5
+
6
+
7
+ class DataPopulation:
8
+ def __init__(self, api_key):
9
+ # Set the API key during initialization
10
+ self.client = OpenAI(api_key=api_key)
11
+ self.conversation = [
12
+ {
13
+ "role": "system",
14
+ "content": (
15
+ "You are an intelligent assistant specialized in web page management tasks. "
16
+ "Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required."
17
+ )
18
+ }
19
+ ]
20
+ self.feature_update_conversation = [
21
+ {
22
+ "role": "system",
23
+ "content": (
24
+ "You are an intelligent assistant specialized in web page management tasks. "
25
+ "Your responsibilities is to identify which type of actions (select vs text_input) does each feature represents."
26
+ )
27
+ }
28
+ ]
29
+
30
+
31
+ def fetch_huggingface_dataset(self, dataset_name):
32
+ """Fetch the dataset from Hugging Face."""
33
+ return load_dataset(dataset_name)
34
+
35
+ def gpt4_chat(self, conversation):
36
+ """Send a chat request to GPT-4."""
37
+ response = self.client.chat.completions.create(
38
+ model="gpt-4",
39
+ messages=conversation,
40
+ max_tokens=1000, # Adjusted max_tokens if needed
41
+ temperature=1.0,
42
+ )
43
+ return response.choices[0].message.content.strip()
44
+
45
+ def ask_for_relevant_pages(self, task, sitemap):
46
+ """Identify relevant pages for the task from the sitemap."""
47
+ self.conversation.append({
48
+ "role": "user",
49
+ "content": (
50
+ f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n"
51
+ "Identify the page(s) relevant to this task. "
52
+ "Return the page names exactly as they appear in the sitemap, in JSON format. "
53
+ "For each relevant page, provide a brief explanation of its relevance. "
54
+ "Example response:\n{{\n 'Ride History': 'Displays previous ride data needed for the task.'\n}}"
55
+ )
56
+ })
57
+ response_content = self.gpt4_chat(self.conversation)
58
+ return response_content
59
+
60
+ def _update_user_data(self, task, relevant_page_details):
61
+ """Populate the relevant user data for the task."""
62
+ self.conversation.append({
63
+ "role": "user",
64
+ "content": (
65
+ f"Given the task: '{task}' and the following page details:\n{relevant_page_details}\n\n"
66
+ "Instructions:\n"
67
+ "1. Ensure each page contains the following keys: 'page_name', 'features', and 'user_data' (even if empty).\n"
68
+ "2. Update 'user_data' with essential information relevant to the task. Do not modify the 'features' section.\n"
69
+ "3. Maintain the exact input structure, making changes only to 'user_data' where required.\n"
70
+ "4. Confirm that both 'features' and 'user_data' are dictionaries.\n"
71
+ "5. Verify that every page includes exactly the three required keys: 'page_name', 'features', and 'user_data'."
72
+ "6. Make sure 'user_data' must exist as a key! "
73
+ )
74
+ })
75
+ response_content = self.gpt4_chat(self.conversation)
76
+ return response_content
77
+
78
+ def ask_to_update_sitemap(self, sitemap, new_page):
79
+ """Update the sitemap with the new page and adjust links."""
80
+ self.conversation.append({
81
+ "role": "user",
82
+ "content": (
83
+ f"Given the current sitemap:\n{sitemap}\nand a new page: '{new_page}' (currently only has a 'uid'), update the sitemap by:\n"
84
+ "- Adding necessary details to '{new_page}', including 'links_to'.\n"
85
+ "- Updating 'links_to' in other relevant pages to include '{new_page}'.\n"
86
+ "Ensure the output retains the exact structure of the input."
87
+ )
88
+ })
89
+ response_sitemap = self.gpt4_chat(self.conversation)
90
+ return json_repair.loads(response_sitemap)
91
+
92
+ def ask_to_update_user_state(self, task, user_state):
93
+ """Update the user state based on the task."""
94
+ self.conversation.append({
95
+ "role": "user",
96
+ "content": (
97
+ f"Given the task: '{task}' and the current user state:\n{user_state}\n\n"
98
+ "Update the user state to reflect any changes necessary for completing the task. "
99
+ "Only modify values that are required; maintain the exact structure of the input."
100
+ )
101
+ })
102
+ response_content = self.gpt4_chat(self.conversation)
103
+ return json_repair.loads(response_content)
104
+
105
+ @staticmethod
106
+ def extract_uid_from_sitemap(sitemap, relevant_pages):
107
+ """Extract UIDs for the relevant pages from the sitemap."""
108
+ uid = []
109
+ for page in relevant_pages:
110
+ try:
111
+ uid.append(sitemap['pages'][page]['uid'])
112
+ except KeyError:
113
+ print(f"Page name '{page}' not found in the sitemap.")
114
+ return uid
115
+
116
+ def process_data(self, task, hugging_face_url):
117
+ """Process the task with the given dataset."""
118
+ dataset = self.fetch_huggingface_dataset(hugging_face_url)
119
+
120
+ # Extract the sitemap, page details, and user state from the dataset
121
+ sitemap = eval(dataset['train'][0]['value'])
122
+ page_details = eval(dataset['train'][1]['value'])
123
+ user_state = eval(dataset['train'][2]['value'])
124
+
125
+ # Step 1: Identify relevant pages
126
+ relevant_pages = self.ask_for_relevant_pages(task, sitemap)
127
+ self.conversation.append({"role": "assistant", "content": relevant_pages})
128
+ relevant_pages = json_repair.loads(relevant_pages)
129
+ target_page_names = relevant_pages.keys()
130
+
131
+ # Step 2: Extract UIDs for the relevant pages
132
+ page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names)
133
+
134
+ # Step 3: Retrieve page details using the UIDs
135
+ relevant_page_details = {
136
+ uid: page_details[uid] for uid in page_uid if uid in page_details
137
+ }
138
+
139
+ # Step 4: Populate user data for the task (only for relevant pages)
140
+ updated_user_data = self._update_user_data(task, relevant_page_details)
141
+ self.conversation.append({"role": "assistant", "content": updated_user_data})
142
+ updated_user_data = json_repair.loads(updated_user_data)
143
+ for uid, page_data in updated_user_data.items():
144
+ page_details[uid]['user_data'] = page_data['user_data']
145
+
146
+ # Step 5: Update user state
147
+ updated_user_state = self.ask_to_update_user_state(task, user_state)
148
+
149
+ # Return the updated structures
150
+ return sitemap, page_details, updated_user_state