Spaces:
Sleeping
Sleeping
initial commit
Browse files- README.md +4 -7
- app.py +109 -0
- chatbot_simulator.py +199 -0
- requirements.txt +3 -0
- task_specific_data_population.py +150 -0
README.md
CHANGED
@@ -1,13 +1,10 @@
|
|
1 |
---
|
2 |
title: App Simulator
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
short_description: simulate any real word app as a text-based chatbot
|
11 |
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: App Simulator
|
3 |
+
emoji: 🐢
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.44.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from chatbot_simulator import ChatbotSimulation
|
3 |
+
from task_specific_data_population import DataPopulation
|
4 |
+
import os
|
5 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
6 |
+
|
7 |
+
|
8 |
+
simulation = None
|
9 |
+
conversation = []
|
10 |
+
display_conversation = []
|
11 |
+
|
12 |
+
def initialize_simulator(task, app_name, sitemap):
|
13 |
+
"""Initialize the simulator."""
|
14 |
+
success = False # Track if the operation succeeds
|
15 |
+
retry_count = 0 # Track the number of retries
|
16 |
+
max_retries = 50 # Set the maximum number of retries
|
17 |
+
|
18 |
+
while not success and retry_count < max_retries:
|
19 |
+
try:
|
20 |
+
# Process data (simulating data loading)
|
21 |
+
data_population = DataPopulation(api_key=openai_api_key)
|
22 |
+
sitemap_data, page_details, user_state = data_population.process_data(task, sitemap)
|
23 |
+
|
24 |
+
global simulation
|
25 |
+
simulation = ChatbotSimulation(
|
26 |
+
site_map=sitemap_data,
|
27 |
+
page_details=page_details,
|
28 |
+
user_state=user_state,
|
29 |
+
task=task,
|
30 |
+
app_name=app_name,
|
31 |
+
log_location=f'conversation_log_{app_name}_human.txt',
|
32 |
+
openai_api_key=openai_api_key,
|
33 |
+
agent='human'
|
34 |
+
)
|
35 |
+
text = simulation.start_conversation()
|
36 |
+
global conversation
|
37 |
+
conversation.append({"role": "assistant", "content": text})
|
38 |
+
log_conversation(simulation.log_location)
|
39 |
+
display_conversation.append(('Start Simulator', text))
|
40 |
+
display_conversation.pop(0)
|
41 |
+
return display_conversation
|
42 |
+
except Exception as e:
|
43 |
+
# Handle the exception and increment retry count
|
44 |
+
retry_count += 1
|
45 |
+
print(f"Attempt {retry_count}/{max_retries}: An error occurred: {e}. Retrying...")
|
46 |
+
|
47 |
+
|
48 |
+
def log_conversation(log_location):
|
49 |
+
"""
|
50 |
+
Append the conversation to the specified log file location.
|
51 |
+
"""
|
52 |
+
try:
|
53 |
+
with open(log_location, 'a') as f: # Use 'a' for append mode
|
54 |
+
for message in conversation:
|
55 |
+
f.write(f"{message['role']}: {message['content']}\n\n")
|
56 |
+
except Exception as e:
|
57 |
+
print(f"Error logging conversation: {e}")
|
58 |
+
|
59 |
+
|
60 |
+
def chatbot_interaction(user_input):
|
61 |
+
"""Handle the conversation."""
|
62 |
+
if simulation is None:
|
63 |
+
return "Simulation is not initialized. Please start the simulator."
|
64 |
+
|
65 |
+
try:
|
66 |
+
# Perform one round of conversation
|
67 |
+
response = simulation.one_conversation_round(user_input)
|
68 |
+
global conversation
|
69 |
+
conversation.append({"role": "user", "content": user_input})
|
70 |
+
conversation.append({"role": "assistant", "content": response})
|
71 |
+
log_conversation(simulation.log_location)
|
72 |
+
display_conversation.append((user_input, response))
|
73 |
+
display_conversation.pop(0)
|
74 |
+
return display_conversation
|
75 |
+
except Exception as e:
|
76 |
+
return f"An error occurred: {e}"
|
77 |
+
|
78 |
+
|
79 |
+
# Gradio Interface
|
80 |
+
with gr.Blocks() as demo:
|
81 |
+
gr.Markdown("## Simulator Setup")
|
82 |
+
|
83 |
+
task_input = gr.Textbox(label="Task", placeholder="Describe your task...")
|
84 |
+
app_name_input = gr.Textbox(label="App Name", placeholder="Enter the app name...")
|
85 |
+
sitemap_input = gr.Textbox(label="Sitemap", placeholder="Enter the Hugging Face link to sitemap...")
|
86 |
+
|
87 |
+
initialize_button = gr.Button("Initialize Simulator")
|
88 |
+
#setup_output = gr.Textbox(label="Setup Status", interactive=False)
|
89 |
+
|
90 |
+
chatbot = gr.Chatbot(label="Simulator Chat", height=800)
|
91 |
+
user_message = gr.Textbox(label="Enter your message", placeholder="Type your message here...")
|
92 |
+
submit_button = gr.Button("Send")
|
93 |
+
|
94 |
+
# Initialize simulator and display the welcome message in chatbot
|
95 |
+
initialize_button.click(
|
96 |
+
initialize_simulator,
|
97 |
+
inputs=[task_input, app_name_input, sitemap_input],
|
98 |
+
outputs=chatbot # Show setup message in the chatbot
|
99 |
+
)
|
100 |
+
|
101 |
+
# Handle conversation
|
102 |
+
submit_button.click(
|
103 |
+
chatbot_interaction,
|
104 |
+
inputs=user_message,
|
105 |
+
outputs=chatbot
|
106 |
+
)
|
107 |
+
|
108 |
+
# Launch the app
|
109 |
+
demo.launch()
|
chatbot_simulator.py
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
import json_repair
|
3 |
+
|
4 |
+
|
5 |
+
class ChatbotSimulation:
|
6 |
+
def __init__(self, site_map, page_details, user_state, task,
|
7 |
+
app_name, log_location, openai_api_key, agent='human',
|
8 |
+
max_steps=50, max_tokens=8192, buffer_tokens=500):
|
9 |
+
self.sitemap = site_map
|
10 |
+
self.page_details = page_details
|
11 |
+
self.user_state = user_state
|
12 |
+
self.user_state['current_page'] = 'Home' # Initialize current page
|
13 |
+
self.user_state['task_completed'] = 0
|
14 |
+
self.task = task
|
15 |
+
self.app_name = app_name
|
16 |
+
self.log_location = log_location
|
17 |
+
self.agent = agent.lower()
|
18 |
+
if self.agent not in ['human', 'llm']:
|
19 |
+
raise ValueError("Invalid agent type. Expected 'Human' or 'llm'.")
|
20 |
+
self.max_steps = max_steps
|
21 |
+
self.max_tokens = max_tokens
|
22 |
+
self.buffer_tokens = buffer_tokens
|
23 |
+
self.conversation = [] # Stores recent conversation snippets
|
24 |
+
self.prompt_count = 0
|
25 |
+
self.client = OpenAI(api_key=openai_api_key)
|
26 |
+
self.actions = []
|
27 |
+
|
28 |
+
def _get_page_uid(self, page_name):
|
29 |
+
"""Retrieve the UID of the given page from the sitemap."""
|
30 |
+
return self.sitemap['pages'].get(page_name, {}).get('uid')
|
31 |
+
|
32 |
+
def _get_page_details(self, page_name):
|
33 |
+
"""Retrieve the page details using its UID."""
|
34 |
+
uid = self._get_page_uid(page_name)
|
35 |
+
return self.page_details.get(uid, {})
|
36 |
+
|
37 |
+
def _generate_system_prompt(self):
|
38 |
+
"""Create a dynamic system prompt based on the current state."""
|
39 |
+
current_page = self.user_state['current_page']
|
40 |
+
page_info = self._get_page_details(current_page)
|
41 |
+
|
42 |
+
return f"""
|
43 |
+
You are interacting with the {self.app_name} app. Your task is: {self.task}.
|
44 |
+
You are currently on the {current_page} page. Current user state: {self.user_state}.
|
45 |
+
|
46 |
+
Page Information:
|
47 |
+
{page_info}
|
48 |
+
|
49 |
+
- **Features**: Represent available options the user can select on this page.
|
50 |
+
- **User Data**: Represents user-specific data accessible on this page.
|
51 |
+
|
52 |
+
Provide instructions or request input from the user. If the user provides an invalid action, respond with:
|
53 |
+
"Invalid action. Please select a valid option."
|
54 |
+
|
55 |
+
### Instruction Format:
|
56 |
+
You are at the {current_page} page. You have the following options:
|
57 |
+
1. Feature 1
|
58 |
+
2. Feature 2
|
59 |
+
3. Feature 3
|
60 |
+
4. Feature 4
|
61 |
+
|
62 |
+
Please enter your choice as the corresponding number.
|
63 |
+
|
64 |
+
Rules:
|
65 |
+
- Be sure to display all options that is available in features.
|
66 |
+
- Be robotic and emotionless. Avoid offering any advice to the user.
|
67 |
+
- If a feature requires `input_text`, request input as: "Enter query as: [number]: query"
|
68 |
+
"""
|
69 |
+
|
70 |
+
def _get_openai_response(self, prompt):
|
71 |
+
"""Fetch response from OpenAI API."""
|
72 |
+
self._trim_conversation()
|
73 |
+
response = self.client.chat.completions.create(
|
74 |
+
model="gpt-4",
|
75 |
+
messages=prompt,
|
76 |
+
max_tokens=1500, # Adjusted max_tokens if needed
|
77 |
+
temperature=1.0,
|
78 |
+
)
|
79 |
+
return response.choices[0].message.content
|
80 |
+
|
81 |
+
def _calculate_token_count(self, conversation):
|
82 |
+
"""Estimate the token count in the conversation."""
|
83 |
+
return sum(len(entry['content'].split()) for entry in conversation)
|
84 |
+
|
85 |
+
def _trim_conversation(self):
|
86 |
+
"""Trim the conversation to keep it within the token limit."""
|
87 |
+
while self._calculate_token_count(self.conversation) > self.max_tokens - self.buffer_tokens:
|
88 |
+
self.conversation.pop(0) #
|
89 |
+
|
90 |
+
def one_conversation_round(self, user_input):
|
91 |
+
"""Conduct one round of conversation between the user and the assistant."""
|
92 |
+
# User provides input
|
93 |
+
self.conversation.append({"role": "user", "content": user_input})
|
94 |
+
self.actions.append(user_input)
|
95 |
+
|
96 |
+
# Update user state using GPT's response
|
97 |
+
update_prompt = f"""
|
98 |
+
Update the user state based on the input: '{user_input}'.
|
99 |
+
|
100 |
+
Current user state (JSON format): {self.user_state}
|
101 |
+
|
102 |
+
Sitemap: {self.sitemap}
|
103 |
+
|
104 |
+
Instructions:
|
105 |
+
1. If the 'current_page' has changed, update it to a page from the sitemap.
|
106 |
+
2. If the task is finished, update 'task_completed' to 1. Otherwise, leave it unchanged.
|
107 |
+
3. If no updates are needed, return the user state exactly as provided, without modification.
|
108 |
+
4. Preserve the **exact JSON structure** and **format** of the provided user state.
|
109 |
+
5. The output **must be a single JSON dictionary** representing the updated user state—do not wrap it in a list.
|
110 |
+
6. Do not change any other fields unless explicitly required by the instructions.
|
111 |
+
|
112 |
+
Important:
|
113 |
+
- Ensure 'current_page' and 'task_completed' are keys in the returned dictionary.
|
114 |
+
- Return **only the JSON object** without additional output or wrapping.
|
115 |
+
- **AVOID OUTPUT A LIST**, must be JSON!
|
116 |
+
|
117 |
+
The format for each page should be:
|
118 |
+
|
119 |
+
{{
|
120 |
+
"page_name": page_name
|
121 |
+
"features": {{}},
|
122 |
+
"user_data": {{
|
123 |
+
"Data Entry 1": "Details of Data Entry 1",
|
124 |
+
"Data Entry 2": "Details of Data Entry 2"
|
125 |
+
}}
|
126 |
+
}}
|
127 |
+
"""
|
128 |
+
|
129 |
+
self.conversation.append({"role": "assistant", "content": update_prompt})
|
130 |
+
updated_state = self._get_openai_response(self.conversation)
|
131 |
+
|
132 |
+
# Parse and update the user state
|
133 |
+
updated_state = json_repair.loads(updated_state)
|
134 |
+
|
135 |
+
if isinstance(updated_state, list):
|
136 |
+
reformat_prompt = f'''
|
137 |
+
Given the {updated_state}, reformat it into a proper JSON
|
138 |
+
with only 3 keys: page_name, features, user_data
|
139 |
+
Follow the format:
|
140 |
+
{{
|
141 |
+
"page_name": page_name
|
142 |
+
"features": {{}},
|
143 |
+
"user_data": {{
|
144 |
+
"Data Entry 1": "Details of Data Entry 1",
|
145 |
+
"Data Entry 2": "Details of Data Entry 2"
|
146 |
+
}}
|
147 |
+
}}
|
148 |
+
'''
|
149 |
+
self.conversation.append({"role": "assistant", "content": reformat_prompt})
|
150 |
+
reformat_state = self._get_openai_response(self.conversation)
|
151 |
+
updated_state = json_repair.loads(reformat_state)
|
152 |
+
|
153 |
+
if updated_state['task_completed']:
|
154 |
+
return f"Task completed! You took {self.prompt_count} steps."
|
155 |
+
|
156 |
+
self.user_state = updated_state
|
157 |
+
|
158 |
+
system_prompt = self._generate_system_prompt()
|
159 |
+
|
160 |
+
# GPT generates the page instructions
|
161 |
+
self.conversation.append({"role": "system", "content": system_prompt})
|
162 |
+
gpt_instruction = self._get_openai_response(self.conversation)
|
163 |
+
self.conversation.append({"role": "assistant", "content": gpt_instruction})
|
164 |
+
return gpt_instruction
|
165 |
+
|
166 |
+
def _generate_agent_input(self):
|
167 |
+
"""
|
168 |
+
Simulate the agent generating input based on the conversation state.
|
169 |
+
"""
|
170 |
+
agent_prompt = f"""
|
171 |
+
Imagine you are an agent navigate through the Uber environment.
|
172 |
+
Your overarching task is: {self.task}. You may have done some part of the task, or none at all.
|
173 |
+
You will have access to all of your previous actions in the environment, as well as the last message from the assistant giving the current state of the environment.
|
174 |
+
The last message from the assistant was: {self.conversation[-1]['content']}
|
175 |
+
Respond first with a brief "Plan" which suggests what steps you are going to take to accomplish the task, and what your immediate.
|
176 |
+
Then generate an "Action" which is the immediate next step you can take.
|
177 |
+
"""
|
178 |
+
|
179 |
+
messages = [{"role": "system", "content": agent_prompt}]
|
180 |
+
for action in self.actions:
|
181 |
+
messages.append({"role": "user", "content": action})
|
182 |
+
messages.append({"role": "assistant", "content": self.conversation[-1]['content']})
|
183 |
+
|
184 |
+
agent_response = self.client.chat.completions.create(
|
185 |
+
model="gpt-4",
|
186 |
+
messages=messages,
|
187 |
+
temperature=1.0,
|
188 |
+
)
|
189 |
+
print(f"LLM-Agent: {agent_response.choices[0].message.content}")
|
190 |
+
return agent_response.choices[0].message.content
|
191 |
+
|
192 |
+
def start_conversation(self):
|
193 |
+
greeting = f'\n Welcome to {self.app_name} simulator! Your task is: {self.task}. \n'
|
194 |
+
system_prompt = self._generate_system_prompt()
|
195 |
+
# GPT generates the page instructions
|
196 |
+
self.conversation.append({"role": "system", "content": system_prompt})
|
197 |
+
gpt_instruction = self._get_openai_response(self.conversation)
|
198 |
+
self.conversation.append({"role": "assistant", "content": gpt_instruction})
|
199 |
+
return greeting + gpt_instruction
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
datadreamer.dev==0.38.0
|
2 |
+
huggingface-hub==0.24.7
|
3 |
+
json_repair
|
task_specific_data_population.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# task = task_generation(sitemap)
|
2 |
+
from openai import OpenAI
|
3 |
+
from datasets import load_dataset
|
4 |
+
import json_repair
|
5 |
+
|
6 |
+
|
7 |
+
class DataPopulation:
|
8 |
+
def __init__(self, api_key):
|
9 |
+
# Set the API key during initialization
|
10 |
+
self.client = OpenAI(api_key=api_key)
|
11 |
+
self.conversation = [
|
12 |
+
{
|
13 |
+
"role": "system",
|
14 |
+
"content": (
|
15 |
+
"You are an intelligent assistant specialized in web page management tasks. "
|
16 |
+
"Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required."
|
17 |
+
)
|
18 |
+
}
|
19 |
+
]
|
20 |
+
self.feature_update_conversation = [
|
21 |
+
{
|
22 |
+
"role": "system",
|
23 |
+
"content": (
|
24 |
+
"You are an intelligent assistant specialized in web page management tasks. "
|
25 |
+
"Your responsibilities is to identify which type of actions (select vs text_input) does each feature represents."
|
26 |
+
)
|
27 |
+
}
|
28 |
+
]
|
29 |
+
|
30 |
+
|
31 |
+
def fetch_huggingface_dataset(self, dataset_name):
|
32 |
+
"""Fetch the dataset from Hugging Face."""
|
33 |
+
return load_dataset(dataset_name)
|
34 |
+
|
35 |
+
def gpt4_chat(self, conversation):
|
36 |
+
"""Send a chat request to GPT-4."""
|
37 |
+
response = self.client.chat.completions.create(
|
38 |
+
model="gpt-4",
|
39 |
+
messages=conversation,
|
40 |
+
max_tokens=1000, # Adjusted max_tokens if needed
|
41 |
+
temperature=1.0,
|
42 |
+
)
|
43 |
+
return response.choices[0].message.content.strip()
|
44 |
+
|
45 |
+
def ask_for_relevant_pages(self, task, sitemap):
|
46 |
+
"""Identify relevant pages for the task from the sitemap."""
|
47 |
+
self.conversation.append({
|
48 |
+
"role": "user",
|
49 |
+
"content": (
|
50 |
+
f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n"
|
51 |
+
"Identify the page(s) relevant to this task. "
|
52 |
+
"Return the page names exactly as they appear in the sitemap, in JSON format. "
|
53 |
+
"For each relevant page, provide a brief explanation of its relevance. "
|
54 |
+
"Example response:\n{{\n 'Ride History': 'Displays previous ride data needed for the task.'\n}}"
|
55 |
+
)
|
56 |
+
})
|
57 |
+
response_content = self.gpt4_chat(self.conversation)
|
58 |
+
return response_content
|
59 |
+
|
60 |
+
def _update_user_data(self, task, relevant_page_details):
|
61 |
+
"""Populate the relevant user data for the task."""
|
62 |
+
self.conversation.append({
|
63 |
+
"role": "user",
|
64 |
+
"content": (
|
65 |
+
f"Given the task: '{task}' and the following page details:\n{relevant_page_details}\n\n"
|
66 |
+
"Instructions:\n"
|
67 |
+
"1. Ensure each page contains the following keys: 'page_name', 'features', and 'user_data' (even if empty).\n"
|
68 |
+
"2. Update 'user_data' with essential information relevant to the task. Do not modify the 'features' section.\n"
|
69 |
+
"3. Maintain the exact input structure, making changes only to 'user_data' where required.\n"
|
70 |
+
"4. Confirm that both 'features' and 'user_data' are dictionaries.\n"
|
71 |
+
"5. Verify that every page includes exactly the three required keys: 'page_name', 'features', and 'user_data'."
|
72 |
+
"6. Make sure 'user_data' must exist as a key! "
|
73 |
+
)
|
74 |
+
})
|
75 |
+
response_content = self.gpt4_chat(self.conversation)
|
76 |
+
return response_content
|
77 |
+
|
78 |
+
def ask_to_update_sitemap(self, sitemap, new_page):
|
79 |
+
"""Update the sitemap with the new page and adjust links."""
|
80 |
+
self.conversation.append({
|
81 |
+
"role": "user",
|
82 |
+
"content": (
|
83 |
+
f"Given the current sitemap:\n{sitemap}\nand a new page: '{new_page}' (currently only has a 'uid'), update the sitemap by:\n"
|
84 |
+
"- Adding necessary details to '{new_page}', including 'links_to'.\n"
|
85 |
+
"- Updating 'links_to' in other relevant pages to include '{new_page}'.\n"
|
86 |
+
"Ensure the output retains the exact structure of the input."
|
87 |
+
)
|
88 |
+
})
|
89 |
+
response_sitemap = self.gpt4_chat(self.conversation)
|
90 |
+
return json_repair.loads(response_sitemap)
|
91 |
+
|
92 |
+
def ask_to_update_user_state(self, task, user_state):
|
93 |
+
"""Update the user state based on the task."""
|
94 |
+
self.conversation.append({
|
95 |
+
"role": "user",
|
96 |
+
"content": (
|
97 |
+
f"Given the task: '{task}' and the current user state:\n{user_state}\n\n"
|
98 |
+
"Update the user state to reflect any changes necessary for completing the task. "
|
99 |
+
"Only modify values that are required; maintain the exact structure of the input."
|
100 |
+
)
|
101 |
+
})
|
102 |
+
response_content = self.gpt4_chat(self.conversation)
|
103 |
+
return json_repair.loads(response_content)
|
104 |
+
|
105 |
+
@staticmethod
|
106 |
+
def extract_uid_from_sitemap(sitemap, relevant_pages):
|
107 |
+
"""Extract UIDs for the relevant pages from the sitemap."""
|
108 |
+
uid = []
|
109 |
+
for page in relevant_pages:
|
110 |
+
try:
|
111 |
+
uid.append(sitemap['pages'][page]['uid'])
|
112 |
+
except KeyError:
|
113 |
+
print(f"Page name '{page}' not found in the sitemap.")
|
114 |
+
return uid
|
115 |
+
|
116 |
+
def process_data(self, task, hugging_face_url):
|
117 |
+
"""Process the task with the given dataset."""
|
118 |
+
dataset = self.fetch_huggingface_dataset(hugging_face_url)
|
119 |
+
|
120 |
+
# Extract the sitemap, page details, and user state from the dataset
|
121 |
+
sitemap = eval(dataset['train'][0]['value'])
|
122 |
+
page_details = eval(dataset['train'][1]['value'])
|
123 |
+
user_state = eval(dataset['train'][2]['value'])
|
124 |
+
|
125 |
+
# Step 1: Identify relevant pages
|
126 |
+
relevant_pages = self.ask_for_relevant_pages(task, sitemap)
|
127 |
+
self.conversation.append({"role": "assistant", "content": relevant_pages})
|
128 |
+
relevant_pages = json_repair.loads(relevant_pages)
|
129 |
+
target_page_names = relevant_pages.keys()
|
130 |
+
|
131 |
+
# Step 2: Extract UIDs for the relevant pages
|
132 |
+
page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names)
|
133 |
+
|
134 |
+
# Step 3: Retrieve page details using the UIDs
|
135 |
+
relevant_page_details = {
|
136 |
+
uid: page_details[uid] for uid in page_uid if uid in page_details
|
137 |
+
}
|
138 |
+
|
139 |
+
# Step 4: Populate user data for the task (only for relevant pages)
|
140 |
+
updated_user_data = self._update_user_data(task, relevant_page_details)
|
141 |
+
self.conversation.append({"role": "assistant", "content": updated_user_data})
|
142 |
+
updated_user_data = json_repair.loads(updated_user_data)
|
143 |
+
for uid, page_data in updated_user_data.items():
|
144 |
+
page_details[uid]['user_data'] = page_data['user_data']
|
145 |
+
|
146 |
+
# Step 5: Update user state
|
147 |
+
updated_user_state = self.ask_to_update_user_state(task, user_state)
|
148 |
+
|
149 |
+
# Return the updated structures
|
150 |
+
return sitemap, page_details, updated_user_state
|