yadongxie commited on
Commit
518275a
1 Parent(s): 120d632

fix: make conversation stream

Browse files
Files changed (1) hide show
  1. app.py +20 -19
app.py CHANGED
@@ -8,6 +8,7 @@ import time
8
  from dataclasses import dataclass, field
9
  from threading import Lock
10
  import base64
 
11
 
12
 
13
  @dataclass
@@ -64,6 +65,14 @@ def process_audio(audio: tuple, state: AppState):
64
  return None, state
65
 
66
 
 
 
 
 
 
 
 
 
67
  def generate_response_and_audio(audio_bytes: bytes, state: AppState):
68
  if state.client is None:
69
  raise gr.Error("Please enter a valid API key first.")
@@ -92,6 +101,7 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
92
  full_response = ""
93
  asr_result = ""
94
  audios = []
 
95
 
96
  for chunk in stream:
97
  if not chunk.choices:
@@ -101,16 +111,16 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
101
  asr_results = getattr(chunk.choices[0], "asr_results", [])
102
  if asr_results:
103
  asr_result += "".join(asr_results)
104
- yield full_response, asr_result, None, state
105
  if content:
106
  full_response += content
107
- yield full_response, asr_result, None, state
108
  if audio:
109
  audios.extend(audio)
110
 
111
  final_audio = b"".join([base64.b64decode(a) for a in audios])
112
 
113
- yield full_response, asr_result, final_audio, state
114
 
115
  except Exception as e:
116
  raise gr.Error(f"Error during audio streaming: {e}")
@@ -131,28 +141,19 @@ def response(state: AppState):
131
 
132
  generator = generate_response_and_audio(audio_buffer.getvalue(), state)
133
 
134
- # Process the generator to get the final results
135
- final_text = ""
136
- final_asr = ""
137
- final_audio = None
138
- for text, asr, audio, updated_state in generator:
139
- final_text = text if text else final_text
140
- final_asr = asr if asr else final_asr
141
- final_audio = audio if audio else final_audio
142
  state = updated_state
143
-
144
- # Update the chatbot with the final conversation
145
- state.conversation.append({"role": "user", "content": final_asr})
146
- state.conversation.append({"role": "assistant", "content": final_text})
 
 
147
 
148
  # Reset the audio stream for the next interaction
149
  state.stream = None
150
  state.pause_detected = False
151
 
152
- chatbot_output = state.conversation[-2:] # Get the last two messages
153
-
154
- return chatbot_output, final_audio, state
155
-
156
 
157
  def start_recording_user(state: AppState):
158
  if not state.stopped:
 
8
  from dataclasses import dataclass, field
9
  from threading import Lock
10
  import base64
11
+ import uuid
12
 
13
 
14
  @dataclass
 
65
  return None, state
66
 
67
 
68
+ def update_or_append_conversation(conversation, id, role, new_content):
69
+ for entry in conversation:
70
+ if entry["id"] == id and entry["role"] == role:
71
+ entry["content"] = new_content
72
+ return
73
+ conversation.append({"id": id, "role": role, "content": new_content})
74
+
75
+
76
  def generate_response_and_audio(audio_bytes: bytes, state: AppState):
77
  if state.client is None:
78
  raise gr.Error("Please enter a valid API key first.")
 
101
  full_response = ""
102
  asr_result = ""
103
  audios = []
104
+ id = uuid.uuid4()
105
 
106
  for chunk in stream:
107
  if not chunk.choices:
 
111
  asr_results = getattr(chunk.choices[0], "asr_results", [])
112
  if asr_results:
113
  asr_result += "".join(asr_results)
114
+ yield id, full_response, asr_result, None, state
115
  if content:
116
  full_response += content
117
+ yield id, full_response, asr_result, None, state
118
  if audio:
119
  audios.extend(audio)
120
 
121
  final_audio = b"".join([base64.b64decode(a) for a in audios])
122
 
123
+ yield id, full_response, asr_result, final_audio, state
124
 
125
  except Exception as e:
126
  raise gr.Error(f"Error during audio streaming: {e}")
 
141
 
142
  generator = generate_response_and_audio(audio_buffer.getvalue(), state)
143
 
144
+ for id, text, asr, audio, updated_state in generator:
 
 
 
 
 
 
 
145
  state = updated_state
146
+ if asr:
147
+ update_or_append_conversation(state.conversation, id, "user", asr)
148
+ if text:
149
+ update_or_append_conversation(state.conversation, id, "assistant", text)
150
+ chatbot_output = state.conversation
151
+ yield chatbot_output, audio, state
152
 
153
  # Reset the audio stream for the next interaction
154
  state.stream = None
155
  state.pause_detected = False
156
 
 
 
 
 
157
 
158
  def start_recording_user(state: AppState):
159
  if not state.stopped: