yadongxie commited on
Commit
120d632
1 Parent(s): 1926927

fix: add asr result back

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -2,7 +2,9 @@ import gradio as gr
2
  import numpy as np
3
  import io
4
  from pydub import AudioSegment
 
5
  import openai
 
6
  from dataclasses import dataclass, field
7
  from threading import Lock
8
  import base64
@@ -88,6 +90,7 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
88
  )
89
 
90
  full_response = ""
 
91
  audios = []
92
 
93
  for chunk in stream:
@@ -95,15 +98,19 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
95
  continue
96
  content = chunk.choices[0].delta.content
97
  audio = getattr(chunk.choices[0], "audio", [])
 
 
 
 
98
  if content:
99
  full_response += content
100
- yield full_response, None, state
101
  if audio:
102
  audios.extend(audio)
103
 
104
  final_audio = b"".join([base64.b64decode(a) for a in audios])
105
 
106
- yield full_response, final_audio, state
107
 
108
  except Exception as e:
109
  raise gr.Error(f"Error during audio streaming: {e}")
@@ -126,14 +133,16 @@ def response(state: AppState):
126
 
127
  # Process the generator to get the final results
128
  final_text = ""
 
129
  final_audio = None
130
- for text, audio, updated_state in generator:
131
  final_text = text if text else final_text
 
132
  final_audio = audio if audio else final_audio
133
  state = updated_state
134
 
135
  # Update the chatbot with the final conversation
136
- state.conversation.append({"role": "user", "content": "Audio input"})
137
  state.conversation.append({"role": "assistant", "content": final_text})
138
 
139
  # Reset the audio stream for the next interaction
@@ -218,4 +227,4 @@ with gr.Blocks() as demo:
218
  cancels=[respond, restart],
219
  )
220
 
221
- demo.launch(share=True)
 
2
  import numpy as np
3
  import io
4
  from pydub import AudioSegment
5
+ import tempfile
6
  import openai
7
+ import time
8
  from dataclasses import dataclass, field
9
  from threading import Lock
10
  import base64
 
90
  )
91
 
92
  full_response = ""
93
+ asr_result = ""
94
  audios = []
95
 
96
  for chunk in stream:
 
98
  continue
99
  content = chunk.choices[0].delta.content
100
  audio = getattr(chunk.choices[0], "audio", [])
101
+ asr_results = getattr(chunk.choices[0], "asr_results", [])
102
+ if asr_results:
103
+ asr_result += "".join(asr_results)
104
+ yield full_response, asr_result, None, state
105
  if content:
106
  full_response += content
107
+ yield full_response, asr_result, None, state
108
  if audio:
109
  audios.extend(audio)
110
 
111
  final_audio = b"".join([base64.b64decode(a) for a in audios])
112
 
113
+ yield full_response, asr_result, final_audio, state
114
 
115
  except Exception as e:
116
  raise gr.Error(f"Error during audio streaming: {e}")
 
133
 
134
  # Process the generator to get the final results
135
  final_text = ""
136
+ final_asr = ""
137
  final_audio = None
138
+ for text, asr, audio, updated_state in generator:
139
  final_text = text if text else final_text
140
+ final_asr = asr if asr else final_asr
141
  final_audio = audio if audio else final_audio
142
  state = updated_state
143
 
144
  # Update the chatbot with the final conversation
145
+ state.conversation.append({"role": "user", "content": final_asr})
146
  state.conversation.append({"role": "assistant", "content": final_text})
147
 
148
  # Reset the audio stream for the next interaction
 
227
  cancels=[respond, restart],
228
  )
229
 
230
+ demo.launch()