Spaces:
Runtime error
Runtime error
fix: add asr result back
Browse files
app.py
CHANGED
@@ -2,7 +2,9 @@ import gradio as gr
|
|
2 |
import numpy as np
|
3 |
import io
|
4 |
from pydub import AudioSegment
|
|
|
5 |
import openai
|
|
|
6 |
from dataclasses import dataclass, field
|
7 |
from threading import Lock
|
8 |
import base64
|
@@ -88,6 +90,7 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
|
|
88 |
)
|
89 |
|
90 |
full_response = ""
|
|
|
91 |
audios = []
|
92 |
|
93 |
for chunk in stream:
|
@@ -95,15 +98,19 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
|
|
95 |
continue
|
96 |
content = chunk.choices[0].delta.content
|
97 |
audio = getattr(chunk.choices[0], "audio", [])
|
|
|
|
|
|
|
|
|
98 |
if content:
|
99 |
full_response += content
|
100 |
-
yield full_response, None, state
|
101 |
if audio:
|
102 |
audios.extend(audio)
|
103 |
|
104 |
final_audio = b"".join([base64.b64decode(a) for a in audios])
|
105 |
|
106 |
-
yield full_response, final_audio, state
|
107 |
|
108 |
except Exception as e:
|
109 |
raise gr.Error(f"Error during audio streaming: {e}")
|
@@ -126,14 +133,16 @@ def response(state: AppState):
|
|
126 |
|
127 |
# Process the generator to get the final results
|
128 |
final_text = ""
|
|
|
129 |
final_audio = None
|
130 |
-
for text, audio, updated_state in generator:
|
131 |
final_text = text if text else final_text
|
|
|
132 |
final_audio = audio if audio else final_audio
|
133 |
state = updated_state
|
134 |
|
135 |
# Update the chatbot with the final conversation
|
136 |
-
state.conversation.append({"role": "user", "content":
|
137 |
state.conversation.append({"role": "assistant", "content": final_text})
|
138 |
|
139 |
# Reset the audio stream for the next interaction
|
@@ -218,4 +227,4 @@ with gr.Blocks() as demo:
|
|
218 |
cancels=[respond, restart],
|
219 |
)
|
220 |
|
221 |
-
demo.launch(
|
|
|
2 |
import numpy as np
|
3 |
import io
|
4 |
from pydub import AudioSegment
|
5 |
+
import tempfile
|
6 |
import openai
|
7 |
+
import time
|
8 |
from dataclasses import dataclass, field
|
9 |
from threading import Lock
|
10 |
import base64
|
|
|
90 |
)
|
91 |
|
92 |
full_response = ""
|
93 |
+
asr_result = ""
|
94 |
audios = []
|
95 |
|
96 |
for chunk in stream:
|
|
|
98 |
continue
|
99 |
content = chunk.choices[0].delta.content
|
100 |
audio = getattr(chunk.choices[0], "audio", [])
|
101 |
+
asr_results = getattr(chunk.choices[0], "asr_results", [])
|
102 |
+
if asr_results:
|
103 |
+
asr_result += "".join(asr_results)
|
104 |
+
yield full_response, asr_result, None, state
|
105 |
if content:
|
106 |
full_response += content
|
107 |
+
yield full_response, asr_result, None, state
|
108 |
if audio:
|
109 |
audios.extend(audio)
|
110 |
|
111 |
final_audio = b"".join([base64.b64decode(a) for a in audios])
|
112 |
|
113 |
+
yield full_response, asr_result, final_audio, state
|
114 |
|
115 |
except Exception as e:
|
116 |
raise gr.Error(f"Error during audio streaming: {e}")
|
|
|
133 |
|
134 |
# Process the generator to get the final results
|
135 |
final_text = ""
|
136 |
+
final_asr = ""
|
137 |
final_audio = None
|
138 |
+
for text, asr, audio, updated_state in generator:
|
139 |
final_text = text if text else final_text
|
140 |
+
final_asr = asr if asr else final_asr
|
141 |
final_audio = audio if audio else final_audio
|
142 |
state = updated_state
|
143 |
|
144 |
# Update the chatbot with the final conversation
|
145 |
+
state.conversation.append({"role": "user", "content": final_asr})
|
146 |
state.conversation.append({"role": "assistant", "content": final_text})
|
147 |
|
148 |
# Reset the audio stream for the next interaction
|
|
|
227 |
cancels=[respond, restart],
|
228 |
)
|
229 |
|
230 |
+
demo.launch()
|