Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ import tempfile
|
|
6 |
import os
|
7 |
import base64
|
8 |
import openai
|
|
|
9 |
from dataclasses import dataclass, field
|
10 |
from threading import Lock
|
11 |
|
@@ -21,10 +22,47 @@ def create_client(api_key):
|
|
21 |
api_key=api_key
|
22 |
)
|
23 |
|
24 |
-
def
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def generate_response_and_audio(message, state):
|
30 |
if state.client is None:
|
@@ -74,26 +112,6 @@ def chat(message, state):
|
|
74 |
|
75 |
return generate_response_and_audio(message, state)
|
76 |
|
77 |
-
def process_audio(audio, state):
|
78 |
-
if audio is None:
|
79 |
-
return "", state
|
80 |
-
|
81 |
-
# Convert numpy array to wav
|
82 |
-
audio_segment = AudioSegment(
|
83 |
-
audio[1].tobytes(),
|
84 |
-
frame_rate=audio[0],
|
85 |
-
sample_width=audio[1].dtype.itemsize,
|
86 |
-
channels=1 if len(audio[1].shape) == 1 else audio[1].shape[1]
|
87 |
-
)
|
88 |
-
|
89 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
|
90 |
-
audio_segment.export(temp_audio.name, format="wav")
|
91 |
-
transcript = transcribe_audio(temp_audio.name)
|
92 |
-
|
93 |
-
os.unlink(temp_audio.name)
|
94 |
-
|
95 |
-
return transcript, state
|
96 |
-
|
97 |
def set_api_key(api_key, state):
|
98 |
if not api_key:
|
99 |
raise gr.Error("Please enter a valid API key.")
|
@@ -111,7 +129,7 @@ with gr.Blocks() as demo:
|
|
111 |
|
112 |
with gr.Row():
|
113 |
with gr.Column(scale=1):
|
114 |
-
|
115 |
with gr.Column(scale=2):
|
116 |
chatbot = gr.Chatbot()
|
117 |
text_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
|
@@ -119,7 +137,11 @@ with gr.Blocks() as demo:
|
|
119 |
audio_output = gr.Audio(label="Generated Audio")
|
120 |
|
121 |
set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
|
122 |
-
|
|
|
|
|
|
|
|
|
123 |
text_input.submit(chat, inputs=[text_input, state], outputs=[chatbot, audio_output, state])
|
124 |
|
125 |
demo.launch()
|
|
|
6 |
import os
|
7 |
import base64
|
8 |
import openai
|
9 |
+
import time
|
10 |
from dataclasses import dataclass, field
|
11 |
from threading import Lock
|
12 |
|
|
|
22 |
api_key=api_key
|
23 |
)
|
24 |
|
25 |
+
def process_audio_file(audio_file, state):
|
26 |
+
if state.client is None:
|
27 |
+
raise gr.Error("Please enter a valid API key first.")
|
28 |
+
|
29 |
+
format_ = "opus"
|
30 |
+
bitrate = 16
|
31 |
+
|
32 |
+
with open(audio_file.name, "rb") as f:
|
33 |
+
audio_bytes = f.read()
|
34 |
+
audio_data = base64.b64encode(audio_bytes).decode()
|
35 |
+
|
36 |
+
try:
|
37 |
+
stream = state.client.chat.completions.create(
|
38 |
+
extra_body={
|
39 |
+
"require_audio": True,
|
40 |
+
"tts_preset_id": "jessica",
|
41 |
+
"tts_audio_format": format_,
|
42 |
+
"tts_audio_bitrate": bitrate
|
43 |
+
},
|
44 |
+
model="llama3.1-8b",
|
45 |
+
messages=[{"role": "user", "content": [{"type": "audio", "data": audio_data}]}],
|
46 |
+
temperature=0.5,
|
47 |
+
max_tokens=128,
|
48 |
+
stream=True,
|
49 |
+
)
|
50 |
+
|
51 |
+
transcript = ""
|
52 |
+
audio_chunks = []
|
53 |
+
|
54 |
+
for chunk in stream:
|
55 |
+
if chunk.choices[0].delta.content:
|
56 |
+
transcript += chunk.choices[0].delta.content
|
57 |
+
if hasattr(chunk.choices[0], 'audio') and chunk.choices[0].audio:
|
58 |
+
audio_chunks.extend(chunk.choices[0].audio)
|
59 |
+
|
60 |
+
audio_data = b''.join([base64.b64decode(a) for a in audio_chunks])
|
61 |
+
|
62 |
+
return transcript, audio_data, state
|
63 |
+
|
64 |
+
except Exception as e:
|
65 |
+
raise gr.Error(f"Error processing audio: {str(e)}")
|
66 |
|
67 |
def generate_response_and_audio(message, state):
|
68 |
if state.client is None:
|
|
|
112 |
|
113 |
return generate_response_and_audio(message, state)
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
def set_api_key(api_key, state):
|
116 |
if not api_key:
|
117 |
raise gr.Error("Please enter a valid API key.")
|
|
|
129 |
|
130 |
with gr.Row():
|
131 |
with gr.Column(scale=1):
|
132 |
+
audio_file_input = gr.File(label="Upload Audio File")
|
133 |
with gr.Column(scale=2):
|
134 |
chatbot = gr.Chatbot()
|
135 |
text_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
|
|
|
137 |
audio_output = gr.Audio(label="Generated Audio")
|
138 |
|
139 |
set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
|
140 |
+
audio_file_input.change(
|
141 |
+
process_audio_file,
|
142 |
+
inputs=[audio_file_input, state],
|
143 |
+
outputs=[text_input, audio_output, state]
|
144 |
+
)
|
145 |
text_input.submit(chat, inputs=[text_input, state], outputs=[chatbot, audio_output, state])
|
146 |
|
147 |
demo.launch()
|