Spaces:

akhaliq
/

llama-3.2-3b-voice

Running

App Files Files Community

akhaliq HF staff commited on Sep 26

Commit

ef46ff0

•

1 Parent(s): f7de418

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -36

app.py CHANGED Viewed

@@ -9,16 +9,17 @@ import openai
 from dataclasses import dataclass, field
 from threading import Lock
-# Lepton API setup
-client = openai.OpenAI(
-    base_url="https://llama3-1-8b.lepton.run/api/v1/",
-    api_key=os.environ.get('LEPTON_API_TOKEN')
-)
 @dataclass
 class AppState:
     conversation: list = field(default_factory=list)
     lock: Lock = field(default_factory=Lock)
 def transcribe_audio(audio):
     # This is a placeholder function. In a real-world scenario, you'd use a
@@ -26,40 +27,46 @@ def transcribe_audio(audio):
     return "This is a dummy transcript. Please implement actual speech-to-text functionality."
 def generate_response_and_audio(message, state):
     with state.lock:
         state.conversation.append({"role": "user", "content": message})
-        completion = client.chat.completions.create(
-            model="llama3-1-8b",
-            messages=state.conversation,
-            max_tokens=128,
-            stream=True,
-            extra_body={
-                "require_audio": "true",
-                "tts_preset_id": "jessica",
-            }
-        )
-        full_response = ""
-        audio_chunks = []
-        for chunk in completion:
-            if not chunk.choices:
-                continue
-            content = chunk.choices[0].delta.content
-            audio = getattr(chunk.choices[0], 'audio', [])
-            if content:
-                full_response += content
-                yield full_response, None, state
-            if audio:
-                audio_chunks.extend(audio)
-                audio_data = b''.join([base64.b64decode(a) for a in audio_chunks])
-                yield full_response, audio_data, state
-        state.conversation.append({"role": "assistant", "content": full_response})
 def chat(message, state):
     if not message:
@@ -87,9 +94,21 @@ def process_audio(audio, state):
     return transcript, state
 with gr.Blocks() as demo:
     state = gr.State(AppState())
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.Audio(source="microphone", type="numpy")
@@ -99,7 +118,8 @@ with gr.Blocks() as demo:
         with gr.Column(scale=1):
             audio_output = gr.Audio(label="Generated Audio")
-    audio_input.change(process_audio, [audio_input, state], [text_input, state])
-    text_input.submit(chat, [text_input, state], [chatbot, audio_output, state])
 demo.launch()

 from dataclasses import dataclass, field
 from threading import Lock
 @dataclass
 class AppState:
     conversation: list = field(default_factory=list)
     lock: Lock = field(default_factory=Lock)
+    client: openai.OpenAI = None
+def create_client(api_key):
+    return openai.OpenAI(
+        base_url="https://llama3-1-8b.lepton.run/api/v1/",
+        api_key=api_key
+    )
 def transcribe_audio(audio):
     # This is a placeholder function. In a real-world scenario, you'd use a
     return "This is a dummy transcript. Please implement actual speech-to-text functionality."
 def generate_response_and_audio(message, state):
+    if state.client is None:
+        raise gr.Error("Please enter a valid API key first.")
     with state.lock:
         state.conversation.append({"role": "user", "content": message})
+        try:
+            completion = state.client.chat.completions.create(
+                model="llama3-1-8b",
+                messages=state.conversation,
+                max_tokens=128,
+                stream=True,
+                extra_body={
+                    "require_audio": "true",
+                    "tts_preset_id": "jessica",
+                }
+            )
+            full_response = ""
+            audio_chunks = []
+            for chunk in completion:
+                if not chunk.choices:
+                    continue
+                content = chunk.choices[0].delta.content
+                audio = getattr(chunk.choices[0], 'audio', [])
+                if content:
+                    full_response += content
+                    yield full_response, None, state
+                if audio:
+                    audio_chunks.extend(audio)
+                    audio_data = b''.join([base64.b64decode(a) for a in audio_chunks])
+                    yield full_response, audio_data, state
+            state.conversation.append({"role": "assistant", "content": full_response})
+        except Exception as e:
+            raise gr.Error(f"Error generating response: {str(e)}")
 def chat(message, state):
     if not message:
     return transcript, state
+def set_api_key(api_key, state):
+    if not api_key:
+        raise gr.Error("Please enter a valid API key.")
+    state.client = create_client(api_key)
+    return "API key set successfully!", state
 with gr.Blocks() as demo:
     state = gr.State(AppState())
+    with gr.Row():
+        api_key_input = gr.Textbox(type="password", label="Enter your Lepton API Key")
+        set_key_button = gr.Button("Set API Key")
+    api_key_status = gr.Textbox(label="API Key Status", interactive=False)
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.Audio(source="microphone", type="numpy")
         with gr.Column(scale=1):
             audio_output = gr.Audio(label="Generated Audio")
+    set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
+    audio_input.change(process_audio, inputs=[audio_input, state], outputs=[text_input, state])
+    text_input.submit(chat, inputs=[text_input, state], outputs=[chatbot, audio_output, state])
 demo.launch()