llama-3.2-3b-voice

Running

App Files Files Community

yadongxie commited on 6 days ago

Commit

1926927

•

1 Parent(s): dec22aa

chore: update the layout

Browse files

Files changed (10) hide show

.gradio/certificate.pem +31 -0
.idea/.gitignore +8 -0
.idea/inspectionProfiles/Project_Default.xml +14 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/llama-3.2-3b-voice.iml +14 -0
.idea/misc.xml +8 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
__pycache__/app.cpython-310.pyc +0 -0
app.py +53 -40

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,14 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="1">
+            <item index="0" class="java.lang.String" itemvalue="flash_attn" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/llama-3.2-3b-voice.iml ADDED Viewed

	@@ -0,0 +1,14 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.9" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="enabledOnSave" value="true" />
+    <option name="sdkName" value="Python 3.9" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/llama-3.2-3b-voice.iml" filepath="$PROJECT_DIR$/.idea/llama-3.2-3b-voice.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (6.18 kB). View file

app.py CHANGED Viewed

@@ -2,9 +2,7 @@ import gradio as gr
 import numpy as np
 import io
 from pydub import AudioSegment
-import tempfile
 import openai
-import time
 from dataclasses import dataclass, field
 from threading import Lock
 import base64
@@ -20,15 +18,17 @@ class AppState:
     output_format: str = "mp3"
     stopped: bool = False
 # Global lock for thread safety
 state_lock = Lock()
 def create_client(api_key):
     return openai.OpenAI(
-        base_url="https://llama3-1-8b.lepton.run/api/v1/",
-        api_key=api_key
     )
 def determine_pause(audio, sampling_rate, state):
     # Take the last 1 second of audio
     pause_length = int(sampling_rate * 1)  # 1 second
@@ -45,6 +45,7 @@ def determine_pause(audio, sampling_rate, state):
     else:
         return False
 def process_audio(audio: tuple, state: AppState):
     if state.stream is None:
         state.stream = audio[1]
@@ -60,6 +61,7 @@ def process_audio(audio: tuple, state: AppState):
     else:
         return None, state
 def generate_response_and_audio(audio_bytes: bytes, state: AppState):
     if state.client is None:
         raise gr.Error("Please enter a valid API key first.")
@@ -74,10 +76,12 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
                 "require_audio": True,
                 "tts_preset_id": "jessica",
                 "tts_audio_format": format_,
-                "tts_audio_bitrate": bitrate
             },
             model="llama3.1-8b",
-            messages=[{"role": "user", "content": [{"type": "audio", "data": audio_data}]}],
             temperature=0.7,
             max_tokens=256,
             stream=True,
@@ -90,20 +94,21 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
             if not chunk.choices:
                 continue
             content = chunk.choices[0].delta.content
-            audio = getattr(chunk.choices[0], 'audio', [])
             if content:
                 full_response += content
                 yield full_response, None, state
             if audio:
                 audios.extend(audio)
-        final_audio = b''.join([base64.b64decode(a) for a in audios])
         yield full_response, final_audio, state
     except Exception as e:
         raise gr.Error(f"Error during audio streaming: {e}")
 def response(state: AppState):
     if state.stream is None or len(state.stream) == 0:
         return None, None, state
@@ -139,43 +144,54 @@ def response(state: AppState):
     return chatbot_output, final_audio, state
 def start_recording_user(state: AppState):
     if not state.stopped:
         return gr.Audio(recording=True)
     else:
         return gr.Audio(recording=False)
 def set_api_key(api_key, state):
     if not api_key:
         raise gr.Error("Please enter a valid API key.")
-    state.client = create_client(api_key)
-    return "API key set successfully!", state
-def update_format(format, state):
-    state.output_format = format
-    return state
 with gr.Blocks() as demo:
     with gr.Row():
-        api_key_input = gr.Textbox(type="password", label="Enter your Lepton API Key")
-        set_key_button = gr.Button("Set API Key")
-    api_key_status = gr.Textbox(label="API Key Status", interactive=False)
-    with gr.Row():
-        format_dropdown = gr.Dropdown(choices=["mp3", "opus"], value="mp3", label="Output Audio Format")
-    with gr.Row():
-        with gr.Column():
-            input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
-        with gr.Column():
-            chatbot = gr.Chatbot(label="Conversation", type="messages")
-            output_audio = gr.Audio(label="Output Audio", autoplay=True)
     state = gr.State(AppState())
-    set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
-    format_dropdown.change(update_format, inputs=[format_dropdown, state], outputs=[state])
     stream = input_audio.stream(
         process_audio,
@@ -186,23 +202,20 @@ with gr.Blocks() as demo:
     )
     respond = input_audio.stop_recording(
-        response,
-        [state],
-        [chatbot, output_audio, state]
     )
     # Update the chatbot with the final conversation
     respond.then(lambda s: s.conversation, [state], [chatbot])
     # Automatically restart recording after the assistant's response
-    restart = output_audio.stop(
-        start_recording_user,
-        [state],
-        [input_audio]
-    )
     # Add a "Stop Conversation" button
-    cancel = gr.Button("Stop Conversation", variant="stop")
-    cancel.click(lambda: (AppState(stopped=True), gr.Audio(recording=False)), None,
-                [state, input_audio], cancels=[respond, restart])
-demo.launch()

 import numpy as np
 import io
 from pydub import AudioSegment
 import openai
 from dataclasses import dataclass, field
 from threading import Lock
 import base64
     output_format: str = "mp3"
     stopped: bool = False
 # Global lock for thread safety
 state_lock = Lock()
 def create_client(api_key):
     return openai.OpenAI(
+        base_url="https://llama3-1-8b.lepton.run/api/v1/", api_key=api_key
     )
 def determine_pause(audio, sampling_rate, state):
     # Take the last 1 second of audio
     pause_length = int(sampling_rate * 1)  # 1 second
     else:
         return False
 def process_audio(audio: tuple, state: AppState):
     if state.stream is None:
         state.stream = audio[1]
     else:
         return None, state
 def generate_response_and_audio(audio_bytes: bytes, state: AppState):
     if state.client is None:
         raise gr.Error("Please enter a valid API key first.")
                 "require_audio": True,
                 "tts_preset_id": "jessica",
                 "tts_audio_format": format_,
+                "tts_audio_bitrate": bitrate,
             },
             model="llama3.1-8b",
+            messages=[
+                {"role": "user", "content": [{"type": "audio", "data": audio_data}]}
+            ],
             temperature=0.7,
             max_tokens=256,
             stream=True,
             if not chunk.choices:
                 continue
             content = chunk.choices[0].delta.content
+            audio = getattr(chunk.choices[0], "audio", [])
             if content:
                 full_response += content
                 yield full_response, None, state
             if audio:
                 audios.extend(audio)
+        final_audio = b"".join([base64.b64decode(a) for a in audios])
         yield full_response, final_audio, state
     except Exception as e:
         raise gr.Error(f"Error during audio streaming: {e}")
 def response(state: AppState):
     if state.stream is None or len(state.stream) == 0:
         return None, None, state
     return chatbot_output, final_audio, state
 def start_recording_user(state: AppState):
     if not state.stopped:
         return gr.Audio(recording=True)
     else:
         return gr.Audio(recording=False)
 def set_api_key(api_key, state):
     if not api_key:
         raise gr.Error("Please enter a valid API key.")
+    try:
+        state.client = create_client(api_key)
+        return gr.update(value="API key set successfully!", visible=True), state
+    except Exception as e:
+        return gr.update(value="Connection error", visible=True), state
 with gr.Blocks() as demo:
+    gr.Markdown("# Lepton LLM Voice Mode")
+    gr.Markdown(
+        "You can find Lepton serverless endpoint API Key at [here](https://dashboard.lepton.ai/workspace-redirect/settings/api-tokens)"
+    )
     with gr.Row():
+        with gr.Column(scale=3):
+            api_key_input = gr.Textbox(
+                type="password", label="Enter your Lepton API Key"
+            )
+        with gr.Column(scale=1):
+            set_key_button = gr.Button("Set API Key", scale=2, variant="secondary")
+    api_key_status = gr.Textbox(
+        show_label=False, container=False, interactive=False, visible=False
+    )
+    with gr.Blocks():
+        input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
+        output_audio = gr.Audio(label="Output Audio", autoplay=True)
+        chatbot = gr.Chatbot(label="Conversation", type="messages")
+        cancel = gr.Button("Stop Conversation", variant="stop")
     state = gr.State(AppState())
+    set_key_button.click(
+        set_api_key,
+        inputs=[api_key_input, state],
+        outputs=[api_key_status, state],
+    )
     stream = input_audio.stream(
         process_audio,
     )
     respond = input_audio.stop_recording(
+        response, [state], [chatbot, output_audio, state]
     )
     # Update the chatbot with the final conversation
     respond.then(lambda s: s.conversation, [state], [chatbot])
     # Automatically restart recording after the assistant's response
+    restart = output_audio.stop(start_recording_user, [state], [input_audio])
     # Add a "Stop Conversation" button
+    cancel.click(
+        lambda: (AppState(stopped=True), gr.Audio(recording=False)),
+        None,
+        [state, input_audio],
+        cancels=[respond, restart],
+    )
+demo.launch(share=True)