akhaliq HF staff commited on
Commit
16020a5
1 Parent(s): 6caf91a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -42
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  import base64
3
  import gradio as gr
4
  import openai
@@ -7,12 +6,6 @@ import io
7
  import tempfile
8
  import speech_recognition as sr
9
 
10
- # Initialize the OpenAI client
11
- client = openai.OpenAI(
12
- base_url="https://llama3-2-3b.lepton.run/api/v1/",
13
- api_key=os.environ.get('LEPTON_API_TOKEN')
14
- )
15
-
16
  def transcribe_audio(audio):
17
  # Convert the audio to wav format
18
  audio = AudioSegment.from_file(audio)
@@ -34,57 +27,73 @@ def transcribe_audio(audio):
34
 
35
  return text
36
 
37
- def process_audio(audio):
 
 
 
 
 
 
 
 
 
38
  # Transcribe the input audio
39
  transcription = transcribe_audio(audio)
40
 
41
- # Process the transcription with the API
42
- completion = client.chat.completions.create(
43
- model="gpt-3.5-turbo",
44
- messages=[
45
- {"role": "user", "content": transcription},
46
- ],
47
- max_tokens=128,
48
- stream=True,
49
- extra_body={
50
- "require_audio": "true",
51
- "tts_preset_id": "jessica",
52
- }
53
- )
 
54
 
55
- response_text = ""
56
- audios = []
57
 
58
- for chunk in completion:
59
- if not chunk.choices:
60
- continue
61
- content = chunk.choices[0].delta.content
62
- audio = getattr(chunk.choices[0], 'audio', [])
63
- if content:
64
- response_text += content
65
- if audio:
66
- audios.extend(audio)
67
 
68
- # Combine audio chunks and save as MP3
69
- audio_data = b''.join([base64.b64decode(audio) for audio in audios])
70
-
71
- # Save the audio to a temporary file
72
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
73
- temp_audio.write(audio_data)
74
- temp_audio_path = temp_audio.name
75
 
76
- return response_text, temp_audio_path
 
 
 
77
 
78
  # Create the Gradio interface
79
  iface = gr.Interface(
80
  fn=process_audio,
81
- inputs=gr.Audio(type="filepath"),
 
 
 
82
  outputs=[
83
  gr.Textbox(label="Response Text"),
84
  gr.Audio(label="Response Audio")
85
  ],
86
  title="Audio-to-Audio Demo",
87
- description="Upload an audio file to get a response in both text and audio format."
88
  )
89
 
90
  # Launch the interface
 
 
1
  import base64
2
  import gradio as gr
3
  import openai
 
6
  import tempfile
7
  import speech_recognition as sr
8
 
 
 
 
 
 
 
9
  def transcribe_audio(audio):
10
  # Convert the audio to wav format
11
  audio = AudioSegment.from_file(audio)
 
27
 
28
  return text
29
 
30
+ def process_audio(audio, api_token):
31
+ if not api_token:
32
+ return "Please provide an API token.", None
33
+
34
+ # Initialize the OpenAI client with the user-provided token
35
+ client = openai.OpenAI(
36
+ base_url="https://llama3-2-3b.lepton.run/api/v1/",
37
+ api_key=api_token
38
+ )
39
+
40
  # Transcribe the input audio
41
  transcription = transcribe_audio(audio)
42
 
43
+ try:
44
+ # Process the transcription with the API
45
+ completion = client.chat.completions.create(
46
+ model="gpt-3.5-turbo",
47
+ messages=[
48
+ {"role": "user", "content": transcription},
49
+ ],
50
+ max_tokens=128,
51
+ stream=True,
52
+ extra_body={
53
+ "require_audio": "true",
54
+ "tts_preset_id": "jessica",
55
+ }
56
+ )
57
 
58
+ response_text = ""
59
+ audios = []
60
 
61
+ for chunk in completion:
62
+ if not chunk.choices:
63
+ continue
64
+ content = chunk.choices[0].delta.content
65
+ audio = getattr(chunk.choices[0], 'audio', [])
66
+ if content:
67
+ response_text += content
68
+ if audio:
69
+ audios.extend(audio)
70
 
71
+ # Combine audio chunks and save as MP3
72
+ audio_data = b''.join([base64.b64decode(audio) for audio in audios])
73
+
74
+ # Save the audio to a temporary file
75
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
76
+ temp_audio.write(audio_data)
77
+ temp_audio_path = temp_audio.name
78
 
79
+ return response_text, temp_audio_path
80
+
81
+ except Exception as e:
82
+ return f"An error occurred: {str(e)}", None
83
 
84
  # Create the Gradio interface
85
  iface = gr.Interface(
86
  fn=process_audio,
87
+ inputs=[
88
+ gr.Audio(type="filepath", label="Input Audio"),
89
+ gr.Textbox(label="API Token", type="password")
90
+ ],
91
  outputs=[
92
  gr.Textbox(label="Response Text"),
93
  gr.Audio(label="Response Audio")
94
  ],
95
  title="Audio-to-Audio Demo",
96
+ description="Upload an audio file and provide your API token to get a response in both text and audio format."
97
  )
98
 
99
  # Launch the interface