Spaces:
Runtime error
Runtime error
import base64 | |
import gradio as gr | |
import openai | |
from pydub import AudioSegment | |
import io | |
import tempfile | |
import speech_recognition as sr | |
def transcribe_audio(audio): | |
# Convert the audio to wav format | |
audio = AudioSegment.from_file(audio) | |
audio = audio.set_frame_rate(16000).set_channels(1) | |
# Save as wav file | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: | |
audio.export(temp_audio.name, format="wav") | |
temp_audio_path = temp_audio.name | |
# Perform speech recognition | |
recognizer = sr.Recognizer() | |
with sr.AudioFile(temp_audio_path) as source: | |
audio_data = recognizer.record(source) | |
text = recognizer.recognize_google(audio_data) | |
# Clean up the temporary file | |
os.unlink(temp_audio_path) | |
return text | |
def process_audio(audio, api_token): | |
if not api_token: | |
return "Please provide an API token.", None | |
# Initialize the OpenAI client with the user-provided token | |
client = openai.OpenAI( | |
base_url="https://llama3-2-3b.lepton.run/api/v1/", | |
api_key=api_token | |
) | |
# Transcribe the input audio | |
transcription = transcribe_audio(audio) | |
try: | |
# Process the transcription with the API | |
completion = client.chat.completions.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "user", "content": transcription}, | |
], | |
max_tokens=128, | |
stream=True, | |
extra_body={ | |
"require_audio": "true", | |
"tts_preset_id": "jessica", | |
} | |
) | |
response_text = "" | |
audios = [] | |
for chunk in completion: | |
if not chunk.choices: | |
continue | |
content = chunk.choices[0].delta.content | |
audio = getattr(chunk.choices[0], 'audio', []) | |
if content: | |
response_text += content | |
if audio: | |
audios.extend(audio) | |
# Combine audio chunks and save as MP3 | |
audio_data = b''.join([base64.b64decode(audio) for audio in audios]) | |
# Save the audio to a temporary file | |
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio: | |
temp_audio.write(audio_data) | |
temp_audio_path = temp_audio.name | |
return response_text, temp_audio_path | |
except Exception as e: | |
return f"An error occurred: {str(e)}", None | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=[ | |
gr.Audio(type="filepath", label="Input Audio"), | |
gr.Textbox(label="API Token", type="password") | |
], | |
outputs=[ | |
gr.Textbox(label="Response Text"), | |
gr.Audio(label="Response Audio") | |
], | |
title="Audio-to-Audio Demo", | |
description="Upload an audio file and provide your API token to get a response in both text and audio format." | |
) | |
# Launch the interface | |
iface.launch() |