Spaces:

Baghdad99
/

ha-en

Sleeping

Baghdad99 commited on Dec 7, 2023

Commit

83e3ccb

•

1 Parent(s): 8fe6fd5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import gradio as gr
 import requests
 # Define the Hugging Face Inference API URLs and headers
 ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
@@ -14,8 +17,14 @@ def query(api_url, payload):
 # Define the function to translate speech
 def translate_speech(audio):
     # Use the ASR pipeline to transcribe the audio
-    with open(audio.name, "rb") as f:
         data = f.read()
     response = requests.post(ASR_API_URL, headers=headers, data=data)
     output = response.json()
@@ -34,7 +43,13 @@ def translate_speech(audio):
     response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
     audio_bytes = response.content
-    return audio_bytes
 # Define the Gradio interface
 iface = gr.Interface(

 import gradio as gr
 import requests
+import soundfile as sf
+import numpy as np
+import tempfile
 # Define the Hugging Face Inference API URLs and headers
 ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
 # Define the function to translate speech
 def translate_speech(audio):
+    # audio is a tuple (np.ndarray, int), we need to save it as a file
+    audio_data, sample_rate = audio
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        sf.write(f, audio_data, sample_rate)
+        audio_file = f.name
     # Use the ASR pipeline to transcribe the audio
+    with open(audio_file, "rb") as f:
         data = f.read()
     response = requests.post(ASR_API_URL, headers=headers, data=data)
     output = response.json()
     response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
     audio_bytes = response.content
+    # Convert the audio bytes to numpy array
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        f.write(audio_bytes)
+        audio_file = f.name
+    audio_data, _ = sf.read(audio_file)
+    return audio_data
 # Define the Gradio interface
 iface = gr.Interface(