Spaces:

Baghdad99
/

ha-en

Sleeping

Baghdad99 commited on Dec 7, 2023

Commit

9829b9c

•

1 Parent(s): 7e87039

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import soundfile as sf
 import numpy as np
 import tempfile
 from pydub import AudioSegment
-import io  # Add this line
 # Define the Hugging Face Inference API URLs and headers
 ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
@@ -29,8 +29,13 @@ def translate_speech(audio):
         sf.write(f, audio_data, sample_rate)
         audio_file = f.name
     # Use the ASR pipeline to transcribe the audio
-    with open(audio_file, "rb") as f:
         data = f.read()
     response = requests.post(ASR_API_URL, headers=headers, data=data)
     output = response.json()
@@ -50,8 +55,7 @@ def translate_speech(audio):
     audio_bytes = response.content
     # Convert the audio bytes to an audio segment
-    audio_segment = AudioSegment.from_mp3(io.BytesIO(audio_bytes))
     # Convert the audio segment to a numpy array
     audio_data = np.array(audio_segment.get_array_of_samples())

 import numpy as np
 import tempfile
 from pydub import AudioSegment
+import io
 # Define the Hugging Face Inference API URLs and headers
 ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
         sf.write(f, audio_data, sample_rate)
         audio_file = f.name
+    # Convert the WAV file to MP3
+    audio_segment = AudioSegment.from_wav(audio_file)
+    mp3_file = audio_file.replace(".wav", ".mp3")
+    audio_segment.export(mp3_file, format="mp3")
     # Use the ASR pipeline to transcribe the audio
+    with open(mp3_file, "rb") as f:  # Change this line
         data = f.read()
     response = requests.post(ASR_API_URL, headers=headers, data=data)
     output = response.json()
     audio_bytes = response.content
     # Convert the audio bytes to an audio segment
+    audio_segment = AudioSegment.from_mp3(io.BytesIO(audio_bytes))  # Change this line
     # Convert the audio segment to a numpy array
     audio_data = np.array(audio_segment.get_array_of_samples())