Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
import gradio as gr
|
2 |
import requests
|
3 |
-
import soundfile as sf
|
4 |
import numpy as np
|
5 |
-
import tempfile
|
6 |
from pydub import AudioSegment
|
7 |
import io
|
8 |
|
9 |
# Define the Hugging Face Inference API URLs and headers
|
10 |
-
ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
|
11 |
TTS_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/english_voice_tts"
|
12 |
TRANSLATION_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-hausa-text-to-english-text"
|
13 |
headers = {"Authorization": "Bearer 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"}
|
14 |
|
|
|
|
|
|
|
15 |
# Define the function to query the Hugging Face Inference API
|
16 |
def query(api_url, payload):
|
17 |
response = requests.post(api_url, headers=headers, json=payload)
|
@@ -21,18 +21,8 @@ def query(api_url, payload):
|
|
21 |
def translate_speech(audio_file):
|
22 |
print(f"Type of audio: {type(audio_file)}, Value of audio: {audio_file}") # Debug line
|
23 |
|
24 |
-
# Use the ASR
|
25 |
-
|
26 |
-
data = f.read()
|
27 |
-
response = requests.post(ASR_API_URL, headers=headers, data=data)
|
28 |
-
output = response.json()
|
29 |
-
|
30 |
-
# Check if the output contains 'text'
|
31 |
-
if 'text' in output:
|
32 |
-
transcription = output["text"]
|
33 |
-
else:
|
34 |
-
print("The output does not contain 'text'")
|
35 |
-
return
|
36 |
|
37 |
# Use the translation pipeline to translate the transcription
|
38 |
translated_text = query(TRANSLATION_API_URL, {"inputs": transcription})
|
|
|
1 |
import gradio as gr
|
2 |
import requests
|
|
|
3 |
import numpy as np
|
|
|
4 |
from pydub import AudioSegment
|
5 |
import io
|
6 |
|
7 |
# Define the Hugging Face Inference API URLs and headers
|
|
|
8 |
TTS_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/english_voice_tts"
|
9 |
TRANSLATION_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-hausa-text-to-english-text"
|
10 |
headers = {"Authorization": "Bearer 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"}
|
11 |
|
12 |
+
# Load the Gradio model for speech recognition
|
13 |
+
asr_model = gr.load("models/Baghdad99/saad-speech-recognition-hausa-audio-to-text")
|
14 |
+
|
15 |
# Define the function to query the Hugging Face Inference API
|
16 |
def query(api_url, payload):
|
17 |
response = requests.post(api_url, headers=headers, json=payload)
|
|
|
21 |
def translate_speech(audio_file):
|
22 |
print(f"Type of audio: {type(audio_file)}, Value of audio: {audio_file}") # Debug line
|
23 |
|
24 |
+
# Use the ASR model to transcribe the audio
|
25 |
+
transcription = asr_model.predict(audio_file.name) # Change this line
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# Use the translation pipeline to translate the transcription
|
28 |
translated_text = query(TRANSLATION_API_URL, {"inputs": transcription})
|