Spaces:

Baghdad99
/

ha-en

Sleeping

App Files Files Community

ha-en / app.py

Baghdad99

Update app.py

a927d1d 11 months ago

raw

history blame

3.52 kB

	import gradio as gr
	import requests
	import numpy as np
	from pydub import AudioSegment
	import io
	from IPython.display import Audio

	# Define the Hugging Face Inference API URLs and headers
	ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
	TTS_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/english_voice_tts"
	TRANSLATION_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-hausa-text-to-english-text"
	headers = {"Authorization": "Bearer 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"}

	# Define the function to query the Hugging Face Inference API
	def query(api_url, payload=None, data=None):
	if data is not None:
	response = requests.post(api_url, headers=headers, data=data)
	else:
	response = requests.post(api_url, headers=headers, json=payload)
	response_json = response.json()
	if 'error' in response_json:
	print(f"Error in query function: {response_json['error']}")
	return None
	return response_json


	# Define the function to translate speech
	def translate_speech(audio_file):
	print(f"Type of audio: {type(audio_file)}, Value of audio: {audio_file}") # Debug line

	# Use the ASR pipeline to transcribe the audio
	data = audio_file.read()
	output = query(ASR_API_URL, data=data)
	print(f"Output: {output}") # Debug line

	# Check if 'error' key exists in the output
	if 'error' in output:
	print(f"Error: {output['error']}")
	estimated_time = output.get('estimated_time')
	if estimated_time:
	print(f"Estimated time for the model to load: {estimated_time} seconds")
	return

	# Check if 'text' key exists in the output
	if 'text' in output:
	transcription = output["text"]
	else:
	print("Key 'text' does not exist in the output.")
	return

	# Use the translation pipeline to translate the transcription
	translated_text = query(TRANSLATION_API_URL, {"inputs": transcription})

	# Use the TTS pipeline to synthesize the translated text
	response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
	audio_bytes = response.content

	# Display the audio output
	return Audio(audio_bytes)

	# print(f"Type of audio: {type(audio_file)}, Value of audio: {audio_file}") # Debug line

	# # Use the ASR pipeline to transcribe the audio
	# data = audio_file.read()
	# output = query(ASR_API_URL, data=data)
	# print(f"Output: {output}") # Debug line

	# # Check if 'text' key exists in the output
	# if 'text' in output:
	# transcription = output["text"]
	# else:
	# print("Key 'text' does not exist in the output.")
	# return

	# # Use the translation pipeline to translate the transcription
	# translated_text = query(TRANSLATION_API_URL, {"inputs": transcription})

	# # Use the TTS pipeline to synthesize the translated text
	# response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
	# audio_bytes = response.content

	# # Display the audio output
	# return Audio(audio_bytes)


	# Define the Gradio interface
	iface = gr.Interface(
	fn=translate_speech,
	inputs=gr.inputs.File(type="file"), # Change this line
	outputs=gr.outputs.Audio(type="numpy"),
	title="Hausa to English Translation",
	description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
	)

	iface.launch()