Spaces:

Sunbird
/

sb-mms-inference

Sleeping

App Files Files Community

sb-mms-inference / app.py

akera

Update app.py

96abd98 verified 9 months ago

raw

history blame

1.78 kB

	import gradio as gr
	from transformers import pipeline
	import torch
	import librosa
	import os

	# Authentication token for Hugging Face
	auth_token = os.environ.get("HF_TOKEN")

	# Mapping of language options to their codes
	target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
	languages = list(target_lang_options.keys())

	# Determine device based on CUDA availability
	device = "cuda" if torch.cuda.is_available() else "cpu"

	base_model_id = "facebook/mms-1b-all"
	pipe = pipeline("automatic-speech-recognition", model=base_model_id, device=device, token=auth_token)

	def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
	target_lang_code = target_lang_options[language]

	# Dynamically set the target language and load the corresponding adapter
	pipe.tokenizer.set_target_lang(target_lang_code)
	# Assuming each language code directly corresponds to an adapter name available for the model
	pipe.model.load_adapter(target_lang_code)

	# Load and transcribe the audio file
	audio_data, _ = librosa.load(input_file, sr=None)
	output = pipe(audio_data, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
	return output

	description = "ASR with dynamic language adaptation"

	iface = gr.Interface(fn=transcribe_audio,
	inputs=[
	gr.Audio(source="upload", type="filepath", label="Upload file to transcribe"),
	gr.Dropdown(choices=languages, label="Language", value="English")
	],
	outputs=gr.Textbox(label="Transcription"),
	description=description)

	iface.launch()