Spaces:
Sleeping
Sleeping
File size: 1,782 Bytes
d7ae26e 96abd98 d7ae26e e59bf3f d7ae26e 96abd98 7ac8184 d7ae26e 96abd98 d4afb45 d7ae26e 96abd98 d7ae26e 96abd98 59bf002 96abd98 22fe498 96abd98 d4afb45 96abd98 d4afb45 d7ae26e 96abd98 d4afb45 d7ae26e 96abd98 d7ae26e d4afb45 d7ae26e 96abd98 d630be3 96abd98 d7ae26e 96abd98 5ab1608 96abd98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
from transformers import pipeline
import torch
import librosa
import os
# Authentication token for Hugging Face
auth_token = os.environ.get("HF_TOKEN")
# Mapping of language options to their codes
target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
languages = list(target_lang_options.keys())
# Determine device based on CUDA availability
device = "cuda" if torch.cuda.is_available() else "cpu"
base_model_id = "facebook/mms-1b-all"
pipe = pipeline("automatic-speech-recognition", model=base_model_id, device=device, token=auth_token)
def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
target_lang_code = target_lang_options[language]
# Dynamically set the target language and load the corresponding adapter
pipe.tokenizer.set_target_lang(target_lang_code)
# Assuming each language code directly corresponds to an adapter name available for the model
pipe.model.load_adapter(target_lang_code)
# Load and transcribe the audio file
audio_data, _ = librosa.load(input_file, sr=None)
output = pipe(audio_data, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
return output
description = "ASR with dynamic language adaptation"
iface = gr.Interface(fn=transcribe_audio,
inputs=[
gr.Audio(source="upload", type="filepath", label="Upload file to transcribe"),
gr.Dropdown(choices=languages, label="Language", value="English")
],
outputs=gr.Textbox(label="Transcription"),
description=description)
iface.launch()
|