File size: 1,782 Bytes
d7ae26e
96abd98
d7ae26e
 
e59bf3f
d7ae26e
96abd98
7ac8184
d7ae26e
96abd98
d4afb45
 
d7ae26e
96abd98
 
d7ae26e
96abd98
 
59bf002
96abd98
22fe498
96abd98
 
d4afb45
96abd98
d4afb45
d7ae26e
96abd98
 
 
d4afb45
d7ae26e
96abd98
d7ae26e
d4afb45
d7ae26e
96abd98
d630be3
96abd98
d7ae26e
96abd98
5ab1608
96abd98
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from transformers import pipeline
import torch
import librosa
import os

# Authentication token for Hugging Face
auth_token = os.environ.get("HF_TOKEN")

# Mapping of language options to their codes
target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
languages = list(target_lang_options.keys())

# Determine device based on CUDA availability
device = "cuda" if torch.cuda.is_available() else "cpu"

base_model_id = "facebook/mms-1b-all"
pipe = pipeline("automatic-speech-recognition", model=base_model_id, device=device, token=auth_token)

def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
    target_lang_code = target_lang_options[language]

    # Dynamically set the target language and load the corresponding adapter
    pipe.tokenizer.set_target_lang(target_lang_code)
    # Assuming each language code directly corresponds to an adapter name available for the model
    pipe.model.load_adapter(target_lang_code)

    # Load and transcribe the audio file
    audio_data, _ = librosa.load(input_file, sr=None)
    output = pipe(audio_data, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
    return output

description = "ASR with dynamic language adaptation"

iface = gr.Interface(fn=transcribe_audio,
                     inputs=[
                         gr.Audio(source="upload", type="filepath", label="Upload file to transcribe"),
                         gr.Dropdown(choices=languages, label="Language", value="English")
                     ],
                     outputs=gr.Textbox(label="Transcription"),
                     description=description)

iface.launch()