Spaces:

Sunbird
/

sb-mms-inference

Sleeping

App Files Files Community

akera commited on Feb 20

Commit

96abd98

•

1 Parent(s): d637eff

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -31

app.py CHANGED Viewed

@@ -1,55 +1,43 @@
 import gradio as gr
-from transformers import Wav2Vec2ForCTC, AutoProcessor, Wav2Vec2Processor
 import torch
 import librosa
-import json
 import os
-import huggingface_hub
-from transformers import pipeline
 auth_token = os.environ.get("HF_TOKEN")
 target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
 languages = list(target_lang_options.keys())
-# Transcribe audio using custom model
-def transcribe_audio(input_file, language,chunk_length_s=10,
-                      stride_length_s=(4, 2), return_timestamps="word"):
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     target_lang_code = target_lang_options[language]
-    # Determine the model_id based on the language
-    if target_lang_code == "eng":
-        model_id = "facebook/mms-1b-all"
-    else:
-        model_id = "Sunbird/sunbird-mms"
-    pipe = pipeline(model=model_id, device=device, token=auth_token)
     pipe.tokenizer.set_target_lang(target_lang_code)
     pipe.model.load_adapter(target_lang_code)
-    # # Read audio file
-    # audio_data = input_file
-    output = pipe(input_file, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
     return output
-description = '''ASR with salt-mms'''
 iface = gr.Interface(fn=transcribe_audio,
                      inputs=[
-                         gr.Audio(source="upload", type="filepath", label="upload file to transcribe"),
                          gr.Dropdown(choices=languages, label="Language", value="English")
-                         ],
                      outputs=gr.Textbox(label="Transcription"),
-                     description=description
-                     )
-iface.launch()

 import gradio as gr
+from transformers import pipeline
 import torch
 import librosa
 import os
+# Authentication token for Hugging Face
 auth_token = os.environ.get("HF_TOKEN")
+# Mapping of language options to their codes
 target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
 languages = list(target_lang_options.keys())
+# Determine device based on CUDA availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+base_model_id = "facebook/mms-1b-all"
+pipe = pipeline("automatic-speech-recognition", model=base_model_id, device=device, token=auth_token)
+def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
     target_lang_code = target_lang_options[language]
+    # Dynamically set the target language and load the corresponding adapter
     pipe.tokenizer.set_target_lang(target_lang_code)
+    # Assuming each language code directly corresponds to an adapter name available for the model
     pipe.model.load_adapter(target_lang_code)
+    # Load and transcribe the audio file
+    audio_data, _ = librosa.load(input_file, sr=None)
+    output = pipe(audio_data, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
     return output
+description = "ASR with dynamic language adaptation"
 iface = gr.Interface(fn=transcribe_audio,
                      inputs=[
+                         gr.Audio(source="upload", type="filepath", label="Upload file to transcribe"),
                          gr.Dropdown(choices=languages, label="Language", value="English")
+                     ],
                      outputs=gr.Textbox(label="Transcription"),
+                     description=description)
+iface.launch()