Spaces:

Sunbird
/

sb-mms-inference

Sleeping

App Files Files Community

akera commited on Feb 20

Commit

6d352f5

•

1 Parent(s): 96abd98

Update app.py

Browse files

working version

Files changed (1) hide show

app.py +32 -19

app.py CHANGED Viewed

@@ -1,43 +1,56 @@
 import gradio as gr
-from transformers import pipeline
 import torch
 import librosa
 import os
-# Authentication token for Hugging Face
 auth_token = os.environ.get("HF_TOKEN")
-# Mapping of language options to their codes
 target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
 languages = list(target_lang_options.keys())
-# Determine device based on CUDA availability
-device = "cuda" if torch.cuda.is_available() else "cpu"
-base_model_id = "facebook/mms-1b-all"
-pipe = pipeline("automatic-speech-recognition", model=base_model_id, device=device, token=auth_token)
-def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
     target_lang_code = target_lang_options[language]
-    # Dynamically set the target language and load the corresponding adapter
     pipe.tokenizer.set_target_lang(target_lang_code)
-    # Assuming each language code directly corresponds to an adapter name available for the model
     pipe.model.load_adapter(target_lang_code)
-    # Load and transcribe the audio file
-    audio_data, _ = librosa.load(input_file, sr=None)
-    output = pipe(audio_data, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
     return output
-description = "ASR with dynamic language adaptation"
 iface = gr.Interface(fn=transcribe_audio,
                      inputs=[
-                         gr.Audio(source="upload", type="filepath", label="Upload file to transcribe"),
                          gr.Dropdown(choices=languages, label="Language", value="English")
-                     ],
                      outputs=gr.Textbox(label="Transcription"),
-                     description=description)
-iface.launch()

 import gradio as gr
+from transformers import Wav2Vec2ForCTC, AutoProcessor, Wav2Vec2Processor
 import torch
 import librosa
+import json
 import os
+import huggingface_hub
+from transformers import pipeline
 auth_token = os.environ.get("HF_TOKEN")
 target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
 languages = list(target_lang_options.keys())
+# Transcribe audio using custom model
+def transcribe_audio(input_file, language,chunk_length_s=10,
+                      stride_length_s=(4, 2), return_timestamps="word"):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     target_lang_code = target_lang_options[language]
+    # Determine the model_id based on the language
+    if target_lang_code == "eng":
+        model_id = "facebook/mms-1b-all"
+    else:
+        model_id = "Sunbird/sunbird-mms"
+    pipe = pipeline(model=model_id, device=device, token=auth_token)
     pipe.tokenizer.set_target_lang(target_lang_code)
     pipe.model.load_adapter(target_lang_code)
+    # Read audio file
+    # audio_data = input_file
+    output = pipe(input_file, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
     return output
+description = '''ASR with salt-mms'''
 iface = gr.Interface(fn=transcribe_audio,
                      inputs=[
+                         gr.Audio(source="upload", type="filepath", label="upload file to transcribe"),
                          gr.Dropdown(choices=languages, label="Language", value="English")
+                         ],
                      outputs=gr.Textbox(label="Transcription"),
+                     description=description
+                     )
+iface.launch()