Spaces:

Sunbird
/

sb-mms-inference

Sleeping

App Files Files Community

akera commited on Feb 26

Commit

27b508c

•

1 Parent(s): 6ce3643

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -14

app.py CHANGED Viewed

@@ -24,37 +24,42 @@ def format_and_group_timestamps(chunks, interval=5.0):
         if interval_start not in grouped:
             grouped[interval_start] = []
         grouped[interval_start].append((start, end, word))
     formatted_output = f"Transcript: {transcript.strip()}'\n\n-------\n\nword-stamped transcripts (every 5 seconds):\n\n"
     for interval_start, words in grouped.items():
         formatted_output += f"({interval_start}, {interval_start + interval}) -- {' '.join([w[2] for w in words])}\n"
     return formatted_output
-# Modified transcribe_audio function
 def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
     target_lang_code = target_lang_options[language]
-    # Determine the model_id based on the language
     if target_lang_code == "eng":
-        model_id = "facebook/mms-1b-all"
     else:
         model_id = "Sunbird/sunbird-mms"
-    pipe = pipeline(model=model_id, device=device, token=auth_token)
-    pipe.tokenizer.set_target_lang(target_lang_code)
-    pipe.model.load_adapter(target_lang_code)
-    output = pipe(input_file, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
-    formatted_output = format_and_group_timestamps(output['chunks'])
-    return formatted_output
 # Interface setup remains the same
 description = '''ASR with salt-mms'''
 iface = gr.Interface(fn=transcribe_audio,
                      inputs=[
-                         gr.Audio(source="upload", type="filepath", label="upload file to transcribe"),
                          gr.Dropdown(choices=list(target_lang_options.keys()), label="Language", value="English")
                      ],
                      outputs=gr.Textbox(label="Transcription"),

         if interval_start not in grouped:
             grouped[interval_start] = []
         grouped[interval_start].append((start, end, word))
     formatted_output = f"Transcript: {transcript.strip()}'\n\n-------\n\nword-stamped transcripts (every 5 seconds):\n\n"
     for interval_start, words in grouped.items():
         formatted_output += f"({interval_start}, {interval_start + interval}) -- {' '.join([w[2] for w in words])}\n"
     return formatted_output
+# Modified transcribe_audio function to use Whisper for English
 def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
     target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
     target_lang_code = target_lang_options[language]
     if target_lang_code == "eng":
+        # Use Whisper for English
+        model = whisper.load_model("small")
+        result = model.transcribe(input_file)
+        # Assuming you want to keep the formatting function for consistency
+        return result["text"]
     else:
+        # Use specified model for other languages
+        device = "cuda" if torch.cuda.is_available() else "cpu"
         model_id = "Sunbird/sunbird-mms"
+        auth_token = os.environ.get("HF_TOKEN")
+        pipe = pipeline(model=model_id, device=device, token=auth_token)
+        pipe.tokenizer.set_target_lang(target_lang_code)
+        pipe.model.load_adapter(target_lang_code)
+        output = pipe(input_file, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
+        formatted_output = format_and_group_timestamps(output['chunks'])
+        return formatted_output
 # Interface setup remains the same
 description = '''ASR with salt-mms'''
 iface = gr.Interface(fn=transcribe_audio,
                      inputs=[
+                         gr.Audio(sources="upload", type="filepath", label="upload file to transcribe"),
                          gr.Dropdown(choices=list(target_lang_options.keys()), label="Language", value="English")
                      ],
                      outputs=gr.Textbox(label="Transcription"),