akera commited on
Commit
96abd98
1 Parent(s): d637eff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -31
app.py CHANGED
@@ -1,55 +1,43 @@
1
  import gradio as gr
2
- from transformers import Wav2Vec2ForCTC, AutoProcessor, Wav2Vec2Processor
3
  import torch
4
  import librosa
5
- import json
6
  import os
7
- import huggingface_hub
8
- from transformers import pipeline
9
-
10
 
 
11
  auth_token = os.environ.get("HF_TOKEN")
12
 
13
-
14
  target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
15
-
16
  languages = list(target_lang_options.keys())
17
 
 
 
18
 
19
- # Transcribe audio using custom model
20
- def transcribe_audio(input_file, language,chunk_length_s=10,
21
- stride_length_s=(4, 2), return_timestamps="word"):
22
-
23
 
24
- device = "cuda" if torch.cuda.is_available() else "cpu"
25
  target_lang_code = target_lang_options[language]
26
-
27
- # Determine the model_id based on the language
28
- if target_lang_code == "eng":
29
- model_id = "facebook/mms-1b-all"
30
- else:
31
- model_id = "Sunbird/sunbird-mms"
32
-
33
- pipe = pipeline(model=model_id, device=device, token=auth_token)
34
  pipe.tokenizer.set_target_lang(target_lang_code)
 
35
  pipe.model.load_adapter(target_lang_code)
36
 
37
- # # Read audio file
38
- # audio_data = input_file
39
- output = pipe(input_file, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
40
  return output
41
 
42
-
43
- description = '''ASR with salt-mms'''
44
 
45
  iface = gr.Interface(fn=transcribe_audio,
46
  inputs=[
47
- gr.Audio(source="upload", type="filepath", label="upload file to transcribe"),
48
  gr.Dropdown(choices=languages, label="Language", value="English")
49
- ],
50
  outputs=gr.Textbox(label="Transcription"),
51
- description=description
52
- )
53
-
54
 
55
- iface.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  import torch
4
  import librosa
 
5
  import os
 
 
 
6
 
7
+ # Authentication token for Hugging Face
8
  auth_token = os.environ.get("HF_TOKEN")
9
 
10
+ # Mapping of language options to their codes
11
  target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
 
12
  languages = list(target_lang_options.keys())
13
 
14
+ # Determine device based on CUDA availability
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
 
17
+ base_model_id = "facebook/mms-1b-all"
18
+ pipe = pipeline("automatic-speech-recognition", model=base_model_id, device=device, token=auth_token)
 
 
19
 
20
+ def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
21
  target_lang_code = target_lang_options[language]
22
+
23
+ # Dynamically set the target language and load the corresponding adapter
 
 
 
 
 
 
24
  pipe.tokenizer.set_target_lang(target_lang_code)
25
+ # Assuming each language code directly corresponds to an adapter name available for the model
26
  pipe.model.load_adapter(target_lang_code)
27
 
28
+ # Load and transcribe the audio file
29
+ audio_data, _ = librosa.load(input_file, sr=None)
30
+ output = pipe(audio_data, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
31
  return output
32
 
33
+ description = "ASR with dynamic language adaptation"
 
34
 
35
  iface = gr.Interface(fn=transcribe_audio,
36
  inputs=[
37
+ gr.Audio(source="upload", type="filepath", label="Upload file to transcribe"),
38
  gr.Dropdown(choices=languages, label="Language", value="English")
39
+ ],
40
  outputs=gr.Textbox(label="Transcription"),
41
+ description=description)
 
 
42
 
43
+ iface.launch()