akera commited on
Commit
6d352f5
1 Parent(s): 96abd98

Update app.py

Browse files

working version

Files changed (1) hide show
  1. app.py +32 -19
app.py CHANGED
@@ -1,43 +1,56 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import torch
4
  import librosa
 
5
  import os
 
 
 
6
 
7
- # Authentication token for Hugging Face
8
  auth_token = os.environ.get("HF_TOKEN")
9
 
10
- # Mapping of language options to their codes
11
  target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
 
12
  languages = list(target_lang_options.keys())
13
 
14
- # Determine device based on CUDA availability
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
 
17
- base_model_id = "facebook/mms-1b-all"
18
- pipe = pipeline("automatic-speech-recognition", model=base_model_id, device=device, token=auth_token)
 
 
19
 
20
- def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4, 2), return_timestamps="word"):
21
  target_lang_code = target_lang_options[language]
22
-
23
- # Dynamically set the target language and load the corresponding adapter
 
 
 
 
 
 
24
  pipe.tokenizer.set_target_lang(target_lang_code)
25
- # Assuming each language code directly corresponds to an adapter name available for the model
26
  pipe.model.load_adapter(target_lang_code)
27
 
28
- # Load and transcribe the audio file
29
- audio_data, _ = librosa.load(input_file, sr=None)
30
- output = pipe(audio_data, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
31
  return output
32
 
33
- description = "ASR with dynamic language adaptation"
 
34
 
35
  iface = gr.Interface(fn=transcribe_audio,
36
  inputs=[
37
- gr.Audio(source="upload", type="filepath", label="Upload file to transcribe"),
38
  gr.Dropdown(choices=languages, label="Language", value="English")
39
- ],
40
  outputs=gr.Textbox(label="Transcription"),
41
- description=description)
 
 
42
 
43
- iface.launch()
 
1
  import gradio as gr
2
+ from transformers import Wav2Vec2ForCTC, AutoProcessor, Wav2Vec2Processor
3
  import torch
4
  import librosa
5
+ import json
6
  import os
7
+ import huggingface_hub
8
+ from transformers import pipeline
9
+
10
 
 
11
  auth_token = os.environ.get("HF_TOKEN")
12
 
13
+
14
  target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
15
+
16
  languages = list(target_lang_options.keys())
17
 
 
 
18
 
19
+ # Transcribe audio using custom model
20
+ def transcribe_audio(input_file, language,chunk_length_s=10,
21
+ stride_length_s=(4, 2), return_timestamps="word"):
22
+
23
 
24
+ device = "cuda" if torch.cuda.is_available() else "cpu"
25
  target_lang_code = target_lang_options[language]
26
+
27
+ # Determine the model_id based on the language
28
+ if target_lang_code == "eng":
29
+ model_id = "facebook/mms-1b-all"
30
+ else:
31
+ model_id = "Sunbird/sunbird-mms"
32
+
33
+ pipe = pipeline(model=model_id, device=device, token=auth_token)
34
  pipe.tokenizer.set_target_lang(target_lang_code)
35
+
36
  pipe.model.load_adapter(target_lang_code)
37
 
38
+ # Read audio file
39
+ # audio_data = input_file
40
+ output = pipe(input_file, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
41
  return output
42
 
43
+
44
+ description = '''ASR with salt-mms'''
45
 
46
  iface = gr.Interface(fn=transcribe_audio,
47
  inputs=[
48
+ gr.Audio(source="upload", type="filepath", label="upload file to transcribe"),
49
  gr.Dropdown(choices=languages, label="Language", value="English")
50
+ ],
51
  outputs=gr.Textbox(label="Transcription"),
52
+ description=description
53
+ )
54
+
55
 
56
+ iface.launch()