Spaces:
unijoh
/
Runtime error

unijoh commited on
Commit
9563833
1 Parent(s): ce8e849

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -108
app.py CHANGED
@@ -1,130 +1,134 @@
1
  import gradio as gr
2
- from asr import transcribe, ASR_EXAMPLES, ASR_NOTE
3
- from tts import synthesize, TTS_EXAMPLES
 
4
  from lid import identify, LID_EXAMPLES
 
5
 
6
- def wrapped_transcribe(select_source, mic_audio, upload_audio):
7
- audio_input = mic_audio if select_source == "Record from Mic" else upload_audio
8
- return transcribe(audio_input, "fao (Faroese)")
9
-
10
- def wrapped_synthesize(text, speed):
11
- return synthesize(text, "fao (Faroese)", speed)
12
 
13
  demo = gr.Blocks()
14
 
15
- with demo:
16
- gr.Markdown(
17
- "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
18
- )
19
- gr.HTML(
20
- """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos. </center>"""
21
- )
22
- gr.HTML(
23
- """<center>You can also finetune MMS models on your data using the recipes provided here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a> </center>"""
24
- )
25
- gr.HTML(
26
- """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
27
- )
 
 
 
 
28
 
29
- with gr.TabbedInterface(["Speech-to-text", "Text-to-speech", "Language Identification"]) as tabs:
30
-
31
- with tabs[0]:
32
- mms_select_source_trans = gr.Radio(
33
- ["Record from Mic", "Upload audio"],
34
- label="Audio input",
35
- value="Record from Mic",
36
- )
37
- mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
38
- mms_upload_source_trans = gr.Audio(
39
- source="upload", type="filepath", label="Upload file", visible=False
40
- )
41
- gr.Interface(
42
- fn=wrapped_transcribe,
43
- inputs=[
44
- mms_select_source_trans,
45
- mms_mic_source_trans,
46
- mms_upload_source_trans,
47
- ],
48
- outputs="text",
49
- examples=ASR_EXAMPLES,
50
- title="Speech-to-text",
51
- description=(
52
- "Transcribe audio from a microphone or input file in Faroese."
53
- ),
54
- article=ASR_NOTE,
55
- allow_flagging="never",
56
- ).render()
57
 
58
- mms_select_source_trans.change(
59
- lambda x: [
60
- gr.update(visible=True if x == "Record from Mic" else False),
61
- gr.update(visible=True if x == "Upload audio" else False),
62
- ],
63
- inputs=[mms_select_source_trans],
64
- outputs=[mms_mic_source_trans, mms_upload_source_trans],
65
- queue=False,
66
- )
67
 
68
- with tabs[1]:
69
- gr.Interface(
70
- fn=wrapped_synthesize,
71
- inputs=[
72
- gr.Text(label="Input text"),
73
- gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
74
- ],
75
- outputs=[
76
- gr.Audio(label="Generated Audio", type="numpy"),
77
- gr.Text(label="Filtered text after removing OOVs"),
78
- ],
79
- examples=TTS_EXAMPLES,
80
- title="Text-to-speech",
81
- description=("Generate audio in Faroese from input text."),
82
- allow_flagging="never",
83
- ).render()
84
 
85
- with tabs[2]:
86
- mms_select_source_iden = gr.Radio(
87
- ["Record from Mic", "Upload audio"],
88
- label="Audio input",
89
- value="Record from Mic",
90
- )
91
- mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
92
- mms_upload_source_iden = gr.Audio(
93
- source="upload", type="filepath", label="Upload file", visible=False
94
- )
95
- gr.Interface(
96
- fn=identify,
97
- inputs=[
98
- mms_select_source_iden,
99
- mms_mic_source_iden,
100
- mms_upload_source_iden,
101
- ],
102
- outputs=gr.Label(num_top_classes=10),
103
- examples=LID_EXAMPLES,
104
- title="Language Identification",
105
- description=("Identify the language of input audio."),
106
- allow_flagging="never",
107
- ).render()
108
 
109
- mms_select_source_iden.change(
110
- lambda x: [
111
- gr.update(visible=True if x == "Record from Mic" else False),
112
- gr.update(visible=True if x == "Upload audio" else False),
113
- ],
114
- inputs=[mms_select_source_iden],
115
- outputs=[mms_mic_source_iden, mms_upload_source_iden],
116
- queue=False,
117
- )
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  gr.HTML(
120
  """
121
  <div class="footer" style="text-align:center">
122
  <p>
123
- Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face
124
  </p>
125
  </div>
126
  """
127
  )
128
 
129
  demo.queue(concurrency_count=3)
130
- demo.launch()
 
1
  import gradio as gr
2
+ import librosa
3
+ from asr import transcribe, ASR_EXAMPLES, ASR_LANGUAGES, ASR_NOTE
4
+ from tts import synthesize, TTS_EXAMPLES, TTS_LANGUAGES
5
  from lid import identify, LID_EXAMPLES
6
+ import os
7
 
8
+ # Disable HF_HUB_ENABLE_HF_TRANSFER
9
+ os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '0'
 
 
 
 
10
 
11
  demo = gr.Blocks()
12
 
13
+ mms_select_source_trans = gr.Radio(
14
+ ["Tak upp", "Ljóðfíla"],
15
+ label="Ljóð til talukennara",
16
+ value="Tak upp",
17
+ )
18
+ mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Brúka mikrofonina", visible=True)
19
+ mms_upload_source_trans = gr.Audio(
20
+ source="upload", type="filepath", label="Legg ljóðfílu upp", visible=False
21
+ )
22
+
23
+ # Add back the language selection dropdown but set it to be hidden and default to Faroese
24
+ asr_language_dropdown = gr.Dropdown(
25
+ [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
26
+ label="Mál",
27
+ value="fao (Faroese)",
28
+ visible=False,
29
+ )
30
 
31
+ mms_transcribe = gr.Interface(
32
+ fn=transcribe,
33
+ inputs=[
34
+ mms_select_source_trans,
35
+ mms_mic_source_trans,
36
+ mms_upload_source_trans,
37
+ asr_language_dropdown,
38
+ ],
39
+ outputs="text",
40
+ examples=ASR_EXAMPLES,
41
+ title="Talukennari",
42
+ description=(
43
+ "Tak upp beinleiðis úr kaganum, ella legg eina ljóðfílu upp, og fá talukennaran at avskriva tað, ið verður sagt."
44
+ ),
45
+ article=ASR_NOTE,
46
+ allow_flagging="never",
47
+ )
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Add back the language selection dropdown but set it to be hidden and default to Faroese
50
+ tts_language_dropdown = gr.Dropdown(
51
+ [f"{k} ({v})" for k, v in TTS_LANGUAGES.items()],
52
+ label="Mál",
53
+ value="fao (Faroese)",
54
+ visible=False,
55
+ )
 
 
56
 
57
+ mms_synthesize = gr.Interface(
58
+ fn=synthesize,
59
+ inputs=[
60
+ gr.Text(label="Tekstur at lesa upp"),
61
+ tts_language_dropdown,
62
+ gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Ferð"),
63
+ ],
64
+ outputs=[
65
+ gr.Audio(label="Ljóð frá teldutaluni", type="numpy"),
66
+ gr.Text(label="Teksturin, sum verður lisin upp"),
67
+ ],
68
+ examples=TTS_EXAMPLES,
69
+ title="Teldutala",
70
+ description=(" tekstin lisnan upp við teldutalu."),
71
+ allow_flagging="never",
72
+ )
73
 
74
+ mms_select_source_iden = gr.Radio(
75
+ ["Tak upp frá mikrofonini", "Vel ljóðfílu"],
76
+ label="Audio input",
77
+ value="Tak upp frá mikrofonini",
78
+ )
79
+ mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic", visible=True)
80
+ mms_upload_source_iden = gr.Audio(
81
+ source="upload", type="filepath", label="Upload file", visible=False
82
+ )
83
+ mms_identify = gr.Interface(
84
+ fn=identify,
85
+ inputs=[
86
+ mms_select_source_iden,
87
+ mms_mic_source_iden,
88
+ mms_upload_source_iden,
89
+ ],
90
+ outputs=gr.Label(num_top_classes=10),
91
+ examples=LID_EXAMPLES,
92
+ title="Máleyðmerkjari",
93
+ description=("Tak upp ella legg eina ljóðfílu upp og fá máleyðmerkjaran at gita, hvat mál tú snakkar."),
94
+ allow_flagging="never",
95
+ )
 
96
 
97
+ tabbed_interface = gr.TabbedInterface(
98
+ [mms_transcribe, mms_synthesize, mms_identify],
99
+ ["Talukennari", "Teldutala", "Máleyðmerkjari"],
100
+ )
 
 
 
 
 
101
 
102
+ with gr.Blocks() as demo:
103
+
104
+ tabbed_interface.render()
105
+ mms_select_source_trans.change(
106
+ lambda x: [
107
+ gr.update(visible=True if x == "Tak upp" else False),
108
+ gr.update(visible=True if x == "Ljóðfíla" else False),
109
+ ],
110
+ inputs=[mms_select_source_trans],
111
+ outputs=[mms_mic_source_trans, mms_upload_source_trans],
112
+ queue=False,
113
+ )
114
+ mms_select_source_iden.change(
115
+ lambda x: [
116
+ gr.update(visible=True if x == "Tak upp frá mikrofonini" else False),
117
+ gr.update(visible=True if x == "Vel ljóðfílu" else False),
118
+ ],
119
+ inputs=[mms_select_source_iden],
120
+ outputs=[mms_mic_source_iden, mms_upload_source_iden],
121
+ queue=False,
122
+ )
123
  gr.HTML(
124
  """
125
  <div class="footer" style="text-align:center">
126
  <p>
127
+ <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> gjørdi hendan málmyndilin, sum koyrir á 🤗 Hugging Face
128
  </p>
129
  </div>
130
  """
131
  )
132
 
133
  demo.queue(concurrency_count=3)
134
+ demo.launch()