MMS

Runtime error

App Files Files Community

unijoh commited on Jun 6

Commit

9563833

•

1 Parent(s): ce8e849

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -108

app.py CHANGED Viewed

@@ -1,130 +1,134 @@
 import gradio as gr
-from asr import transcribe, ASR_EXAMPLES, ASR_NOTE
-from tts import synthesize, TTS_EXAMPLES
 from lid import identify, LID_EXAMPLES
-def wrapped_transcribe(select_source, mic_audio, upload_audio):
-    audio_input = mic_audio if select_source == "Record from Mic" else upload_audio
-    return transcribe(audio_input, "fao (Faroese)")
-def wrapped_synthesize(text, speed):
-    return synthesize(text, "fao (Faroese)", speed)
 demo = gr.Blocks()
-with demo:
-    gr.Markdown(
-        "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
-    )
-    gr.HTML(
-        """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos.   </center>"""
-    )
-    gr.HTML(
-        """<center>You can also finetune MMS models on your data using the recipes provided here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a>  </center>"""
-    )
-    gr.HTML(
-        """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true"  style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
-    )
-    with gr.TabbedInterface(["Speech-to-text", "Text-to-speech", "Language Identification"]) as tabs:
-        with tabs[0]:
-            mms_select_source_trans = gr.Radio(
-                ["Record from Mic", "Upload audio"],
-                label="Audio input",
-                value="Record from Mic",
-            )
-            mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
-            mms_upload_source_trans = gr.Audio(
-                source="upload", type="filepath", label="Upload file", visible=False
-            )
-            gr.Interface(
-                fn=wrapped_transcribe,
-                inputs=[
-                    mms_select_source_trans,
-                    mms_mic_source_trans,
-                    mms_upload_source_trans,
-                ],
-                outputs="text",
-                examples=ASR_EXAMPLES,
-                title="Speech-to-text",
-                description=(
-                    "Transcribe audio from a microphone or input file in Faroese."
-                ),
-                article=ASR_NOTE,
-                allow_flagging="never",
-            ).render()
-            mms_select_source_trans.change(
-                lambda x: [
-                    gr.update(visible=True if x == "Record from Mic" else False),
-                    gr.update(visible=True if x == "Upload audio" else False),
-                ],
-                inputs=[mms_select_source_trans],
-                outputs=[mms_mic_source_trans, mms_upload_source_trans],
-                queue=False,
-            )
-        with tabs[1]:
-            gr.Interface(
-                fn=wrapped_synthesize,
-                inputs=[
-                    gr.Text(label="Input text"),
-                    gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
-                ],
-                outputs=[
-                    gr.Audio(label="Generated Audio", type="numpy"),
-                    gr.Text(label="Filtered text after removing OOVs"),
-                ],
-                examples=TTS_EXAMPLES,
-                title="Text-to-speech",
-                description=("Generate audio in Faroese from input text."),
-                allow_flagging="never",
-            ).render()
-        with tabs[2]:
-            mms_select_source_iden = gr.Radio(
-                ["Record from Mic", "Upload audio"],
-                label="Audio input",
-                value="Record from Mic",
-            )
-            mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
-            mms_upload_source_iden = gr.Audio(
-                source="upload", type="filepath", label="Upload file", visible=False
-            )
-            gr.Interface(
-                fn=identify,
-                inputs=[
-                    mms_select_source_iden,
-                    mms_mic_source_iden,
-                    mms_upload_source_iden,
-                ],
-                outputs=gr.Label(num_top_classes=10),
-                examples=LID_EXAMPLES,
-                title="Language Identification",
-                description=("Identify the language of input audio."),
-                allow_flagging="never",
-            ).render()
-            mms_select_source_iden.change(
-                lambda x: [
-                    gr.update(visible=True if x == "Record from Mic" else False),
-                    gr.update(visible=True if x == "Upload audio" else False),
-                ],
-                inputs=[mms_select_source_iden],
-                outputs=[mms_mic_source_iden, mms_upload_source_iden],
-                queue=False,
-            )
     gr.HTML(
         """
             <div class="footer" style="text-align:center">
                 <p>
-                    Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face
                 </p>
             </div>
            """
     )
 demo.queue(concurrency_count=3)
-demo.launch()

 import gradio as gr
+import librosa
+from asr import transcribe, ASR_EXAMPLES, ASR_LANGUAGES, ASR_NOTE
+from tts import synthesize, TTS_EXAMPLES, TTS_LANGUAGES
 from lid import identify, LID_EXAMPLES
+import os
+# Disable HF_HUB_ENABLE_HF_TRANSFER
+os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '0'
 demo = gr.Blocks()
+mms_select_source_trans = gr.Radio(
+    ["Tak upp", "Ljóðfíla"],
+    label="Ljóð til talukennara",
+    value="Tak upp",
+)
+mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Brúka mikrofonina", visible=True)
+mms_upload_source_trans = gr.Audio(
+    source="upload", type="filepath", label="Legg ljóðfílu upp", visible=False
+)
+# Add back the language selection dropdown but set it to be hidden and default to Faroese
+asr_language_dropdown = gr.Dropdown(
+    [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
+    label="Mál",
+    value="fao (Faroese)",
+    visible=False,
+)
+mms_transcribe = gr.Interface(
+    fn=transcribe,
+    inputs=[
+        mms_select_source_trans,
+        mms_mic_source_trans,
+        mms_upload_source_trans,
+        asr_language_dropdown,
+    ],
+    outputs="text",
+    examples=ASR_EXAMPLES,
+    title="Talukennari",
+    description=(
+        "Tak upp beinleiðis úr kaganum, ella legg eina ljóðfílu upp, og fá talukennaran at avskriva tað, ið verður sagt."
+    ),
+    article=ASR_NOTE,
+    allow_flagging="never",
+)
+# Add back the language selection dropdown but set it to be hidden and default to Faroese
+tts_language_dropdown = gr.Dropdown(
+    [f"{k} ({v})" for k, v in TTS_LANGUAGES.items()],
+    label="Mál",
+    value="fao (Faroese)",
+    visible=False,
+)
+mms_synthesize = gr.Interface(
+    fn=synthesize,
+    inputs=[
+        gr.Text(label="Tekstur at lesa upp"),
+        tts_language_dropdown,
+        gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Ferð"),
+    ],
+    outputs=[
+        gr.Audio(label="Ljóð frá teldutaluni", type="numpy"),
+        gr.Text(label="Teksturin, sum verður lisin upp"),
+    ],
+    examples=TTS_EXAMPLES,
+    title="Teldutala",
+    description=("Fá tekstin lisnan upp við teldutalu."),
+    allow_flagging="never",
+)
+mms_select_source_iden = gr.Radio(
+    ["Tak upp frá mikrofonini", "Vel ljóðfílu"],
+    label="Audio input",
+    value="Tak upp frá mikrofonini",
+)
+mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic", visible=True)
+mms_upload_source_iden = gr.Audio(
+    source="upload", type="filepath", label="Upload file", visible=False
+)
+mms_identify = gr.Interface(
+    fn=identify,
+    inputs=[
+        mms_select_source_iden,
+        mms_mic_source_iden,
+        mms_upload_source_iden,
+    ],
+    outputs=gr.Label(num_top_classes=10),
+    examples=LID_EXAMPLES,
+    title="Máleyðmerkjari",
+    description=("Tak upp ella legg eina ljóðfílu upp og fá máleyðmerkjaran at gita, hvat mál tú snakkar."),
+    allow_flagging="never",
+)
+tabbed_interface = gr.TabbedInterface(
+    [mms_transcribe, mms_synthesize, mms_identify],
+    ["Talukennari", "Teldutala", "Máleyðmerkjari"],
+)
+with gr.Blocks() as demo:
+    tabbed_interface.render()
+    mms_select_source_trans.change(
+        lambda x: [
+            gr.update(visible=True if x == "Tak upp" else False),
+            gr.update(visible=True if x == "Ljóðfíla" else False),
+        ],
+        inputs=[mms_select_source_trans],
+        outputs=[mms_mic_source_trans, mms_upload_source_trans],
+        queue=False,
+    )
+    mms_select_source_iden.change(
+        lambda x: [
+            gr.update(visible=True if x == "Tak upp frá mikrofonini" else False),
+            gr.update(visible=True if x == "Vel ljóðfílu" else False),
+        ],
+        inputs=[mms_select_source_iden],
+        outputs=[mms_mic_source_iden, mms_upload_source_iden],
+        queue=False,
+    )
     gr.HTML(
         """
             <div class="footer" style="text-align:center">
                 <p>
+                    <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> gjørdi hendan málmyndilin, sum koyrir á 🤗 Hugging Face
                 </p>
             </div>
            """
     )
 demo.queue(concurrency_count=3)
+demo.launch()