Spaces:
unijoh
/
Runtime error

File size: 5,051 Bytes
7bcf8d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97fe1f4
 
 
 
7bcf8d7
 
 
 
 
 
 
 
ed3244e
97fe1f4
7bcf8d7
 
 
 
 
ed3244e
7bcf8d7
 
 
 
 
 
 
97fe1f4
 
 
7bcf8d7
 
 
ed3244e
7bcf8d7
 
ed3244e
97fe1f4
7bcf8d7
 
 
 
ed3244e
7bcf8d7
 
 
ed3244e
7bcf8d7
 
 
 
 
ed3244e
7bcf8d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed3244e
7bcf8d7
 
 
 
 
 
ed3244e
7bcf8d7
 
 
 
 
 
 
 
 
ef8804e
 
 
7bcf8d7
bec2b9c
7bcf8d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cc287f
7bcf8d7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
import librosa
from asr import transcribe, ASR_EXAMPLES, ASR_LANGUAGES, ASR_NOTE
from tts import synthesize, TTS_EXAMPLES, TTS_LANGUAGES
from lid import identify, LID_EXAMPLES


demo = gr.Blocks()

mms_select_source_trans = gr.Radio(
    ["Record from Mic", "Upload audio"],
    label="Audio input",
    value="Record from Mic",
)
mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
mms_upload_source_trans = gr.Audio(
    source="upload", type="filepath", label="Upload file", visible=False
)

# Filter ASR_LANGUAGES to only include Faroese
ASR_LANGUAGES = {'fao': 'Faroese'}

mms_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        mms_select_source_trans,
        mms_mic_source_trans,
        mms_upload_source_trans,
        gr.Dropdown(
            [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
            label="Mál",
            value="fao (Faroese)",
        ),
        # gr.Checkbox(label="Use Language Model (if available)", default=True),
    ],
    outputs="text",
    examples=ASR_EXAMPLES,
    title="Talukennari",
    description=(
        "Transcribe audio from a microphone or input file in your desired language."
    ),
    article=ASR_NOTE,
    allow_flagging="never",
)

# Filter TTS_LANGUAGES to only include Faroese
TTS_LANGUAGES = {'fao': 'Faroese'}

mms_synthesize = gr.Interface(
    fn=synthesize,
    inputs=[
        gr.Text(label="Tekstur at lesa upp"),
        gr.Dropdown(
            [f"{k} ({v})" for k, v in TTS_LANGUAGES.items()],
            label="Mál",
            value="fao (Faroese)",
        ),
        gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
    ],
    outputs=[
        gr.Audio(label="Ljóð frá teldutaluni", type="numpy"),
        gr.Text(label="Filtered text after removing OOVs"),
    ],
    examples=TTS_EXAMPLES,
    title="Teldutala",
    description=("Generate audio in your desired language from input text."),
    allow_flagging="never",
)

mms_select_source_iden = gr.Radio(
    ["Tak upp frá mikrofonini", "Vel ljóðfílu"],
    label="Audio input",
    value="Record from Mic",
)
mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
mms_upload_source_iden = gr.Audio(
    source="upload", type="filepath", label="Upload file", visible=False
)
mms_identify = gr.Interface(
    fn=identify,
    inputs=[
        mms_select_source_iden,
        mms_mic_source_iden,
        mms_upload_source_iden,
    ],
    outputs=gr.Label(num_top_classes=10),
    examples=LID_EXAMPLES,
    title="Máleyðmerkjari",
    description=("Identity the language of input audio."),
    allow_flagging="never",
)

tabbed_interface = gr.TabbedInterface(
    [mms_transcribe, mms_synthesize, mms_identify],
    ["Talukennari", "Teldutala", "Máleyðmerkjari"],
)

with gr.Blocks() as demo:
    gr.Markdown(
        "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
    )
    gr.HTML(
        """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos.   </center>"""
    )
    gr.HTML(
        """<center>You can also finetune MMS models on your data using the recipes provides here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a>  </center>"""
    )
    gr.HTML(
        """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true"  style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
    )

    tabbed_interface.render()
    mms_select_source_trans.change(
        lambda x: [
            gr.update(visible=True if x == "Record from Mic" else False),
            gr.update(visible=True if x == "Upload audio" else False),
        ],
        inputs=[mms_select_source_trans],
        outputs=[mms_mic_source_trans, mms_upload_source_trans],
        queue=False,
    )
    mms_select_source_iden.change(
        lambda x: [
            gr.update(visible=True if x == "Record from Mic" else False),
            gr.update(visible=True if x == "Upload audio" else False),
        ],
        inputs=[mms_select_source_iden],
        outputs=[mms_mic_source_iden, mms_upload_source_iden],
        queue=False,
    )
    gr.HTML(
        """
            <div class="footer" style="text-align:center">
                <p>
                    Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face
                </p>
            </div>
           """
        )

demo.queue(concurrency_count=3)
demo.launch()