import gradio as gr from zeroshot import ( process, WORD_SCORE_DEFAULT_IF_LM, WORD_SCORE_DEFAULT_IF_NOLM, LM_SCORE_DEFAULT, ) with gr.Blocks(css="style.css") as demo: gr.Markdown( "

MMS Zero-shot ASR Demo. See our arXiV paper for model details.

" ) gr.HTML( """
The demo works on input audio in any language, as long as you provide a list of words or sentences for that language and an optional n-gram language model (even a simple 1-gram model will work!) to help with accuracy.
We recommend having a minimum of 5000 distinct words in the textfile to acheive a good performance.
""" ) with gr.Row(): with gr.Column(): audio = gr.Audio(label="Audio Input\n(use microphone or upload a file)") with gr.Row(): words_file = gr.File(label="Text Data") lm_file = gr.File(label="Language Model\n(optional)") with gr.Accordion("Advanced Settings", open=False): gr.Markdown( "The following parameters are used for beam-search decoding. Use the default values if you are not sure." ) with gr.Row(): with gr.Column(): wscore_usedefault = gr.Checkbox( label="Use Default Word Insertion Score", value=True ) wscore = gr.Slider( minimum=-10.0, maximum=10.0, value=WORD_SCORE_DEFAULT_IF_LM, step=0.1, interactive=False, label="Word Insertion Score", ) with gr.Column(): lmscore_usedefault = gr.Checkbox( label="Use Default Language Model Score", value=True ) lmscore = gr.Slider( minimum=-10.0, maximum=10.0, value=LM_SCORE_DEFAULT, step=0.1, interactive=False, label="Language Model Score", ) with gr.Column(): autolm = gr.Checkbox( label="Automatically create Unigram LM from text data", value=True ) btn = gr.Button("Submit", elem_id="submit") @gr.on( inputs=[wscore_usedefault, lmscore_usedefault, lm_file, autolm], outputs=[wscore, lmscore], ) def update_slider(ws, ls, lm, alm): ws_slider = gr.Slider( minimum=-10.0, maximum=10.0, value=LM_SCORE_DEFAULT if (lm is not None or alm) else 0, step=0.1, interactive=not ws, label="Word Insertion Score", ) ls_slider = gr.Slider( minimum=-10.0, maximum=10.0, value=WORD_SCORE_DEFAULT_IF_NOLM if (lm is None and not alm) else WORD_SCORE_DEFAULT_IF_LM, step=0.1, interactive=not ls, label="Language Model Score", ) return ws_slider, ls_slider with gr.Column(): text = gr.Textbox(label="Transcript") with gr.Accordion("Logs", open=False): logs = gr.Textbox(show_label=False) # hack reference = gr.Textbox(label="Reference Transcript", visible=False) btn.click( process, inputs=[ audio, words_file, lm_file, wscore, lmscore, wscore_usedefault, lmscore_usedefault, autolm, reference, ], outputs=[text, logs], ) # Examples gr.Examples( examples=[ # ["upload/english/english.mp3", "upload/english/c4_25k_sentences.txt"], [ "upload/english/english.mp3", "upload/english/c4_10k_sentences.txt", " This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import", ], [ "upload/english/english.mp3", "upload/english/c4_5k_sentences.txt", " This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import", ], [ "upload/english/english.mp3", "upload/english/gutenberg_27045.txt", " This is going to look at the code that we have in our configuration that we've already exported and compare it to our database, and we want to import", ], ], inputs=[audio, words_file, reference], label="English", ) gr.Examples( examples=[ # ["upload/english/english.mp3", "upload/english/c4_25k_sentences.txt"], [ "upload/ligurian/ligurian_1.mp3", "upload/ligurian/zenamt_10k_sentences.txt", "I mæ colleghi m’an domandou d’aggiuttâli à fâ unna preuva co-o zeneise pe vedde s’o fonçioña.", ], [ "upload/ligurian/ligurian_2.mp3", "upload/ligurian/zenamt_10k_sentences.txt", "Staseia vaggo à çenâ con mæ moggê e doî amixi che de chì à quarche settemaña faian stramuo feua stato.", ], [ "upload/ligurian/ligurian_3.mp3", "upload/ligurian/zenamt_5k_sentences.txt", "Pe inandiâ o pesto ghe veu o baxaicò, i pigneu, l’euio, o formaggio, l’aggio e a sâ.", ], ], inputs=[audio, words_file, reference], label="Ligurian", ) demo.launch()