from transformers import pipeline asr = pipeline(task="automatic-speech-recognition", model= "distil-whisper/distil-small.en") import gradio as gr demo = gr.Blocks() def transcribe_long_form(filepath): if filepath is None: gr.Warning("No audio found, please retry") return output = asr(filepath, max_new_tokens=256, chunk_length_s=30, batch_size=4,) return output['text'] mic_transcribe = gr.Interface( fn=transcribe_long_form, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never", description="Speak into the microphone or upload an audio file to transcribe it into text. This model uses a state-of-the-art speech recognition algorithm to recognize spoken words and phrases") file_transcribe = gr.Interface( fn=transcribe_long_form, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=5), allow_flagging="never", description="Speak into the microphone or upload an audio file to transcribe it into text. This model uses a state-of-the-art speech recognition algorithm to recognize spoken words and phrases") ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], title="Speak out Loud - Automatic Speech Recognition" ) demo.launch()