Whisper / app.py
explorall's picture
Update app.py
db29542
from transformers import pipeline
import gradio as gr
from pytube import YouTube
transcription_pipe = pipeline(model="explorall/whisper-small-sv-dropout-6mb")
translation_pipe = pipeline(model="Helsinki-NLP/opus-mt-sv-en")
def transcribe_and_translate(audio):
transcription = transcription_pipe(audio)["text"]
translation = translation_pipe(transcription)[0]['translation_text']
return transcription, translation
def transcribe_and_translate_yt(link):
yt = YouTube(link)
audio = yt.streams.filter(only_audio=True).first().download()
return transcribe_and_translate(audio)
with gr.Blocks() as demo:
with gr.Tab("Real-time Swedish to English Transcription and Translation"):
audio = gr.Audio(sources=["microphone", "upload"], type="filepath")
rt_outputs = [gr.Textbox(), gr.Textbox()]
rt_button = gr.Button('Transcribe and Translate')
with gr.Tab("Youtube Video Transcription and Translation"):
link = gr.Textbox(label="Enter YouTube Video Link")
yt_outputs = [gr.Textbox(), gr.Textbox()]
yt_button = gr.Button('Transcribe and Translate YouTube Video')
rt_button.click(transcribe_and_translate, inputs=audio, outputs=rt_outputs)
yt_button.click(transcribe_and_translate_yt, inputs=link, outputs=yt_outputs)
demo.launch(debug=True)