Spaces:

Silemo
/

whisper-it

Runtime error

App Files Files Community

whisper-it / app.py

Silemo

Removed few comments and added videos

42a00dc 11 months ago

raw

history blame

2.72 kB

	from transformers import pipeline
	from pytube import YouTube
	import gradio as gr
	import requests

	pipe = pipeline(model="Silemo/whisper-it") # change to "your-username/the-name-you-picked"

	def download_audio(audio_url, filename):

	# URL of the image to be downloaded is defined as audio_url
	r = requests.get(audio_url) # create HTTP response object

	# send a HTTP request to the server and save
	# the HTTP response in a response object called r
	with open(filename,'wb') as f:

	# Saving received content as a mp3 file in
	# binary format

	# write the contents of the response (r.content)
	# to a new file in binary mode.
	f.write(r.content)

	def transcribe(audio):
	text = pipe(audio)["text"]
	return text

	def transcribe_video(url):
	yt = YouTube(url)
	stream = yt.streams.get_audio_only()

	# Saves the audio in the /audio folder
	audio = stream.download()

	text = transcribe(audio)

	return text

	audio1_url = "https://github.com/Silemo/sml-lab2-2023-manfredi-meneghin/raw/main/task1/audio/offer.mp3"
	audio1_filename = "offer.mp3"
	download_audio(audio1_url, audio1_filename)

	audio2_url = "https://github.com/Silemo/sml-lab2-2023-manfredi-meneghin/raw/main/task1/audio/fantozzi.mp3"
	audio2_filename = "fantozzi.mp3"
	download_audio(audio2_url, audio2_filename)

	# Multiple interfaces using tabs -> https://github.com/gradio-app/gradio/issues/450

	io1 = gr.Interface(
	fn = transcribe,
	inputs = gr.Audio(sources=["microphone", "upload"], type="filepath"),
	outputs = "text",

	examples=[
	[audio1_filename],
	[audio2_filename],
	],

	title = "Whisper Small - Italian - Microphone or Audio file",
	description = "Realtime demo for Italian speech recognition using a fine-tuned Whisper small model. It uses the computer microphone or an audio file as audio input",
	)

	io2 = gr.Interface(
	fn = transcribe_video,
	inputs = gr.Textbox(label = "YouTube URL", placeholder = "https://youtu.be/9DImRZERJNs?si=1Lme7o_KH2oCxU7y"),
	outputs = "text",

	examples=[
	# Per me è la cipolla
	["https://youtu.be/QbwZlURClSA?si=DKMtIiKE-nO2mfcV"],

	# Breaking Italy - Lollobrigida ferma il treno
	["https://youtu.be/9MPBN0tnA_E?si=8-hqkJS05LNkWprX&t=2"],

	# Mussolini discorso
	["https://youtu.be/UmnxcjRk37Q?si=uxt8oqnMDJ3vFzIB&t=77"],
	],

	title = "Whisper Small - Italian - YouTube link",
	description = "Realtime demo for Italian speech recognition using a fine-tuned Whisper small model. It uses a YouTube link as audio input",
	)

	gr.TabbedInterface(
	[io1, io2], {"Microphone or audio file", "YouTube"}
	).launch()