Spaces:

arifagustyawan
/

wav2vec2-large-xlsr-53-id

Sleeping

App Files Files Community

wav2vec2-large-xlsr-53-id / app.py

arifagustyawan

initial commit

4eafd35 11 months ago

raw

history blame

2.03 kB

	import gradio as gr
	from src.inference import Wav2Vec2Inference
	import librosa
	import os, sys
	import soundfile

	model_name = "arifagustyawan/wav2vec2-large-xlsr-common_voice_13_0-id"
	asr = Wav2Vec2Inference(model_name)

	def convert(inputfile, outfile):
	target_sr = 16000
	data, sample_rate = librosa.load(inputfile)
	data = librosa.resample(data, orig_sr=sample_rate, target_sr=target_sr)
	soundfile.write(outfile, data, target_sr)

	def parse_transcription_record(wav_file):
	filename = wav_file.split('.')[0]
	convert(wav_file, filename + "16k.wav")
	transcription, confidence = asr.file_to_text(filename + "16k.wav")
	return transcription, confidence
	return filename + "16k.wav", transcription

	def parse_transcription_file(wav_file):
	filename = wav_file.name.split('.')[0]
	convert(wav_file.name, filename + "16k.wav")
	transcription, confidence = asr.file_to_text(filename + "16k.wav")
	return transcription, confidence
	return filename + "16k.wav", transcription

	examples = [
	[os.path.join("assets", "halo.wav")]

	]
	record_audio = gr.Interface(
	fn = parse_transcription_record,
	inputs = gr.Audio(sources="microphone", type="filepath", label = "Click button to record audio"),
	outputs = [gr.Textbox(label="Transcription"), gr.Textbox(label="Confidence")],
	analytics_enabled=False,
	allow_flagging = "never",
	title="Automatic Speech Recognition",
	description="Click the button bellow to record audio!",
	)

	upload_file = gr.Interface(
	fn = parse_transcription_file,
	inputs = gr.File(type= "filepath", label = "Upload file here"),
	outputs = [gr.Textbox(label="Transcription"), gr.Textbox(label="Confidence")],
	examples = examples,
	analytics_enabled=False,
	title="Automatic Speech Recognition",
	allow_flagging = "never",
	description="Upload or drag and drop the audio file here!",
	)


	demo = gr.TabbedInterface([record_audio, upload_file], ["Record Audio", "Upload Audio"])

	if __name__ == "__main__":
	demo.launch()