File size: 2,019 Bytes
4eafd35 97c03b1 4eafd35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
from src.inference import Wav2Vec2Inference
import librosa
import os, sys
import soundfile
model_name = "arifagustyawan/wav2vec2-large-xlsr-53-id"
asr = Wav2Vec2Inference(model_name)
def convert(inputfile, outfile):
target_sr = 16000
data, sample_rate = librosa.load(inputfile)
data = librosa.resample(data, orig_sr=sample_rate, target_sr=target_sr)
soundfile.write(outfile, data, target_sr)
def parse_transcription_record(wav_file):
filename = wav_file.split('.')[0]
convert(wav_file, filename + "16k.wav")
transcription, confidence = asr.file_to_text(filename + "16k.wav")
return transcription, confidence
return filename + "16k.wav", transcription
def parse_transcription_file(wav_file):
filename = wav_file.name.split('.')[0]
convert(wav_file.name, filename + "16k.wav")
transcription, confidence = asr.file_to_text(filename + "16k.wav")
return transcription, confidence
return filename + "16k.wav", transcription
examples = [
[os.path.join("assets", "halo.wav")]
]
record_audio = gr.Interface(
fn = parse_transcription_record,
inputs = gr.Audio(sources="microphone", type="filepath", label = "Click button to record audio"),
outputs = [gr.Textbox(label="Transcription"), gr.Textbox(label="Confidence")],
analytics_enabled=False,
allow_flagging = "never",
title="Automatic Speech Recognition",
description="Click the button bellow to record audio!",
)
upload_file = gr.Interface(
fn = parse_transcription_file,
inputs = gr.File(type= "filepath", label = "Upload file here"),
outputs = [gr.Textbox(label="Transcription"), gr.Textbox(label="Confidence")],
examples = examples,
analytics_enabled=False,
title="Automatic Speech Recognition",
allow_flagging = "never",
description="Upload or drag and drop the audio file here!",
)
demo = gr.TabbedInterface([record_audio, upload_file], ["Record Audio", "Upload Audio"])
if __name__ == "__main__":
demo.launch() |