whisperaudio / app.py
Hunzla's picture
Update app.py
ad7b749
raw
history blame
1.6 kB
import gradio as gr
import numpy as np
import sounddevice as sd
from transformers import pipeline
# Load ASR model
asr_model = "Abdullah17/whisper-small-urdu"
asr_pipe = pipeline("automatic-speech-recognition", model=asr_model)
# Function to transcribe the command from audio samples
def transcribe_the_command(audio_samples):
transcript = asr_pipe(np.array(audio_samples))[0]["text"]
most_similar_command, reply = find_most_similar_command(transcript, commands)
return f"Transcript: {transcript}\nMost Similar Command: {most_similar_command}"
# Capture audio samples from the microphone
def capture_audio(rec_duration=6, sample_rate=16000):
audio_data = sd.rec(int(rec_duration * sample_rate), samplerate=sample_rate, channels=1)
sd.wait()
return audio_data.flatten()
def find_most_similar_command(statement, command_list):
best_match = None
highest_similarity = 0
i=0
for command in command_list:
similarity = SequenceMatcher(None, statement, command).ratio()
print(similarity)
if similarity > highest_similarity:
highest_similarity = similarity
best_match = command
reply=i
i+=1
return best_match,reply
iface = gr.Interface(
fn=transcribe_the_command,
inputs=gr.inputs.Function(capture_audio, label="Recorded Audio"),
outputs="text",
title="Whisper Small Urdu Command",
description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.",
)
iface.launch()