whisperaudio / app.py
Hunzla's picture
Update app.py
528afc6
raw
history blame
3.25 kB
from transformers import pipeline
asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu")
from difflib import SequenceMatcher
import json
import socket
def get_local_ip():
try:
# Create a socket connection to a remote host (here, google.com)
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
local_ip = s.getsockname()[0]
s.close()
return local_ip
except Exception as e:
print(f"Error getting local IP: {e}")
return None
local_ip = get_local_ip()
if local_ip:
print(f"Local IP Address: {local_ip}")
else:
print("Local IP could not be determined.")
with open("tasks.json", "r",encoding="utf-8") as json_file:
urdu_data = json.load(json_file)
# List of commands
# commands = [
# "نمائندے ایجنٹ نمائندہ",
# " سم ایکٹیویٹ ",
# " سم بلاک بند ",
# "موبائل پیکیجز انٹرنیٹ پیکیج",
# " چالان جمع چلان",
# " گانا "
# ]
# replies = [
# 1,2,
# ]
# Function to find the most similar command
def find_most_similar_command(statement, command_list):
best_match = None
highest_similarity = 0
i=0
for sub_list in command_list:
for command in sub_list:
similarity = SequenceMatcher(None, statement, command).ratio()
print(i,"similarity",similarity)
if similarity > highest_similarity:
highest_similarity = similarity
best_match = command
reply=i
i+=1
return best_match,reply
def send_data_to_db(order_id,col_name):
import requests
# API endpoint URL
url = 'https://pizzahut.softinfix.tech/api/save_order/'+order_id
# Data to send (in dictionary format)
data = {
col_name: col_value,
}
# Send POST request with data
response = requests.post(url, data=data)
# Print response
print(response.status_code)
print(response.text)
def transcribe_the_command(audio,menu_id,order_id,db_col="0"):
import soundfile as sf
sample_rate, audio_data = audio
file_name = "recorded_audio.wav"
sf.write(file_name, audio_data, sample_rate)
# Convert stereo to mono by averaging the two channels
print(menu_id)
transcript = asr_pipe(file_name)["text"]
if menu_id == "transcript_only":
reply=transcript
print(reply)
else:
commands=urdu_data[menu_id]
print(commands)
most_similar_command,reply = find_most_similar_command(transcript, commands)
print(f"Given Statement: {transcript}")
print(f"Most Similar Command: {most_similar_command}\n")
print(reply)
return reply
# get_text_from_voice("urdu.wav")
import gradio as gr
iface = gr.Interface(
fn=transcribe_the_command,
inputs=[gr.inputs.Audio(label="Recorded Audio",source="microphone"),gr.inputs.Textbox(label="id"),,gr.inputs.Textbox(label="col_name(optional)")],
outputs="text",
title="Whisper Small Urdu Command",
description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.",
)
iface.launch()