from transformers import pipeline
asr_pipe = pipeline("automatic-speech-recognition", model="ihanif/whisper-medium-urdu")
from difflib import SequenceMatcher

# List of commands
commands = [
    "کمپیوٹر، کھیل کھیلو",
    "میوزک چلاؤ",
    "روشنی کم کریں"
]
replies = [
    "https://medicobilling.info/urdu.wav",
    "download.wav",
    "https://medicobilling.info/urdu.wav"
]
# Function to find the most similar command
def find_most_similar_command(statement, command_list):
    best_match = None
    highest_similarity = 0
    i=0
    for command in command_list:
        similarity = SequenceMatcher(None, statement, command).ratio()
        if similarity > highest_similarity:
            highest_similarity = similarity
            best_match = command
            reply=replies[i]
            i+=1
        else:
            best_match="unknown"
            reply="unknown.wav"
    return best_match,reply
def transcribe_the_command(audio):
      import soundfile as sf
      file_name = "recorded_audio.wav"
      audio_data = audio[0]

    # Convert stereo to mono by averaging the two channels
      mono_audio = (audio_data[:, 0] + audio_data[:, 1]) / 2
      print(mono_audio)
      sf.write(file_name, mono_audio, 48000)
      transcript = asr_pipe(file_name)["text"]
      most_similar_command,reply = find_most_similar_command(transcript, commands)
      print(f"Given Statement: {transcript}")
      print(f"Most Similar Command: {most_similar_command}\n")
      return reply
# get_text_from_voice("urdu.wav")
import gradio as gr


iface = gr.Interface(
    fn=transcribe_the_command,
    inputs=gr.inputs.Audio(label="Recorded Audio"),
    outputs="text",
    title="Whisper Small Hindi",
    description="Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.",
)

iface.launch()