Spaces:

Hunzla
/

whisperaudio

Runtime error

File size: 2,147 Bytes

b0b1ade
7c7805e
 
 
 
 
 
711d7d7
 
 
 
 
7c7805e
 
711d7d7
 
 
 
 
 
7c7805e
 
 
 
 
 
 
 
c482354
7c7805e
 
 
 
81c1590
7c7805e
adcf092
7c7805e
2105e0b
963f3fd
aac73e4
963f3fd
aac73e4
ba51995
aac73e4
 
963f3fd
7c7805e
 
 
39e3e24
 
7c7805e
 
b0b1ade
 
 
 
7c7805e
bbb0ce5
b0b1ade
7c7805e
 
b0b1ade
 
ef798a3

from transformers import pipeline
asr_pipe = pipeline("automatic-speech-recognition", model="ihanif/whisper-medium-urdu")
from difflib import SequenceMatcher

# List of commands
commands = [
    "کمپیوٹر، کھیل کھیلو",
    "میوزک چلاؤ موسیقی",
    "روشنی کم کریں",
    "آج کی تاریخ کیا ہے؟",
    "مجھے ایک لطیفہ سنائیں۔",
    
]
replies = [
    "کیا آپ کھیل دیکھنا چاہتے ہیں؟",
    "کیا آپ موسیقی سننا چاہتے ہیں؟",
    "کیا آپ روشنی کم کرنا چاہتے ہیں؟",
    "کیا آپ تاریخ جاننا چاہتے ہیں؟",
    "کیا آپ لطیفہ سننا چاہتے ہیں؟"
    
]
# Function to find the most similar command
def find_most_similar_command(statement, command_list):
    best_match = None
    highest_similarity = 0
    i=0
    for command in command_list:
        similarity = SequenceMatcher(None, statement, command).ratio()
        print(similarity)
        if similarity > highest_similarity:
            highest_similarity = similarity
            best_match = command
            reply=replies[i]
            print(reply)
            i+=1

    return best_match,reply
def transcribe_the_command(audio):
      import soundfile as sf
      sample_rate, audio_data = audio
      file_name = "recorded_audio.wav"
      sf.write(file_name, audio_data, sample_rate)
    # Convert stereo to mono by averaging the two channels
      print(file_name)

      transcript = asr_pipe(file_name)["text"]
      most_similar_command,reply = find_most_similar_command(transcript, commands)
      print(f"Given Statement: {transcript}")
      print(f"Most Similar Command: {most_similar_command}\n")
      print(reply)
    
      return reply
# get_text_from_voice("urdu.wav")
import gradio as gr


iface = gr.Interface(
    fn=transcribe_the_command,
    inputs=gr.inputs.Audio(label="Recorded Audio",source="microphone"),
    outputs="text",
    title="Whisper Small Hindi",
    description="Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.",
)

iface.launch()