from transformers import pipeline asr_pipe = pipeline("automatic-speech-recognition", model="ihanif/whisper-medium-urdu") from difflib import SequenceMatcher # List of commands commands = [ "کمپیوٹر، کھیل کھیلو", "میوزک چلاؤ موسیقی", "روشنی کم کریں", "آج کی تاریخ کیا ہے؟", "مجھے ایک لطیفہ سنائیں۔", ] replies = [ "کیا آپ کھیل دیکھنا چاہتے ہیں؟", "کیا آپ موسیقی سننا چاہتے ہیں؟", "کیا آپ روشنی کم کرنا چاہتے ہیں؟", "کیا آپ تاریخ جاننا چاہتے ہیں؟", "کیا آپ لطیفہ سننا چاہتے ہیں؟" ] # Function to find the most similar command def find_most_similar_command(statement, command_list): best_match = None highest_similarity = 0 i=0 for command in command_list: similarity = SequenceMatcher(None, statement, command).ratio() if similarity > highest_similarity: highest_similarity = similarity best_match = command reply=replies[i] i+=1 else: best_match="unknown" reply="آرے بھائی، کہنا کیا چاہتے ہو؟" return best_match,reply def transcribe_the_command(audio): import soundfile as sf sample_rate, audio_data = audio file_name = "recorded_audio.wav" sf.write(file_name, audio_data, sample_rate) # Convert stereo to mono by averaging the two channels print(file_name) transcript = asr_pipe(file_name)["text"] most_similar_command,reply = find_most_similar_command(transcript, commands) print(f"Given Statement: {transcript}") print(f"Most Similar Command: {most_similar_command}\n") return reply # get_text_from_voice("urdu.wav") import gradio as gr iface = gr.Interface( fn=transcribe_the_command, inputs=gr.inputs.Audio(label="Recorded Audio"), outputs="text", title="Whisper Small Hindi", description="Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.", ) iface.launch()