from transformers import pipeline import soundfile as sf # Load ASR model asr_model = "Abdullah17/whisper-small-urdu" asr_pipe = pipeline("automatic-speech-recognition", model=asr_model) # Rest of your code commands = [ "نمائندے ایجنٹ نمائندہ", "سم ایکٹیویٹ", "سم بلاک بند", "موبائل پیکیجز انٹرنیٹ پیکیج", "چالان جمع", "گانا سنانا" ] # Function to transcribe the command from audio def transcribe_the_command(audio_list): transcriptions = [] # Process each audio in the batch for audio_data, sample_rate in audio_list: file_name = "recorded_audio.wav" sf.write(file_name, audio_data, sample_rate) # Convert stereo to mono by averaging the two channels transcript = asr_pipe(file_name)[0]["text"] most_similar_command, reply = find_most_similar_command(transcript, commands) transcriptions.append((transcript, most_similar_command, reply)) return transcriptions # from transformers import pipeline # asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu") # from difflib import SequenceMatcher # # List of commands # commands = [ # "نمائندے ایجنٹ نمائندہ", # " سم ایکٹیویٹ ", # " سم بلاک بند ", # "موبائل پیکیجز انٹرنیٹ پیکیج", # " چالان جمع ", # " گانا سنانا" # ] # # replies = [ # # 1,2, # # ] # # Function to find the most similar command def find_most_similar_command(statement, command_list): best_match = None highest_similarity = 0 i=0 for command in command_list: similarity = SequenceMatcher(None, statement, command).ratio() print(similarity) if similarity > highest_similarity: highest_similarity = similarity best_match = command reply=i i+=1 return best_match,reply # x # get_text_from_voice("urdu.wav") import gradio as gr iface = gr.Interface( fn=transcribe_the_command, inputs=gr.inputs.Audio(label="Recorded Audio",source="microphone", duration=6), outputs="text", title="Whisper Small Urdu Command", description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.", ) iface.launch()