from transformers import pipeline
import soundfile as sf

# Load ASR model
asr_model = "Abdullah17/whisper-small-urdu"
asr_pipe = pipeline("automatic-speech-recognition", model=asr_model)

# Rest of your code
commands = [
    "نمائندے ایجنٹ نمائندہ",
    "سم ایکٹیویٹ",
    "سم بلاک بند",
    "موبائل پیکیجز انٹرنیٹ پیکیج",
    "چالان جمع",
    "گانا سنانا"
]

# Function to transcribe the command from audio
def transcribe_the_command(audio_list):
    transcriptions = []
    
    # Process each audio in the batch
    for audio_data, sample_rate in audio_list:
        file_name = "recorded_audio.wav"
        sf.write(file_name, audio_data, sample_rate)
        
        # Convert stereo to mono by averaging the two channels
        transcript = asr_pipe(file_name)[0]["text"]
        most_similar_command, reply = find_most_similar_command(transcript, commands)
        
        transcriptions.append((transcript, most_similar_command, reply))
    
    return transcriptions
# from transformers import pipeline
# asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu")
# from difflib import SequenceMatcher

# # List of commands
# commands = [
#     "نمائندے ایجنٹ نمائندہ",
#     "  سم  ایکٹیویٹ ",
#     " سم  بلاک بند ",
#     "موبائل پیکیجز انٹرنیٹ پیکیج",
#     " چالان جمع ",
#     " گانا سنانا"
# ]
# # replies = [
# # 1,2,
# # ]
# # Function to find the most similar command
def find_most_similar_command(statement, command_list):
    best_match = None
    highest_similarity = 0
    i=0
    for command in command_list:
        similarity = SequenceMatcher(None, statement, command).ratio()
        print(similarity)
        if similarity > highest_similarity:
            highest_similarity = similarity
            best_match = command
            reply=i
        i+=1

    return best_match,reply
# x
# get_text_from_voice("urdu.wav")
import gradio as gr


iface = gr.Interface(
    fn=transcribe_the_command,
    inputs=gr.inputs.Audio(label="Recorded Audio",source="microphone", duration=6),
    outputs="text",
    title="Whisper Small Urdu Command",
    description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.",
)

iface.launch()