File size: 2,453 Bytes
b0b1ade
e50170f
7c7805e
e50170f
 
 
 
 
7c7805e
62ff93b
e50170f
 
571322c
e50170f
 
7c7805e
e50170f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec27395
e50170f
 
 
 
7c7805e
 
 
 
 
 
c482354
7c7805e
 
 
ec27395
d14d221
adcf092
7c7805e
e50170f
7c7805e
b0b1ade
 
 
 
7c7805e
83d70c9
b0b1ade
7d2b240
 
b0b1ade
 
ef798a3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from transformers import pipeline
import soundfile as sf

# Load ASR model
asr_model = "Abdullah17/whisper-small-urdu"
asr_pipe = pipeline("automatic-speech-recognition", model=asr_model)

# Rest of your code
commands = [
    "نمائندے ایجنٹ نمائندہ",
    "سم ایکٹیویٹ",
    "سم بلاک بند",
    "موبائل پیکیجز انٹرنیٹ پیکیج",
    "چالان جمع",
    "گانا سنانا"
]

# Function to transcribe the command from audio
def transcribe_the_command(audio_list):
    transcriptions = []
    
    # Process each audio in the batch
    for audio_data, sample_rate in audio_list:
        file_name = "recorded_audio.wav"
        sf.write(file_name, audio_data, sample_rate)
        
        # Convert stereo to mono by averaging the two channels
        transcript = asr_pipe(file_name)[0]["text"]
        most_similar_command, reply = find_most_similar_command(transcript, commands)
        
        transcriptions.append((transcript, most_similar_command, reply))
    
    return transcriptions
# from transformers import pipeline
# asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu")
# from difflib import SequenceMatcher

# # List of commands
# commands = [
#     "نمائندے ایجنٹ نمائندہ",
#     "  سم  ایکٹیویٹ ",
#     " سم  بلاک بند ",
#     "موبائل پیکیجز انٹرنیٹ پیکیج",
#     " چالان جمع ",
#     " گانا سنانا"
# ]
# # replies = [
# # 1,2,
# # ]
# # Function to find the most similar command
def find_most_similar_command(statement, command_list):
    best_match = None
    highest_similarity = 0
    i=0
    for command in command_list:
        similarity = SequenceMatcher(None, statement, command).ratio()
        print(similarity)
        if similarity > highest_similarity:
            highest_similarity = similarity
            best_match = command
            reply=i
        i+=1

    return best_match,reply
# x
# get_text_from_voice("urdu.wav")
import gradio as gr


iface = gr.Interface(
    fn=transcribe_the_command,
    inputs=gr.inputs.Audio(label="Recorded Audio",source="microphone", duration=6),
    outputs="text",
    title="Whisper Small Urdu Command",
    description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.",
)

iface.launch()