Hunzla commited on
Commit
c5fe8de
1 Parent(s): 64bceb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -8
app.py CHANGED
@@ -37,12 +37,7 @@ def transcribe_with_diarization(audio_path):
37
  # Export the interval audio as a temporary WAV file
38
  torchaudio.save("interval_audio.wav", interval_audio,sample_rate)
39
  transcript = asr_pipe("interval_audio.wav")
40
- print(transcript)
41
- start_time = segment.start
42
- end_time = segment.end
43
- label = track[0].label() # Extract the label manually
44
- speaker_audio = audio_path + f"[{start_time:.2f},{end_time:.2f}]"
45
- transcript = asr_pipe(speaker_audio)[0]["text"]
46
  transcripts.append(transcript)
47
 
48
  # Combine the transcriptions from all speakers
@@ -53,10 +48,10 @@ iface = gr.Interface(
53
  fn=transcribe_with_diarization,
54
  inputs=[
55
  gr.File(label="Audio File"),
56
- gr.Audio(source="microphone", type="filepath")
57
  ],
58
  outputs="text",
59
- title="Whisper small Hindi with Speaker Diarization",
60
  description="Real-time demo for Hindi speech recognition using a fine-tuned Whisper large model with speaker diarization.",
61
  )
62
 
 
37
  # Export the interval audio as a temporary WAV file
38
  torchaudio.save("interval_audio.wav", interval_audio,sample_rate)
39
  transcript = asr_pipe("interval_audio.wav")
40
+ print(transcript)
 
 
 
 
 
41
  transcripts.append(transcript)
42
 
43
  # Combine the transcriptions from all speakers
 
48
  fn=transcribe_with_diarization,
49
  inputs=[
50
  gr.File(label="Audio File"),
51
+ gr.Audio(source="microphone", type="filepath", filetype="mp3")
52
  ],
53
  outputs="text",
54
+ title="Whisper Large Hindi with Speaker Diarization",
55
  description="Real-time demo for Hindi speech recognition using a fine-tuned Whisper large model with speaker diarization.",
56
  )
57