Staqt commited on
Commit
1996e15
1 Parent(s): f4be82e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # import whisper
2
  from faster_whisper import WhisperModel
3
  import datetime
4
  import subprocess
@@ -218,13 +218,7 @@ def get_youtube(video_url):
218
 
219
  def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_speakers):
220
  """
221
- # Transcribe youtube link using OpenAI Whisper
222
- 1. Using Open AI's Whisper model to seperate audio into segments and generate transcripts.
223
- 2. Generating speaker embeddings for each segments.
224
- 3. Applying agglomerative clustering on the embeddings to identify the speaker for each segment.
225
 
226
- Speech Recognition is based on models from OpenAI Whisper https://github.com/openai/whisper
227
- Speaker diarization model and pipeline from by https://github.com/pyannote/pyannote-audio
228
  """
229
 
230
  # model = whisper.load_model(whisper_model)
@@ -405,9 +399,7 @@ with demo:
405
  video_in.render()
406
  with gr.Column():
407
  gr.Markdown('''
408
- ##### Here you can start the transcription process.
409
- ##### Please select the source language for transcription.
410
- ##### You can select a range of assumed numbers of speakers.
411
  ''')
412
  selected_source_lang.render()
413
  selected_whisper_model.render()
 
1
+ import whisper
2
  from faster_whisper import WhisperModel
3
  import datetime
4
  import subprocess
 
218
 
219
  def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_speakers):
220
  """
 
 
 
 
221
 
 
 
222
  """
223
 
224
  # model = whisper.load_model(whisper_model)
 
399
  video_in.render()
400
  with gr.Column():
401
  gr.Markdown('''
402
+
 
 
403
  ''')
404
  selected_source_lang.render()
405
  selected_whisper_model.render()