yama commited on
Commit
631631a
1 Parent(s): 3f207e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -59
app.py CHANGED
@@ -374,69 +374,70 @@ demo = gr.Blocks(title=title)
374
  demo.encrypt = False
375
 
376
  with demo:
377
- gr.Markdown('''
378
- <div>
379
- <h1 style='text-align: center'>Whisper speaker diarization</h1>
380
- This space uses Whisper models from <a href='https://github.com/openai/whisper' target='_blank'><b>OpenAI</b></a> with <a href='https://github.com/guillaumekln/faster-whisper' target='_blank'><b>CTranslate2</b></a> which is a fast inference engine for Transformer models to recognize the speech (4 times faster than original openai model with same accuracy)
381
- and ECAPA-TDNN model from <a href='https://github.com/speechbrain/speechbrain' target='_blank'><b>SpeechBrain</b></a> to encode and clasify speakers
382
- </div>
383
- ''')
384
-
385
- with gr.Row():
386
  gr.Markdown('''
387
- ### Transcribe youtube link using OpenAI Whisper
388
- ##### 1. Using Open AI's Whisper model to seperate audio into segments and generate transcripts.
389
- ##### 2. Generating speaker embeddings for each segments.
390
- ##### 3. Applying agglomerative clustering on the embeddings to identify the speaker for each segment.
 
391
  ''')
392
 
393
- with gr.Row():
394
- gr.Markdown('''
395
- ### You can test by following examples:
 
 
 
396
  ''')
397
- examples = gr.Examples(examples=
398
- ["https://www.youtube.com/watch?v=j7BfEzAFuYc&t=32s",
399
- "https://www.youtube.com/watch?v=-UX0X45sYe4",
400
- "https://www.youtube.com/watch?v=7minSgqi-Gw"],
401
- label="Examples", inputs=[youtube_url_in])
402
-
403
- with gr.Row():
404
- with gr.Column():
405
- youtube_url_in.render()
406
- download_youtube_btn = gr.Button("Download Youtube video")
407
- download_youtube_btn.click(get_youtube, [youtube_url_in], [
408
- video_in])
409
- print(video_in)
410
-
411
- with gr.Row():
412
- with gr.Column():
413
- video_in.render()
414
- with gr.Column():
415
- gr.Markdown('''
416
- ##### Here you can start the transcription process.
417
- ##### Please select the source language for transcription.
418
- ##### You can select a range of assumed numbers of speakers.
419
  ''')
420
- selected_source_lang.render()
421
- selected_whisper_model.render()
422
- number_speakers.render()
423
- transcribe_btn = gr.Button("Transcribe audio and diarization")
424
- transcribe_btn.click(speech_to_text,
425
- [video_in, selected_source_lang, selected_whisper_model, number_speakers],
426
- [transcription_df, system_info, download_transcript]
427
- )
428
-
429
- with gr.Row():
430
- gr.Markdown('''
431
- ##### Here you will get transcription output
432
- ##### ''')
433
-
434
- with gr.Row():
435
- with gr.Column():
436
- download_transcript.render()
437
- transcription_df.render()
438
- system_info.render()
439
- gr.Markdown(
440
- '''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
442
  demo.launch(debug=True)
 
374
  demo.encrypt = False
375
 
376
  with demo:
377
+ with gr.Tab("Whisper speaker diarization"):
 
 
 
 
 
 
 
 
378
  gr.Markdown('''
379
+ <div>
380
+ <h1 style='text-align: center'>Whisper speaker diarization</h1>
381
+ This space uses Whisper models from <a href='https://github.com/openai/whisper' target='_blank'><b>OpenAI</b></a> with <a href='https://github.com/guillaumekln/faster-whisper' target='_blank'><b>CTranslate2</b></a> which is a fast inference engine for Transformer models to recognize the speech (4 times faster than original openai model with same accuracy)
382
+ and ECAPA-TDNN model from <a href='https://github.com/speechbrain/speechbrain' target='_blank'><b>SpeechBrain</b></a> to encode and clasify speakers
383
+ </div>
384
  ''')
385
 
386
+ with gr.Row():
387
+ gr.Markdown('''
388
+ ### Transcribe youtube link using OpenAI Whisper
389
+ ##### 1. Using Open AI's Whisper model to seperate audio into segments and generate transcripts.
390
+ ##### 2. Generating speaker embeddings for each segments.
391
+ ##### 3. Applying agglomerative clustering on the embeddings to identify the speaker for each segment.
392
  ''')
393
+
394
+ with gr.Row():
395
+ gr.Markdown('''
396
+ ### You can test by following examples:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  ''')
398
+ examples = gr.Examples(examples=
399
+ ["https://www.youtube.com/watch?v=j7BfEzAFuYc&t=32s",
400
+ "https://www.youtube.com/watch?v=-UX0X45sYe4",
401
+ "https://www.youtube.com/watch?v=7minSgqi-Gw"],
402
+ label="Examples", inputs=[youtube_url_in])
403
+
404
+ with gr.Row():
405
+ with gr.Column():
406
+ youtube_url_in.render()
407
+ download_youtube_btn = gr.Button("Download Youtube video")
408
+ download_youtube_btn.click(get_youtube, [youtube_url_in], [
409
+ video_in])
410
+ print(video_in)
411
+
412
+ with gr.Row():
413
+ with gr.Column():
414
+ video_in.render()
415
+ with gr.Column():
416
+ gr.Markdown('''
417
+ ##### Here you can start the transcription process.
418
+ ##### Please select the source language for transcription.
419
+ ##### You can select a range of assumed numbers of speakers.
420
+ ''')
421
+ selected_source_lang.render()
422
+ selected_whisper_model.render()
423
+ number_speakers.render()
424
+ transcribe_btn = gr.Button("Transcribe audio and diarization")
425
+ transcribe_btn.click(speech_to_text,
426
+ [video_in, selected_source_lang, selected_whisper_model, number_speakers],
427
+ [transcription_df, system_info, download_transcript]
428
+ )
429
+
430
+ with gr.Row():
431
+ gr.Markdown('''
432
+ ##### Here you will get transcription output
433
+ ##### ''')
434
+
435
+ with gr.Row():
436
+ with gr.Column():
437
+ download_transcript.render()
438
+ transcription_df.render()
439
+ system_info.render()
440
+ gr.Markdown(
441
+ '''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
442
 
443
  demo.launch(debug=True)