Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
from faster_whisper import WhisperModel
|
3 |
import datetime
|
4 |
import subprocess
|
@@ -218,13 +218,7 @@ def get_youtube(video_url):
|
|
218 |
|
219 |
def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_speakers):
|
220 |
"""
|
221 |
-
# Transcribe youtube link using OpenAI Whisper
|
222 |
-
1. Using Open AI's Whisper model to seperate audio into segments and generate transcripts.
|
223 |
-
2. Generating speaker embeddings for each segments.
|
224 |
-
3. Applying agglomerative clustering on the embeddings to identify the speaker for each segment.
|
225 |
|
226 |
-
Speech Recognition is based on models from OpenAI Whisper https://github.com/openai/whisper
|
227 |
-
Speaker diarization model and pipeline from by https://github.com/pyannote/pyannote-audio
|
228 |
"""
|
229 |
|
230 |
# model = whisper.load_model(whisper_model)
|
@@ -405,9 +399,7 @@ with demo:
|
|
405 |
video_in.render()
|
406 |
with gr.Column():
|
407 |
gr.Markdown('''
|
408 |
-
|
409 |
-
##### Please select the source language for transcription.
|
410 |
-
##### You can select a range of assumed numbers of speakers.
|
411 |
''')
|
412 |
selected_source_lang.render()
|
413 |
selected_whisper_model.render()
|
|
|
1 |
+
import whisper
|
2 |
from faster_whisper import WhisperModel
|
3 |
import datetime
|
4 |
import subprocess
|
|
|
218 |
|
219 |
def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_speakers):
|
220 |
"""
|
|
|
|
|
|
|
|
|
221 |
|
|
|
|
|
222 |
"""
|
223 |
|
224 |
# model = whisper.load_model(whisper_model)
|
|
|
399 |
video_in.render()
|
400 |
with gr.Column():
|
401 |
gr.Markdown('''
|
402 |
+
|
|
|
|
|
403 |
''')
|
404 |
selected_source_lang.render()
|
405 |
selected_whisper_model.render()
|