Spaces:
Runtime error
Runtime error
Arnaudding001
commited on
Commit
•
ed0d9df
1
Parent(s):
bb23e2c
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,7 @@ from utils import slugify, write_srt, write_vtt
|
|
17 |
from vad import NonSpeechStrategy, PeriodicTranscriptionConfig, TranscriptionConfig, VadPeriodicTranscription, VadSileroTranscription
|
18 |
|
19 |
# Limitations (set to -1 to disable)
|
20 |
-
DEFAULT_INPUT_AUDIO_MAX_DURATION =
|
21 |
|
22 |
# Whether or not to automatically delete all uploaded files, to save disk space
|
23 |
DELETE_UPLOADED_FILES = True
|
@@ -216,6 +216,12 @@ class WhisperTranscriber:
|
|
216 |
file.write(text)
|
217 |
|
218 |
return file.name
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
|
221 |
def create_ui(inputAudioMaxDuration, share=False, server_name: str = None):
|
@@ -224,7 +230,7 @@ def create_ui(inputAudioMaxDuration, share=False, server_name: str = None):
|
|
224 |
ui_description = "Whisper是一个语音转文字模型,经过多个语音数据集的训练而成。也可以进行多语言的识别任务和翻译(多种语言翻译成英文)"
|
225 |
|
226 |
|
227 |
-
ui_description += "\n\n\n\n对于时长大于
|
228 |
|
229 |
if inputAudioMaxDuration > 0:
|
230 |
ui_description += "\n\n" + "音频最大时长: " + str(inputAudioMaxDuration) + " 秒"
|
|
|
17 |
from vad import NonSpeechStrategy, PeriodicTranscriptionConfig, TranscriptionConfig, VadPeriodicTranscription, VadSileroTranscription
|
18 |
|
19 |
# Limitations (set to -1 to disable)
|
20 |
+
DEFAULT_INPUT_AUDIO_MAX_DURATION = 1200 # seconds #initial value 600
|
21 |
|
22 |
# Whether or not to automatically delete all uploaded files, to save disk space
|
23 |
DELETE_UPLOADED_FILES = True
|
|
|
216 |
file.write(text)
|
217 |
|
218 |
return file.name
|
219 |
+
|
220 |
+
# translate_checkbox = gr.inputs.Checkbox(label = "Translate to English", default=False)
|
221 |
+
# transcription_tb = gr.Textbox(label="Transcription", lines=10, max_lines=20)
|
222 |
+
# translation_tb = gr.Textbox(label="Translation", lines=10, max_lines=20)
|
223 |
+
# detected_lang = gr.outputs.HTML(label="Detected Language")
|
224 |
+
|
225 |
|
226 |
|
227 |
def create_ui(inputAudioMaxDuration, share=False, server_name: str = None):
|
|
|
230 |
ui_description = "Whisper是一个语音转文字模型,经过多个语音数据集的训练而成。也可以进行多语言的识别任务和翻译(多种语言翻译成英文)"
|
231 |
|
232 |
|
233 |
+
ui_description += "\n\n\n\n对于时长大于20分钟的非英语音频文件,建议选择VAD选项中的Silero VAD (语音活动检测器)。"
|
234 |
|
235 |
if inputAudioMaxDuration > 0:
|
236 |
ui_description += "\n\n" + "音频最大时长: " + str(inputAudioMaxDuration) + " 秒"
|