whisper-thai-demo

Sleeping

App Files Files Community

tensorops commited on Jul 10

Commit

207d7df

•

1 Parent(s): 779835f

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -43

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from transformers import pipeline
 from huggingface_hub import model_info
-MODEL_NAME = "biodatlab/whisper-th-medium-combined" #this always needs to stay in line 8 :D sorry for the hackiness
 lang = "th"
 device = 0 if torch.cuda.is_available() else "cpu"
@@ -26,7 +26,6 @@ def transcribe(microphone, file_upload):
             "WARNING: You've uploaded an audio file and used the microphone. "
             "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
         )
     elif (microphone is None) and (file_upload is None):
         return "ERROR: You have to either use the microphone or upload an audio file"
@@ -36,7 +35,6 @@ def transcribe(microphone, file_upload):
     return warn_output + text
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
@@ -45,7 +43,6 @@ def _return_yt_html_embed(yt_url):
     )
     return HTML_str
 def yt_transcribe(yt_url):
     yt = pt.YouTube(yt_url)
     html_embed_str = _return_yt_html_embed(yt_url)
@@ -56,42 +53,37 @@ def yt_transcribe(yt_url):
     return html_embed_str, text
-demo = gr.Blocks()
-mf_transcribe = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
-        gr.inputs.Audio(source="upload", type="filepath", optional=True),
-    ],
-    outputs="text",
-    layout="horizontal",
-    theme="huggingface",
-    title="Whisper Demo Thai 🇹🇭: Transcribe Audio",
-    description=(
-        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
-        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
-        " of arbitrary length."
-    ),
-    allow_flagging="never",
-)
-yt_transcribe = gr.Interface(
-    fn=yt_transcribe,
-    inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
-    outputs=["html", "text"],
-    layout="horizontal",
-    theme="huggingface",
-    title="Whisper Demo Thai 🇹🇭: Transcribe YouTube",
-    description=(
-        "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
-        f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
-        " arbitrary length."
-    ),
-    allow_flagging="never",
-)
-with demo:
-    gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
-demo.launch(enable_queue=True)

 from huggingface_hub import model_info
+MODEL_NAME = "biodatlab/whisper-th-medium-combined"  # this always needs to stay in line 8 :D sorry for the hackiness
 lang = "th"
 device = 0 if torch.cuda.is_available() else "cpu"
             "WARNING: You've uploaded an audio file and used the microphone. "
             "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
         )
     elif (microphone is None) and (file_upload is None):
         return "ERROR: You have to either use the microphone or upload an audio file"
     return warn_output + text
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
     )
     return HTML_str
 def yt_transcribe(yt_url):
     yt = pt.YouTube(yt_url)
     html_embed_str = _return_yt_html_embed(yt_url)
     return html_embed_str, text
+with gr.Blocks(theme=gr.themes.HuggingFace()) as demo:
+    gr.Markdown(f"# Whisper Demo Thai 🇹🇭")
+    with gr.Tab("Transcribe Audio"):
+        gr.Markdown(
+            f"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the fine-tuned"
+            f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
+            f" of arbitrary length."
+        )
+        with gr.Row():
+            with gr.Column():
+                audio_mic = gr.Audio(source="microphone", type="filepath", label="Microphone Input")
+                audio_file = gr.Audio(source="upload", type="filepath", label="Audio File Upload")
+            with gr.Column():
+                text_output = gr.Textbox(label="Transcription Output")
+        transcribe_btn = gr.Button("Transcribe")
+        transcribe_btn.click(fn=transcribe, inputs=[audio_mic, audio_file], outputs=text_output)
+    with gr.Tab("Transcribe YouTube"):
+        gr.Markdown(
+            f"Transcribe long-form YouTube videos with the click of a button! Demo uses the fine-tuned checkpoint:"
+            f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
+            f" arbitrary length."
+        )
+        with gr.Row():
+            with gr.Column():
+                yt_url_input = gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")
+            with gr.Column():
+                yt_html_output = gr.HTML(label="Video")
+                yt_text_output = gr.Textbox(label="Transcription Output")
+        yt_transcribe_btn = gr.Button("Transcribe YouTube Video")
+        yt_transcribe_btn.click(fn=yt_transcribe, inputs=yt_url_input, outputs=[yt_html_output, yt_text_output])
+demo.launch(enable_queue=True)