whisper-thai-demo

Sleeping

App Files Files Community

titipata commited on Jul 10

Commit

ad30dac

•

1 Parent(s): bf80da4

Try using `yt_dlp` library, update transcribe function

Browse files

Files changed (1) hide show

app.py +33 -12

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import torch
 import gradio as gr
 import pytube as pt
@@ -21,20 +22,23 @@ pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(lan
 def transcribe(microphone, file_upload):
     warn_output = ""
-    if (microphone is not None) and (file_upload is not None):
         warn_output = (
             "WARNING: You've uploaded an audio file and used the microphone. "
             "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
         )
-    elif (microphone is None) and (file_upload is None):
         return "ERROR: You have to either use the microphone or upload an audio file"
-    file = microphone if microphone is not None else file_upload
     text = pipe(file, generate_kwargs={"language":"<|th|>", "task":"transcribe"}, batch_size=16)["text"]
     return warn_output + text
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
@@ -44,14 +48,31 @@ def _return_yt_html_embed(yt_url):
     return HTML_str
 def yt_transcribe(yt_url):
-    yt = pt.YouTube(yt_url)
-    html_embed_str = _return_yt_html_embed(yt_url)
-    stream = yt.streams.filter(only_audio=True)[0]
-    stream.download(filename="audio.mp3")
-    text = pipe("audio.mp3", generate_kwargs={"language":"<|th|>", "task":"transcribe"}, batch_size=16)["text"]
-    return html_embed_str, text
 with gr.Blocks() as demo:
     gr.Markdown(f"# Whisper Demo Thai 🇹🇭")

+import yt_dlp
 import torch
 import gradio as gr
 import pytube as pt
 def transcribe(microphone, file_upload):
     warn_output = ""
+    if microphone and file_upload:
         warn_output = (
             "WARNING: You've uploaded an audio file and used the microphone. "
             "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
         )
+        file = microphone
+    elif microphone:
+        file = microphone
+    elif file_upload:
+        file = file_upload
+    else:
         return "ERROR: You have to either use the microphone or upload an audio file"
     text = pipe(file, generate_kwargs={"language":"<|th|>", "task":"transcribe"}, batch_size=16)["text"]
     return warn_output + text
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
     return HTML_str
 def yt_transcribe(yt_url):
+    try:
+        ydl_opts = {
+            'format': 'bestaudio/best',
+            'postprocessors': [{
+                'key': 'FFmpegExtractAudio',
+                'preferredcodec': 'mp3',
+                'preferredquality': '192',
+            }],
+            'outtmpl': 'audio.%(ext)s',
+        }
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(yt_url, download=True)
+            video_id = info['id']
+        html_embed_str = _return_yt_html_embed(video_id)
+        text = pipe("audio.mp3", generate_kwargs={"language":"<|th|>", "task":"transcribe"}, batch_size=16)["text"]
+        # Clean up the downloaded file
+        os.remove("audio.mp3")
+        return html_embed_str, text
+    except Exception as e:
+        return f"Error: {str(e)}", "An error occurred while processing the YouTube video."
 with gr.Blocks() as demo:
     gr.Markdown(f"# Whisper Demo Thai 🇹🇭")