titipata commited on
Commit
ad30dac
1 Parent(s): bf80da4

Try using `yt_dlp` library, update transcribe function

Browse files
Files changed (1) hide show
  1. app.py +33 -12
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import torch
2
  import gradio as gr
3
  import pytube as pt
@@ -21,20 +22,23 @@ pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(lan
21
 
22
  def transcribe(microphone, file_upload):
23
  warn_output = ""
24
- if (microphone is not None) and (file_upload is not None):
25
  warn_output = (
26
  "WARNING: You've uploaded an audio file and used the microphone. "
27
  "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
28
  )
29
- elif (microphone is None) and (file_upload is None):
 
 
 
 
 
30
  return "ERROR: You have to either use the microphone or upload an audio file"
31
 
32
- file = microphone if microphone is not None else file_upload
33
-
34
  text = pipe(file, generate_kwargs={"language":"<|th|>", "task":"transcribe"}, batch_size=16)["text"]
35
-
36
  return warn_output + text
37
 
 
38
  def _return_yt_html_embed(yt_url):
39
  video_id = yt_url.split("?v=")[-1]
40
  HTML_str = (
@@ -44,14 +48,31 @@ def _return_yt_html_embed(yt_url):
44
  return HTML_str
45
 
46
  def yt_transcribe(yt_url):
47
- yt = pt.YouTube(yt_url)
48
- html_embed_str = _return_yt_html_embed(yt_url)
49
- stream = yt.streams.filter(only_audio=True)[0]
50
- stream.download(filename="audio.mp3")
51
-
52
- text = pipe("audio.mp3", generate_kwargs={"language":"<|th|>", "task":"transcribe"}, batch_size=16)["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- return html_embed_str, text
55
 
56
  with gr.Blocks() as demo:
57
  gr.Markdown(f"# Whisper Demo Thai 🇹🇭")
 
1
+ import yt_dlp
2
  import torch
3
  import gradio as gr
4
  import pytube as pt
 
22
 
23
  def transcribe(microphone, file_upload):
24
  warn_output = ""
25
+ if microphone and file_upload:
26
  warn_output = (
27
  "WARNING: You've uploaded an audio file and used the microphone. "
28
  "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
29
  )
30
+ file = microphone
31
+ elif microphone:
32
+ file = microphone
33
+ elif file_upload:
34
+ file = file_upload
35
+ else:
36
  return "ERROR: You have to either use the microphone or upload an audio file"
37
 
 
 
38
  text = pipe(file, generate_kwargs={"language":"<|th|>", "task":"transcribe"}, batch_size=16)["text"]
 
39
  return warn_output + text
40
 
41
+
42
  def _return_yt_html_embed(yt_url):
43
  video_id = yt_url.split("?v=")[-1]
44
  HTML_str = (
 
48
  return HTML_str
49
 
50
  def yt_transcribe(yt_url):
51
+ try:
52
+ ydl_opts = {
53
+ 'format': 'bestaudio/best',
54
+ 'postprocessors': [{
55
+ 'key': 'FFmpegExtractAudio',
56
+ 'preferredcodec': 'mp3',
57
+ 'preferredquality': '192',
58
+ }],
59
+ 'outtmpl': 'audio.%(ext)s',
60
+ }
61
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
62
+ info = ydl.extract_info(yt_url, download=True)
63
+ video_id = info['id']
64
+
65
+ html_embed_str = _return_yt_html_embed(video_id)
66
+
67
+ text = pipe("audio.mp3", generate_kwargs={"language":"<|th|>", "task":"transcribe"}, batch_size=16)["text"]
68
+
69
+ # Clean up the downloaded file
70
+ os.remove("audio.mp3")
71
+
72
+ return html_embed_str, text
73
+ except Exception as e:
74
+ return f"Error: {str(e)}", "An error occurred while processing the YouTube video."
75
 
 
76
 
77
  with gr.Blocks() as demo:
78
  gr.Markdown(f"# Whisper Demo Thai 🇹🇭")