Spaces:
Running
Running
pierreguillou
commited on
Commit
•
4ef19a6
1
Parent(s):
4a2b045
Delete docs/video_to_audio.ipynb
Browse files- docs/video_to_audio.ipynb +0 -175
docs/video_to_audio.ipynb
DELETED
@@ -1,175 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"nbformat": 4,
|
3 |
-
"nbformat_minor": 0,
|
4 |
-
"metadata": {
|
5 |
-
"colab": {
|
6 |
-
"provenance": []
|
7 |
-
},
|
8 |
-
"kernelspec": {
|
9 |
-
"name": "python3",
|
10 |
-
"display_name": "Python 3"
|
11 |
-
},
|
12 |
-
"language_info": {
|
13 |
-
"name": "python"
|
14 |
-
}
|
15 |
-
},
|
16 |
-
"cells": [
|
17 |
-
{
|
18 |
-
"cell_type": "markdown",
|
19 |
-
"source": [
|
20 |
-
"# (YouTube) video to audio"
|
21 |
-
],
|
22 |
-
"metadata": {
|
23 |
-
"id": "kNt1V_xZCYzb"
|
24 |
-
}
|
25 |
-
},
|
26 |
-
{
|
27 |
-
"cell_type": "markdown",
|
28 |
-
"source": [
|
29 |
-
"- Author: [Pierre Guillou](https://www.linkedin.com/in/pierreguillou/)\n",
|
30 |
-
"- Date: 08/10/2023"
|
31 |
-
],
|
32 |
-
"metadata": {
|
33 |
-
"id": "Fa6V8oEynFe-"
|
34 |
-
}
|
35 |
-
},
|
36 |
-
{
|
37 |
-
"cell_type": "code",
|
38 |
-
"source": [
|
39 |
-
"%%capture\n",
|
40 |
-
"#!apt-get install -y ffmpeg\n",
|
41 |
-
"!python3 -m pip install -U yt-dlp\n",
|
42 |
-
"!pip install unidecode\n",
|
43 |
-
"!pip install gradio"
|
44 |
-
],
|
45 |
-
"metadata": {
|
46 |
-
"id": "S4yB5r9RCdkH"
|
47 |
-
},
|
48 |
-
"execution_count": 2,
|
49 |
-
"outputs": []
|
50 |
-
},
|
51 |
-
{
|
52 |
-
"cell_type": "code",
|
53 |
-
"source": [
|
54 |
-
"import gradio as gr\n",
|
55 |
-
"import re, unidecode\n",
|
56 |
-
"from unidecode import unidecode\n",
|
57 |
-
"import yt_dlp\n",
|
58 |
-
"import os\n",
|
59 |
-
"import pydub\n",
|
60 |
-
"import numpy as np\n",
|
61 |
-
"\n",
|
62 |
-
"# no space, punctuation, accent in lower string\n",
|
63 |
-
"def cleanString(string):\n",
|
64 |
-
" cleanString = unidecode(string)\n",
|
65 |
-
" cleanString = re.sub('\\W+','_', cleanString)\n",
|
66 |
-
" return cleanString.lower()\n",
|
67 |
-
"\n",
|
68 |
-
"# from audio file path to sample rate and numpy array\n",
|
69 |
-
"def read_audio(f, normalized=False):\n",
|
70 |
-
" \"\"\"MP3 to numpy array\"\"\"\n",
|
71 |
-
" a = pydub.AudioSegment.from_mp3(f)\n",
|
72 |
-
" y = np.array(a.get_array_of_samples())\n",
|
73 |
-
" if a.channels == 2:\n",
|
74 |
-
" y = y.reshape((-1, 2))\n",
|
75 |
-
" if normalized:\n",
|
76 |
-
" return a.frame_rate, np.float32(y) / 2**15\n",
|
77 |
-
" else:\n",
|
78 |
-
" return a.frame_rate, y\n",
|
79 |
-
"\n",
|
80 |
-
"# from YouTube url to audio file path and sample rate + numpy array\n",
|
81 |
-
"def download_audio(url):\n",
|
82 |
-
"\n",
|
83 |
-
" path_to_folder_audio_mp3 = \"./audio/\"\n",
|
84 |
-
" ydl_opts = {\n",
|
85 |
-
" 'format': 'm4a/bestaudio/best',\n",
|
86 |
-
" 'outtmpl': f'{path_to_folder_audio_mp3}%(title)s',\n",
|
87 |
-
" 'postprocessors': [{\n",
|
88 |
-
" 'key': 'FFmpegExtractAudio',\n",
|
89 |
-
" 'preferredcodec': 'mp3',\n",
|
90 |
-
" }]\n",
|
91 |
-
" }\n",
|
92 |
-
" with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
|
93 |
-
" info_dict = ydl.extract_info(url, download=True)\n",
|
94 |
-
" video_title = info_dict['title']\n",
|
95 |
-
"\n",
|
96 |
-
" # Rename the audio file\n",
|
97 |
-
" local_link = video_title + \".mp3\"\n",
|
98 |
-
" new_local_link = cleanString(video_title) + \".mp3\"\n",
|
99 |
-
" for filename in os.listdir(path_to_folder_audio_mp3):\n",
|
100 |
-
" if cleanString(local_link) == cleanString(filename):\n",
|
101 |
-
" os.rename(os.path.join(path_to_folder_audio_mp3, filename),os.path.join(path_to_folder_audio_mp3, new_local_link))\n",
|
102 |
-
"\n",
|
103 |
-
" # get audio file path\n",
|
104 |
-
" file_path = path_to_folder_audio_mp3 + new_local_link\n",
|
105 |
-
"\n",
|
106 |
-
" return file_path, read_audio(file_path)\n",
|
107 |
-
"\n",
|
108 |
-
"# Gradio interface\n",
|
109 |
-
"iface = gr.Interface(fn=download_audio,\n",
|
110 |
-
" inputs=gr.Textbox(label=\"YouTube Video URL\"),\n",
|
111 |
-
" outputs=[\n",
|
112 |
-
" gr.File(label=\"Output Audio File\"),\n",
|
113 |
-
" gr.Audio(label=\"Play Audio\", show_download_button=False, format=\"mp3\"),\n",
|
114 |
-
" ],\n",
|
115 |
-
" allow_flagging=\"never\",\n",
|
116 |
-
" title=\"YouTube Video to Audio (mp3)\",\n",
|
117 |
-
" description=\"Just paste any YouTube video url and get its corresponding audio file in mp3.\",\n",
|
118 |
-
" )\n",
|
119 |
-
"iface.launch()"
|
120 |
-
],
|
121 |
-
"metadata": {
|
122 |
-
"colab": {
|
123 |
-
"base_uri": "https://localhost:8080/",
|
124 |
-
"height": 646
|
125 |
-
},
|
126 |
-
"id": "fPD5JZXKDJNn",
|
127 |
-
"outputId": "97012563-c97e-4cc1-95b7-d611492a5b8d"
|
128 |
-
},
|
129 |
-
"execution_count": 73,
|
130 |
-
"outputs": [
|
131 |
-
{
|
132 |
-
"output_type": "stream",
|
133 |
-
"name": "stdout",
|
134 |
-
"text": [
|
135 |
-
"Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
|
136 |
-
"\n",
|
137 |
-
"Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
|
138 |
-
"Running on public URL: https://378e399891d3567f06.gradio.live\n",
|
139 |
-
"\n",
|
140 |
-
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
|
141 |
-
]
|
142 |
-
},
|
143 |
-
{
|
144 |
-
"output_type": "display_data",
|
145 |
-
"data": {
|
146 |
-
"text/plain": [
|
147 |
-
"<IPython.core.display.HTML object>"
|
148 |
-
],
|
149 |
-
"text/html": [
|
150 |
-
"<div><iframe src=\"https://378e399891d3567f06.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
151 |
-
]
|
152 |
-
},
|
153 |
-
"metadata": {}
|
154 |
-
},
|
155 |
-
{
|
156 |
-
"output_type": "execute_result",
|
157 |
-
"data": {
|
158 |
-
"text/plain": []
|
159 |
-
},
|
160 |
-
"metadata": {},
|
161 |
-
"execution_count": 73
|
162 |
-
}
|
163 |
-
]
|
164 |
-
},
|
165 |
-
{
|
166 |
-
"cell_type": "code",
|
167 |
-
"source": [],
|
168 |
-
"metadata": {
|
169 |
-
"id": "NLnlu0lik2Sv"
|
170 |
-
},
|
171 |
-
"execution_count": null,
|
172 |
-
"outputs": []
|
173 |
-
}
|
174 |
-
]
|
175 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|