File size: 4,599 Bytes
10a0c3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# (YouTube) video to audio"
      ],
      "metadata": {
        "id": "kNt1V_xZCYzb"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "- Author: [Pierre Guillou](https://www.linkedin.com/in/pierreguillou/)\n",
        "- Date: 08/10/2023"
      ],
      "metadata": {
        "id": "Fa6V8oEynFe-"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "%%capture\n",
        "#!apt-get install -y ffmpeg\n",
        "!python3 -m pip install -U yt-dlp\n",
        "!pip install unidecode\n",
        "!pip install gradio\n",
        "!pip install pydub"
      ],
      "metadata": {
        "id": "S4yB5r9RCdkH"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import gradio as gr\n",
        "import re, unidecode\n",
        "from unidecode import unidecode\n",
        "import yt_dlp\n",
        "import os\n",
        "import pydub\n",
        "import numpy as np\n",
        "\n",
        "# no space, punctuation, accent in lower string\n",
        "def cleanString(string):\n",
        "    cleanString = unidecode(string)\n",
        "    cleanString = re.sub('\\W+','_', cleanString)\n",
        "    return cleanString.lower()\n",
        "\n",
        "# from audio file path to sample rate and numpy array\n",
        "def read_audio(f, normalized=False):\n",
        "    \"\"\"MP3 to numpy array\"\"\"\n",
        "    a = pydub.AudioSegment.from_mp3(f)\n",
        "    y = np.array(a.get_array_of_samples())\n",
        "    if a.channels == 2:\n",
        "        y = y.reshape((-1, 2))\n",
        "    if normalized:\n",
        "        return a.frame_rate, np.float32(y) / 2**15\n",
        "    else:\n",
        "        return a.frame_rate, y\n",
        "\n",
        "# from YouTube url to audio file path and sample rate + numpy array\n",
        "def download_audio(url):\n",
        "\n",
        "    path_to_folder_audio_mp3 = \"./audio/\"\n",
        "    ydl_opts = {\n",
        "        'format': 'm4a/bestaudio/best',\n",
        "        'outtmpl': f'{path_to_folder_audio_mp3}%(title)s',\n",
        "        'postprocessors': [{\n",
        "            'key': 'FFmpegExtractAudio',\n",
        "            'preferredcodec': 'mp3',\n",
        "        }]\n",
        "    }\n",
        "\n",
        "    with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
        "        info_dict = ydl.extract_info(url, download=True)\n",
        "        video_title = info_dict['title']\n",
        "\n",
        "        # Rename the audio file\n",
        "        local_link = video_title + \".mp3\"\n",
        "        new_local_link = cleanString(video_title) + \".mp3\"\n",
        "        for filename in os.listdir(path_to_folder_audio_mp3):\n",
        "            if cleanString(local_link) == cleanString(filename):\n",
        "                os.rename(os.path.join(path_to_folder_audio_mp3, filename),os.path.join(path_to_folder_audio_mp3, new_local_link))\n",
        "\n",
        "        # get audio file path\n",
        "        file_path = path_to_folder_audio_mp3 + new_local_link\n",
        "\n",
        "    return file_path, read_audio(file_path)\n",
        "\n",
        "# Gradio interface\n",
        "iface = gr.Interface(fn=download_audio,\n",
        "                     inputs=gr.Textbox(label=\"YouTube Video URL\"),\n",
        "                     outputs=[\n",
        "                         gr.File(label=\"Output Audio File\"),\n",
        "                         gr.Audio(label=\"Play Audio\", show_download_button=False, format=\"mp3\"),\n",
        "                     ],\n",
        "                     allow_flagging=\"never\",\n",
        "                     title=\"YouTube Video to Audio (mp3)\",\n",
        "                     description=\"Just paste any YouTube video url and get its corresponding audio file in mp3.\",\n",
        "                     )\n",
        "iface.launch()"
      ],
      "metadata": {
        "id": "9YvB5hBloP1f"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# END"
      ],
      "metadata": {
        "id": "u9QYxqjtnzCD"
      }
    }
  ]
}