import gradio as gr import re, unidecode from unidecode import unidecode import yt_dlp import os import pydub import numpy as np # no space, punctuation, accent in lower string def cleanString(string): cleanString = unidecode(string) # cleanString = re.sub('\W+','_', cleanString) cleanString = re.sub(r'[^\w\s]','',cleanString) cleanString = cleanString.replace(" ", "_") return cleanString.lower() # from YouTube url to audio file path and sample rate + numpy array def download_audio(url): path_to_folder_audio_mp3 = "./" ydl_opts = { 'format': 'm4a/bestaudio/best', 'outtmpl': f'{path_to_folder_audio_mp3}%(title)s', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', }] } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=True) video_title = info_dict['title'] # Rename the audio file local_link = video_title + ".mp3" new_local_link = cleanString(video_title) + ".mp3" for filename in os.listdir(path_to_folder_audio_mp3): if cleanString(local_link) == cleanString(filename): os.rename(os.path.join(path_to_folder_audio_mp3, filename),os.path.join(path_to_folder_audio_mp3, new_local_link)) # get audio file path file_path = path_to_folder_audio_mp3 + new_local_link return file_path, file_path with gr.Blocks() as demo: gr.Markdown("