from pathlib import Path import gradio as gr import librosa from transformers import (AutoFeatureExtractor, AutoModelForAudioClassification, pipeline) feature_extractor = AutoFeatureExtractor.from_pretrained( "sanchit-gandhi/whisper-medium-fleurs-lang-id", do_normalize=True ) SR = 16_000 label2id = { 'blues': '0', 'classical': '1', 'country': '2', 'disco': '3', 'hiphop': '4', 'jazz': '5', 'metal': '6', 'pop': '7', 'reggae': '8', 'rock': '9' } num_labels = len(label2id) path = "best_Whisper-Small_model_92" model = AutoModelForAudioClassification.from_pretrained("allispaul/whisper-small-gtzan") classifier = pipeline("audio-classification", model=model, feature_extractor=feature_extractor) def predict(audio): if isinstance(audio, str) or isinstance(audio, Path): audio, sr = librosa.load(audio, sr=16_000) else: sr, audio = audio preds = classifier({"array": audio, "sampling_rate": sr}) probs = {k: 0.0 for k in label2id} for pred in preds: probs[pred["label"]] = pred["score"] return probs title = "šŸ¤– šŸŽµ Audiobot šŸŽ¹ āš”" description = "

ā™Æ The superpowered music genre classifier ā™­

" article = """

šŸ¤– This model is a version of Whisper Medium, fine-tuned on the GTZAN dataset.

šŸŽ¹ It recognizes 10 genres: blues, classical, country, hip-hop, jazz, metal, pop, reggae, and rock.

šŸŽµ Upload a song or click one of the examples to try it out!

āš” Part of a project for the Erdős Institute Deep Learning bootcamp, by Dylan Bates, Muhammed Cifci, Aycan Katitas, Johann Thiel, Soheyl Anbouhi, and Paul VanKoughnett.

""" iface = gr.Interface( fn=predict, inputs=gr.Audio(type="filepath"), outputs=gr.Label(num_top_classes=3), title=title, description=description, article=article, examples=["examples/country.00008.wav", "examples/hiphop.00057.wav", "examples/gamblersblues.opus"] ) iface.launch()