import gradio as gr
import librosa
import numpy as np
import torch
from transformers import pipeline


language_classes = {
    0: "Arabic",
    1: "Basque",
    2: "Breton",
    3: "Catalan",
    4: "Chinese_China",
    5: "Chinese_Hongkong",
    6: "Chinese_Taiwan",
    7: "Chuvash",
    8: "Czech",
    9: "Dhivehi",
    10: "Dutch",
    11: "English",
    12: "Esperanto",
    13: "Estonian",
    14: "French",
    15: "Frisian",
    16: "Georgian",
    17: "German",
    18: "Greek",
    19: "Hakha_Chin",
    20: "Indonesian",
    21: "Interlingua",
    22: "Italian",
    23: "Japanese",
    24: "Kabyle",
    25: "Kinyarwanda",
    26: "Kyrgyz",
    27: "Latvian",
    28: "Maltese",
    29: "Mongolian",
    30: "Persian",
    31: "Polish",
    32: "Portuguese",
    33: "Romanian",
    34: "Romansh_Sursilvan",
    35: "Russian",
    36: "Sakha",
    37: "Slovenian",
    38: "Spanish",
    39: "Swedish",
    40: "Tamil",
    41: "Tatar",
    42: "Turkish",
    43: "Ukranian",
    44: "Welsh"
}


username = "AescF"  ## Complete your username
model_id = "AescF/hubert-base-ls960-finetuned-common_language"
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline("audio-classification", model=model_id, device=device)

# def predict_trunc(filepath):
#     preprocessed = pipe.preprocess(filepath)
#     truncated = pipe.feature_extractor.pad(preprocessed,truncation=True, max_length = 16_000*30)
#     model_outputs = pipe.forward(truncated)
#     outputs = pipe.postprocess(model_outputs)

#     return outputs


def classify_audio(filepath):
    """
      Goes from
      [{'score': 0.8339303731918335, 'label': 'country'},
    {'score': 0.11914275586605072, 'label': 'rock'},]
     to
     {"country":  0.8339303731918335, "rock":0.11914275586605072}
    """
    start_time = timer()
    preds = pipe(filepath)
    # preds = predict_trunc(filepath)
    outputs = {}
    pred_time = round(timer() - start_time, 5)
    for p in preds:
        outputs[p["label"]] = p["score"], timer
    return outputs


title = "🎵 Music Genre Classifier"
description = """
Demo for a music genre classifier trained on [GTZAN](https://huggingface.co/datasets/marsyas/gtzan)
For more info checkout [GITHUB](https://github.com/AEscF)
"""
filenames = ['blues.00098.wav', "disco.00020.wav", "classical.00075.wav","keyboard-153960.mp3"]
filenames = [[f"./{f}"] for f in filenames]
demo = gr.Interface(
    fn=classify_audio,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Label(label="Predictions"), gr.Number(label="Prediction time (s)")],
    title=title,
    description=description,
    examples=filenames,
)
demo.launch()