File size: 2,507 Bytes
7f53721
 
 
fc74109
 
7f53721
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc74109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f53721
fc74109
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
import librosa
import numpy as np
import torch
from transformers import pipeline


language_classes = {
    0: "Arabic",
    1: "Basque",
    2: "Breton",
    3: "Catalan",
    4: "Chinese_China",
    5: "Chinese_Hongkong",
    6: "Chinese_Taiwan",
    7: "Chuvash",
    8: "Czech",
    9: "Dhivehi",
    10: "Dutch",
    11: "English",
    12: "Esperanto",
    13: "Estonian",
    14: "French",
    15: "Frisian",
    16: "Georgian",
    17: "German",
    18: "Greek",
    19: "Hakha_Chin",
    20: "Indonesian",
    21: "Interlingua",
    22: "Italian",
    23: "Japanese",
    24: "Kabyle",
    25: "Kinyarwanda",
    26: "Kyrgyz",
    27: "Latvian",
    28: "Maltese",
    29: "Mongolian",
    30: "Persian",
    31: "Polish",
    32: "Portuguese",
    33: "Romanian",
    34: "Romansh_Sursilvan",
    35: "Russian",
    36: "Sakha",
    37: "Slovenian",
    38: "Spanish",
    39: "Swedish",
    40: "Tamil",
    41: "Tatar",
    42: "Turkish",
    43: "Ukranian",
    44: "Welsh"
}




username = "AescF"  ## Complete your username
model_id = "AescF/hubert-base-ls960-finetuned-common_language"
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline("audio-classification", model=model_id, device=device)

# def predict_trunc(filepath):
#     preprocessed = pipe.preprocess(filepath)
#     truncated = pipe.feature_extractor.pad(preprocessed,truncation=True, max_length = 16_000*30)
#     model_outputs = pipe.forward(truncated)
#     outputs = pipe.postprocess(model_outputs)

#     return outputs


def classify_audio(filepath):
    """
      Goes from
      [{'score': 0.8339303731918335, 'label': 'country'},
    {'score': 0.11914275586605072, 'label': 'rock'},]
     to
     {"country":  0.8339303731918335, "rock":0.11914275586605072}
    """
    start_time = timer()
    preds = pipe(filepath)
    # preds = predict_trunc(filepath)
    outputs = {}
    pred_time = round(timer() - start_time, 5)
    for p in preds:
        outputs[p["label"]] = p["score"], timer
    return outputs


title = "🎵 Music Genre Classifier"
description = """
Demo for a music genre classifier trained on [GTZAN](https://huggingface.co/datasets/marsyas/gtzan)
For more info checkout [GITHUB](https://github.com/AEscF)
"""
demo = gr.Interface(
    fn=classify_audio,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Label(label="Predictions"), gr.Number(label="Prediction time (s)")],
    title=title,
    description=description,
    examples=filenames,
)
demo.launch()