AescF commited on
Commit
7f53721
1 Parent(s): b6beba3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import librosa
3
+ import numpy as np
4
+ from transformers import Wav2Vec2ForClassification, Wav2Vec2Processor
5
+ import os
6
+
7
+
8
+ model_id = "AescF/hubert-base-ls960-finetuned-common_language"
9
+ processor = Wav2Vec2Processor.from_pretrained(model_id)
10
+ model = Wav2Vec2ForClassification.from_pretrained(model_id)
11
+ language_classes = {
12
+ 0: "Arabic",
13
+ 1: "Basque",
14
+ 2: "Breton",
15
+ 3: "Catalan",
16
+ 4: "Chinese_China",
17
+ 5: "Chinese_Hongkong",
18
+ 6: "Chinese_Taiwan",
19
+ 7: "Chuvash",
20
+ 8: "Czech",
21
+ 9: "Dhivehi",
22
+ 10: "Dutch",
23
+ 11: "English",
24
+ 12: "Esperanto",
25
+ 13: "Estonian",
26
+ 14: "French",
27
+ 15: "Frisian",
28
+ 16: "Georgian",
29
+ 17: "German",
30
+ 18: "Greek",
31
+ 19: "Hakha_Chin",
32
+ 20: "Indonesian",
33
+ 21: "Interlingua",
34
+ 22: "Italian",
35
+ 23: "Japanese",
36
+ 24: "Kabyle",
37
+ 25: "Kinyarwanda",
38
+ 26: "Kyrgyz",
39
+ 27: "Latvian",
40
+ 28: "Maltese",
41
+ 29: "Mongolian",
42
+ 30: "Persian",
43
+ 31: "Polish",
44
+ 32: "Portuguese",
45
+ 33: "Romanian",
46
+ 34: "Romansh_Sursilvan",
47
+ 35: "Russian",
48
+ 36: "Sakha",
49
+ 37: "Slovenian",
50
+ 38: "Spanish",
51
+ 39: "Swedish",
52
+ 40: "Tamil",
53
+ 41: "Tatar",
54
+ 42: "Turkish",
55
+ 43: "Ukranian",
56
+ 44: "Welsh"
57
+ }
58
+
59
+
60
+ def predict_language(audio):
61
+ # Read audio file
62
+ audio_input, sr = librosa.load(audio, sr=16000)
63
+
64
+ # Convert to suitable format
65
+ input_values = processor(audio_input, return_tensors="pt", padding=True).input_values
66
+
67
+ # Make prediction
68
+ with torch.no_grad():
69
+ logits = model(input_values).logits
70
+
71
+ # Compute probabilities
72
+ probabilities = torch.softmax(logits, dim=1)
73
+
74
+ # Retrieve label
75
+ predicted_language_idx = torch.argmax(probabilities[0]).item()
76
+
77
+ return {language_classes[predicted_language_idx]: float(probabilities[0][predicted_language_idx])}
78
+
79
+ iface = gr.Interface(
80
+ predict_language,
81
+ inputs=gr.inputs.Audio(type="filepath", label="Upload Language Audio file"),
82
+ outputs=gr.outputs.Label(),
83
+ title="Language Classifier",
84
+ live=True
85
+ )
86
+ script_dir = os.path.abspath(os.path.join(os.path.abspath(''), os.pardir))
87
+ iface.launch()