fydhfzh commited on
Commit
8cbd12d
1 Parent(s): d702eb1

initial commit

Browse files
Files changed (2) hide show
  1. app.py +127 -0
  2. requirements.txt +202 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import gradio as gr
3
+ import torch
4
+ import numpy as np
5
+ import librosa
6
+ import matplotlib.pyplot as plt
7
+ import noisereduce
8
+
9
+ model_id = "fydhfzh/hubert-classifier-aug-fold-3"
10
+ pipe = pipeline("audio-classification", model=model_id)
11
+
12
+ def get_binary_values():
13
+ binary_values = []
14
+
15
+ for i in range(1, 29):
16
+ binary_rep = format(i, '05b')
17
+ for i in range(1, 4):
18
+ binary_harakat = format(i, '02b')
19
+ binary_values.append(binary_rep + binary_harakat)
20
+
21
+ return binary_values
22
+
23
+ binary_values = get_binary_values()
24
+
25
+ arabic_letters = [
26
+ "أَ", "إِ", "أُ",
27
+ "بَ", "بِ", "بُ",
28
+ "تَ", "تِ", "تُ",
29
+ "ثَ", "ثِ", "ثُ",
30
+ "جَ", "جِ", "جُ",
31
+ "حَ", "حِ", "حُ",
32
+ "خَ", "خِ", "خُ",
33
+ "دَ", "دِ", "دُ",
34
+ "ذَ", "ذِ", "ذُ",
35
+ "رَ", "رِ", "رُ",
36
+ "زَ", "زِ", "زُ",
37
+ "سَ", "سِ", "سُ",
38
+ "شَ", "شِ", "شُ",
39
+ "صَ", "صِ", "صُ",
40
+ "ضَ", "ضِ", "ضُ",
41
+ "طَ", "طِ", "طُ",
42
+ "ظَ", "ظِ", "ظُ",
43
+ "عَ", "عِ", "عُ",
44
+ "غَ", "غِ", "غُ",
45
+ "فَ", "فِ", "فُ",
46
+ "قَ", "قِ", "قُ",
47
+ "كَ", "كِ", "كُ",
48
+ "لَ", "لِ", "لُ",
49
+ "مَ", "مِ", "مُ",
50
+ "نَ", "نِ", "نُ",
51
+ "هَ", "هِ", "هُ",
52
+ "وَ", "وِ", "وُ",
53
+ "يَ", "يِ", "يُ"
54
+ ]
55
+
56
+ arabic_representation = dict(zip(binary_values, arabic_letters))
57
+ arabic_representation
58
+
59
+ def split_input(raw_input):
60
+ mse = librosa.feature.rms(y=raw_input, frame_length=2048, hop_length=512) ** 2
61
+ mse_db = librosa.core.power_to_db(mse.squeeze(), ref=np.min, top_db=None)
62
+ mse_db = mse_db[mse_db != 0]
63
+
64
+ percentile_param = 10
65
+ extra_db_param = 0
66
+
67
+ threshold = np.percentile(mse_db, percentile_param) + extra_db_param
68
+ print(threshold)
69
+
70
+ intervals = librosa.effects.split(y=raw_input, top_db=threshold) # top_db = 60 - threshold
71
+ splitted_input = []
72
+
73
+ for i, (start, end) in enumerate(intervals):
74
+ # Add overlapping frames both for trail and lead to ensure good split result
75
+ overlap = 2000
76
+ start = start - overlap if start - overlap >= 0 else 0
77
+ end = end + overlap if end + overlap <= len(raw_input) else len(raw_input)
78
+ split_audio = raw_input[start:end]
79
+
80
+ if len(split_audio) < 16000:
81
+ side_len = (16000 - len(split_audio))/2
82
+ pad_width = (int(side_len), int(side_len))
83
+ split_audio = np.pad(split_audio, pad_width=pad_width, mode='constant', constant_values=(0, 0))
84
+ else:
85
+ split_audio = split_audio[0:16000]
86
+
87
+ splitted_input.append(split_audio)
88
+
89
+ return splitted_input
90
+
91
+ def process_audio(filepath):
92
+ audio, sr = librosa.load(filepath, sr=16000)
93
+ audio = noisereduce.reduce_noise(audio, sr)
94
+ audio = librosa.util.normalize(audio)
95
+ audios = split_input(audio)
96
+
97
+ return audios
98
+
99
+
100
+ def classify_utterances(filepath):
101
+ audios = process_audio(filepath)
102
+ output = [pipe(audio)[0] for audio in audios]
103
+ predictions = [arabic_representation[x['label']] for x in output]
104
+
105
+ return ' '.join(predictions)
106
+
107
+ demo = gr.Blocks()
108
+
109
+ mic_classification = gr.Interface(
110
+ fn=classify_utterances,
111
+ inputs=gr.Audio(sources='microphone', type='filepath'),
112
+ outputs=gr.Textbox()
113
+ )
114
+
115
+ file_classification = gr.Interface(
116
+ fn=classify_utterances,
117
+ inputs=gr.Audio(sources='upload', type='filepath'),
118
+ outputs=gr.Textbox()
119
+ )
120
+
121
+ with demo:
122
+ gr.TabbedInterface(
123
+ [mic_classification, file_classification],
124
+ ['Classify Microphone', 'Classify Audio File']
125
+ )
126
+
127
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.0.0
2
+ accelerate==0.30.1
3
+ aiofiles==23.2.1
4
+ aiohttp==3.8.1
5
+ aiosignal==1.3.1
6
+ altair==5.3.0
7
+ annotated-types==0.7.0
8
+ anyio==4.4.0
9
+ appdirs==1.4.4
10
+ asttokens==2.1.0
11
+ astunparse==1.6.3
12
+ async-timeout==4.0.3
13
+ attrs==23.2.0
14
+ audioread==3.0.1
15
+ backcall==0.2.0
16
+ Brotli==1.0.9
17
+ cachetools==5.3.2
18
+ certifi==2024.6.2
19
+ cffi==1.16.0
20
+ charset-normalizer==2.0.4
21
+ click==8.1.7
22
+ colorama==0.4.6
23
+ comm==0.2.1
24
+ contourpy==1.2.0
25
+ cycler==0.12.1
26
+ datasets==2.19.1
27
+ debugpy==1.6.3
28
+ decorator==5.1.1
29
+ dill==0.3.8
30
+ dm-tree==0.1.8
31
+ dnspython==2.6.1
32
+ docker-pycreds==0.4.0
33
+ email_validator==2.2.0
34
+ entrypoints==0.4
35
+ evaluate==0.4.2
36
+ exceptiongroup==1.2.0
37
+ executing==1.2.0
38
+ fastapi==0.111.0
39
+ fastapi-cli==0.0.4
40
+ ffmpy==0.3.0
41
+ filelock==3.13.1
42
+ flatbuffers==23.5.26
43
+ fonttools==4.46.0
44
+ frozenlist==1.4.0
45
+ fsspec==2024.3.1
46
+ gast==0.5.4
47
+ gevent==23.9.0.post1
48
+ gitdb==4.0.7
49
+ GitPython==3.1.37
50
+ gmpy2==2.1.2
51
+ google-auth==2.23.4
52
+ google-auth-oauthlib==1.1.0
53
+ google-pasta==0.2.0
54
+ gradio==4.37.2
55
+ gradio_client==1.0.2
56
+ greenlet==3.0.3
57
+ grpcio==1.59.3
58
+ h11==0.14.0
59
+ h2==4.1.0
60
+ h5py==3.11.0
61
+ hpack==4.0.0
62
+ httpcore==1.0.5
63
+ httpx==0.27.0
64
+ huggingface_hub==0.23.0
65
+ hyperframe==6.0.1
66
+ idna==3.7
67
+ importlib_metadata==7.1.0
68
+ importlib_resources==6.4.0
69
+ ipykernel==6.17.0
70
+ ipython==8.6.0
71
+ jedi==0.18.1
72
+ Jinja2==3.1.3
73
+ jiwer==3.0.4
74
+ joblib==1.4.2
75
+ jsonschema==4.22.0
76
+ jsonschema-specifications==2023.12.1
77
+ jupyter_client==7.4.4
78
+ jupyter_core==4.11.2
79
+ keras==3.0.1
80
+ kiwisolver==1.4.5
81
+ lazy_loader==0.4
82
+ libclang==16.0.6
83
+ librosa==0.10.2.post1
84
+ llvmlite==0.42.0
85
+ Markdown==3.6
86
+ markdown-it-py==3.0.0
87
+ MarkupSafe==2.1.3
88
+ matplotlib==3.8.2
89
+ matplotlib-inline==0.1.6
90
+ mdurl==0.1.2
91
+ ml-dtypes==0.3.2
92
+ mpmath==1.3.0
93
+ msgpack==1.0.8
94
+ multidict==6.0.4
95
+ multiprocess==0.70.13
96
+ munkres==1.1.4
97
+ namex==0.0.7
98
+ nest-asyncio==1.5.6
99
+ networkx==3.1
100
+ noisereduce==3.0.2
101
+ numba==0.59.1
102
+ numpy==1.23.1
103
+ oauthlib==3.2.2
104
+ opt-einsum==3.3.0
105
+ orjson==3.10.4
106
+ packaging==23.2
107
+ pandas==1.4.4
108
+ parso==0.8.3
109
+ pathtools==0.1.2
110
+ patsy==0.5.6
111
+ pickleshare==0.7.5
112
+ pillow==10.2.0
113
+ pip==24.0
114
+ pkgutil_resolve_name==1.3.10
115
+ platformdirs==3.10.0
116
+ pooch==1.8.1
117
+ prompt-toolkit==3.0.32
118
+ protobuf==4.23.4
119
+ psutil==5.9.3
120
+ pure-eval==0.2.2
121
+ pyarrow==14.0.2
122
+ pyarrow-hotfix==0.6
123
+ pycparser==2.22
124
+ pydantic==2.8.0
125
+ pydantic_core==2.20.0
126
+ pydub==0.25.1
127
+ Pygments==2.13.0
128
+ pyparsing==3.0.9
129
+ PySastrawi==1.2.0
130
+ PySocks==1.7.1
131
+ python-dateutil==2.8.2
132
+ python-multipart==0.0.9
133
+ python-version==0.0.2
134
+ pytz==2024.1
135
+ pywin32==304
136
+ PyYAML==6.0.1
137
+ pyzmq==24.0.1
138
+ rapidfuzz==3.9.1
139
+ referencing==0.35.1
140
+ regex==2023.10.3
141
+ requests==2.31.0
142
+ requests-oauthlib==1.3.1
143
+ rich==13.7.0
144
+ rpds-py==0.18.1
145
+ rsa==4.9
146
+ ruff==0.5.0
147
+ safetensors==0.4.2
148
+ scikit-learn==1.5.0
149
+ scipy==1.13.1
150
+ seaborn==0.13.2
151
+ semantic-version==2.10.0
152
+ sentry-sdk==1.9.0
153
+ setproctitle==1.2.2
154
+ setuptools==69.5.1
155
+ shellingham==1.5.4
156
+ six==1.16.0
157
+ smmap==4.0.0
158
+ sniffio==1.3.1
159
+ soundfile==0.12.1
160
+ soxr==0.3.7
161
+ stack-data==0.6.0
162
+ starlette==0.37.2
163
+ statsmodels==0.14.2
164
+ sympy==1.12
165
+ tensorboard==2.16.2
166
+ tensorboard-data-server==0.7.2
167
+ tensorflow==2.16.1
168
+ tensorflow-estimator==2.15.0
169
+ tensorflow-intel==2.16.1
170
+ tensorflow-io-gcs-filesystem==0.31.0
171
+ termcolor==2.3.0
172
+ tf_keras==2.16.0
173
+ threadpoolctl==3.5.0
174
+ tokenizers==0.15.1
175
+ tomlkit==0.12.0
176
+ toolz==0.12.1
177
+ torch==2.3.0
178
+ tornado==6.2
179
+ tqdm==4.66.4
180
+ traitlets==5.5.0
181
+ transformers==4.38.2
182
+ typer==0.12.3
183
+ typer-slim==0.12.3
184
+ typing_extensions==4.11.0
185
+ ujson==5.10.0
186
+ unicodedata2==15.1.0
187
+ urllib3==2.2.1
188
+ uvicorn==0.30.1
189
+ wandb==0.16.5
190
+ wcwidth==0.2.5
191
+ websockets==11.0.3
192
+ Werkzeug==3.0.3
193
+ wheel==0.43.0
194
+ win-inet-pton==1.1.0
195
+ wordcloud==1.9.2
196
+ wrapt==1.16.0
197
+ xgboost==1.7.5
198
+ xxhash==2.0.2
199
+ yarl==1.7.2
200
+ zipp==3.17.0
201
+ zope.event==5.0
202
+ zope.interface==6.4.post2