Spaces:
Running
Running
Yurii Paniv
commited on
Commit
•
cfb23ad
1
Parent(s):
9430ecf
Fix model
Browse files- app.py +8 -10
- requirements.txt +1 -1
app.py
CHANGED
@@ -7,11 +7,11 @@ from pydub.audio_segment import AudioSegment
|
|
7 |
import requests
|
8 |
from os.path import exists
|
9 |
from stt import Model
|
10 |
-
|
11 |
|
12 |
MODEL_NAMES = [
|
13 |
-
"
|
14 |
-
"
|
15 |
]
|
16 |
|
17 |
# download model
|
@@ -52,12 +52,11 @@ def download(url, file_name):
|
|
52 |
|
53 |
def stt(audio: Tuple[int, np.array], model_name: str):
|
54 |
sample_rate, audio = audio
|
|
|
55 |
use_scorer = True if model_name == "With scorer" else False
|
56 |
|
57 |
-
if sample_rate != 16000:
|
58 |
-
raise ValueError("Incorrect sample rate.")
|
59 |
-
|
60 |
recognized_result = client(audio, sample_rate, use_scorer)
|
|
|
61 |
|
62 |
return recognized_result
|
63 |
|
@@ -67,14 +66,13 @@ def _convert_audio(audio_data: np.array, sample_rate: int):
|
|
67 |
source_audio.write(audio_data)
|
68 |
source_audio.seek(0)
|
69 |
output_audio = BytesIO()
|
70 |
-
wav_file = AudioSegment.from_raw(
|
71 |
source_audio,
|
72 |
channels=1,
|
73 |
-
sample_width=
|
74 |
frame_rate=sample_rate
|
75 |
)
|
76 |
-
wav_file =
|
77 |
-
wav_file.export(output_audio, "wav", codec="pcm_s16le")
|
78 |
output_audio.seek(0)
|
79 |
return output_audio
|
80 |
|
|
|
7 |
import requests
|
8 |
from os.path import exists
|
9 |
from stt import Model
|
10 |
+
from datetime import datetime
|
11 |
|
12 |
MODEL_NAMES = [
|
13 |
+
"No scorer",
|
14 |
+
"With scorer"
|
15 |
]
|
16 |
|
17 |
# download model
|
|
|
52 |
|
53 |
def stt(audio: Tuple[int, np.array], model_name: str):
|
54 |
sample_rate, audio = audio
|
55 |
+
print(f"Input sample rate: {sample_rate}. Audio file length: {round(audio.shape[0]/sample_rate ,2)}")
|
56 |
use_scorer = True if model_name == "With scorer" else False
|
57 |
|
|
|
|
|
|
|
58 |
recognized_result = client(audio, sample_rate, use_scorer)
|
59 |
+
print(f"Time: {datetime.utcnow()}. Transcript: `{recognized_result}`. Scorer: {use_scorer}.")
|
60 |
|
61 |
return recognized_result
|
62 |
|
|
|
66 |
source_audio.write(audio_data)
|
67 |
source_audio.seek(0)
|
68 |
output_audio = BytesIO()
|
69 |
+
wav_file: AudioSegment = AudioSegment.from_raw(
|
70 |
source_audio,
|
71 |
channels=1,
|
72 |
+
sample_width=4,
|
73 |
frame_rate=sample_rate
|
74 |
)
|
75 |
+
wav_file.export(output_audio, "wav", codec="pcm_s16le", parameters=["-ar", "16k"])
|
|
|
76 |
output_audio.seek(0)
|
77 |
return output_audio
|
78 |
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
gradio==2.4.5
|
2 |
-
STT==1.
|
3 |
pydub==0.25.1
|
|
|
1 |
gradio==2.4.5
|
2 |
+
STT==1.3.0
|
3 |
pydub==0.25.1
|