Update app.py
Browse files
app.py
CHANGED
@@ -9,12 +9,8 @@ import jiwer
|
|
9 |
|
10 |
# ASR part
|
11 |
from transformers import pipeline
|
12 |
-
|
13 |
-
|
14 |
-
"automatic-speech-recognition",
|
15 |
-
model="KevinGeng/whipser_medium_en_PAL300_step25",
|
16 |
-
device=0,
|
17 |
-
)
|
18 |
# WER part
|
19 |
transformation = jiwer.Compose([
|
20 |
jiwer.ToLowerCase(),
|
@@ -92,7 +88,7 @@ Add WER interface.
|
|
92 |
|
93 |
iface = gr.Interface(
|
94 |
fn=calc_mos,
|
95 |
-
inputs=[gr.Audio(type='
|
96 |
gr.Textbox(placeholder="Input reference here (Don't keep this empty)", label="Reference")],
|
97 |
outputs=[gr.Textbox(placeholder="Naturalness evaluation, ranged 1 to 5, the higher the better.", label="Predicted MOS"),
|
98 |
gr.Textbox(placeholder="Hypothesis", label="Hypothesis"),
|
|
|
9 |
|
10 |
# ASR part
|
11 |
from transformers import pipeline
|
12 |
+
p = pipeline("automatic-speech-recognition")
|
13 |
+
|
|
|
|
|
|
|
|
|
14 |
# WER part
|
15 |
transformation = jiwer.Compose([
|
16 |
jiwer.ToLowerCase(),
|
|
|
88 |
|
89 |
iface = gr.Interface(
|
90 |
fn=calc_mos,
|
91 |
+
inputs=[gr.Audio(type='microphone', label="Audio to evaluate"),
|
92 |
gr.Textbox(placeholder="Input reference here (Don't keep this empty)", label="Reference")],
|
93 |
outputs=[gr.Textbox(placeholder="Naturalness evaluation, ranged 1 to 5, the higher the better.", label="Predicted MOS"),
|
94 |
gr.Textbox(placeholder="Hypothesis", label="Hypothesis"),
|