add new phoneme mode;
Browse files
app.py
CHANGED
@@ -23,8 +23,8 @@ transformation = jiwer.Compose([
|
|
23 |
|
24 |
# WPM part
|
25 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
26 |
-
|
27 |
-
|
28 |
# phoneme_model = pipeline(model="facebook/wav2vec2-xlsr-53-espeak-cv-ft")
|
29 |
class ChangeSampleRate(nn.Module):
|
30 |
def __init__(self, input_rate: int, output_rate: int):
|
@@ -79,10 +79,10 @@ def calc_mos(audio_path, ref):
|
|
79 |
MOS_fig = Naturalness_Plot(AVA_MOS)
|
80 |
|
81 |
# Phonemes per minute (PPM)
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
|
87 |
# Disable PPM for now
|
88 |
phone_transcription = ['D U M M Y']
|
@@ -95,8 +95,7 @@ def calc_mos(audio_path, ref):
|
|
95 |
ppm = len(lst_phonemes) / (wav_vad.shape[-1] / sr) * 60
|
96 |
|
97 |
# pdb.set_trace()
|
98 |
-
|
99 |
-
return AVA_MOS, MOS_fig, INTELI_score, INT_fig, trans, f0_db_fig
|
100 |
|
101 |
|
102 |
with open("local/description.md") as f:
|
@@ -118,6 +117,8 @@ iface = gr.Interface(
|
|
118 |
gr.Textbox(placeholder="Intelligibility Score", label = "Intelligibility Score, range from 0 to 100, the higher the better", visible=False),
|
119 |
gr.Plot(label="Intelligibility Score, range from 0 to 100, the higher the better", show_label=True, container=True),
|
120 |
gr.Textbox(placeholder="Hypothesis", label="Hypothesis"),
|
|
|
|
|
121 |
gr.Plot(label="Pitch Contour and dB Analysis", show_label=True, container=True)],
|
122 |
title="Speech Analysis by Laronix AI",
|
123 |
description=description,
|
@@ -125,7 +126,5 @@ iface = gr.Interface(
|
|
125 |
examples=examples,
|
126 |
)
|
127 |
# Currently remove PPM and Phonemes
|
128 |
-
# gr.Textbox(placeholder="Predicted Phonemes", label="Predicted Phonemes", visible=False),
|
129 |
-
# gr.Textbox(placeholder="Speaking Rate, Phonemes per minutes", label="Speaking Rate, Phonemes per minutes", visible=False),
|
130 |
# add password to protect the interface
|
131 |
iface.launch(share=False, auth=['Laronix', 'LaronixSLP'], auth_message="Authentication Required, ask kevin@laronix.com for password.\n Thanks for your cooperation!")
|
|
|
23 |
|
24 |
# WPM part
|
25 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
26 |
+
processor = Wav2Vec2Processor.from_pretrained("vitouphy/wav2vec2-xls-r-300m-timit-phoneme")
|
27 |
+
model = Wav2Vec2ForCTC.from_pretrained("vitouphy/wav2vec2-xls-r-300m-timit-phoneme")
|
28 |
# phoneme_model = pipeline(model="facebook/wav2vec2-xlsr-53-espeak-cv-ft")
|
29 |
class ChangeSampleRate(nn.Module):
|
30 |
def __init__(self, input_rate: int, output_rate: int):
|
|
|
79 |
MOS_fig = Naturalness_Plot(AVA_MOS)
|
80 |
|
81 |
# Phonemes per minute (PPM)
|
82 |
+
with torch.no_grad():
|
83 |
+
logits = phoneme_model(out_wavs).logits
|
84 |
+
phone_predicted_ids = torch.argmax(logits, dim=-1)
|
85 |
+
phone_transcription = processor.batch_decode(phone_predicted_ids)
|
86 |
|
87 |
# Disable PPM for now
|
88 |
phone_transcription = ['D U M M Y']
|
|
|
95 |
ppm = len(lst_phonemes) / (wav_vad.shape[-1] / sr) * 60
|
96 |
|
97 |
# pdb.set_trace()
|
98 |
+
return AVA_MOS, MOS_fig, INTELI_score, INT_fig, trans, phone_transcription, ppm, f0_db_fig
|
|
|
99 |
|
100 |
|
101 |
with open("local/description.md") as f:
|
|
|
117 |
gr.Textbox(placeholder="Intelligibility Score", label = "Intelligibility Score, range from 0 to 100, the higher the better", visible=False),
|
118 |
gr.Plot(label="Intelligibility Score, range from 0 to 100, the higher the better", show_label=True, container=True),
|
119 |
gr.Textbox(placeholder="Hypothesis", label="Hypothesis"),
|
120 |
+
gr.Textbox(placeholder="Predicted Phonemes", label="Predicted Phonemes", visible=False),
|
121 |
+
gr.Textbox(placeholder="Speaking Rate, Phonemes per minutes", label="Speaking Rate, Phonemes per minutes", visible=False),
|
122 |
gr.Plot(label="Pitch Contour and dB Analysis", show_label=True, container=True)],
|
123 |
title="Speech Analysis by Laronix AI",
|
124 |
description=description,
|
|
|
126 |
examples=examples,
|
127 |
)
|
128 |
# Currently remove PPM and Phonemes
|
|
|
|
|
129 |
# add password to protect the interface
|
130 |
iface.launch(share=False, auth=['Laronix', 'LaronixSLP'], auth_message="Authentication Required, ask kevin@laronix.com for password.\n Thanks for your cooperation!")
|