Spaces:

KevinGeng
/

Laronix_voice_quality_checking_system_MICROPHONE

Runtime error

App Files Files Community

KevinGeng commited on Nov 6, 2023

Commit

2090f04

•

1 Parent(s): 1732ec4

gradio-4 support

Browse files

Files changed (4) hide show

app.py +26 -21
local/description.md +5 -0
local/indicator_plot.py +112 -0
requirements.txt +8 -96

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import lightning_module
 import pdb
 import jiwer
 from local.convert_metrics import nat2avaMOS, WER2INTELI
 # ASR part
 from transformers import pipeline
@@ -55,10 +56,11 @@ def calc_mos(audio_path, ref):
     trans = p(audio_path)["text"]
     # WER
     wer = jiwer.wer(ref, trans, truth_transform=transformation, hypothesis_transform=transformation)
     # WER convert to Intellibility score
     INTELI_score = WER2INTELI(wer*100)
     # MOS
     batch = {
         'wav': out_wavs,
@@ -70,6 +72,9 @@ def calc_mos(audio_path, ref):
     predic_mos = output.mean(dim=1).squeeze().detach().numpy()*2 + 3
     # MOS to AVA MOS
     AVA_MOS = nat2avaMOS(predic_mos)
     # Phonemes per minute (PPM)
     with torch.no_grad():
         logits = phoneme_model(out_wavs).logits
@@ -78,34 +83,34 @@ def calc_mos(audio_path, ref):
     lst_phonemes = phone_transcription[0].split(" ")
     wav_vad = torchaudio.functional.vad(wav, sample_rate=sr)
     ppm = len(lst_phonemes) / (wav_vad.shape[-1] / sr) * 60
-    return AVA_MOS, INTELI_score, trans, phone_transcription, ppm
-description ="""
-MOS prediction demo using UTMOS-strong w/o phoneme encoder model, which is trained on the main track dataset.
-This demo only accepts .wav format. Best at 16 kHz sampling rate.
-Paper is available [here](https://arxiv.org/abs/2204.02152)
-Add ASR based on wav2vec-960, currently only English available.
-This is a lite version of ASR, delievring faster calculation and compromise to recognition performance
-Add WER interface by Laronix　Pty LTD
-"""
 iface = gr.Interface(
   fn=calc_mos,
-  inputs=[gr.Audio(source='microphone', type="filepath", label="Audio to evaluate"),
           gr.Textbox(placeholder="Input reference here (Don't keep this empty)", label="Reference")],
-  outputs=[gr.Textbox(placeholder="Naturalness Score", label="Naturalness Score, ranged from 0 to 5, the higher the better."),
-           gr.Textbox(placeholder="Intelligibility Score", label = "Intelligibility Score, range from 0 to 100, the higher the better"),
            gr.Textbox(placeholder="Hypothesis", label="Hypothesis"),
-           gr.Textbox(placeholder="Predicted Phonemes", label="Predicted Phonemes"),
-           gr.Textbox(placeholder="Speaking Rate, Phonemes per minutes", label="PPM")],
-  title="Laronix's Voice Quality Checking System Demo",
   description=description,
   allow_flagging="auto",
 )
 iface.launch()

 import pdb
 import jiwer
 from local.convert_metrics import nat2avaMOS, WER2INTELI
+from local.indicator_plot import Intelligibility_Plot, Naturalness_Plot
 # ASR part
 from transformers import pipeline
     trans = p(audio_path)["text"]
     # WER
     wer = jiwer.wer(ref, trans, truth_transform=transformation, hypothesis_transform=transformation)
     # WER convert to Intellibility score
     INTELI_score = WER2INTELI(wer*100)
+    INT_fig = Intelligibility_Plot(INTELI_score)
     # MOS
     batch = {
         'wav': out_wavs,
     predic_mos = output.mean(dim=1).squeeze().detach().numpy()*2 + 3
     # MOS to AVA MOS
     AVA_MOS = nat2avaMOS(predic_mos)
+    MOS_fig = Naturalness_Plot(AVA_MOS)
     # Phonemes per minute (PPM)
     with torch.no_grad():
         logits = phoneme_model(out_wavs).logits
     lst_phonemes = phone_transcription[0].split(" ")
     wav_vad = torchaudio.functional.vad(wav, sample_rate=sr)
     ppm = len(lst_phonemes) / (wav_vad.shape[-1] / sr) * 60
+    # pdb.set_trace()
+    return AVA_MOS, MOS_fig, INTELI_score, INT_fig, trans, phone_transcription, ppm
+with open("local/description.md") as f:
+    description = f.read()
+# calc_mos("audio_2023-11-01_15-57-39.wav", "hello world")
+# pdb.set_trace()
+examples = [
+    [None, "Once upon a time, there was a young rat named Arthur who couldn't make up his mind."],
+    [None, "Whenever the other rats asked Arthur if he wanted to go to the park, he would say, 'I don't know.'"],
+]
 iface = gr.Interface(
   fn=calc_mos,
+  inputs=[gr.Audio(type='filepath', label="Audio to evaluate"),
           gr.Textbox(placeholder="Input reference here (Don't keep this empty)", label="Reference")],
+  outputs=[gr.Textbox(placeholder="Naturalness Score, ranged from 0 to 5, the higher the better.", label="Naturalness Score, ranged from 0 to 5, the higher the better.", visible=False),
+           gr.Plot(label="Naturalness Score, ranged from 0 to 5, the higher the better.", show_label=True, container=True),
+           gr.Textbox(placeholder="Intelligibility Score", label = "Intelligibility Score, range from 0 to 100, the higher the better", visible=False),
+           gr.Plot(label="Intelligibility Score, range from 0 to 100, the higher the better", show_label=True, container=True),
            gr.Textbox(placeholder="Hypothesis", label="Hypothesis"),
+           gr.Textbox(placeholder="Predicted Phonemes", label="Predicted Phonemes", visible=False),
+           gr.Textbox(placeholder="Speaking Rate, Phonemes per minutes", label="Speaking Rate, Phonemes per minutes", visible=False)],
+  title="Speech Analysis by Laronix AI",
   description=description,
   allow_flagging="auto",
+  examples=examples,
 )
 iface.launch()

local/description.md ADDED Viewed

	@@ -0,0 +1,5 @@

+## Let’s get started!
++ Use this in a quiet environment.
++ Make sure your microphone is working and attached.
++ If prompted in a pop-up, allow the browser access to your microphone.
++ When you’re ready, hit record and read the reference sentence below:

local/indicator_plot.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import plotly.graph_objects as go
+def Intelligibility_Plot(Int_Score, fair_thre=30, good_thre = 70, Upper=100, Lower=0):
+    '''
+    Int_Score: a float number between 0 and 100
+    Upper: the upper bound of the plot
+    Lower: the lower bound of the plot
+    '''
+    # Assert Nat_Score is a float number between 0 and 100
+    assert isinstance(Int_Score, float|int)
+    assert Int_Score >= Lower
+    assert Int_Score <= Upper
+    # Indicator plot with different colors, under fair_threshold the plot is red, then yellow, then green
+    # Design 1: Show bar in different colors refer to the threshold
+    color = "#75DA99"
+    if Int_Score <= fair_thre:
+        color = "#F2ADA0"
+    elif Int_Score <= good_thre:
+        color = "#e8ee89"
+    else:
+        color = "#75DA99"
+    fig = go.Figure(go.Indicator(
+        mode="number+gauge",
+        gauge={'shape': "bullet",
+               'axis':{'range': [Lower, Upper+10]},
+               'bgcolor': 'white',
+               'bar': {'color': color},
+               },
+        value=Int_Score,
+        domain = {'x': [0, 1], 'y': [0, 1]},
+        )
+    )
+    # # Design 2: Show all thresholds in the background
+    # fig = go.Figure(go.Indicator(
+    #     mode = "number+gauge",
+    #     gauge = {'shape': "bullet",
+    #              'axis': {'range': [Lower, Upper]},
+    #              'bgcolor': 'white',
+    #              'steps': [
+    #                 {'range': [Lower, fair_thre], 'color': "#F2ADA0"},
+    #                 {'range': [fair_thre, good_thre], 'color': "#e8ee89"},
+    #                 {'range': [good_thre, Upper], 'color': " #75DA99"}],
+    #             'bar': {'color': "grey"},
+    #             },
+    #         value = Int_Score,
+    #     domain = {'x': [0, 1], 'y': [0, 1]},
+    #     )
+    # )
+    fig.update_layout(
+    autosize=False,
+    width=650,
+    height=250,
+    margin=dict(
+        l=10,
+        r=10,
+        b=10,
+        t=10,
+        pad=4
+    ),)
+    return fig
+def Naturalness_Plot(Nat_Score, fair_thre=2, good_thre = 4, Upper=5, Lower=0):
+    '''
+    Nat_Score: a float number between 0 and 100
+    Upper: the upper bound of the plot
+    Lower: the lower bound of the plot
+    '''
+    # Assert Nat_Score is a float number between 0 and 100
+    assert isinstance(Nat_Score, float)
+    assert Nat_Score >= Lower
+    assert Nat_Score <= Upper
+    color = "#75DA99"
+    if Nat_Score <= fair_thre:
+        color = "#F2ADA0"
+    elif Nat_Score <= good_thre:
+        color = "#e8ee89"
+    else:
+        color = "#75DA99"
+    fig = go.Figure(go.Indicator(
+        mode = "number+gauge",
+        gauge = {'shape': "bullet",
+                 'axis':{'range': [Lower, Upper+0.4]},
+                 "bar":{'color': color}},
+        value = Nat_Score,
+        domain = {'x': [0, 1], 'y': [0, 1]},
+        )
+)
+    fig.update_layout(
+    autosize=False,
+    width=650,
+    height=250,
+    margin=dict(
+        l=10,
+        r=10,
+        b=10,
+        t=10,
+        pad=4
+    ),)
+    return fig
+# test case Intelligibility_Plot
+# x = Intelligibility_Plot(10)
+# x.show()
+# x = Naturalness_Plot(3.5)
+# x.show()
+# x = Intelligibility_Plot(50)
+# x.show()
+# x = Intelligibility_Plot(90)
+# x.show()

requirements.txt CHANGED Viewed

@@ -1,101 +1,13 @@
-absl-py==1.0.0
-aiohttp==3.8.1
-aiosignal==1.2.0
-analytics-python==1.4.0
-antlr4-python3-runtime==4.8
-anyio==3.5.0
-asgiref==3.5.0
-async-timeout==4.0.2
-attrs==21.4.0
-backoff==1.10.0
-bcrypt==3.2.0
-bitarray==2.4.0
-cachetools==5.0.0
-certifi==2021.10.8
-cffi==1.15.0
-charset-normalizer==2.0.12
-click==8.0.4
-colorama==0.4.4
-cryptography==36.0.1
-cycler==0.11.0
-Cython==0.29.28
-fairseq @ git+https://github.com/pytorch/fairseq.git@d03f4e771484a433f025f47744017c2eb6e9c6bc
-fastapi==0.75.0
-ffmpy==0.3.0
-fonttools==4.30.0
-frozenlist==1.3.0
-fsspec==2022.2.0
-future==0.18.2
-google-auth==2.6.0
-google-auth-oauthlib==0.4.6
-gradio==3.46.1
-grpcio==1.44.0
-h11==0.12.0
-hydra-core==1.0.7
-idna==3.3
-importlib-metadata==4.11.3
-Jinja2==3.0.3
-kiwisolver==1.3.2
-linkify-it-py==1.0.3
-Markdown==3.3.6
-markdown-it-py==2.0.1
-MarkupSafe==2.1.0
-matplotlib==3.5.1
-mdit-py-plugins==0.3.0
-mdurl==0.1.0
-monotonic==1.6
-multidict==6.0.2
-numpy==1.22.3
-oauthlib==3.2.0
-omegaconf==2.0.6
-orjson==3.6.7
-packaging==21.3
-pandas==1.4.1
-paramiko==2.10.1
-Pillow==9.0.1
-portalocker==2.4.0
-protobuf==3.19.4
-pyasn1==0.4.8
-pyasn1-modules==0.2.8
-pycparser==2.21
-pycryptodome==3.14.1
-pydantic==1.9.0
-pyDeprecate==0.3.1
-pydub==0.25.1
-PyNaCl==1.5.0
-pyparsing==3.0.7
-python-dateutil==2.8.2
-python-multipart==0.0.5
-pytorch-lightning==1.5.10
-pytz==2021.3
-PyYAML==6.0
-regex==2022.3.2
-requests==2.27.1
-requests-oauthlib==1.3.1
-rsa==4.8
-sacrebleu==2.0.0
-six==1.16.0
-sniffio==1.2.0
-starlette==0.17.1
-tabulate==0.8.9
-tensorboard==2.8.0
-tensorboard-data-server==0.6.1
-tensorboard-plugin-wit==1.8.1
-torch==1.11.0
-torchaudio==0.11.0
-torchmetrics==0.7.2
-tqdm==4.63.0
-typing-extensions==4.1.1
-uc-micro-py==1.0.1
-urllib3==1.26.8
-uvicorn==0.17.6
-Werkzeug==2.0.3
-yarl==1.7.2
-zipp==3.7.0
 transformers
 tensorboardX
 jiwer
 phonemizer
 librosa
-speake

+gradio
+pytorch_lightning
+torch
+jiwer
 transformers
+numpy
+fairseq @ git+https://github.com/pytorch/fairseq.git@d03f4e771484a433f025f47744017c2eb6e9c6bc
 tensorboardX
 jiwer
 phonemizer
 librosa
+speake
+plotly