add password pretection
Browse files
app.py
CHANGED
@@ -48,10 +48,13 @@ def calc_mos(audio_path, ref):
|
|
48 |
wav, sr = torchaudio.load(audio_path, channels_first=True)
|
49 |
if wav.shape[0] > 1:
|
50 |
wav = wav.mean(dim=0, keepdim=True) # Mono channel
|
|
|
|
|
51 |
osr = 16_000
|
52 |
batch = wav.unsqueeze(0).repeat(10, 1, 1)
|
53 |
csr = ChangeSampleRate(sr, osr)
|
54 |
out_wavs = csr(wav)
|
|
|
55 |
# ASR
|
56 |
trans = p(audio_path)["text"]
|
57 |
# WER
|
@@ -82,7 +85,68 @@ def calc_mos(audio_path, ref):
|
|
82 |
phone_transcription = processor.batch_decode(phone_predicted_ids)
|
83 |
lst_phonemes = phone_transcription[0].split(" ")
|
84 |
wav_vad = torchaudio.functional.vad(wav, sample_rate=sr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
ppm = len(lst_phonemes) / (wav_vad.shape[-1] / sr) * 60
|
|
|
86 |
# pdb.set_trace()
|
87 |
return AVA_MOS, MOS_fig, INTELI_score, INT_fig, trans, phone_transcription, ppm
|
88 |
|
@@ -90,8 +154,9 @@ def calc_mos(audio_path, ref):
|
|
90 |
with open("local/description.md") as f:
|
91 |
description = f.read()
|
92 |
|
93 |
-
|
94 |
-
|
|
|
95 |
examples = [
|
96 |
["local/Julianna_Set1_Author_01.wav", "Once upon a time, there was a young rat named Arthur who couldn't make up his mind."],
|
97 |
["local/Patient_Arthur_set1_002_noisy.wav", "Whenever the other rats asked him if he would like to go hunting with them, he would answer in a soft voice, 'I don't know.'"],
|
@@ -112,5 +177,5 @@ iface = gr.Interface(
|
|
112 |
allow_flagging="auto",
|
113 |
examples=examples,
|
114 |
)
|
115 |
-
|
116 |
-
iface.launch()
|
|
|
48 |
wav, sr = torchaudio.load(audio_path, channels_first=True)
|
49 |
if wav.shape[0] > 1:
|
50 |
wav = wav.mean(dim=0, keepdim=True) # Mono channel
|
51 |
+
# get decibel
|
52 |
+
|
53 |
osr = 16_000
|
54 |
batch = wav.unsqueeze(0).repeat(10, 1, 1)
|
55 |
csr = ChangeSampleRate(sr, osr)
|
56 |
out_wavs = csr(wav)
|
57 |
+
db = torchaudio.transforms.AmplitudeToDB(stype="amplitude", top_db=80)(wav)
|
58 |
# ASR
|
59 |
trans = p(audio_path)["text"]
|
60 |
# WER
|
|
|
85 |
phone_transcription = processor.batch_decode(phone_predicted_ids)
|
86 |
lst_phonemes = phone_transcription[0].split(" ")
|
87 |
wav_vad = torchaudio.functional.vad(wav, sample_rate=sr)
|
88 |
+
import matplotlib.pyplot as plt
|
89 |
+
|
90 |
+
fig = plt.figure(figsize=(30, 10))
|
91 |
+
# ax = fig.subplots(1, 1)
|
92 |
+
# pdb.set_trace()
|
93 |
+
|
94 |
+
# time_x = torch.arange(wav.shape[-1]) / sr
|
95 |
+
# # ax.plot(time_x, wav_vad.squeeze())
|
96 |
+
# pdb.set_trace()
|
97 |
+
# ax.plot(time_x, wav.squeeze(), alpha=0.5)
|
98 |
+
# get f0
|
99 |
+
f0 = torchaudio.functional.compute_kaldi_pitch(wav, frame_length=25, frame_shift=20, min_f0=20, max_f0=600, sample_rate=sr)[0, :, 1]
|
100 |
+
# # get f0 time x axis
|
101 |
+
# time_x_f0 = torch.arange(f0.shape[-1]) * 20 / 1000
|
102 |
+
# plot f0 with x axis as time
|
103 |
+
|
104 |
+
# spectrogram with x axis as time
|
105 |
+
pdb.set_trace()
|
106 |
+
spectrogram = torchaudio.transforms.MelSpectrogram(sample_rate=sr, n_fft=400, hop_length=160, n_mels=80)(wav)
|
107 |
+
|
108 |
+
spectrogram = torchaudio.transforms.AmplitudeToDB(stype="power", top_db=80)(spectrogram)
|
109 |
+
|
110 |
+
# plot spectrogram with x axis as time, y axis as frequency bins
|
111 |
+
ax2 = fig.add_subplot(212)
|
112 |
+
ax2.set_xlabel("Time (s)")
|
113 |
+
ax2.set_ylabel("Frequency (Hz)")
|
114 |
+
ax2.set_title("Spectrogram")
|
115 |
+
ax2.set_xticks(torch.arange(0, spectrogram.shape[-1], 100))
|
116 |
+
ax2.set_xticklabels(torch.arange(0, spectrogram.shape[-1], 100) * 20 / 1000)
|
117 |
+
ax2.set_yticks(torch.arange(0, spectrogram.shape[1], 10))
|
118 |
+
ax2.set_yticklabels(torch.arange(0, spectrogram.shape[1], 10) * 800 / 80)
|
119 |
+
|
120 |
+
# add colorbar to spectrogram with limitation from -80 to 0
|
121 |
+
cbar = plt.colorbar(ax2.imshow(spectrogram.squeeze().numpy(), aspect='auto', origin='lower'))
|
122 |
+
cbar.set_label("dB")
|
123 |
+
ax2.grid()
|
124 |
+
|
125 |
+
# plot f0 with x axis as time, y axis as frequency bins, y is limited from 0 to 600
|
126 |
+
ax1 = fig.add_subplot(211)
|
127 |
+
ax1.set_xlabel("Time (s)")
|
128 |
+
ax1.set_ylabel("Frequency (Hz)")
|
129 |
+
ax1.set_title("F0")
|
130 |
+
ax1.set_xticks(torch.arange(0, f0.shape[-1], 100))
|
131 |
+
ax1.set_xticklabels(torch.arange(0, f0.shape[-1], 100) * 20 / 1000)
|
132 |
+
ax1.set_yticks(torch.arange(0, 600, 50))
|
133 |
+
ax1.set_yticklabels(torch.arange(0, 600, 50))
|
134 |
+
|
135 |
+
# add colorbar to f0 with limitation from 0 to 600
|
136 |
+
# cbar = plt.colorbar(ax1.imshow(f0.squeeze().numpy(), aspect='auto', origin='lower'))
|
137 |
+
# cbar.set_label("Hz")
|
138 |
+
ax1.grid()
|
139 |
+
|
140 |
+
# remove unvoiced part based on vad
|
141 |
+
|
142 |
+
# plot f0 with x axis as time
|
143 |
+
|
144 |
+
# time_x = torch.arange(f0.shape[-1]) * 20 / 1000
|
145 |
+
# plt.plot(time_x, f0.squeeze())
|
146 |
+
# fig.savefig("vad.png")
|
147 |
+
# pdb.set_trace()
|
148 |
ppm = len(lst_phonemes) / (wav_vad.shape[-1] / sr) * 60
|
149 |
+
|
150 |
# pdb.set_trace()
|
151 |
return AVA_MOS, MOS_fig, INTELI_score, INT_fig, trans, phone_transcription, ppm
|
152 |
|
|
|
154 |
with open("local/description.md") as f:
|
155 |
description = f.read()
|
156 |
|
157 |
+
calc_mos("JOHN1.wav", "he would answer in a soft voice, 'I don't know.'")
|
158 |
+
|
159 |
+
pdb.set_trace()
|
160 |
examples = [
|
161 |
["local/Julianna_Set1_Author_01.wav", "Once upon a time, there was a young rat named Arthur who couldn't make up his mind."],
|
162 |
["local/Patient_Arthur_set1_002_noisy.wav", "Whenever the other rats asked him if he would like to go hunting with them, he would answer in a soft voice, 'I don't know.'"],
|
|
|
177 |
allow_flagging="auto",
|
178 |
examples=examples,
|
179 |
)
|
180 |
+
# add password to protect the interface
|
181 |
+
iface.launch(share=True, auth=['Laronix', 'LaronixSLP'], auth_message="Authentication Required, ask kevin@laronix.com for password,\n Thanks for your cooperation!")
|