Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -15,16 +15,12 @@ from dotenv import load_dotenv
|
|
15 |
torch.manual_seed(0)
|
16 |
np.random.seed(0)
|
17 |
|
18 |
-
# CleanUnet Dependencies
|
19 |
-
|
20 |
import json
|
21 |
from copy import deepcopy
|
22 |
|
23 |
import numpy as np
|
24 |
import torch
|
25 |
|
26 |
-
# from util import print_size, sampling
|
27 |
-
|
28 |
import torchaudio
|
29 |
import torchaudio.transforms as T
|
30 |
|
@@ -36,33 +32,6 @@ np.random.seed(0)
|
|
36 |
|
37 |
SAMPLE_RATE = 8000
|
38 |
|
39 |
-
'''
|
40 |
-
CONFIG = "configs/DNS-large-full.json"
|
41 |
-
# CHECKPOINT = "./exp/DNS-large-full/checkpoint/pretrained.pkl"
|
42 |
-
|
43 |
-
# Parse configs. Globals nicer in this case
|
44 |
-
with open(CONFIG) as f:
|
45 |
-
data = f.read()
|
46 |
-
config = json.loads(data)
|
47 |
-
gen_config = config["gen_config"]
|
48 |
-
global network_config
|
49 |
-
network_config = config["network_config"] # to define wavenet
|
50 |
-
global train_config
|
51 |
-
train_config = config["train_config"] # train config
|
52 |
-
global trainset_config
|
53 |
-
trainset_config = config["trainset_config"] # to read trainset configurations
|
54 |
-
|
55 |
-
# global use_denoise
|
56 |
-
# use_denoise = False
|
57 |
-
|
58 |
-
# setup local experiment path
|
59 |
-
exp_path = train_config["exp_path"]
|
60 |
-
print('exp_path:', exp_path)
|
61 |
-
|
62 |
-
# load data
|
63 |
-
loader_config = deepcopy(trainset_config)
|
64 |
-
loader_config["crop_length_sec"] = 0
|
65 |
-
'''
|
66 |
#############################################################################################################
|
67 |
|
68 |
load_dotenv()
|
@@ -114,33 +83,24 @@ def get_phonetic_transcription(text: str):
|
|
114 |
return None
|
115 |
|
116 |
|
117 |
-
def tts_inference(text: str, speaker_idx: str = None
|
118 |
# synthesize
|
119 |
if synthesizer is None:
|
120 |
raise NameError("model not found")
|
121 |
t1 = time.time()
|
122 |
wavs = synthesizer.tts(text, speaker_idx)
|
123 |
-
print(type(wavs))
|
124 |
-
|
125 |
-
wavs_den = torch.Tensor(wavs).unsqueeze(0) # one sample
|
126 |
-
# wavs_den = denoise(wavs_den).tolist()
|
127 |
-
else:
|
128 |
-
wavs_den = wavs
|
129 |
|
130 |
# return output
|
131 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
132 |
# wavs must be a list of integers
|
133 |
-
synthesizer.save_wav(
|
134 |
t2 = time.time() - t1
|
135 |
print(round(t2, 2))
|
136 |
output_audio = fp.name
|
137 |
|
138 |
-
|
139 |
-
# wavs must be a list of integers
|
140 |
-
synthesizer.save_wav(wavs_den, fp)
|
141 |
-
output_audio_den = fp.name
|
142 |
-
|
143 |
-
return output_audio, output_audio_den
|
144 |
|
145 |
|
146 |
title = "🗣️ Catalan Multispeaker TTS Tester 🗣️"
|
@@ -222,11 +182,10 @@ with gr.Blocks(**AinaGradioTheme().get_kwargs()) as app:
|
|
222 |
"Submit",
|
223 |
variant="primary",
|
224 |
)
|
225 |
-
use_denoise = gr.Radio(choices=[("Yes", 0), ("No", 1)], value=0)
|
226 |
with gr.Column(variant='panel'):
|
227 |
output_audio = gr.Audio(label="Output", type="filepath", autoplay=True, show_share_button=False)
|
228 |
-
output_audio_den = gr.Audio(label="Output denoised", type="filepath", autoplay=False,
|
229 |
-
show_share_button=False)
|
230 |
|
231 |
output_phonetic = gr.Textbox(label="Phonetic Transcription", readonly=True)
|
232 |
|
@@ -234,7 +193,7 @@ with gr.Blocks(**AinaGradioTheme().get_kwargs()) as app:
|
|
234 |
input_.change(fn=change_interactive, inputs=[input_], outputs=button)
|
235 |
|
236 |
# clear_btn.click(fn=clean, inputs=[], outputs=[input_, output_audio, output_phonetic], queue=False)
|
237 |
-
submit_btn.click(fn=submit_input, inputs=[input_, speaker_id
|
238 |
output_audio_den,
|
239 |
output_phonetic])
|
240 |
|
|
|
15 |
torch.manual_seed(0)
|
16 |
np.random.seed(0)
|
17 |
|
|
|
|
|
18 |
import json
|
19 |
from copy import deepcopy
|
20 |
|
21 |
import numpy as np
|
22 |
import torch
|
23 |
|
|
|
|
|
24 |
import torchaudio
|
25 |
import torchaudio.transforms as T
|
26 |
|
|
|
32 |
|
33 |
SAMPLE_RATE = 8000
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
#############################################################################################################
|
36 |
|
37 |
load_dotenv()
|
|
|
83 |
return None
|
84 |
|
85 |
|
86 |
+
def tts_inference(text: str, speaker_idx: str = None):
|
87 |
# synthesize
|
88 |
if synthesizer is None:
|
89 |
raise NameError("model not found")
|
90 |
t1 = time.time()
|
91 |
wavs = synthesizer.tts(text, speaker_idx)
|
92 |
+
# print(type(wavs))
|
93 |
+
wavs_den = wavs
|
|
|
|
|
|
|
|
|
94 |
|
95 |
# return output
|
96 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
97 |
# wavs must be a list of integers
|
98 |
+
synthesizer.save_wav(wavs_den, fp)
|
99 |
t2 = time.time() - t1
|
100 |
print(round(t2, 2))
|
101 |
output_audio = fp.name
|
102 |
|
103 |
+
return output_audio
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
|
106 |
title = "🗣️ Catalan Multispeaker TTS Tester 🗣️"
|
|
|
182 |
"Submit",
|
183 |
variant="primary",
|
184 |
)
|
185 |
+
# use_denoise = gr.Radio(choices=[("Yes", 0), ("No", 1)], value=0)
|
186 |
with gr.Column(variant='panel'):
|
187 |
output_audio = gr.Audio(label="Output", type="filepath", autoplay=True, show_share_button=False)
|
188 |
+
# output_audio_den = gr.Audio(label="Output denoised", type="filepath", autoplay=False, show_share_button=False)
|
|
|
189 |
|
190 |
output_phonetic = gr.Textbox(label="Phonetic Transcription", readonly=True)
|
191 |
|
|
|
193 |
input_.change(fn=change_interactive, inputs=[input_], outputs=button)
|
194 |
|
195 |
# clear_btn.click(fn=clean, inputs=[], outputs=[input_, output_audio, output_phonetic], queue=False)
|
196 |
+
submit_btn.click(fn=submit_input, inputs=[input_, speaker_id], outputs=[output_audio,
|
197 |
output_audio_den,
|
198 |
output_phonetic])
|
199 |
|