AlexK-PL commited on
Commit
0860c4d
1 Parent(s): d739d73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -49
app.py CHANGED
@@ -15,16 +15,12 @@ from dotenv import load_dotenv
15
  torch.manual_seed(0)
16
  np.random.seed(0)
17
 
18
- # CleanUnet Dependencies
19
-
20
  import json
21
  from copy import deepcopy
22
 
23
  import numpy as np
24
  import torch
25
 
26
- # from util import print_size, sampling
27
-
28
  import torchaudio
29
  import torchaudio.transforms as T
30
 
@@ -36,33 +32,6 @@ np.random.seed(0)
36
 
37
  SAMPLE_RATE = 8000
38
 
39
- '''
40
- CONFIG = "configs/DNS-large-full.json"
41
- # CHECKPOINT = "./exp/DNS-large-full/checkpoint/pretrained.pkl"
42
-
43
- # Parse configs. Globals nicer in this case
44
- with open(CONFIG) as f:
45
- data = f.read()
46
- config = json.loads(data)
47
- gen_config = config["gen_config"]
48
- global network_config
49
- network_config = config["network_config"] # to define wavenet
50
- global train_config
51
- train_config = config["train_config"] # train config
52
- global trainset_config
53
- trainset_config = config["trainset_config"] # to read trainset configurations
54
-
55
- # global use_denoise
56
- # use_denoise = False
57
-
58
- # setup local experiment path
59
- exp_path = train_config["exp_path"]
60
- print('exp_path:', exp_path)
61
-
62
- # load data
63
- loader_config = deepcopy(trainset_config)
64
- loader_config["crop_length_sec"] = 0
65
- '''
66
  #############################################################################################################
67
 
68
  load_dotenv()
@@ -114,33 +83,24 @@ def get_phonetic_transcription(text: str):
114
  return None
115
 
116
 
117
- def tts_inference(text: str, speaker_idx: str = None, use_denoise: int = 0):
118
  # synthesize
119
  if synthesizer is None:
120
  raise NameError("model not found")
121
  t1 = time.time()
122
  wavs = synthesizer.tts(text, speaker_idx)
123
- print(type(wavs))
124
- if use_denoise == 0:
125
- wavs_den = torch.Tensor(wavs).unsqueeze(0) # one sample
126
- # wavs_den = denoise(wavs_den).tolist()
127
- else:
128
- wavs_den = wavs
129
 
130
  # return output
131
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
132
  # wavs must be a list of integers
133
- synthesizer.save_wav(wavs, fp)
134
  t2 = time.time() - t1
135
  print(round(t2, 2))
136
  output_audio = fp.name
137
 
138
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
139
- # wavs must be a list of integers
140
- synthesizer.save_wav(wavs_den, fp)
141
- output_audio_den = fp.name
142
-
143
- return output_audio, output_audio_den
144
 
145
 
146
  title = "🗣️ Catalan Multispeaker TTS Tester 🗣️"
@@ -222,11 +182,10 @@ with gr.Blocks(**AinaGradioTheme().get_kwargs()) as app:
222
  "Submit",
223
  variant="primary",
224
  )
225
- use_denoise = gr.Radio(choices=[("Yes", 0), ("No", 1)], value=0)
226
  with gr.Column(variant='panel'):
227
  output_audio = gr.Audio(label="Output", type="filepath", autoplay=True, show_share_button=False)
228
- output_audio_den = gr.Audio(label="Output denoised", type="filepath", autoplay=False,
229
- show_share_button=False)
230
 
231
  output_phonetic = gr.Textbox(label="Phonetic Transcription", readonly=True)
232
 
@@ -234,7 +193,7 @@ with gr.Blocks(**AinaGradioTheme().get_kwargs()) as app:
234
  input_.change(fn=change_interactive, inputs=[input_], outputs=button)
235
 
236
  # clear_btn.click(fn=clean, inputs=[], outputs=[input_, output_audio, output_phonetic], queue=False)
237
- submit_btn.click(fn=submit_input, inputs=[input_, speaker_id, use_denoise], outputs=[output_audio,
238
  output_audio_den,
239
  output_phonetic])
240
 
 
15
  torch.manual_seed(0)
16
  np.random.seed(0)
17
 
 
 
18
  import json
19
  from copy import deepcopy
20
 
21
  import numpy as np
22
  import torch
23
 
 
 
24
  import torchaudio
25
  import torchaudio.transforms as T
26
 
 
32
 
33
  SAMPLE_RATE = 8000
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  #############################################################################################################
36
 
37
  load_dotenv()
 
83
  return None
84
 
85
 
86
+ def tts_inference(text: str, speaker_idx: str = None):
87
  # synthesize
88
  if synthesizer is None:
89
  raise NameError("model not found")
90
  t1 = time.time()
91
  wavs = synthesizer.tts(text, speaker_idx)
92
+ # print(type(wavs))
93
+ wavs_den = wavs
 
 
 
 
94
 
95
  # return output
96
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
97
  # wavs must be a list of integers
98
+ synthesizer.save_wav(wavs_den, fp)
99
  t2 = time.time() - t1
100
  print(round(t2, 2))
101
  output_audio = fp.name
102
 
103
+ return output_audio
 
 
 
 
 
104
 
105
 
106
  title = "🗣️ Catalan Multispeaker TTS Tester 🗣️"
 
182
  "Submit",
183
  variant="primary",
184
  )
185
+ # use_denoise = gr.Radio(choices=[("Yes", 0), ("No", 1)], value=0)
186
  with gr.Column(variant='panel'):
187
  output_audio = gr.Audio(label="Output", type="filepath", autoplay=True, show_share_button=False)
188
+ # output_audio_den = gr.Audio(label="Output denoised", type="filepath", autoplay=False, show_share_button=False)
 
189
 
190
  output_phonetic = gr.Textbox(label="Phonetic Transcription", readonly=True)
191
 
 
193
  input_.change(fn=change_interactive, inputs=[input_], outputs=button)
194
 
195
  # clear_btn.click(fn=clean, inputs=[], outputs=[input_, output_audio, output_phonetic], queue=False)
196
+ submit_btn.click(fn=submit_input, inputs=[input_, speaker_id], outputs=[output_audio,
197
  output_audio_den,
198
  output_phonetic])
199