rahulshah63 commited on
Commit
ecba073
1 Parent(s): a1ebad1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -17
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import torch
2
  import os
 
3
  import gradio as gr
4
  import matplotlib.pyplot as plt
5
- from scipy.io.wavfile import write
6
 
7
  device="cpu"
8
 
@@ -23,21 +24,26 @@ tacotron2 = tacotron2.to(device)
23
  tacotron2.eval()
24
 
25
  # Load Nvidia Waveglow from Hub
26
- waveglow = torch.hub.load(
27
- "NVIDIA/DeepLearningExamples:torchhub",
28
- "nvidia_waveglow",
29
- model_math="fp32",
30
- pretrained=False,
31
- )
32
- checkpoint = torch.hub.load_state_dict_from_url(
33
- "https://api.ngc.nvidia.com/v2/models/nvidia/waveglowpyt_fp32/versions/1/files/nvidia_waveglowpyt_fp32_20190306.pth", # noqa: E501
34
- progress=False,
35
- map_location=device,
36
- )
37
- state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()}
 
 
 
 
 
38
 
39
- waveglow.load_state_dict(state_dict)
40
- waveglow = waveglow.remove_weightnorm(waveglow)
41
  waveglow = waveglow.to(device)
42
  waveglow.eval()
43
 
@@ -61,9 +67,10 @@ def inference(text):
61
  #Save Audio
62
  audio_numpy = audio[0].data.cpu().numpy()
63
  rate = 22050
64
- write("output.wav", rate, audio_numpy)
 
65
 
66
- return "output.wav","test.png"
67
 
68
  title="TACOTRON 2"
69
  description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
 
1
  import torch
2
  import os
3
+ import torchaudio
4
  import gradio as gr
5
  import matplotlib.pyplot as plt
6
+
7
 
8
  device="cpu"
9
 
 
24
  tacotron2.eval()
25
 
26
  # Load Nvidia Waveglow from Hub
27
+ # waveglow = torch.hub.load(
28
+ # "NVIDIA/DeepLearningExamples:torchhub",
29
+ # "nvidia_waveglow",
30
+ # model_math="fp32",
31
+ # pretrained=False,
32
+ # )
33
+ # checkpoint = torch.hub.load_state_dict_from_url(
34
+ # "https://api.ngc.nvidia.com/v2/models/nvidia/waveglowpyt_fp32/versions/1/files/nvidia_waveglowpyt_fp32_20190306.pth", # noqa: E501
35
+ # progress=False,
36
+ # map_location=device,
37
+ # )
38
+ # state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()}
39
+
40
+ # waveglow.load_state_dict(state_dict)
41
+ # waveglow = waveglow.remove_weightnorm(waveglow)
42
+ # waveglow = waveglow.to(device)
43
+ # waveglow.eval()
44
 
45
+ waveglow_pretrained_model = os.path.join(os.getcwd(), 'waveglow_256channels_ljs_v3.pt')
46
+ waveglow = torch.load(waveglow_pretrained_model, map_location=device)['model']
47
  waveglow = waveglow.to(device)
48
  waveglow.eval()
49
 
 
67
  #Save Audio
68
  audio_numpy = audio[0].data.cpu().numpy()
69
  rate = 22050
70
+ write("output1.wav", rate, audio_numpy)
71
+ torchaudio.save("output2.wav", audio[0:1].cpu(), sample_rate=22050)
72
 
73
+ return "output1.wav", "output2.wav", "test.png"
74
 
75
  title="TACOTRON 2"
76
  description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."