Spaces:
Runtime error
Runtime error
rahulshah63
commited on
Commit
•
ecba073
1
Parent(s):
a1ebad1
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
import torch
|
2 |
import os
|
|
|
3 |
import gradio as gr
|
4 |
import matplotlib.pyplot as plt
|
5 |
-
|
6 |
|
7 |
device="cpu"
|
8 |
|
@@ -23,21 +24,26 @@ tacotron2 = tacotron2.to(device)
|
|
23 |
tacotron2.eval()
|
24 |
|
25 |
# Load Nvidia Waveglow from Hub
|
26 |
-
waveglow = torch.hub.load(
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
)
|
32 |
-
checkpoint = torch.hub.load_state_dict_from_url(
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
)
|
37 |
-
state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()}
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
waveglow =
|
41 |
waveglow = waveglow.to(device)
|
42 |
waveglow.eval()
|
43 |
|
@@ -61,9 +67,10 @@ def inference(text):
|
|
61 |
#Save Audio
|
62 |
audio_numpy = audio[0].data.cpu().numpy()
|
63 |
rate = 22050
|
64 |
-
write("
|
|
|
65 |
|
66 |
-
return "
|
67 |
|
68 |
title="TACOTRON 2"
|
69 |
description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
|
|
|
1 |
import torch
|
2 |
import os
|
3 |
+
import torchaudio
|
4 |
import gradio as gr
|
5 |
import matplotlib.pyplot as plt
|
6 |
+
|
7 |
|
8 |
device="cpu"
|
9 |
|
|
|
24 |
tacotron2.eval()
|
25 |
|
26 |
# Load Nvidia Waveglow from Hub
|
27 |
+
# waveglow = torch.hub.load(
|
28 |
+
# "NVIDIA/DeepLearningExamples:torchhub",
|
29 |
+
# "nvidia_waveglow",
|
30 |
+
# model_math="fp32",
|
31 |
+
# pretrained=False,
|
32 |
+
# )
|
33 |
+
# checkpoint = torch.hub.load_state_dict_from_url(
|
34 |
+
# "https://api.ngc.nvidia.com/v2/models/nvidia/waveglowpyt_fp32/versions/1/files/nvidia_waveglowpyt_fp32_20190306.pth", # noqa: E501
|
35 |
+
# progress=False,
|
36 |
+
# map_location=device,
|
37 |
+
# )
|
38 |
+
# state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()}
|
39 |
+
|
40 |
+
# waveglow.load_state_dict(state_dict)
|
41 |
+
# waveglow = waveglow.remove_weightnorm(waveglow)
|
42 |
+
# waveglow = waveglow.to(device)
|
43 |
+
# waveglow.eval()
|
44 |
|
45 |
+
waveglow_pretrained_model = os.path.join(os.getcwd(), 'waveglow_256channels_ljs_v3.pt')
|
46 |
+
waveglow = torch.load(waveglow_pretrained_model, map_location=device)['model']
|
47 |
waveglow = waveglow.to(device)
|
48 |
waveglow.eval()
|
49 |
|
|
|
67 |
#Save Audio
|
68 |
audio_numpy = audio[0].data.cpu().numpy()
|
69 |
rate = 22050
|
70 |
+
write("output1.wav", rate, audio_numpy)
|
71 |
+
torchaudio.save("output2.wav", audio[0:1].cpu(), sample_rate=22050)
|
72 |
|
73 |
+
return "output1.wav", "output2.wav", "test.png"
|
74 |
|
75 |
title="TACOTRON 2"
|
76 |
description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
|