Spaces:
Runtime error
Runtime error
rahulshah63
commited on
Commit
•
a1ebad1
1
Parent(s):
5dba62c
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import torch
|
2 |
import os
|
3 |
-
import torchaudio
|
4 |
import gradio as gr
|
5 |
import matplotlib.pyplot as plt
|
|
|
6 |
|
7 |
device="cpu"
|
8 |
|
@@ -15,8 +15,8 @@ tacotron2 = torch.hub.load(
|
|
15 |
)
|
16 |
|
17 |
# Load Weights and bias of nepali text
|
18 |
-
|
19 |
-
state_dict = torch.load(
|
20 |
|
21 |
tacotron2.load_state_dict(state_dict)
|
22 |
tacotron2 = tacotron2.to(device)
|
@@ -46,25 +46,27 @@ utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_tts_utils
|
|
46 |
# sequences, lengths = utils.prepare_input_sequence([text])
|
47 |
|
48 |
def inference(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
mel, _, _ = tacotron2.infer(sequences, lengths)
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
torchaudio.save("output.wav", audio[0:1].cpu(), sample_rate=22050)
|
64 |
-
return "output.wav","test.png"
|
65 |
|
66 |
title="TACOTRON 2"
|
67 |
description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
|
68 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
|
69 |
-
examples=[["
|
70 |
gr.Interface(inference,"text",[gr.outputs.Audio(type="file",label="Audio"),gr.outputs.Image(type="file",label="Spectrogram")],title=title,description=description,article=article,examples=examples).launch(enable_queue=True)
|
|
|
1 |
import torch
|
2 |
import os
|
|
|
3 |
import gradio as gr
|
4 |
import matplotlib.pyplot as plt
|
5 |
+
from scipy.io.wavfile import write
|
6 |
|
7 |
device="cpu"
|
8 |
|
|
|
15 |
)
|
16 |
|
17 |
# Load Weights and bias of nepali text
|
18 |
+
tacotron2_checkpoint_path = os.path.join(os.getcwd(), 'model_E45.ckpt')
|
19 |
+
state_dict = torch.load(tacotron2_checkpoint_path, map_location=device)
|
20 |
|
21 |
tacotron2.load_state_dict(state_dict)
|
22 |
tacotron2 = tacotron2.to(device)
|
|
|
46 |
# sequences, lengths = utils.prepare_input_sequence([text])
|
47 |
|
48 |
def inference(text):
|
49 |
+
with torch.no_grad():
|
50 |
+
sequences, lengths = utils.prepare_input_sequence([text])
|
51 |
+
sequences = sequences.to(device)
|
52 |
+
lengths = lengths.to(device)
|
53 |
+
mel, _, _ = tacotron2.infer(sequences, lengths)
|
54 |
+
audio = waveglow.infer(mel)
|
55 |
|
56 |
+
#Save Mel Spectrogram
|
57 |
+
plt.imshow(mel[0].cpu().detach())
|
58 |
+
plt.axis('off')
|
59 |
+
plt.savefig("test.png", bbox_inches='tight')
|
|
|
60 |
|
61 |
+
#Save Audio
|
62 |
+
audio_numpy = audio[0].data.cpu().numpy()
|
63 |
+
rate = 22050
|
64 |
+
write("output.wav", rate, audio_numpy)
|
65 |
+
|
66 |
+
return "output.wav","test.png"
|
|
|
|
|
|
|
67 |
|
68 |
title="TACOTRON 2"
|
69 |
description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
|
70 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
|
71 |
+
examples=[["म नेपाली टिटिएस हुँ"]]
|
72 |
gr.Interface(inference,"text",[gr.outputs.Audio(type="file",label="Audio"),gr.outputs.Image(type="file",label="Spectrogram")],title=title,description=description,article=article,examples=examples).launch(enable_queue=True)
|