rahulshah63 commited on
Commit
a1ebad1
1 Parent(s): 5dba62c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -18
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import torch
2
  import os
3
- import torchaudio
4
  import gradio as gr
5
  import matplotlib.pyplot as plt
 
6
 
7
  device="cpu"
8
 
@@ -15,8 +15,8 @@ tacotron2 = torch.hub.load(
15
  )
16
 
17
  # Load Weights and bias of nepali text
18
- checkpoint_path = os.path.join(os.getcwd(), 'model_E45.ckpt')
19
- state_dict = torch.load(checkpoint_path, map_location=device)
20
 
21
  tacotron2.load_state_dict(state_dict)
22
  tacotron2 = tacotron2.to(device)
@@ -46,25 +46,27 @@ utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_tts_utils
46
  # sequences, lengths = utils.prepare_input_sequence([text])
47
 
48
  def inference(text):
 
 
 
 
 
 
49
 
50
- with torch.inference_mode():
51
- sequences, lengths = utils.prepare_input_sequence([text])
52
- sequences = sequences.to(device)
53
- lengths = lengths.to(device)
54
- mel, _, _ = tacotron2.infer(sequences, lengths)
55
 
56
- plt.imshow(mel[0].cpu().detach())
57
- plt.axis('off')
58
- plt.savefig("test.png", bbox_inches='tight')
59
-
60
- with torch.no_grad():
61
- audio = waveglow.infer(mel)
62
-
63
- torchaudio.save("output.wav", audio[0:1].cpu(), sample_rate=22050)
64
- return "output.wav","test.png"
65
 
66
  title="TACOTRON 2"
67
  description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
68
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
69
- examples=[["life is like a box of chocolates"]]
70
  gr.Interface(inference,"text",[gr.outputs.Audio(type="file",label="Audio"),gr.outputs.Image(type="file",label="Spectrogram")],title=title,description=description,article=article,examples=examples).launch(enable_queue=True)
 
1
  import torch
2
  import os
 
3
  import gradio as gr
4
  import matplotlib.pyplot as plt
5
+ from scipy.io.wavfile import write
6
 
7
  device="cpu"
8
 
 
15
  )
16
 
17
  # Load Weights and bias of nepali text
18
+ tacotron2_checkpoint_path = os.path.join(os.getcwd(), 'model_E45.ckpt')
19
+ state_dict = torch.load(tacotron2_checkpoint_path, map_location=device)
20
 
21
  tacotron2.load_state_dict(state_dict)
22
  tacotron2 = tacotron2.to(device)
 
46
  # sequences, lengths = utils.prepare_input_sequence([text])
47
 
48
  def inference(text):
49
+ with torch.no_grad():
50
+ sequences, lengths = utils.prepare_input_sequence([text])
51
+ sequences = sequences.to(device)
52
+ lengths = lengths.to(device)
53
+ mel, _, _ = tacotron2.infer(sequences, lengths)
54
+ audio = waveglow.infer(mel)
55
 
56
+ #Save Mel Spectrogram
57
+ plt.imshow(mel[0].cpu().detach())
58
+ plt.axis('off')
59
+ plt.savefig("test.png", bbox_inches='tight')
 
60
 
61
+ #Save Audio
62
+ audio_numpy = audio[0].data.cpu().numpy()
63
+ rate = 22050
64
+ write("output.wav", rate, audio_numpy)
65
+
66
+ return "output.wav","test.png"
 
 
 
67
 
68
  title="TACOTRON 2"
69
  description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
70
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
71
+ examples=[[" नेपाली टिटिएस हुँ"]]
72
  gr.Interface(inference,"text",[gr.outputs.Audio(type="file",label="Audio"),gr.outputs.Image(type="file",label="Spectrogram")],title=title,description=description,article=article,examples=examples).launch(enable_queue=True)