Nepali-Tacotron2_ShrutiAudio

Runtime error

App Files Files Community

rahulshah63 commited on Jan 2, 2023

Commit

a1ebad1

•

1 Parent(s): 5dba62c

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -18

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import torch
 import os
-import torchaudio
 import gradio as gr
 import matplotlib.pyplot as plt
 device="cpu"
@@ -15,8 +15,8 @@ tacotron2 = torch.hub.load(
 )
 # Load Weights and bias of nepali text
-checkpoint_path = os.path.join(os.getcwd(), 'model_E45.ckpt')
-state_dict = torch.load(checkpoint_path, map_location=device)
 tacotron2.load_state_dict(state_dict)
 tacotron2 = tacotron2.to(device)
@@ -46,25 +46,27 @@ utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_tts_utils
 # sequences, lengths = utils.prepare_input_sequence([text])
 def inference(text):
-  with torch.inference_mode():
-      sequences, lengths = utils.prepare_input_sequence([text])
-      sequences = sequences.to(device)
-      lengths = lengths.to(device)
-      mel, _, _ = tacotron2.infer(sequences, lengths)
-  plt.imshow(mel[0].cpu().detach())
-  plt.axis('off')
-  plt.savefig("test.png", bbox_inches='tight')
-  with torch.no_grad():
-      audio = waveglow.infer(mel)
-  torchaudio.save("output.wav", audio[0:1].cpu(), sample_rate=22050)
-  return "output.wav","test.png"
 title="TACOTRON 2"
 description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
-examples=[["life is like a box of chocolates"]]
 gr.Interface(inference,"text",[gr.outputs.Audio(type="file",label="Audio"),gr.outputs.Image(type="file",label="Spectrogram")],title=title,description=description,article=article,examples=examples).launch(enable_queue=True)

 import torch
 import os
 import gradio as gr
 import matplotlib.pyplot as plt
+from scipy.io.wavfile import write
 device="cpu"
 )
 # Load Weights and bias of nepali text
+tacotron2_checkpoint_path = os.path.join(os.getcwd(), 'model_E45.ckpt')
+state_dict = torch.load(tacotron2_checkpoint_path, map_location=device)
 tacotron2.load_state_dict(state_dict)
 tacotron2 = tacotron2.to(device)
 # sequences, lengths = utils.prepare_input_sequence([text])
 def inference(text):
+    with torch.no_grad():
+        sequences, lengths = utils.prepare_input_sequence([text])
+        sequences = sequences.to(device)
+        lengths = lengths.to(device)
+        mel, _, _ = tacotron2.infer(sequences, lengths)
+        audio = waveglow.infer(mel)
+    #Save Mel Spectrogram
+    plt.imshow(mel[0].cpu().detach())
+    plt.axis('off')
+    plt.savefig("test.png", bbox_inches='tight')
+    #Save Audio
+    audio_numpy = audio[0].data.cpu().numpy()
+    rate = 22050
+    write("output.wav", rate, audio_numpy)
+    return "output.wav","test.png"
 title="TACOTRON 2"
 description="Nepali Speech TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
+examples=[["म नेपाली टिटिएस हुँ"]]
 gr.Interface(inference,"text",[gr.outputs.Audio(type="file",label="Audio"),gr.outputs.Image(type="file",label="Spectrogram")],title=title,description=description,article=article,examples=examples).launch(enable_queue=True)