rahulshah63 akhaliq HF staff commited on
Commit
831484b
0 Parent(s):

Duplicate from pytorch/Tacotron2

Browse files

Co-authored-by: Ahsen Khaliq <akhaliq@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +27 -0
  2. README.md +12 -0
  3. app.py +53 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Tacotron2
3
+ emoji: 🏃
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: false
9
+ duplicated_from: pytorch/Tacotron2
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import gradio as gr
4
+ import matplotlib.pyplot as plt
5
+
6
+ device="cpu"
7
+ bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
8
+ processor = bundle.get_text_processor()
9
+ tacotron2 = bundle.get_tacotron2().to(device)
10
+
11
+ # Workaround to load model mapped on GPU
12
+ # https://stackoverflow.com/a/61840832
13
+ waveglow = torch.hub.load(
14
+ "NVIDIA/DeepLearningExamples:torchhub",
15
+ "nvidia_waveglow",
16
+ model_math="fp32",
17
+ pretrained=False,
18
+ )
19
+ checkpoint = torch.hub.load_state_dict_from_url(
20
+ "https://api.ngc.nvidia.com/v2/models/nvidia/waveglowpyt_fp32/versions/1/files/nvidia_waveglowpyt_fp32_20190306.pth", # noqa: E501
21
+ progress=False,
22
+ map_location=device,
23
+ )
24
+ state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()}
25
+
26
+ waveglow.load_state_dict(state_dict)
27
+ waveglow = waveglow.remove_weightnorm(waveglow)
28
+ waveglow = waveglow.to(device)
29
+ waveglow.eval()
30
+
31
+ def inference(text):
32
+
33
+ with torch.inference_mode():
34
+ processed, lengths = processor(text)
35
+ processed = processed.to(device)
36
+ lengths = lengths.to(device)
37
+ spec, _, _ = tacotron2.infer(processed, lengths)
38
+
39
+ plt.imshow(spec[0].cpu().detach())
40
+ plt.axis('off')
41
+ plt.savefig("test.png", bbox_inches='tight')
42
+
43
+ with torch.no_grad():
44
+ waveforms = waveglow.infer(spec)
45
+
46
+ torchaudio.save("output_waveglow.wav", waveforms[0:1].cpu(), sample_rate=22050)
47
+ return "output_waveglow.wav","test.png"
48
+
49
+ title="TACOTRON 2"
50
+ description="Gradio demo for TACOTRON 2: The Tacotron 2 model for generating mel spectrograms from text. To use it, simply add you text or click on one of the examples to load them. Read more at the links below."
51
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1712.05884' target='_blank'>Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>"
52
+ examples=[["life is like a box of chocolates"]]
53
+ gr.Interface(inference,"text",[gr.outputs.Audio(type="file",label="Audio"),gr.outputs.Image(type="file",label="Spectrogram")],title=title,description=description,article=article,examples=examples).launch(enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ torchaudio
3
+ deep_phonemizer