|
import torch |
|
import torchaudio |
|
import gradio as gr |
|
|
|
device="cpu" |
|
bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH |
|
processor = bundle.get_text_processor() |
|
tacotron2 = bundle.get_tacotron2().to(device) |
|
|
|
|
|
|
|
waveglow = torch.hub.load( |
|
"NVIDIA/DeepLearningExamples:torchhub", |
|
"nvidia_waveglow", |
|
model_math="fp32", |
|
pretrained=False, |
|
) |
|
checkpoint = torch.hub.load_state_dict_from_url( |
|
"https://api.ngc.nvidia.com/v2/models/nvidia/waveglowpyt_fp32/versions/1/files/nvidia_waveglowpyt_fp32_20190306.pth", |
|
progress=False, |
|
map_location=device, |
|
) |
|
state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()} |
|
|
|
waveglow.load_state_dict(state_dict) |
|
waveglow = waveglow.remove_weightnorm(waveglow) |
|
waveglow = waveglow.to(device) |
|
waveglow.eval() |
|
|
|
def inference(text): |
|
|
|
with torch.inference_mode(): |
|
processed, lengths = processor(text) |
|
processed = processed.to(device) |
|
lengths = lengths.to(device) |
|
spec, _, _ = tacotron2.infer(processed, lengths) |
|
|
|
|
|
|
|
with torch.no_grad(): |
|
waveforms = waveglow.infer(spec) |
|
|
|
torchaudio.save("output_waveglow.wav", waveforms[0:1].cpu(), sample_rate=22050) |
|
return "output_waveglow.wav" |
|
|
|
title="WAVEGLOW" |
|
description="Gradio demo for WAVEGLOW: WaveGlow model for generating speech from mel spectrograms (generated by Tacotron2). To use it, simply add you text or click on one of the examples to load them. Read more at the links below." |
|
|
|
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1811.00002' target='_blank'>WaveGlow: A Flow-based Generative Network for Speech Synthesis</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2' target='_blank'>Github Repo</a></p>" |
|
examples=[["life is like a box of chocolates"]] |
|
gr.Interface(inference,"text",gr.outputs.Audio(type="file"),title=title,description=description,article=article,examples=examples).launch(enable_queue=True) |