|
import os |
|
|
|
import gradio as gr |
|
import nltk |
|
|
|
os.system("python -m unidic download") |
|
from melo.api import TTS |
|
|
|
nltk.download("averaged_perceptron_tagger_eng") |
|
|
|
|
|
device = "auto" |
|
model = TTS(language="EN", device=device) |
|
speaker_ids = model.hps.data.spk2id |
|
|
|
|
|
def inference( |
|
text: str, speed: float, speaker: str, progress=gr.Progress(track_tqdm=True) |
|
): |
|
try: |
|
out_path = "audio.wav" |
|
model.tts_to_file( |
|
text, |
|
speaker_ids[speaker], |
|
out_path, |
|
speed=speed, |
|
format="wav", |
|
) |
|
except Exception as e: |
|
return gr.Error(str(e)) |
|
return out_path |
|
|
|
|
|
if __name__ == "__main__": |
|
theme = gr.themes.Soft( |
|
primary_hue=gr.themes.colors.emerald, secondary_hue=gr.themes.colors.emerald |
|
) |
|
|
|
sample_text = ( |
|
"Hello, my name is Chi-ku-wa-bu. " |
|
"I am a text-to-speech system designed to assist you. " |
|
"How can I help you today?" |
|
) |
|
|
|
demo = gr.Interface( |
|
title="Text-to-Speech", |
|
description="Convert English text to speech", |
|
fn=inference, |
|
inputs=[ |
|
gr.Textbox(label="Text to Synthesize", value=sample_text), |
|
gr.Slider(minimum=0.5, maximum=3.0, value=1.0, label="Speed"), |
|
gr.Dropdown( |
|
label="Speaker", |
|
choices=["EN-US", "EN-BR", "EN_INDIA", "EN-AU", "EN-Default"], |
|
value="EN-US", |
|
), |
|
], |
|
outputs=[gr.Audio(value="audio.wav")], |
|
examples=[ |
|
[ |
|
sample_text, |
|
1.0, |
|
"EN-US", |
|
], |
|
], |
|
cache_examples=False, |
|
theme=theme, |
|
) |
|
demo.queue().launch() |
|
|