Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -38,7 +38,7 @@ for v in voicelist:
|
|
38 |
# v = voice.lower()
|
39 |
# # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
40 |
# return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
|
41 |
-
if not torch.cuda.is_available(): INTROTXT += "\n\n###
|
42 |
def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
43 |
if text.strip() == "":
|
44 |
raise gr.Error("You must enter some text")
|
@@ -123,12 +123,14 @@ with gr.Blocks() as vctk:
|
|
123 |
with gr.Column(scale=1):
|
124 |
inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
125 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
|
126 |
-
multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", info="
|
|
|
|
|
127 |
# use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
|
128 |
with gr.Column(scale=1):
|
129 |
btn = gr.Button("Synthesize", variant="primary")
|
130 |
audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
|
131 |
-
btn.click(synthesize, inputs=[inp, voice, multispeakersteps], outputs=[audio], concurrency_limit=4)
|
132 |
with gr.Blocks() as clone:
|
133 |
with gr.Row():
|
134 |
with gr.Column(scale=1):
|
@@ -169,9 +171,9 @@ with gr.Blocks(title="StyleTTS 2", css="footer{display:none !important}", theme=
|
|
169 |
# gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'Text-guided Inference', 'Long Text [Beta]'])
|
170 |
gr.TabbedInterface([vctk, clone, lj], ['Multi-Voice', 'do not use this option','Text-guided Inference', 'Long Text [Beta]'])
|
171 |
gr.Markdown("""
|
172 |
-
|
173 |
|
174 |
-
Run
|
175 |
|
176 |
```bash
|
177 |
docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all registry.hf.space/styletts2-styletts2:latest python app.py
|
|
|
38 |
# v = voice.lower()
|
39 |
# # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
40 |
# return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
|
41 |
+
if not torch.cuda.is_available(): INTROTXT += "\n\n### on CPU, it'll run rather slower, but not too much."
|
42 |
def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
43 |
if text.strip() == "":
|
44 |
raise gr.Error("You must enter some text")
|
|
|
123 |
with gr.Column(scale=1):
|
124 |
inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
125 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
|
126 |
+
multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", info="これを増えたらもっとエモーショナルな結果になりますが、クオリティーのいい結果になるとは限らない。", interactive=True)
|
127 |
+
alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", info="Defaults to 0.3", interactive=True)
|
128 |
+
beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1, label="Beta", info="Defaults to 0.7", interactive=True)
|
129 |
# use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
|
130 |
with gr.Column(scale=1):
|
131 |
btn = gr.Button("Synthesize", variant="primary")
|
132 |
audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
|
133 |
+
btn.click(synthesize, inputs=[inp, voice, multispeakersteps,alpha,beta], outputs=[audio], concurrency_limit=4)
|
134 |
with gr.Blocks() as clone:
|
135 |
with gr.Row():
|
136 |
with gr.Column(scale=1):
|
|
|
171 |
# gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'Text-guided Inference', 'Long Text [Beta]'])
|
172 |
gr.TabbedInterface([vctk, clone, lj], ['Multi-Voice', 'do not use this option','Text-guided Inference', 'Long Text [Beta]'])
|
173 |
gr.Markdown("""
|
174 |
+
the base code was borrowed from -> [mrfakename](https://twitter.com/realmrfakename). Neither of use are affiliated with the StyleTTS 2 authors.
|
175 |
|
176 |
+
Run the orginal (English-only) demo locally using Docker:
|
177 |
|
178 |
```bash
|
179 |
docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all registry.hf.space/styletts2-styletts2:latest python app.py
|