mrfakename
commited on
Commit
·
a5cfbc2
1
Parent(s):
2f1c8d2
remove long text
Browse files- app.py +30 -29
- requirements.txt +1 -1
app.py
CHANGED
@@ -3,7 +3,7 @@ import styletts2importable
|
|
3 |
import ljspeechimportable
|
4 |
import torch
|
5 |
import os
|
6 |
-
from tortoise.utils.text import split_and_recombine_text
|
7 |
import numpy as np
|
8 |
import pickle
|
9 |
theme = gr.themes.Base(
|
@@ -29,22 +29,22 @@ def synthesize(text, voice, multispeakersteps):
|
|
29 |
v = voice.lower()
|
30 |
# return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
31 |
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
|
32 |
-
def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
def clsynthesize(text, voice, vcsteps):
|
49 |
if text.strip() == "":
|
50 |
raise gr.Error("You must enter some text")
|
@@ -84,17 +84,17 @@ with gr.Blocks() as clone:
|
|
84 |
clbtn = gr.Button("Synthesize", variant="primary")
|
85 |
claudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
86 |
clbtn.click(clsynthesize, inputs=[clinp, clvoice, vcsteps], outputs=[claudio], concurrency_limit=4)
|
87 |
-
with gr.Blocks() as longText:
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
with gr.Blocks() as lj:
|
99 |
with gr.Row():
|
100 |
with gr.Column(scale=1):
|
@@ -116,7 +116,8 @@ Is there a long queue on this space? Duplicate it and add a more powerful GPU to
|
|
116 |
|
117 |
**NOTE: StyleTTS 2 does better on longer texts.** For example, making it say "hi" will produce a lower-quality result than making it say a longer phrase.""")
|
118 |
gr.DuplicateButton("Duplicate Space")
|
119 |
-
gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'LJSpeech', 'Long Text [Beta]'])
|
|
|
120 |
gr.Markdown("""
|
121 |
Demo by by [mrfakename](https://twitter.com/realmrfakename). I am not affiliated with the StyleTTS 2 authors.
|
122 |
|
|
|
3 |
import ljspeechimportable
|
4 |
import torch
|
5 |
import os
|
6 |
+
# from tortoise.utils.text import split_and_recombine_text
|
7 |
import numpy as np
|
8 |
import pickle
|
9 |
theme = gr.themes.Base(
|
|
|
29 |
v = voice.lower()
|
30 |
# return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
31 |
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
|
32 |
+
# def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
33 |
+
# if password == os.environ['ACCESS_CODE']:
|
34 |
+
# if text.strip() == "":
|
35 |
+
# raise gr.Error("You must enter some text")
|
36 |
+
# if lngsteps > 25:
|
37 |
+
# raise gr.Error("Max 25 steps")
|
38 |
+
# if lngsteps < 5:
|
39 |
+
# raise gr.Error("Min 5 steps")
|
40 |
+
# texts = split_and_recombine_text(text)
|
41 |
+
# v = voice.lower()
|
42 |
+
# audios = []
|
43 |
+
# for t in progress.tqdm(texts):
|
44 |
+
# audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=lngsteps, embedding_scale=1))
|
45 |
+
# return (24000, np.concatenate(audios))
|
46 |
+
# else:
|
47 |
+
# raise gr.Error('Wrong access code')
|
48 |
def clsynthesize(text, voice, vcsteps):
|
49 |
if text.strip() == "":
|
50 |
raise gr.Error("You must enter some text")
|
|
|
84 |
clbtn = gr.Button("Synthesize", variant="primary")
|
85 |
claudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
86 |
clbtn.click(clsynthesize, inputs=[clinp, clvoice, vcsteps], outputs=[claudio], concurrency_limit=4)
|
87 |
+
# with gr.Blocks() as longText:
|
88 |
+
# with gr.Row():
|
89 |
+
# with gr.Column(scale=1):
|
90 |
+
# lnginp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
91 |
+
# lngvoice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
|
92 |
+
# lngsteps = gr.Slider(minimum=5, maximum=25, value=10, step=1, label="Diffusion Steps", info="Higher = better quality, but slower", interactive=True)
|
93 |
+
# lngpwd = gr.Textbox(label="Access code", info="This feature is in beta. You need an access code to use it as it uses more resources and we would like to prevent abuse")
|
94 |
+
# with gr.Column(scale=1):
|
95 |
+
# lngbtn = gr.Button("Synthesize", variant="primary")
|
96 |
+
# lngaudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
97 |
+
# lngbtn.click(longsynthesize, inputs=[lnginp, lngvoice, lngsteps, lngpwd], outputs=[lngaudio], concurrency_limit=4)
|
98 |
with gr.Blocks() as lj:
|
99 |
with gr.Row():
|
100 |
with gr.Column(scale=1):
|
|
|
116 |
|
117 |
**NOTE: StyleTTS 2 does better on longer texts.** For example, making it say "hi" will produce a lower-quality result than making it say a longer phrase.""")
|
118 |
gr.DuplicateButton("Duplicate Space")
|
119 |
+
# gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'LJSpeech', 'Long Text [Beta]'])
|
120 |
+
gr.TabbedInterface([vctk, clone, lj], ['Multi-Voice', 'Voice Cloning', 'LJSpeech', 'Long Text [Beta]'])
|
121 |
gr.Markdown("""
|
122 |
Demo by by [mrfakename](https://twitter.com/realmrfakename). I am not affiliated with the StyleTTS 2 authors.
|
123 |
|
requirements.txt
CHANGED
@@ -20,4 +20,4 @@ phonemizer
|
|
20 |
cached-path
|
21 |
gradio
|
22 |
gruut
|
23 |
-
tortoise-tts
|
|
|
20 |
cached-path
|
21 |
gradio
|
22 |
gruut
|
23 |
+
# tortoise-tts
|