mrfakename commited on
Commit
a5cfbc2
·
1 Parent(s): 2f1c8d2

remove long text

Browse files
Files changed (2) hide show
  1. app.py +30 -29
  2. requirements.txt +1 -1
app.py CHANGED
@@ -3,7 +3,7 @@ import styletts2importable
3
  import ljspeechimportable
4
  import torch
5
  import os
6
- from tortoise.utils.text import split_and_recombine_text
7
  import numpy as np
8
  import pickle
9
  theme = gr.themes.Base(
@@ -29,22 +29,22 @@ def synthesize(text, voice, multispeakersteps):
29
  v = voice.lower()
30
  # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
31
  return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
32
- def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
33
- if password == os.environ['ACCESS_CODE']:
34
- if text.strip() == "":
35
- raise gr.Error("You must enter some text")
36
- if lngsteps > 25:
37
- raise gr.Error("Max 25 steps")
38
- if lngsteps < 5:
39
- raise gr.Error("Min 5 steps")
40
- texts = split_and_recombine_text(text)
41
- v = voice.lower()
42
- audios = []
43
- for t in progress.tqdm(texts):
44
- audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=lngsteps, embedding_scale=1))
45
- return (24000, np.concatenate(audios))
46
- else:
47
- raise gr.Error('Wrong access code')
48
  def clsynthesize(text, voice, vcsteps):
49
  if text.strip() == "":
50
  raise gr.Error("You must enter some text")
@@ -84,17 +84,17 @@ with gr.Blocks() as clone:
84
  clbtn = gr.Button("Synthesize", variant="primary")
85
  claudio = gr.Audio(interactive=False, label="Synthesized Audio")
86
  clbtn.click(clsynthesize, inputs=[clinp, clvoice, vcsteps], outputs=[claudio], concurrency_limit=4)
87
- with gr.Blocks() as longText:
88
- with gr.Row():
89
- with gr.Column(scale=1):
90
- lnginp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
91
- lngvoice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
92
- lngsteps = gr.Slider(minimum=5, maximum=25, value=10, step=1, label="Diffusion Steps", info="Higher = better quality, but slower", interactive=True)
93
- lngpwd = gr.Textbox(label="Access code", info="This feature is in beta. You need an access code to use it as it uses more resources and we would like to prevent abuse")
94
- with gr.Column(scale=1):
95
- lngbtn = gr.Button("Synthesize", variant="primary")
96
- lngaudio = gr.Audio(interactive=False, label="Synthesized Audio")
97
- lngbtn.click(longsynthesize, inputs=[lnginp, lngvoice, lngsteps, lngpwd], outputs=[lngaudio], concurrency_limit=4)
98
  with gr.Blocks() as lj:
99
  with gr.Row():
100
  with gr.Column(scale=1):
@@ -116,7 +116,8 @@ Is there a long queue on this space? Duplicate it and add a more powerful GPU to
116
 
117
  **NOTE: StyleTTS 2 does better on longer texts.** For example, making it say "hi" will produce a lower-quality result than making it say a longer phrase.""")
118
  gr.DuplicateButton("Duplicate Space")
119
- gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'LJSpeech', 'Long Text [Beta]'])
 
120
  gr.Markdown("""
121
  Demo by by [mrfakename](https://twitter.com/realmrfakename). I am not affiliated with the StyleTTS 2 authors.
122
 
 
3
  import ljspeechimportable
4
  import torch
5
  import os
6
+ # from tortoise.utils.text import split_and_recombine_text
7
  import numpy as np
8
  import pickle
9
  theme = gr.themes.Base(
 
29
  v = voice.lower()
30
  # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
31
  return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
32
+ # def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
33
+ # if password == os.environ['ACCESS_CODE']:
34
+ # if text.strip() == "":
35
+ # raise gr.Error("You must enter some text")
36
+ # if lngsteps > 25:
37
+ # raise gr.Error("Max 25 steps")
38
+ # if lngsteps < 5:
39
+ # raise gr.Error("Min 5 steps")
40
+ # texts = split_and_recombine_text(text)
41
+ # v = voice.lower()
42
+ # audios = []
43
+ # for t in progress.tqdm(texts):
44
+ # audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=lngsteps, embedding_scale=1))
45
+ # return (24000, np.concatenate(audios))
46
+ # else:
47
+ # raise gr.Error('Wrong access code')
48
  def clsynthesize(text, voice, vcsteps):
49
  if text.strip() == "":
50
  raise gr.Error("You must enter some text")
 
84
  clbtn = gr.Button("Synthesize", variant="primary")
85
  claudio = gr.Audio(interactive=False, label="Synthesized Audio")
86
  clbtn.click(clsynthesize, inputs=[clinp, clvoice, vcsteps], outputs=[claudio], concurrency_limit=4)
87
+ # with gr.Blocks() as longText:
88
+ # with gr.Row():
89
+ # with gr.Column(scale=1):
90
+ # lnginp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
91
+ # lngvoice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
92
+ # lngsteps = gr.Slider(minimum=5, maximum=25, value=10, step=1, label="Diffusion Steps", info="Higher = better quality, but slower", interactive=True)
93
+ # lngpwd = gr.Textbox(label="Access code", info="This feature is in beta. You need an access code to use it as it uses more resources and we would like to prevent abuse")
94
+ # with gr.Column(scale=1):
95
+ # lngbtn = gr.Button("Synthesize", variant="primary")
96
+ # lngaudio = gr.Audio(interactive=False, label="Synthesized Audio")
97
+ # lngbtn.click(longsynthesize, inputs=[lnginp, lngvoice, lngsteps, lngpwd], outputs=[lngaudio], concurrency_limit=4)
98
  with gr.Blocks() as lj:
99
  with gr.Row():
100
  with gr.Column(scale=1):
 
116
 
117
  **NOTE: StyleTTS 2 does better on longer texts.** For example, making it say "hi" will produce a lower-quality result than making it say a longer phrase.""")
118
  gr.DuplicateButton("Duplicate Space")
119
+ # gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'LJSpeech', 'Long Text [Beta]'])
120
+ gr.TabbedInterface([vctk, clone, lj], ['Multi-Voice', 'Voice Cloning', 'LJSpeech', 'Long Text [Beta]'])
121
  gr.Markdown("""
122
  Demo by by [mrfakename](https://twitter.com/realmrfakename). I am not affiliated with the StyleTTS 2 authors.
123
 
requirements.txt CHANGED
@@ -20,4 +20,4 @@ phonemizer
20
  cached-path
21
  gradio
22
  gruut
23
- tortoise-tts
 
20
  cached-path
21
  gradio
22
  gruut
23
+ # tortoise-tts