mrfakename commited on
Commit
b21342c
·
1 Parent(s): 4b8ade9

Add long-speaker support

Browse files
Files changed (2) hide show
  1. app.py +26 -10
  2. requirements.txt +1 -1
app.py CHANGED
@@ -3,7 +3,7 @@ import styletts2importable
3
  import ljspeechimportable
4
  import torch
5
  import os
6
- # from tortoise.utils.text import split_and_recombine_text
7
  import numpy as np
8
  import pickle
9
  theme = gr.themes.Base(
@@ -20,15 +20,31 @@ global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_
20
  # else:
21
  for v in voicelist:
22
  voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
23
- def synthesize(text, voice, multispeakersteps):
24
- if text.strip() == "":
25
- raise gr.Error("You must enter some text")
26
- # if len(global_phonemizer.phonemize([text])) > 300:
27
- if len(text) > 300:
28
- raise gr.Error("Text must be under 300 characters")
29
- v = voice.lower()
30
- # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
31
- return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
33
  # if password == os.environ['ACCESS_CODE']:
34
  # if text.strip() == "":
 
3
  import ljspeechimportable
4
  import torch
5
  import os
6
+ from tortoise.utils.text import split_and_recombine_text
7
  import numpy as np
8
  import pickle
9
  theme = gr.themes.Base(
 
20
  # else:
21
  for v in voicelist:
22
  voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
23
+ # def synthesize(text, voice, multispeakersteps):
24
+ # if text.strip() == "":
25
+ # raise gr.Error("You must enter some text")
26
+ # # if len(global_phonemizer.phonemize([text])) > 300:
27
+ # if len(text) > 300:
28
+ # raise gr.Error("Text must be under 300 characters")
29
+ # v = voice.lower()
30
+ # # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
31
+ # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
32
+ def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
33
+ if password == os.environ['ACCESS_CODE']:
34
+ if text.strip() == "":
35
+ raise gr.Error("You must enter some text")
36
+ if lngsteps > 25:
37
+ raise gr.Error("Max 25 steps")
38
+ if lngsteps < 5:
39
+ raise gr.Error("Min 5 steps")
40
+ texts = split_and_recombine_text(text)
41
+ v = voice.lower()
42
+ audios = []
43
+ for t in progress.tqdm(texts):
44
+ audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=lngsteps, embedding_scale=1))
45
+ return (24000, np.concatenate(audios))
46
+ else:
47
+ raise gr.Error('Wrong access code')
48
  # def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
49
  # if password == os.environ['ACCESS_CODE']:
50
  # if text.strip() == "":
requirements.txt CHANGED
@@ -20,4 +20,4 @@ phonemizer
20
  cached-path
21
  gradio
22
  gruut
23
- # tortoise-tts
 
20
  cached-path
21
  gradio
22
  gruut
23
+ tortoise-tts