Shanuka01 commited on
Commit
048d6c5
·
1 Parent(s): 138a10c

update the app

Browse files
Files changed (1) hide show
  1. app.py +55 -17
app.py CHANGED
@@ -3,13 +3,45 @@ import torch
3
  import gradio as gr
4
  import torchaudio
5
  import time
6
- import numpy as np
7
  from datetime import datetime
8
- from tortoise.api import TextToSpeech, MODELS_DIR
9
  from tortoise.utils.text import split_and_recombine_text
10
  from tortoise.utils.audio import load_audio, load_voice, load_voices
11
 
12
- VOICE_OPTIONS = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def inference(
15
  text,
@@ -41,21 +73,27 @@ def inference(
41
 
42
  start_time = time.time()
43
 
44
- gen, dbg_state = tts.tts_with_preset(text=text, k=1, voice_samples=voice_samples, conditioning_latents=conditioning_latents,
45
- preset='fast', use_deterministic_seed=None, return_deterministic_state=True, cvvp_amount=.0)
46
-
47
- sep_segment = gen.squeeze(0).squeeze(0).data.cpu().numpy()
48
- return 24000, np.round(sep_segment * 32767).astype(np.int16)
 
 
 
 
 
 
 
49
 
 
 
 
 
50
  def main():
51
  title = "Tortoise TTS"
52
  description = """
53
  """
54
-
55
- for root, dirs, files in os.walk("tortoise/voices"):
56
- for folder in dirs:
57
- VOICE_OPTIONS.append(folder)
58
-
59
  text = gr.Textbox(
60
  lines=4,
61
  label="Text (Provide either text, or upload a newline separated text file below):",
@@ -78,7 +116,7 @@ def main():
78
  value="No",
79
  )
80
 
81
- output_audio = gr.Audio(label="streaming audio:", scale=10)
82
  # download_audio = gr.Audio(label="dowanload audio:")
83
  interface = gr.Interface(
84
  fn=inference,
@@ -93,11 +131,11 @@ def main():
93
  description=description,
94
  outputs=[output_audio],
95
  )
96
- interface.queue().launch(inbrowser=True)
 
97
 
98
  if __name__ == "__main__":
99
- #tts = TextToSpeech(kv_cache=True, use_deepspeed=False, half=True)
100
- tts = TextToSpeech(models_dir=MODELS_DIR, use_deepspeed=False, kv_cache=True, half=True)
101
 
102
  with open("Tortoise_TTS_Runs_Scripts.log", "a") as f:
103
  f.write(
 
3
  import gradio as gr
4
  import torchaudio
5
  import time
 
6
  from datetime import datetime
7
+ from tortoise.api import TextToSpeech
8
  from tortoise.utils.text import split_and_recombine_text
9
  from tortoise.utils.audio import load_audio, load_voice, load_voices
10
 
11
+ VOICE_OPTIONS = [
12
+ "angie",
13
+ "deniro",
14
+ "freeman",
15
+ "halle",
16
+ "lj",
17
+ "myself",
18
+ "pat2",
19
+ "snakes",
20
+ "tom",
21
+ "daws",
22
+ "dreams",
23
+ "grace",
24
+ "lescault",
25
+ "weaver",
26
+ "applejack",
27
+ "daniel",
28
+ "emma",
29
+ "geralt",
30
+ "jlaw",
31
+ "mol",
32
+ "pat",
33
+ "rainbow",
34
+ "tim_reynolds",
35
+ "atkins",
36
+ "dortice",
37
+ "empire",
38
+ "kennard",
39
+ "mouse",
40
+ "william",
41
+ "jane_eyre",
42
+ "random", # special option for random voice
43
+ ]
44
+
45
 
46
  def inference(
47
  text,
 
73
 
74
  start_time = time.time()
75
 
76
+ # all_parts = []
77
+ for j, text in enumerate(texts):
78
+ for audio_frame in tts.tts_with_preset(
79
+ text,
80
+ voice_samples=voice_samples,
81
+ conditioning_latents=conditioning_latents,
82
+ preset="ultra_fast",
83
+ k=1
84
+ ):
85
+ # print("Time taken: ", time.time() - start_time)
86
+ # all_parts.append(audio_frame)
87
+ yield (24000, audio_frame.cpu().detach().numpy())
88
 
89
+ # wav = torch.cat(all_parts, dim=0).unsqueeze(0)
90
+ # print(wav.shape)
91
+ # torchaudio.save("output.wav", wav.cpu(), 24000)
92
+ # yield (None, gr.make_waveform(audio="output.wav",))
93
  def main():
94
  title = "Tortoise TTS"
95
  description = """
96
  """
 
 
 
 
 
97
  text = gr.Textbox(
98
  lines=4,
99
  label="Text (Provide either text, or upload a newline separated text file below):",
 
116
  value="No",
117
  )
118
 
119
+ output_audio = gr.Audio(label="streaming audio:", streaming=True, autoplay=True)
120
  # download_audio = gr.Audio(label="dowanload audio:")
121
  interface = gr.Interface(
122
  fn=inference,
 
131
  description=description,
132
  outputs=[output_audio],
133
  )
134
+ interface.queue().launch()
135
+
136
 
137
  if __name__ == "__main__":
138
+ tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)
 
139
 
140
  with open("Tortoise_TTS_Runs_Scripts.log", "a") as f:
141
  f.write(