Spaces:
Running
Running
update the app
Browse files
app.py
CHANGED
@@ -3,13 +3,45 @@ import torch
|
|
3 |
import gradio as gr
|
4 |
import torchaudio
|
5 |
import time
|
6 |
-
import numpy as np
|
7 |
from datetime import datetime
|
8 |
-
from tortoise.api import TextToSpeech
|
9 |
from tortoise.utils.text import split_and_recombine_text
|
10 |
from tortoise.utils.audio import load_audio, load_voice, load_voices
|
11 |
|
12 |
-
VOICE_OPTIONS = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
def inference(
|
15 |
text,
|
@@ -41,21 +73,27 @@ def inference(
|
|
41 |
|
42 |
start_time = time.time()
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
|
|
|
|
|
|
|
|
50 |
def main():
|
51 |
title = "Tortoise TTS"
|
52 |
description = """
|
53 |
"""
|
54 |
-
|
55 |
-
for root, dirs, files in os.walk("tortoise/voices"):
|
56 |
-
for folder in dirs:
|
57 |
-
VOICE_OPTIONS.append(folder)
|
58 |
-
|
59 |
text = gr.Textbox(
|
60 |
lines=4,
|
61 |
label="Text (Provide either text, or upload a newline separated text file below):",
|
@@ -78,7 +116,7 @@ def main():
|
|
78 |
value="No",
|
79 |
)
|
80 |
|
81 |
-
output_audio = gr.Audio(label="streaming audio:",
|
82 |
# download_audio = gr.Audio(label="dowanload audio:")
|
83 |
interface = gr.Interface(
|
84 |
fn=inference,
|
@@ -93,11 +131,11 @@ def main():
|
|
93 |
description=description,
|
94 |
outputs=[output_audio],
|
95 |
)
|
96 |
-
interface.queue().launch(
|
|
|
97 |
|
98 |
if __name__ == "__main__":
|
99 |
-
|
100 |
-
tts = TextToSpeech(models_dir=MODELS_DIR, use_deepspeed=False, kv_cache=True, half=True)
|
101 |
|
102 |
with open("Tortoise_TTS_Runs_Scripts.log", "a") as f:
|
103 |
f.write(
|
|
|
3 |
import gradio as gr
|
4 |
import torchaudio
|
5 |
import time
|
|
|
6 |
from datetime import datetime
|
7 |
+
from tortoise.api import TextToSpeech
|
8 |
from tortoise.utils.text import split_and_recombine_text
|
9 |
from tortoise.utils.audio import load_audio, load_voice, load_voices
|
10 |
|
11 |
+
VOICE_OPTIONS = [
|
12 |
+
"angie",
|
13 |
+
"deniro",
|
14 |
+
"freeman",
|
15 |
+
"halle",
|
16 |
+
"lj",
|
17 |
+
"myself",
|
18 |
+
"pat2",
|
19 |
+
"snakes",
|
20 |
+
"tom",
|
21 |
+
"daws",
|
22 |
+
"dreams",
|
23 |
+
"grace",
|
24 |
+
"lescault",
|
25 |
+
"weaver",
|
26 |
+
"applejack",
|
27 |
+
"daniel",
|
28 |
+
"emma",
|
29 |
+
"geralt",
|
30 |
+
"jlaw",
|
31 |
+
"mol",
|
32 |
+
"pat",
|
33 |
+
"rainbow",
|
34 |
+
"tim_reynolds",
|
35 |
+
"atkins",
|
36 |
+
"dortice",
|
37 |
+
"empire",
|
38 |
+
"kennard",
|
39 |
+
"mouse",
|
40 |
+
"william",
|
41 |
+
"jane_eyre",
|
42 |
+
"random", # special option for random voice
|
43 |
+
]
|
44 |
+
|
45 |
|
46 |
def inference(
|
47 |
text,
|
|
|
73 |
|
74 |
start_time = time.time()
|
75 |
|
76 |
+
# all_parts = []
|
77 |
+
for j, text in enumerate(texts):
|
78 |
+
for audio_frame in tts.tts_with_preset(
|
79 |
+
text,
|
80 |
+
voice_samples=voice_samples,
|
81 |
+
conditioning_latents=conditioning_latents,
|
82 |
+
preset="ultra_fast",
|
83 |
+
k=1
|
84 |
+
):
|
85 |
+
# print("Time taken: ", time.time() - start_time)
|
86 |
+
# all_parts.append(audio_frame)
|
87 |
+
yield (24000, audio_frame.cpu().detach().numpy())
|
88 |
|
89 |
+
# wav = torch.cat(all_parts, dim=0).unsqueeze(0)
|
90 |
+
# print(wav.shape)
|
91 |
+
# torchaudio.save("output.wav", wav.cpu(), 24000)
|
92 |
+
# yield (None, gr.make_waveform(audio="output.wav",))
|
93 |
def main():
|
94 |
title = "Tortoise TTS"
|
95 |
description = """
|
96 |
"""
|
|
|
|
|
|
|
|
|
|
|
97 |
text = gr.Textbox(
|
98 |
lines=4,
|
99 |
label="Text (Provide either text, or upload a newline separated text file below):",
|
|
|
116 |
value="No",
|
117 |
)
|
118 |
|
119 |
+
output_audio = gr.Audio(label="streaming audio:", streaming=True, autoplay=True)
|
120 |
# download_audio = gr.Audio(label="dowanload audio:")
|
121 |
interface = gr.Interface(
|
122 |
fn=inference,
|
|
|
131 |
description=description,
|
132 |
outputs=[output_audio],
|
133 |
)
|
134 |
+
interface.queue().launch()
|
135 |
+
|
136 |
|
137 |
if __name__ == "__main__":
|
138 |
+
tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)
|
|
|
139 |
|
140 |
with open("Tortoise_TTS_Runs_Scripts.log", "a") as f:
|
141 |
f.write(
|