Flux9665 commited on
Commit
0ebcf15
1 Parent(s): 6faeba1

initial commit

Browse files
Models/Embedding/init ADDED
File without changes
Models/ToucanTTS_Meta/init ADDED
File without changes
Models/Vocoder/init ADDED
File without changes
Preprocessing/multilinguality/iso_lookup.json ADDED
The diff for this file is too large to render. See raw diff
 
Preprocessing/multilinguality/iso_to_fullname.json ADDED
The diff for this file is too large to render. See raw diff
 
Utility/storage_config.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ MODELS_DIR = "Models/"
2
+ PREPROCESSING_DIR = "Corpora/"
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch.cuda
3
+
4
+ from InferenceInterfaces.ControllableInterface import ControllableInterface
5
+ from Utility.utils import float2pcm
6
+
7
+
8
+ class TTSWebUI:
9
+
10
+ def __init__(self, gpu_id="cpu", title="Controllable Text-to-Speech with IMS Toucan", article="", available_artificial_voices=1000):
11
+ self.controllable_ui = ControllableInterface(gpu_id=gpu_id,
12
+ available_artificial_voices=available_artificial_voices)
13
+ self.iface = gr.Interface(fn=self.read,
14
+ inputs=[gr.Textbox(lines=2,
15
+ placeholder="write what you want the synthesis to read here...",
16
+ value="The woods are lovely, dark and deep, but I have promises to keep, and miles to go, before I sleep.",
17
+ label="Text input"),
18
+ gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
19
+ value=279,
20
+ label="Random Seed for the artificial Voice"),
21
+ gr.Slider(minimum=0.0, maximum=0.8, step=0.1, value=0.4, label="Prosody Creativity"),
22
+ gr.Slider(minimum=0.7, maximum=1.3, step=0.1, value=1.0, label="Duration Scale"),
23
+ gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Pitch Variance Scale"),
24
+ gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Energy Variance Scale"),
25
+ gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Femininity / Masculinity"),
26
+ gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
27
+ ],
28
+ outputs=[gr.Audio(type="numpy", label="Speech"),
29
+ gr.Image(label="Visualization")],
30
+ title=title,
31
+ theme="default",
32
+ allow_flagging="never",
33
+ article=article)
34
+ self.iface.launch()
35
+
36
+ def read(self,
37
+ prompt,
38
+ voice_seed,
39
+ prosody_creativity,
40
+ duration_scaling_factor,
41
+ pitch_variance_scale,
42
+ energy_variance_scale,
43
+ emb1,
44
+ emb2
45
+ ):
46
+ sr, wav, fig = self.controllable_ui.read(prompt,
47
+ voice_seed,
48
+ prosody_creativity,
49
+ duration_scaling_factor,
50
+ 1.,
51
+ pitch_variance_scale,
52
+ energy_variance_scale,
53
+ emb1,
54
+ emb2,
55
+ 0.,
56
+ 0.,
57
+ 0.,
58
+ 0.,
59
+ -24.)
60
+ return (sr, float2pcm(wav)), fig
61
+
62
+
63
+ if __name__ == '__main__':
64
+ TTSWebUI(gpu_id="cuda" if torch.cuda.is_available() else "cpu")