mrq commited on
Commit
59f7675
1 Parent(s): faa194e
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import sys
2
 
3
- sys.argv = sys.argv + "--dtype=float32 --device=cuda".split(" ")
4
 
5
- from vall_e.webui import ui
 
1
  import sys
2
 
3
+ # sys.argv = sys.argv + "--dtype=float32 --device=cuda".split(" ")
4
 
5
+ from vall_e.webui import ui
fp32.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a7710db086a695e47ca79e3aade7f64a5034dc6eb88a6ea21de63e0c21434c5
3
- size 441076095
 
 
 
 
models/ckpt/ar+nar-retnet-8/fp32.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e2df5862030d8331021810119266e35b22f0d11c3c19feb5692bbcfec489cd7
3
- size 441076095
 
 
 
 
models/config.ar_nar.yaml DELETED
@@ -1,126 +0,0 @@
1
- dataset:
2
- training: []
3
- validation: [ ]
4
- noise: []
5
-
6
- speaker_name_getter: "lambda p: f'{p.parts[-3]}_{p.parts[-2]}'"
7
-
8
- use_hdf5: True
9
- use_metadata: True
10
- hdf5_flag: r
11
- validate: True
12
-
13
- workers: 2
14
- cache: True
15
-
16
- phones_range: [4, 256]
17
- duration_range: [1.0, 16.0]
18
-
19
- random_utterance: 1.0
20
- max_prompts: 3
21
- prompt_duration: 6.0
22
-
23
- sample_type: speaker
24
-
25
- tasks_list: [ "tts" ] # , [ "tts", "tts-c", "ns", "sr", "tse", "cse", "nse", "tts"]
26
-
27
- models:
28
- _prom_levels: 8
29
- _max_levels: 8
30
-
31
- _models:
32
- - name: "ar+nar"
33
- size: "full"
34
- resp_levels: 8
35
- prom_levels: 8
36
- tasks: 8
37
- langs: 2
38
- arch_type: "retnet"
39
- training: True
40
- version: 3
41
-
42
- hyperparameters:
43
- batch_size: 8
44
- gradient_accumulation_steps: 32
45
- gradient_clipping: 100
46
-
47
- optimizer: AdamW # Prodigy
48
- torch_optimizer: True
49
- learning_rate: 1.0e-4
50
-
51
- scheduler_type: ""
52
- #scheduler_type: OneCycle
53
- #scheduler_params:
54
- # cycle_first_step_size: 10_000
55
- # cycle_first_stair_count: 10_000
56
-
57
- # cycle_second_step_size: 15_000
58
- # cycle_second_stair_count: 15_000
59
-
60
- # decay_step_size: 5_000
61
-
62
- # cycle_min_lr: 2.5e-4 # 1.0e-5
63
- # cycle_max_lr: 2.5e-4 # 1.0e-4
64
- # decay_lr_rate: 0.0
65
-
66
- # cycle_min_mom: 0.90
67
- # cycle_max_mom: 0.99
68
- # decay_mom_rate: 0.0
69
-
70
- evaluation:
71
- batch_size: 16
72
- frequency: 250
73
- size: 16
74
-
75
- steps: 450
76
- ar_temperature: 0.95
77
- nar_temperature: 0.25
78
- load_disabled_engines: True
79
-
80
- trainer:
81
- iterations: 1_000_000
82
-
83
- save_tag: step
84
- save_on_oom: True
85
- save_on_quit: True
86
- save_frequency: 100
87
- export_on_save: True
88
-
89
- keep_last_checkpoints: 4
90
-
91
- aggressive_optimizations: False
92
- load_disabled_engines: False
93
-
94
- load_state_dict: True
95
- strict_loading: False
96
- #load_tag: "9500"
97
- #load_states: False
98
- #restart_step_count: True
99
-
100
- gc_mode: None # "global_step"
101
-
102
- weight_dtype: bfloat16
103
- amp: False
104
-
105
- backend: deepspeed
106
- deepspeed:
107
- inferencing: False
108
- zero_optimization_level: 0
109
- use_compression_training: False
110
-
111
- activation_checkpointing: True
112
-
113
- inference:
114
- backend: deepspeed
115
- use_vocos: True
116
- normalize: False
117
-
118
- weight_dtype: bfloat16
119
- amp: False
120
-
121
- bitsandbytes:
122
- enabled: False
123
- injects: True
124
- linear: True
125
- embedding: True
126
-