Simonlob commited on
Commit
cf1295a
1 Parent(s): 7e928b3
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LICENSE +21 -0
  2. MANIFEST.in +14 -0
  3. Makefile +42 -0
  4. README.md:Zone.Identifier +4 -0
  5. app.py +174 -0
  6. checkpoints/info.txt +1 -0
  7. configs/__init__.py +1 -0
  8. configs/callbacks/default.yaml +5 -0
  9. configs/callbacks/model_checkpoint.yaml +17 -0
  10. configs/callbacks/model_summary.yaml +5 -0
  11. configs/callbacks/none.yaml +0 -0
  12. configs/callbacks/rich_progress_bar.yaml +4 -0
  13. configs/data/akylai.yaml +21 -0
  14. configs/data/akylai_multi.yaml +21 -0
  15. configs/data/hi-fi_en-US_female.yaml +14 -0
  16. configs/data/ljspeech.yaml +22 -0
  17. configs/data/vctk.yaml +14 -0
  18. configs/debug/default.yaml +35 -0
  19. configs/debug/fdr.yaml +9 -0
  20. configs/debug/limit.yaml +12 -0
  21. configs/debug/overfit.yaml +13 -0
  22. configs/debug/profiler.yaml +15 -0
  23. configs/eval.yaml +18 -0
  24. configs/experiment/akylai.yaml +14 -0
  25. configs/experiment/akylai_multi.yaml +14 -0
  26. configs/experiment/hifi_dataset_piper_phonemizer.yaml +14 -0
  27. configs/experiment/ljspeech.yaml +14 -0
  28. configs/experiment/ljspeech_min_memory.yaml +18 -0
  29. configs/experiment/multispeaker.yaml +14 -0
  30. configs/extras/default.yaml +8 -0
  31. configs/hparams_search/mnist_optuna.yaml +52 -0
  32. configs/hydra/default.yaml +19 -0
  33. configs/local/.gitkeep +0 -0
  34. configs/logger/aim.yaml +28 -0
  35. configs/logger/comet.yaml +12 -0
  36. configs/logger/csv.yaml +7 -0
  37. configs/logger/many_loggers.yaml +9 -0
  38. configs/logger/mlflow.yaml +12 -0
  39. configs/logger/neptune.yaml +9 -0
  40. configs/logger/tensorboard.yaml +10 -0
  41. configs/logger/wandb.yaml +16 -0
  42. configs/model/cfm/default.yaml +3 -0
  43. configs/model/decoder/default.yaml +7 -0
  44. configs/model/encoder/default.yaml +18 -0
  45. configs/model/matcha.yaml +15 -0
  46. configs/model/optimizer/adam.yaml +4 -0
  47. configs/paths/default.yaml +18 -0
  48. configs/train.yaml +51 -0
  49. configs/trainer/cpu.yaml +5 -0
  50. configs/trainer/ddp.yaml +9 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Shivam Mehta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
MANIFEST.in ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ include README.md
2
+ include LICENSE.txt
3
+ include requirements.*.txt
4
+ include *.cff
5
+ include requirements.txt
6
+ include matcha/VERSION
7
+ recursive-include matcha *.json
8
+ recursive-include matcha *.html
9
+ recursive-include matcha *.png
10
+ recursive-include matcha *.md
11
+ recursive-include matcha *.py
12
+ recursive-include matcha *.pyx
13
+ recursive-exclude tests *
14
+ prune tests*
Makefile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ help: ## Show help
3
+ @grep -E '^[.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
4
+
5
+ clean: ## Clean autogenerated files
6
+ rm -rf dist
7
+ find . -type f -name "*.DS_Store" -ls -delete
8
+ find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
9
+ find . | grep -E ".pytest_cache" | xargs rm -rf
10
+ find . | grep -E ".ipynb_checkpoints" | xargs rm -rf
11
+ rm -f .coverage
12
+
13
+ clean-logs: ## Clean logs
14
+ rm -rf logs/**
15
+
16
+ create-package: ## Create wheel and tar gz
17
+ rm -rf dist/
18
+ python setup.py bdist_wheel --plat-name=manylinux1_x86_64
19
+ python setup.py sdist
20
+ python -m twine upload dist/* --verbose --skip-existing
21
+
22
+ format: ## Run pre-commit hooks
23
+ pre-commit run -a
24
+
25
+ sync: ## Merge changes from main branch to your current branch
26
+ git pull
27
+ git pull origin main
28
+
29
+ test: ## Run not slow tests
30
+ pytest -k "not slow"
31
+
32
+ test-full: ## Run all tests
33
+ pytest
34
+
35
+ train-ljspeech: ## Train the model
36
+ python matcha/train.py experiment=ljspeech
37
+
38
+ train-ljspeech-min: ## Train the model with minimum memory
39
+ python matcha/train.py experiment=ljspeech_min_memory
40
+
41
+ start_app: ## Start the app
42
+ python matcha/app.py
README.md:Zone.Identifier ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [ZoneTransfer]
2
+ ZoneId=3
3
+ ReferrerUrl=https://huggingface.co/spaces/the-cramer-project/AkylAI_TTS_small/tree/main
4
+ HostUrl=https://huggingface.co/spaces/the-cramer-project/AkylAI_TTS_small/resolve/main/README.md?download=true
app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import argparse
3
+ import soundfile as sf
4
+ import torch
5
+ import io
6
+ import argparse
7
+ from matcha.hifigan.config import v1
8
+ from matcha.hifigan.denoiser import Denoiser
9
+ from matcha.hifigan.env import AttrDict
10
+ from matcha.hifigan.models import Generator as HiFiGAN
11
+ from matcha.models.matcha_tts import MatchaTTS
12
+ from matcha.text import sequence_to_text, text_to_sequence
13
+ from matcha.utils.utils import intersperse
14
+ import gradio as gr
15
+ import requests
16
+
17
+ def download_file(url, save_path):
18
+ response = requests.get(url)
19
+ with open(save_path, 'wb') as file:
20
+ file.write(response.content)
21
+
22
+ url_checkpoint = 'https://github.com/simonlobgromov/AkylAI_Matcha_Checkpoint/releases/download/Matcha-TTS/checkpoint_epoch.499.ckpt'
23
+ save_checkpoint_path = './checkpoints/checkpoint.ckpt'
24
+ url_generator = 'https://github.com/simonlobgromov/AkylAI_Matcha_HiFiGan/releases/download/Generator/generator_v1'
25
+ save_generator_path = './checkpoints/generator'
26
+
27
+ download_file(url_checkpoint, save_checkpoint_path)
28
+ download_file(url_generator, save_generator_path)
29
+
30
+ def load_matcha( checkpoint_path, device):
31
+ model = MatchaTTS.load_from_checkpoint(checkpoint_path, map_location=device)
32
+ _ = model.eval()
33
+ return model
34
+
35
+ def load_hifigan(checkpoint_path, device):
36
+ h = AttrDict(v1)
37
+ hifigan = HiFiGAN(h).to(device)
38
+ hifigan.load_state_dict(torch.load(checkpoint_path, map_location=device)["generator"])
39
+ _ = hifigan.eval()
40
+ hifigan.remove_weight_norm()
41
+ return hifigan
42
+
43
+ def load_vocoder(checkpoint_path, device):
44
+ vocoder = None
45
+ vocoder = load_hifigan(checkpoint_path, device)
46
+ denoiser = Denoiser(vocoder, mode="zeros")
47
+ return vocoder, denoiser
48
+
49
+ def process_text(i: int, text: str, device: torch.device):
50
+ print(f"[{i}] - Input text: {text}")
51
+ x = torch.tensor(
52
+ intersperse(text_to_sequence(text, ["kyrgyz_cleaners"]), 0),
53
+ dtype=torch.long,
54
+ device=device,
55
+ )[None]
56
+ x_lengths = torch.tensor([x.shape[-1]], dtype=torch.long, device=device)
57
+ x_phones = sequence_to_text(x.squeeze(0).tolist())
58
+ print(f"[{i}] - Phonetised text: {x_phones[1::2]}")
59
+ return {"x_orig": text, "x": x, "x_lengths": x_lengths, "x_phones": x_phones}
60
+
61
+ def to_waveform(mel, vocoder, denoiser=None):
62
+ audio = vocoder(mel).clamp(-1, 1)
63
+ if denoiser is not None:
64
+ audio = denoiser(audio.squeeze(), strength=0.00025).cpu().squeeze()
65
+ return audio.cpu().squeeze()
66
+
67
+ @torch.inference_mode()
68
+ def process_text_gradio(text):
69
+ output = process_text(1, text, device)
70
+ return output["x_phones"][1::2], output["x"], output["x_lengths"]
71
+
72
+ @torch.inference_mode()
73
+ def synthesise_mel(text, text_length, n_timesteps, temperature, length_scale, spk=-1):
74
+ spk = torch.tensor([spk], device=device, dtype=torch.long) if spk >= 0 else None
75
+ output = model.synthesise(
76
+ text,
77
+ text_length,
78
+ n_timesteps=n_timesteps,
79
+ temperature=temperature,
80
+ spks=spk,
81
+ length_scale=length_scale,
82
+ )
83
+ output["waveform"] = to_waveform(output["mel"], vocoder, denoiser)
84
+ return output["waveform"].numpy()
85
+
86
+ def get_inference(text, n_timesteps=20, mel_temp = 0.667, length_scale=0.8, spk=-1):
87
+ phones, text, text_lengths = process_text_gradio(text)
88
+ print(type(synthesise_mel(text, text_lengths, n_timesteps, mel_temp, length_scale, spk)))
89
+ return synthesise_mel(text, text_lengths, n_timesteps, mel_temp, length_scale, spk)
90
+
91
+
92
+ device = torch.device("cpu")
93
+ model_path = './checkpoints/checkpoint.ckpt'
94
+ vocoder_path = './checkpoints/generator'
95
+ model = load_matcha(model_path, device)
96
+ vocoder, denoiser = load_vocoder(vocoder_path, device)
97
+
98
+ def gen_tts(text, speaking_rate):
99
+ return 22050, get_inference(text = text, length_scale = speaking_rate)
100
+
101
+ default_text = "Баарыңарга салам, менин атым Акылай."
102
+
103
+ css = """
104
+ #share-btn-container {
105
+ display: flex;
106
+ padding-left: 0.5rem !important;
107
+ padding-right: 0.5rem !important;
108
+ background-color: #000000;
109
+ justify-content: center;
110
+ align-items: center;
111
+ border-radius: 9999px !important;
112
+ width: 13rem;
113
+ margin-top: 10px;
114
+ margin-left: auto;
115
+ flex: unset !important;
116
+ }
117
+ #share-btn {
118
+ all: initial;
119
+ color: #ffffff;
120
+ font-weight: 600;
121
+ cursor: pointer;
122
+ font-family: 'IBM Plex Sans', sans-serif;
123
+ margin-left: 0.5rem !important;
124
+ padding-top: 0.25rem !important;
125
+ padding-bottom: 0.25rem !important;
126
+ right:0;
127
+ }
128
+ #share-btn * {
129
+ all: unset !important;
130
+ }
131
+ #share-btn-container div:nth-child(-n+2){
132
+ width: auto !important;
133
+ min-height: 0px !important;
134
+ }
135
+ #share-btn-container .wrap {
136
+ display: none !important;
137
+ }
138
+ """
139
+ with gr.Blocks(css=css) as block:
140
+ gr.HTML(
141
+ """
142
+ <div style="text-align: center; max-width: 700px; margin: 0 auto;">
143
+ <div
144
+ style="
145
+ display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;
146
+ "
147
+ >
148
+ <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
149
+ Akyl-AI TTS
150
+ </h1>
151
+ </div>
152
+ </div>
153
+ """
154
+ )
155
+ with gr.Row():
156
+ image_path = "./photo_2024-04-07_15-59-52.png"
157
+ gr.Image(image_path, label=None, width=660, height=315, show_label=False)
158
+ with gr.Row():
159
+ with gr.Column():
160
+ input_text = gr.Textbox(label="Input Text", lines=2, value=default_text, elem_id="input_text")
161
+ speaking_rate = gr.Slider(label='Speaking rate', minimum=0.5, maximum=1, step=0.05, value=0.8, interactive=True, show_label=True, elem_id="speaking_rate")
162
+
163
+
164
+ run_button = gr.Button("Generate Audio", variant="primary")
165
+ with gr.Column():
166
+ audio_out = gr.Audio(label="Parler-TTS generation", type="numpy", elem_id="audio_out")
167
+
168
+ inputs = [input_text, speaking_rate]
169
+ outputs = [audio_out]
170
+ run_button.click(fn=gen_tts, inputs=inputs, outputs=outputs, queue=True)
171
+
172
+
173
+ block.queue()
174
+ block.launch(share=True)
checkpoints/info.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Забудь дорогу всяк сюда входящий!
configs/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # this file is needed here to include configs when building project as a package
configs/callbacks/default.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ defaults:
2
+ - model_checkpoint.yaml
3
+ - model_summary.yaml
4
+ - rich_progress_bar.yaml
5
+ - _self_
configs/callbacks/model_checkpoint.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html
2
+
3
+ model_checkpoint:
4
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
5
+ dirpath: ${paths.output_dir}/checkpoints # directory to save the model file
6
+ filename: checkpoint_{epoch:03d} # checkpoint filename
7
+ monitor: epoch # name of the logged metric which determines when model is improving
8
+ verbose: False # verbosity mode
9
+ save_last: true # additionally always save an exact copy of the last checkpoint to a file last.ckpt
10
+ save_top_k: 5 # save k best models (determined by above metric)
11
+ mode: "max" # "max" means higher metric value is better, can be also "min"
12
+ auto_insert_metric_name: True # when True, the checkpoints filenames will contain the metric name
13
+ save_weights_only: False # if True, then only the model’s weights will be saved
14
+ every_n_train_steps: null # number of training steps between checkpoints
15
+ train_time_interval: null # checkpoints are monitored at the specified time interval
16
+ every_n_epochs: 10 # number of epochs between checkpoints
17
+ save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation
configs/callbacks/model_summary.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html
2
+
3
+ model_summary:
4
+ _target_: lightning.pytorch.callbacks.RichModelSummary
5
+ max_depth: 3 # the maximum depth of layer nesting that the summary will include
configs/callbacks/none.yaml ADDED
File without changes
configs/callbacks/rich_progress_bar.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichProgressBar.html
2
+
3
+ rich_progress_bar:
4
+ _target_: lightning.pytorch.callbacks.RichProgressBar
configs/data/akylai.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: matcha.data.text_mel_datamodule.TextMelDataModule
2
+ name: akylai
3
+ train_filelist_path: ./Kany_dataset_mk4_v1/Kany_dataset_mk4_v1_filelist_train.txt
4
+ valid_filelist_path: ./Kany_dataset_mk4_v1/Kany_dataset_mk4_v1_filelist_test.txt
5
+ batch_size: 12
6
+ num_workers: 12
7
+ pin_memory: True
8
+ cleaners: [kyrgyz_cleaners]
9
+ add_blank: True
10
+ n_spks: 1
11
+ n_fft: 1024
12
+ n_feats: 80
13
+ sample_rate: 22050
14
+ hop_length: 256
15
+ win_length: 1024
16
+ f_min: 0
17
+ f_max: 8000
18
+ data_statistics: # Computed for ljspeech dataset
19
+ mel_mean: -5.638045310974121
20
+ mel_std: 2.6814498901367188
21
+ seed: ${seed}
configs/data/akylai_multi.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: matcha.data.text_mel_datamodule.TextMelDataModule
2
+ name: akylai_multi
3
+ train_filelist_path: ./akylai_multi_dataset/akylai_mlspk_filelist_train.txt
4
+ valid_filelist_path: ./akylai_multi_dataset/akylai_mlspk_filelist_test.txt
5
+ batch_size: 32
6
+ num_workers: 20
7
+ pin_memory: True
8
+ cleaners: [kyrgyz_cleaners]
9
+ add_blank: True
10
+ n_spks: 2
11
+ n_fft: 1024
12
+ n_feats: 80
13
+ sample_rate: 22050
14
+ hop_length: 256
15
+ win_length: 1024
16
+ f_min: 0
17
+ f_max: 8000
18
+ data_statistics:
19
+ mel_mean: -5.6814561
20
+ mel_std: 2.7337122
21
+ seed: ${seed}
configs/data/hi-fi_en-US_female.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - ljspeech
3
+ - _self_
4
+
5
+ # Dataset URL: https://ast-astrec.nict.go.jp/en/release/hi-fi-captain/
6
+ _target_: matcha.data.text_mel_datamodule.TextMelDataModule
7
+ name: hi-fi_en-US_female
8
+ train_filelist_path: data/filelists/hi-fi-captain-en-us-female_train.txt
9
+ valid_filelist_path: data/filelists/hi-fi-captain-en-us-female_val.txt
10
+ batch_size: 32
11
+ cleaners: [english_cleaners_piper]
12
+ data_statistics: # Computed for this dataset
13
+ mel_mean: -6.38385
14
+ mel_std: 2.541796
configs/data/ljspeech.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: matcha.data.text_mel_datamodule.TextMelDataModule
2
+ name: ljspeech
3
+ train_filelist_path: /content/kany_dataset/kany_filelist_train.txt
4
+ valid_filelist_path: /content/kany_dataset/kany_filelist_test.txt
5
+ batch_size: 16
6
+ num_workers: 20
7
+ pin_memory: True
8
+ cleaners: [kyrgyz_cleaners]
9
+ add_blank: True
10
+ n_spks: 1
11
+ n_fft: 1024
12
+ n_feats: 80
13
+ sample_rate: 22050
14
+ hop_length: 256
15
+ win_length: 1024
16
+ f_min: 0
17
+ f_max: 8000
18
+ data_statistics: # Computed for ljspeech dataset
19
+ mel_mean: -5.68145561
20
+ mel_std: 2.7337122
21
+ seed: ${seed}
22
+
configs/data/vctk.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - ljspeech
3
+ - _self_
4
+
5
+ _target_: matcha.data.text_mel_datamodule.TextMelDataModule
6
+ name: vctk
7
+ train_filelist_path: data/filelists/vctk_audio_sid_text_train_filelist.txt
8
+ valid_filelist_path: data/filelists/vctk_audio_sid_text_val_filelist.txt
9
+ batch_size: 32
10
+ add_blank: True
11
+ n_spks: 109
12
+ data_statistics: # Computed for vctk dataset
13
+ mel_mean: -6.630575
14
+ mel_std: 2.482914
configs/debug/default.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # default debugging setup, runs 1 full epoch
4
+ # other debugging configs can inherit from this one
5
+
6
+ # overwrite task name so debugging logs are stored in separate folder
7
+ task_name: "debug"
8
+
9
+ # disable callbacks and loggers during debugging
10
+ # callbacks: null
11
+ # logger: null
12
+
13
+ extras:
14
+ ignore_warnings: False
15
+ enforce_tags: False
16
+
17
+ # sets level of all command line loggers to 'DEBUG'
18
+ # https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
19
+ hydra:
20
+ job_logging:
21
+ root:
22
+ level: DEBUG
23
+
24
+ # use this to also set hydra loggers to 'DEBUG'
25
+ # verbose: True
26
+
27
+ trainer:
28
+ max_epochs: 1
29
+ accelerator: cpu # debuggers don't like gpus
30
+ devices: 1 # debuggers don't like multiprocessing
31
+ detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
32
+
33
+ data:
34
+ num_workers: 0 # debuggers don't like multiprocessing
35
+ pin_memory: False # disable gpu memory pin
configs/debug/fdr.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # runs 1 train, 1 validation and 1 test step
4
+
5
+ defaults:
6
+ - default
7
+
8
+ trainer:
9
+ fast_dev_run: true
configs/debug/limit.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # uses only 1% of the training data and 5% of validation/test data
4
+
5
+ defaults:
6
+ - default
7
+
8
+ trainer:
9
+ max_epochs: 3
10
+ limit_train_batches: 0.01
11
+ limit_val_batches: 0.05
12
+ limit_test_batches: 0.05
configs/debug/overfit.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # overfits to 3 batches
4
+
5
+ defaults:
6
+ - default
7
+
8
+ trainer:
9
+ max_epochs: 20
10
+ overfit_batches: 3
11
+
12
+ # model ckpt and early stopping need to be disabled during overfitting
13
+ callbacks: null
configs/debug/profiler.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # runs with execution time profiling
4
+
5
+ defaults:
6
+ - default
7
+
8
+ trainer:
9
+ max_epochs: 1
10
+ # profiler: "simple"
11
+ profiler: "advanced"
12
+ # profiler: "pytorch"
13
+ accelerator: gpu
14
+
15
+ limit_train_batches: 0.02
configs/eval.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ defaults:
4
+ - _self_
5
+ - data: akylai # choose datamodule with `test_dataloader()` for evaluation
6
+ - model: matcha
7
+ - logger: null
8
+ - trainer: default
9
+ - paths: default
10
+ - extras: default
11
+ - hydra: default
12
+
13
+ task_name: "eval"
14
+
15
+ tags: ["dev"]
16
+
17
+ # passing checkpoint path is necessary for evaluation
18
+ ckpt_path: ???
configs/experiment/akylai.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: akylai.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["akylai"]
13
+
14
+ run_name: akylai
configs/experiment/akylai_multi.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: akylai_multi.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["akylai_multi"]
13
+
14
+ run_name: akylai_multi
configs/experiment/hifi_dataset_piper_phonemizer.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: hi-fi_en-US_female.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["hi-fi", "single_speaker", "piper_phonemizer", "en_US", "female"]
13
+
14
+ run_name: hi-fi_en-US_female_piper_phonemizer
configs/experiment/ljspeech.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: ljspeech.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["ljspeech"]
13
+
14
+ run_name: ljspeech
configs/experiment/ljspeech_min_memory.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: ljspeech.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["ljspeech"]
13
+
14
+ run_name: ljspeech_min
15
+
16
+
17
+ model:
18
+ out_size: 172
configs/experiment/multispeaker.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: vctk.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["multispeaker"]
13
+
14
+ run_name: multispeaker
configs/extras/default.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # disable python warnings if they annoy you
2
+ ignore_warnings: False
3
+
4
+ # ask user for tags if none are provided in the config
5
+ enforce_tags: True
6
+
7
+ # pretty print config tree at the start of the run using Rich library
8
+ print_config: True
configs/hparams_search/mnist_optuna.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # example hyperparameter optimization of some experiment with Optuna:
4
+ # python train.py -m hparams_search=mnist_optuna experiment=example
5
+
6
+ defaults:
7
+ - override /hydra/sweeper: optuna
8
+
9
+ # choose metric which will be optimized by Optuna
10
+ # make sure this is the correct name of some metric logged in lightning module!
11
+ optimized_metric: "val/acc_best"
12
+
13
+ # here we define Optuna hyperparameter search
14
+ # it optimizes for value returned from function with @hydra.main decorator
15
+ # docs: https://hydra.cc/docs/next/plugins/optuna_sweeper
16
+ hydra:
17
+ mode: "MULTIRUN" # set hydra to multirun by default if this config is attached
18
+
19
+ sweeper:
20
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
21
+
22
+ # storage URL to persist optimization results
23
+ # for example, you can use SQLite if you set 'sqlite:///example.db'
24
+ storage: null
25
+
26
+ # name of the study to persist optimization results
27
+ study_name: null
28
+
29
+ # number of parallel workers
30
+ n_jobs: 1
31
+
32
+ # 'minimize' or 'maximize' the objective
33
+ direction: maximize
34
+
35
+ # total number of runs that will be executed
36
+ n_trials: 20
37
+
38
+ # choose Optuna hyperparameter sampler
39
+ # you can choose bayesian sampler (tpe), random search (without optimization), grid sampler, and others
40
+ # docs: https://optuna.readthedocs.io/en/stable/reference/samplers.html
41
+ sampler:
42
+ _target_: optuna.samplers.TPESampler
43
+ seed: 1234
44
+ n_startup_trials: 10 # number of random sampling runs before optimization starts
45
+
46
+ # define hyperparameter search space
47
+ params:
48
+ model.optimizer.lr: interval(0.0001, 0.1)
49
+ data.batch_size: choice(32, 64, 128, 256)
50
+ model.net.lin1_size: choice(64, 128, 256)
51
+ model.net.lin2_size: choice(64, 128, 256)
52
+ model.net.lin3_size: choice(32, 64, 128, 256)
configs/hydra/default.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://hydra.cc/docs/configure_hydra/intro/
2
+
3
+ # enable color logging
4
+ defaults:
5
+ - override hydra_logging: colorlog
6
+ - override job_logging: colorlog
7
+
8
+ # output directory, generated dynamically on each run
9
+ run:
10
+ dir: ${paths.log_dir}/${task_name}/${run_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
11
+ sweep:
12
+ dir: ${paths.log_dir}/${task_name}/${run_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
13
+ subdir: ${hydra.job.num}
14
+
15
+ job_logging:
16
+ handlers:
17
+ file:
18
+ # Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242
19
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
configs/local/.gitkeep ADDED
File without changes
configs/logger/aim.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://aimstack.io/
2
+
3
+ # example usage in lightning module:
4
+ # https://github.com/aimhubio/aim/blob/main/examples/pytorch_lightning_track.py
5
+
6
+ # open the Aim UI with the following command (run in the folder containing the `.aim` folder):
7
+ # `aim up`
8
+
9
+ aim:
10
+ _target_: aim.pytorch_lightning.AimLogger
11
+ repo: ${paths.root_dir} # .aim folder will be created here
12
+ # repo: "aim://ip_address:port" # can instead provide IP address pointing to Aim remote tracking server which manages the repo, see https://aimstack.readthedocs.io/en/latest/using/remote_tracking.html#
13
+
14
+ # aim allows to group runs under experiment name
15
+ experiment: null # any string, set to "default" if not specified
16
+
17
+ train_metric_prefix: "train/"
18
+ val_metric_prefix: "val/"
19
+ test_metric_prefix: "test/"
20
+
21
+ # sets the tracking interval in seconds for system usage metrics (CPU, GPU, memory, etc.)
22
+ system_tracking_interval: 10 # set to null to disable system metrics tracking
23
+
24
+ # enable/disable logging of system params such as installed packages, git info, env vars, etc.
25
+ log_system_params: true
26
+
27
+ # enable/disable tracking console logs (default value is true)
28
+ capture_terminal_logs: false # set to false to avoid infinite console log loop issue https://github.com/aimhubio/aim/issues/2550
configs/logger/comet.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://www.comet.ml
2
+
3
+ comet:
4
+ _target_: lightning.pytorch.loggers.comet.CometLogger
5
+ api_key: ${oc.env:COMET_API_TOKEN} # api key is loaded from environment variable
6
+ save_dir: "${paths.output_dir}"
7
+ project_name: "lightning-hydra-template"
8
+ rest_api_key: null
9
+ # experiment_name: ""
10
+ experiment_key: null # set to resume experiment
11
+ offline: False
12
+ prefix: ""
configs/logger/csv.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # csv logger built in lightning
2
+
3
+ csv:
4
+ _target_: lightning.pytorch.loggers.csv_logs.CSVLogger
5
+ save_dir: "${paths.output_dir}"
6
+ name: "csv/"
7
+ prefix: ""
configs/logger/many_loggers.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # train with many loggers at once
2
+
3
+ defaults:
4
+ # - comet
5
+ - csv
6
+ # - mlflow
7
+ # - neptune
8
+ - tensorboard
9
+ - wandb
configs/logger/mlflow.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://mlflow.org
2
+
3
+ mlflow:
4
+ _target_: lightning.pytorch.loggers.mlflow.MLFlowLogger
5
+ # experiment_name: ""
6
+ # run_name: ""
7
+ tracking_uri: ${paths.log_dir}/mlflow/mlruns # run `mlflow ui` command inside the `logs/mlflow/` dir to open the UI
8
+ tags: null
9
+ # save_dir: "./mlruns"
10
+ prefix: ""
11
+ artifact_location: null
12
+ # run_id: ""
configs/logger/neptune.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # https://neptune.ai
2
+
3
+ neptune:
4
+ _target_: lightning.pytorch.loggers.neptune.NeptuneLogger
5
+ api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
6
+ project: username/lightning-hydra-template
7
+ # name: ""
8
+ log_model_checkpoints: True
9
+ prefix: ""
configs/logger/tensorboard.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://www.tensorflow.org/tensorboard/
2
+
3
+ tensorboard:
4
+ _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
5
+ save_dir: "${paths.output_dir}/tensorboard/"
6
+ name: null
7
+ log_graph: False
8
+ default_hp_metric: True
9
+ prefix: ""
10
+ # version: ""
configs/logger/wandb.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://wandb.ai
2
+
3
+ wandb:
4
+ _target_: lightning.pytorch.loggers.wandb.WandbLogger
5
+ # name: "" # name of the run (normally generated by wandb)
6
+ save_dir: "${paths.output_dir}"
7
+ offline: False
8
+ id: null # pass correct id to resume experiment!
9
+ anonymous: null # enable anonymous logging
10
+ project: "lightning-hydra-template"
11
+ log_model: False # upload lightning ckpts
12
+ prefix: "" # a string to put at the beginning of metric keys
13
+ # entity: "" # set to name of your wandb team
14
+ group: ""
15
+ tags: []
16
+ job_type: ""
configs/model/cfm/default.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name: CFM
2
+ solver: euler
3
+ sigma_min: 1e-4
configs/model/decoder/default.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ channels: [256, 256]
2
+ dropout: 0.05
3
+ attention_head_dim: 64
4
+ n_blocks: 1
5
+ num_mid_blocks: 2
6
+ num_heads: 2
7
+ act_fn: snakebeta
configs/model/encoder/default.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ encoder_type: RoPE Encoder
2
+ encoder_params:
3
+ n_feats: ${model.n_feats}
4
+ n_channels: 192
5
+ filter_channels: 768
6
+ filter_channels_dp: 256
7
+ n_heads: 2
8
+ n_layers: 6
9
+ kernel_size: 3
10
+ p_dropout: 0.1
11
+ spk_emb_dim: 64
12
+ n_spks: 1
13
+ prenet: true
14
+
15
+ duration_predictor_params:
16
+ filter_channels_dp: ${model.encoder.encoder_params.filter_channels_dp}
17
+ kernel_size: 3
18
+ p_dropout: ${model.encoder.encoder_params.p_dropout}
configs/model/matcha.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - _self_
3
+ - encoder: default.yaml
4
+ - decoder: default.yaml
5
+ - cfm: default.yaml
6
+ - optimizer: adam.yaml
7
+
8
+ _target_: matcha.models.matcha_tts.MatchaTTS
9
+ n_vocab: 178
10
+ n_spks: ${data.n_spks}
11
+ spk_emb_dim: 64
12
+ n_feats: 80
13
+ data_statistics: ${data.data_statistics}
14
+ out_size: null # Must be divisible by 4
15
+ prior_loss: true
configs/model/optimizer/adam.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ _target_: torch.optim.Adam
2
+ _partial_: true
3
+ lr: 1e-4
4
+ weight_decay: 0.0
configs/paths/default.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # path to root directory
2
+ # this requires PROJECT_ROOT environment variable to exist
3
+ # you can replace it with "." if you want the root to be the current working directory
4
+ root_dir: ${oc.env:PROJECT_ROOT}
5
+
6
+ # path to data directory
7
+ data_dir: ${paths.root_dir}/data/
8
+
9
+ # path to logging directory
10
+ log_dir: ${paths.root_dir}/logs/
11
+
12
+ # path to output directory, created dynamically by hydra
13
+ # path generation pattern is specified in `configs/hydra/default.yaml`
14
+ # use it to store all files generated during the run, like ckpts and metrics
15
+ output_dir: ${hydra:runtime.output_dir}
16
+
17
+ # path to working directory
18
+ work_dir: ${hydra:runtime.cwd}
configs/train.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # specify here default configuration
4
+ # order of defaults determines the order in which configs override each other
5
+ defaults:
6
+ - _self_
7
+ - data: akylai
8
+ - model: matcha
9
+ - callbacks: default
10
+ - logger: tensorboard # set logger here or use command line (e.g. `python train.py logger=tensorboard`)
11
+ - trainer: default
12
+ - paths: default
13
+ - extras: default
14
+ - hydra: default
15
+
16
+ # experiment configs allow for version control of specific hyperparameters
17
+ # e.g. best hyperparameters for given model and datamodule
18
+ - experiment: null
19
+
20
+ # config for hyperparameter optimization
21
+ - hparams_search: null
22
+
23
+ # optional local config for machine/user specific settings
24
+ # it's optional since it doesn't need to exist and is excluded from version control
25
+ - optional local: default
26
+
27
+ # debugging config (enable through command line, e.g. `python train.py debug=default)
28
+ - debug: null
29
+
30
+ # task name, determines output directory path
31
+ task_name: "train"
32
+
33
+ run_name: ???
34
+
35
+ # tags to help you identify your experiments
36
+ # you can overwrite this in experiment configs
37
+ # overwrite from command line with `python train.py tags="[first_tag, second_tag]"`
38
+ tags: ["dev"]
39
+
40
+ # set False to skip model training
41
+ train: True
42
+
43
+ # evaluate on test set, using best model weights achieved during training
44
+ # lightning chooses best weights based on the metric specified in checkpoint callback
45
+ test: False
46
+
47
+ # simply provide checkpoint path to resume training
48
+ ckpt_path: "https://github.com/simonlobgromov/AkylAI_Matcha_Checkpoint/releases/download/Matcha-TTS/checkpoint_epoch.499.ckpt"
49
+
50
+ # seed for random number generators in pytorch, numpy and python.random
51
+ seed: 1234
configs/trainer/cpu.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ defaults:
2
+ - default
3
+
4
+ accelerator: cpu
5
+ devices: 1
configs/trainer/ddp.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - default
3
+
4
+ strategy: ddp
5
+
6
+ accelerator: gpu
7
+ devices: [0,1]
8
+ num_nodes: 1
9
+ sync_batchnorm: True