import from zenodo
Browse files- README.md +50 -0
- dump/xvector/dev/spk_xvector.ark +0 -0
- dump/xvector/dev/spk_xvector.scp +96 -0
- dump/xvector/eval1/spk_xvector.ark +0 -0
- dump/xvector/eval1/spk_xvector.scp +81 -0
- dump/xvector/tr_no_dev/spk_xvector.ark +0 -0
- dump/xvector/tr_no_dev/spk_xvector.scp +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/config.yaml +313 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/backward_time.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/duration_loss.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/energy_loss.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/forward_time.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/iter_time.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/l1_loss.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/loss.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/lr_0.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/optim_step_time.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/pitch_loss.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/train_time.png +0 -0
- exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/train.loss.ave_5best.pth +3 -0
- exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/energy_stats.npz +0 -0
- exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/feats_stats.npz +0 -0
- exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/pitch_stats.npz +0 -0
- meta.yaml +8 -0
README.md
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- espnet
|
4 |
+
- audio
|
5 |
+
- text-to-speech
|
6 |
+
language: en
|
7 |
+
datasets:
|
8 |
+
- libritts
|
9 |
+
license: cc-by-4.0
|
10 |
+
---
|
11 |
+
## Example ESPnet2 TTS model
|
12 |
+
### `kan-bayashi/libritts_xvector_conformer_fastspeech2`
|
13 |
+
♻️ Imported from https://zenodo.org/record/4418754/
|
14 |
+
|
15 |
+
This model was trained by kan-bayashi using libritts/tts1 recipe in [espnet](https://github.com/espnet/espnet/).
|
16 |
+
### Demo: How to use in ESPnet2
|
17 |
+
```python
|
18 |
+
# coming soon
|
19 |
+
```
|
20 |
+
### Citing ESPnet
|
21 |
+
```BibTex
|
22 |
+
@inproceedings{watanabe2018espnet,
|
23 |
+
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
|
24 |
+
title={{ESPnet}: End-to-End Speech Processing Toolkit},
|
25 |
+
year={2018},
|
26 |
+
booktitle={Proceedings of Interspeech},
|
27 |
+
pages={2207--2211},
|
28 |
+
doi={10.21437/Interspeech.2018-1456},
|
29 |
+
url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
|
30 |
+
}
|
31 |
+
@inproceedings{hayashi2020espnet,
|
32 |
+
title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},
|
33 |
+
author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},
|
34 |
+
booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
|
35 |
+
pages={7654--7658},
|
36 |
+
year={2020},
|
37 |
+
organization={IEEE}
|
38 |
+
}
|
39 |
+
```
|
40 |
+
or arXiv:
|
41 |
+
```bibtex
|
42 |
+
@misc{watanabe2018espnet,
|
43 |
+
title={ESPnet: End-to-End Speech Processing Toolkit},
|
44 |
+
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Enrique Yalta Soplin and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
|
45 |
+
year={2018},
|
46 |
+
eprint={1804.00015},
|
47 |
+
archivePrefix={arXiv},
|
48 |
+
primaryClass={cs.CL}
|
49 |
+
}
|
50 |
+
```
|
dump/xvector/dev/spk_xvector.ark
ADDED
Binary file (199 kB). View file
|
|
dump/xvector/dev/spk_xvector.scp
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1272_128104 dump/xvector/dev/spk_xvector.ark:12
|
2 |
+
1272_135031 dump/xvector/dev/spk_xvector.ark:2082
|
3 |
+
1272_141231 dump/xvector/dev/spk_xvector.ark:4152
|
4 |
+
1462_170138 dump/xvector/dev/spk_xvector.ark:6222
|
5 |
+
1462_170142 dump/xvector/dev/spk_xvector.ark:8292
|
6 |
+
1462_170145 dump/xvector/dev/spk_xvector.ark:10362
|
7 |
+
1673_143396 dump/xvector/dev/spk_xvector.ark:12432
|
8 |
+
1673_143397 dump/xvector/dev/spk_xvector.ark:14502
|
9 |
+
174_168635 dump/xvector/dev/spk_xvector.ark:16571
|
10 |
+
174_50561 dump/xvector/dev/spk_xvector.ark:18639
|
11 |
+
174_84280 dump/xvector/dev/spk_xvector.ark:20707
|
12 |
+
1919_142785 dump/xvector/dev/spk_xvector.ark:22777
|
13 |
+
1988_147956 dump/xvector/dev/spk_xvector.ark:24847
|
14 |
+
1988_148538 dump/xvector/dev/spk_xvector.ark:26917
|
15 |
+
1988_24833 dump/xvector/dev/spk_xvector.ark:28986
|
16 |
+
1993_147149 dump/xvector/dev/spk_xvector.ark:31056
|
17 |
+
1993_147964 dump/xvector/dev/spk_xvector.ark:33126
|
18 |
+
1993_147965 dump/xvector/dev/spk_xvector.ark:35196
|
19 |
+
1993_147966 dump/xvector/dev/spk_xvector.ark:37266
|
20 |
+
2035_147960 dump/xvector/dev/spk_xvector.ark:39336
|
21 |
+
2035_147961 dump/xvector/dev/spk_xvector.ark:41406
|
22 |
+
2035_152373 dump/xvector/dev/spk_xvector.ark:43476
|
23 |
+
2078_142845 dump/xvector/dev/spk_xvector.ark:45546
|
24 |
+
2086_149214 dump/xvector/dev/spk_xvector.ark:47616
|
25 |
+
2086_149220 dump/xvector/dev/spk_xvector.ark:49686
|
26 |
+
2277_149874 dump/xvector/dev/spk_xvector.ark:51756
|
27 |
+
2277_149896 dump/xvector/dev/spk_xvector.ark:53826
|
28 |
+
2277_149897 dump/xvector/dev/spk_xvector.ark:55896
|
29 |
+
2412_153947 dump/xvector/dev/spk_xvector.ark:57966
|
30 |
+
2412_153948 dump/xvector/dev/spk_xvector.ark:60036
|
31 |
+
2412_153954 dump/xvector/dev/spk_xvector.ark:62106
|
32 |
+
2428_83699 dump/xvector/dev/spk_xvector.ark:64175
|
33 |
+
2428_83705 dump/xvector/dev/spk_xvector.ark:66244
|
34 |
+
251_118436 dump/xvector/dev/spk_xvector.ark:68313
|
35 |
+
251_136532 dump/xvector/dev/spk_xvector.ark:70382
|
36 |
+
251_137823 dump/xvector/dev/spk_xvector.ark:72451
|
37 |
+
2803_154320 dump/xvector/dev/spk_xvector.ark:74521
|
38 |
+
2803_154328 dump/xvector/dev/spk_xvector.ark:76591
|
39 |
+
2803_161169 dump/xvector/dev/spk_xvector.ark:78661
|
40 |
+
2902_9006 dump/xvector/dev/spk_xvector.ark:80729
|
41 |
+
2902_9008 dump/xvector/dev/spk_xvector.ark:82797
|
42 |
+
3000_15664 dump/xvector/dev/spk_xvector.ark:84866
|
43 |
+
3081_166546 dump/xvector/dev/spk_xvector.ark:86936
|
44 |
+
3170_137482 dump/xvector/dev/spk_xvector.ark:89006
|
45 |
+
3536_23268 dump/xvector/dev/spk_xvector.ark:91075
|
46 |
+
3536_8226 dump/xvector/dev/spk_xvector.ark:93143
|
47 |
+
3576_138058 dump/xvector/dev/spk_xvector.ark:95213
|
48 |
+
3752_4943 dump/xvector/dev/spk_xvector.ark:97281
|
49 |
+
3752_4944 dump/xvector/dev/spk_xvector.ark:99349
|
50 |
+
3853_163249 dump/xvector/dev/spk_xvector.ark:101419
|
51 |
+
422_122949 dump/xvector/dev/spk_xvector.ark:103488
|
52 |
+
5338_24615 dump/xvector/dev/spk_xvector.ark:105557
|
53 |
+
5338_24640 dump/xvector/dev/spk_xvector.ark:107626
|
54 |
+
5338_284437 dump/xvector/dev/spk_xvector.ark:109696
|
55 |
+
5536_43358 dump/xvector/dev/spk_xvector.ark:111765
|
56 |
+
5536_43359 dump/xvector/dev/spk_xvector.ark:113834
|
57 |
+
5536_43363 dump/xvector/dev/spk_xvector.ark:115903
|
58 |
+
5694_64025 dump/xvector/dev/spk_xvector.ark:117972
|
59 |
+
5694_64029 dump/xvector/dev/spk_xvector.ark:120041
|
60 |
+
5694_64038 dump/xvector/dev/spk_xvector.ark:122110
|
61 |
+
5895_34615 dump/xvector/dev/spk_xvector.ark:124179
|
62 |
+
5895_34622 dump/xvector/dev/spk_xvector.ark:126248
|
63 |
+
5895_34629 dump/xvector/dev/spk_xvector.ark:128317
|
64 |
+
6241_61943 dump/xvector/dev/spk_xvector.ark:130386
|
65 |
+
6241_61946 dump/xvector/dev/spk_xvector.ark:132455
|
66 |
+
6241_66616 dump/xvector/dev/spk_xvector.ark:134524
|
67 |
+
6295_244435 dump/xvector/dev/spk_xvector.ark:136594
|
68 |
+
6295_64301 dump/xvector/dev/spk_xvector.ark:138663
|
69 |
+
6313_66125 dump/xvector/dev/spk_xvector.ark:140732
|
70 |
+
6313_66129 dump/xvector/dev/spk_xvector.ark:142801
|
71 |
+
6313_76958 dump/xvector/dev/spk_xvector.ark:144870
|
72 |
+
6319_275224 dump/xvector/dev/spk_xvector.ark:146940
|
73 |
+
6319_57405 dump/xvector/dev/spk_xvector.ark:149009
|
74 |
+
6319_64726 dump/xvector/dev/spk_xvector.ark:151078
|
75 |
+
6345_64257 dump/xvector/dev/spk_xvector.ark:153147
|
76 |
+
6345_93302 dump/xvector/dev/spk_xvector.ark:155216
|
77 |
+
6345_93306 dump/xvector/dev/spk_xvector.ark:157285
|
78 |
+
652_129742 dump/xvector/dev/spk_xvector.ark:159354
|
79 |
+
652_130737 dump/xvector/dev/spk_xvector.ark:161423
|
80 |
+
777_126732 dump/xvector/dev/spk_xvector.ark:163492
|
81 |
+
7850_111771 dump/xvector/dev/spk_xvector.ark:165562
|
82 |
+
7850_281318 dump/xvector/dev/spk_xvector.ark:167632
|
83 |
+
7850_286674 dump/xvector/dev/spk_xvector.ark:169702
|
84 |
+
7850_73752 dump/xvector/dev/spk_xvector.ark:171771
|
85 |
+
7976_105575 dump/xvector/dev/spk_xvector.ark:173841
|
86 |
+
7976_110124 dump/xvector/dev/spk_xvector.ark:175911
|
87 |
+
7976_110523 dump/xvector/dev/spk_xvector.ark:177981
|
88 |
+
8297_275154 dump/xvector/dev/spk_xvector.ark:180051
|
89 |
+
8297_275155 dump/xvector/dev/spk_xvector.ark:182121
|
90 |
+
8297_275156 dump/xvector/dev/spk_xvector.ark:184191
|
91 |
+
84_121123 dump/xvector/dev/spk_xvector.ark:186259
|
92 |
+
84_121550 dump/xvector/dev/spk_xvector.ark:188327
|
93 |
+
8842_302196 dump/xvector/dev/spk_xvector.ark:190397
|
94 |
+
8842_302201 dump/xvector/dev/spk_xvector.ark:192467
|
95 |
+
8842_302203 dump/xvector/dev/spk_xvector.ark:194537
|
96 |
+
8842_304647 dump/xvector/dev/spk_xvector.ark:196607
|
dump/xvector/eval1/spk_xvector.ark
ADDED
Binary file (168 kB). View file
|
|
dump/xvector/eval1/spk_xvector.scp
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1089_134686 dump/xvector/eval1/spk_xvector.ark:12
|
2 |
+
1089_134691 dump/xvector/eval1/spk_xvector.ark:2082
|
3 |
+
1188_133604 dump/xvector/eval1/spk_xvector.ark:4152
|
4 |
+
121_121726 dump/xvector/eval1/spk_xvector.ark:6221
|
5 |
+
121_123859 dump/xvector/eval1/spk_xvector.ark:8290
|
6 |
+
121_127105 dump/xvector/eval1/spk_xvector.ark:10359
|
7 |
+
1221_135766 dump/xvector/eval1/spk_xvector.ark:12429
|
8 |
+
1221_135767 dump/xvector/eval1/spk_xvector.ark:14499
|
9 |
+
1284_1180 dump/xvector/eval1/spk_xvector.ark:16567
|
10 |
+
1284_1181 dump/xvector/eval1/spk_xvector.ark:18635
|
11 |
+
1320_122612 dump/xvector/eval1/spk_xvector.ark:20705
|
12 |
+
1320_122617 dump/xvector/eval1/spk_xvector.ark:22775
|
13 |
+
1580_141083 dump/xvector/eval1/spk_xvector.ark:24845
|
14 |
+
1580_141084 dump/xvector/eval1/spk_xvector.ark:26915
|
15 |
+
1995_1826 dump/xvector/eval1/spk_xvector.ark:28983
|
16 |
+
1995_1836 dump/xvector/eval1/spk_xvector.ark:31051
|
17 |
+
1995_1837 dump/xvector/eval1/spk_xvector.ark:33119
|
18 |
+
2300_131720 dump/xvector/eval1/spk_xvector.ark:35189
|
19 |
+
237_126133 dump/xvector/eval1/spk_xvector.ark:37258
|
20 |
+
237_134493 dump/xvector/eval1/spk_xvector.ark:39327
|
21 |
+
237_134500 dump/xvector/eval1/spk_xvector.ark:41396
|
22 |
+
260_123286 dump/xvector/eval1/spk_xvector.ark:43465
|
23 |
+
260_123288 dump/xvector/eval1/spk_xvector.ark:45534
|
24 |
+
260_123440 dump/xvector/eval1/spk_xvector.ark:47603
|
25 |
+
2830_3979 dump/xvector/eval1/spk_xvector.ark:49671
|
26 |
+
2830_3980 dump/xvector/eval1/spk_xvector.ark:51739
|
27 |
+
2961_961 dump/xvector/eval1/spk_xvector.ark:53806
|
28 |
+
3570_5694 dump/xvector/eval1/spk_xvector.ark:55874
|
29 |
+
3570_5695 dump/xvector/eval1/spk_xvector.ark:57942
|
30 |
+
3570_5696 dump/xvector/eval1/spk_xvector.ark:60010
|
31 |
+
3575_170457 dump/xvector/eval1/spk_xvector.ark:62080
|
32 |
+
3729_6852 dump/xvector/eval1/spk_xvector.ark:64148
|
33 |
+
4077_13751 dump/xvector/eval1/spk_xvector.ark:66217
|
34 |
+
4077_13754 dump/xvector/eval1/spk_xvector.ark:68286
|
35 |
+
4446_2271 dump/xvector/eval1/spk_xvector.ark:70354
|
36 |
+
4446_2273 dump/xvector/eval1/spk_xvector.ark:72422
|
37 |
+
4446_2275 dump/xvector/eval1/spk_xvector.ark:74490
|
38 |
+
4507_16021 dump/xvector/eval1/spk_xvector.ark:76559
|
39 |
+
4970_29093 dump/xvector/eval1/spk_xvector.ark:78628
|
40 |
+
4970_29095 dump/xvector/eval1/spk_xvector.ark:80697
|
41 |
+
4992_23283 dump/xvector/eval1/spk_xvector.ark:82766
|
42 |
+
4992_41797 dump/xvector/eval1/spk_xvector.ark:84835
|
43 |
+
4992_41806 dump/xvector/eval1/spk_xvector.ark:86904
|
44 |
+
5105_28233 dump/xvector/eval1/spk_xvector.ark:88973
|
45 |
+
5105_28240 dump/xvector/eval1/spk_xvector.ark:91042
|
46 |
+
5105_28241 dump/xvector/eval1/spk_xvector.ark:93111
|
47 |
+
5142_33396 dump/xvector/eval1/spk_xvector.ark:95180
|
48 |
+
5142_36377 dump/xvector/eval1/spk_xvector.ark:97249
|
49 |
+
5142_36586 dump/xvector/eval1/spk_xvector.ark:99318
|
50 |
+
5142_36600 dump/xvector/eval1/spk_xvector.ark:101387
|
51 |
+
5639_40744 dump/xvector/eval1/spk_xvector.ark:103456
|
52 |
+
5683_32865 dump/xvector/eval1/spk_xvector.ark:105525
|
53 |
+
5683_32866 dump/xvector/eval1/spk_xvector.ark:107594
|
54 |
+
5683_32879 dump/xvector/eval1/spk_xvector.ark:109663
|
55 |
+
61_70970 dump/xvector/eval1/spk_xvector.ark:111730
|
56 |
+
672_122797 dump/xvector/eval1/spk_xvector.ark:113799
|
57 |
+
6829_68769 dump/xvector/eval1/spk_xvector.ark:115868
|
58 |
+
6829_68771 dump/xvector/eval1/spk_xvector.ark:117937
|
59 |
+
6930_75918 dump/xvector/eval1/spk_xvector.ark:120006
|
60 |
+
6930_76324 dump/xvector/eval1/spk_xvector.ark:122075
|
61 |
+
6930_81414 dump/xvector/eval1/spk_xvector.ark:124144
|
62 |
+
7021_79730 dump/xvector/eval1/spk_xvector.ark:126213
|
63 |
+
7021_79740 dump/xvector/eval1/spk_xvector.ark:128282
|
64 |
+
7021_79759 dump/xvector/eval1/spk_xvector.ark:130351
|
65 |
+
7021_85628 dump/xvector/eval1/spk_xvector.ark:132420
|
66 |
+
7127_75946 dump/xvector/eval1/spk_xvector.ark:134489
|
67 |
+
7127_75947 dump/xvector/eval1/spk_xvector.ark:136558
|
68 |
+
7176_88083 dump/xvector/eval1/spk_xvector.ark:138627
|
69 |
+
7176_92135 dump/xvector/eval1/spk_xvector.ark:140696
|
70 |
+
7729_102255 dump/xvector/eval1/spk_xvector.ark:142766
|
71 |
+
8224_274384 dump/xvector/eval1/spk_xvector.ark:144836
|
72 |
+
8230_279154 dump/xvector/eval1/spk_xvector.ark:146906
|
73 |
+
8455_210777 dump/xvector/eval1/spk_xvector.ark:148976
|
74 |
+
8463_287645 dump/xvector/eval1/spk_xvector.ark:151046
|
75 |
+
8463_294825 dump/xvector/eval1/spk_xvector.ark:153116
|
76 |
+
8463_294828 dump/xvector/eval1/spk_xvector.ark:155186
|
77 |
+
8555_284447 dump/xvector/eval1/spk_xvector.ark:157256
|
78 |
+
8555_284449 dump/xvector/eval1/spk_xvector.ark:159326
|
79 |
+
8555_292519 dump/xvector/eval1/spk_xvector.ark:161396
|
80 |
+
908_157963 dump/xvector/eval1/spk_xvector.ark:163465
|
81 |
+
908_31957 dump/xvector/eval1/spk_xvector.ark:165533
|
dump/xvector/tr_no_dev/spk_xvector.ark
ADDED
Binary file (5.3 MB). View file
|
|
dump/xvector/tr_no_dev/spk_xvector.scp
ADDED
The diff for this file is too large to render.
See raw diff
|
|
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/config.yaml
ADDED
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
config: conf/tuning/train_xvector_conformer_fastspeech2.yaml
|
2 |
+
print_config: false
|
3 |
+
log_level: INFO
|
4 |
+
dry_run: false
|
5 |
+
iterator_type: sequence
|
6 |
+
output_dir: exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space
|
7 |
+
ngpu: 1
|
8 |
+
seed: 0
|
9 |
+
num_workers: 1
|
10 |
+
num_att_plot: 3
|
11 |
+
dist_backend: nccl
|
12 |
+
dist_init_method: env://
|
13 |
+
dist_world_size: 4
|
14 |
+
dist_rank: 0
|
15 |
+
local_rank: 0
|
16 |
+
dist_master_addr: localhost
|
17 |
+
dist_master_port: 39385
|
18 |
+
dist_launcher: null
|
19 |
+
multiprocessing_distributed: true
|
20 |
+
cudnn_enabled: true
|
21 |
+
cudnn_benchmark: false
|
22 |
+
cudnn_deterministic: true
|
23 |
+
collect_stats: false
|
24 |
+
write_collected_feats: false
|
25 |
+
max_epoch: 500
|
26 |
+
patience: null
|
27 |
+
val_scheduler_criterion:
|
28 |
+
- valid
|
29 |
+
- loss
|
30 |
+
early_stopping_criterion:
|
31 |
+
- valid
|
32 |
+
- loss
|
33 |
+
- min
|
34 |
+
best_model_criterion:
|
35 |
+
- - valid
|
36 |
+
- loss
|
37 |
+
- min
|
38 |
+
- - train
|
39 |
+
- loss
|
40 |
+
- min
|
41 |
+
keep_nbest_models: 5
|
42 |
+
grad_clip: 1.0
|
43 |
+
grad_clip_type: 2.0
|
44 |
+
grad_noise: false
|
45 |
+
accum_grad: 1
|
46 |
+
no_forward_run: false
|
47 |
+
resume: true
|
48 |
+
train_dtype: float32
|
49 |
+
use_amp: false
|
50 |
+
log_interval: null
|
51 |
+
unused_parameters: false
|
52 |
+
use_tensorboard: true
|
53 |
+
use_wandb: false
|
54 |
+
wandb_project: null
|
55 |
+
wandb_id: null
|
56 |
+
pretrain_path: null
|
57 |
+
init_param: []
|
58 |
+
freeze_param: []
|
59 |
+
num_iters_per_epoch: 500
|
60 |
+
batch_size: 20
|
61 |
+
valid_batch_size: null
|
62 |
+
batch_bins: 18000000
|
63 |
+
valid_batch_bins: null
|
64 |
+
train_shape_file:
|
65 |
+
- exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/text_shape.phn
|
66 |
+
- exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/speech_shape
|
67 |
+
valid_shape_file:
|
68 |
+
- exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/text_shape.phn
|
69 |
+
- exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/speech_shape
|
70 |
+
batch_type: numel
|
71 |
+
valid_batch_type: null
|
72 |
+
fold_length:
|
73 |
+
- 150
|
74 |
+
- 240000
|
75 |
+
sort_in_batch: descending
|
76 |
+
sort_batch: descending
|
77 |
+
multiple_iterator: false
|
78 |
+
chunk_length: 500
|
79 |
+
chunk_shift_ratio: 0.5
|
80 |
+
num_cache_chunks: 1024
|
81 |
+
train_data_path_and_name_and_type:
|
82 |
+
- - dump/raw/tr_no_dev/text
|
83 |
+
- text
|
84 |
+
- text
|
85 |
+
- - exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/tr_no_dev/durations
|
86 |
+
- durations
|
87 |
+
- text_int
|
88 |
+
- - dump/raw/tr_no_dev/wav.scp
|
89 |
+
- speech
|
90 |
+
- sound
|
91 |
+
- - exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/collect_feats/pitch.scp
|
92 |
+
- pitch
|
93 |
+
- npy
|
94 |
+
- - exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/collect_feats/energy.scp
|
95 |
+
- energy
|
96 |
+
- npy
|
97 |
+
- - dump/xvector/tr_no_dev/xvector.scp
|
98 |
+
- spembs
|
99 |
+
- kaldi_ark
|
100 |
+
valid_data_path_and_name_and_type:
|
101 |
+
- - dump/raw/dev/text
|
102 |
+
- text
|
103 |
+
- text
|
104 |
+
- - exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/dev/durations
|
105 |
+
- durations
|
106 |
+
- text_int
|
107 |
+
- - dump/raw/dev/wav.scp
|
108 |
+
- speech
|
109 |
+
- sound
|
110 |
+
- - exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/collect_feats/pitch.scp
|
111 |
+
- pitch
|
112 |
+
- npy
|
113 |
+
- - exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/collect_feats/energy.scp
|
114 |
+
- energy
|
115 |
+
- npy
|
116 |
+
- - dump/xvector/dev/xvector.scp
|
117 |
+
- spembs
|
118 |
+
- kaldi_ark
|
119 |
+
allow_variable_data_keys: false
|
120 |
+
max_cache_size: 0.0
|
121 |
+
max_cache_fd: 32
|
122 |
+
valid_max_cache_size: null
|
123 |
+
optim: adam
|
124 |
+
optim_conf:
|
125 |
+
lr: 1.0
|
126 |
+
scheduler: noamlr
|
127 |
+
scheduler_conf:
|
128 |
+
model_size: 384
|
129 |
+
warmup_steps: 4000
|
130 |
+
token_list:
|
131 |
+
- <blank>
|
132 |
+
- <unk>
|
133 |
+
- AH0
|
134 |
+
- T
|
135 |
+
- N
|
136 |
+
- D
|
137 |
+
- S
|
138 |
+
- R
|
139 |
+
- L
|
140 |
+
- IH1
|
141 |
+
- DH
|
142 |
+
- M
|
143 |
+
- K
|
144 |
+
- Z
|
145 |
+
- EH1
|
146 |
+
- AE1
|
147 |
+
- IH0
|
148 |
+
- AH1
|
149 |
+
- W
|
150 |
+
- ','
|
151 |
+
- HH
|
152 |
+
- ER0
|
153 |
+
- P
|
154 |
+
- IY1
|
155 |
+
- V
|
156 |
+
- F
|
157 |
+
- B
|
158 |
+
- UW1
|
159 |
+
- AA1
|
160 |
+
- AY1
|
161 |
+
- AO1
|
162 |
+
- .
|
163 |
+
- EY1
|
164 |
+
- IY0
|
165 |
+
- OW1
|
166 |
+
- NG
|
167 |
+
- G
|
168 |
+
- SH
|
169 |
+
- Y
|
170 |
+
- AW1
|
171 |
+
- CH
|
172 |
+
- ER1
|
173 |
+
- UH1
|
174 |
+
- TH
|
175 |
+
- JH
|
176 |
+
- ''''
|
177 |
+
- '?'
|
178 |
+
- OW0
|
179 |
+
- EH2
|
180 |
+
- '!'
|
181 |
+
- IH2
|
182 |
+
- OY1
|
183 |
+
- EY2
|
184 |
+
- AY2
|
185 |
+
- EH0
|
186 |
+
- UW0
|
187 |
+
- AA2
|
188 |
+
- AE2
|
189 |
+
- OW2
|
190 |
+
- AO2
|
191 |
+
- AE0
|
192 |
+
- AH2
|
193 |
+
- ZH
|
194 |
+
- AA0
|
195 |
+
- UW2
|
196 |
+
- IY2
|
197 |
+
- AY0
|
198 |
+
- AO0
|
199 |
+
- AW2
|
200 |
+
- EY0
|
201 |
+
- UH2
|
202 |
+
- ER2
|
203 |
+
- AW0
|
204 |
+
- '...'
|
205 |
+
- UH0
|
206 |
+
- OY2
|
207 |
+
- . . .
|
208 |
+
- OY0
|
209 |
+
- . . . .
|
210 |
+
- ..
|
211 |
+
- . ...
|
212 |
+
- . .
|
213 |
+
- . . . . .
|
214 |
+
- .. ..
|
215 |
+
- '... .'
|
216 |
+
- <sos/eos>
|
217 |
+
odim: null
|
218 |
+
model_conf: {}
|
219 |
+
use_preprocessor: true
|
220 |
+
token_type: phn
|
221 |
+
bpemodel: null
|
222 |
+
non_linguistic_symbols: null
|
223 |
+
cleaner: tacotron
|
224 |
+
g2p: g2p_en_no_space
|
225 |
+
feats_extract: fbank
|
226 |
+
feats_extract_conf:
|
227 |
+
fs: 24000
|
228 |
+
fmin: 80
|
229 |
+
fmax: 7600
|
230 |
+
n_mels: 80
|
231 |
+
hop_length: 300
|
232 |
+
n_fft: 2048
|
233 |
+
win_length: 1200
|
234 |
+
normalize: global_mvn
|
235 |
+
normalize_conf:
|
236 |
+
stats_file: exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/feats_stats.npz
|
237 |
+
tts: fastspeech2
|
238 |
+
tts_conf:
|
239 |
+
adim: 384
|
240 |
+
aheads: 2
|
241 |
+
elayers: 4
|
242 |
+
eunits: 1536
|
243 |
+
dlayers: 4
|
244 |
+
dunits: 1536
|
245 |
+
positionwise_layer_type: conv1d
|
246 |
+
positionwise_conv_kernel_size: 3
|
247 |
+
duration_predictor_layers: 2
|
248 |
+
duration_predictor_chans: 256
|
249 |
+
duration_predictor_kernel_size: 3
|
250 |
+
postnet_layers: 5
|
251 |
+
postnet_filts: 5
|
252 |
+
postnet_chans: 256
|
253 |
+
use_masking: true
|
254 |
+
encoder_normalize_before: true
|
255 |
+
decoder_normalize_before: true
|
256 |
+
reduction_factor: 1
|
257 |
+
encoder_type: conformer
|
258 |
+
decoder_type: conformer
|
259 |
+
conformer_pos_enc_layer_type: rel_pos
|
260 |
+
conformer_self_attn_layer_type: rel_selfattn
|
261 |
+
conformer_activation_type: swish
|
262 |
+
use_macaron_style_in_conformer: true
|
263 |
+
use_cnn_in_conformer: true
|
264 |
+
conformer_enc_kernel_size: 7
|
265 |
+
conformer_dec_kernel_size: 31
|
266 |
+
init_type: xavier_uniform
|
267 |
+
transformer_enc_dropout_rate: 0.2
|
268 |
+
transformer_enc_positional_dropout_rate: 0.2
|
269 |
+
transformer_enc_attn_dropout_rate: 0.2
|
270 |
+
transformer_dec_dropout_rate: 0.2
|
271 |
+
transformer_dec_positional_dropout_rate: 0.2
|
272 |
+
transformer_dec_attn_dropout_rate: 0.2
|
273 |
+
pitch_predictor_layers: 5
|
274 |
+
pitch_predictor_chans: 256
|
275 |
+
pitch_predictor_kernel_size: 5
|
276 |
+
pitch_predictor_dropout: 0.5
|
277 |
+
pitch_embed_kernel_size: 1
|
278 |
+
pitch_embed_dropout: 0.0
|
279 |
+
stop_gradient_from_pitch_predictor: true
|
280 |
+
energy_predictor_layers: 2
|
281 |
+
energy_predictor_chans: 256
|
282 |
+
energy_predictor_kernel_size: 3
|
283 |
+
energy_predictor_dropout: 0.5
|
284 |
+
energy_embed_kernel_size: 1
|
285 |
+
energy_embed_dropout: 0.0
|
286 |
+
stop_gradient_from_energy_predictor: false
|
287 |
+
spk_embed_dim: 512
|
288 |
+
spk_embed_integration_type: add
|
289 |
+
pitch_extract: dio
|
290 |
+
pitch_extract_conf:
|
291 |
+
fs: 24000
|
292 |
+
n_fft: 2048
|
293 |
+
hop_length: 300
|
294 |
+
f0max: 400
|
295 |
+
f0min: 80
|
296 |
+
reduction_factor: 1
|
297 |
+
pitch_normalize: global_mvn
|
298 |
+
pitch_normalize_conf:
|
299 |
+
stats_file: exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/pitch_stats.npz
|
300 |
+
energy_extract: energy
|
301 |
+
energy_extract_conf:
|
302 |
+
fs: 24000
|
303 |
+
n_fft: 2048
|
304 |
+
hop_length: 300
|
305 |
+
win_length: 1200
|
306 |
+
reduction_factor: 1
|
307 |
+
energy_normalize: global_mvn
|
308 |
+
energy_normalize_conf:
|
309 |
+
stats_file: exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/energy_stats.npz
|
310 |
+
required:
|
311 |
+
- output_dir
|
312 |
+
- token_list
|
313 |
+
distributed: true
|
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/backward_time.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/duration_loss.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/energy_loss.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/forward_time.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/iter_time.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/l1_loss.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/loss.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/lr_0.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/optim_step_time.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/pitch_loss.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/images/train_time.png
ADDED
exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/train.loss.ave_5best.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfebebd64c5b8d83dace37c7a03dd727ed7397233b9bcf711d1069b0b2517f90
|
3 |
+
size 282307968
|
exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/energy_stats.npz
ADDED
Binary file (770 Bytes). View file
|
|
exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/feats_stats.npz
ADDED
Binary file (1.4 kB). View file
|
|
exp/tts_train_xvector_transformer_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/pitch_stats.npz
ADDED
Binary file (770 Bytes). View file
|
|
meta.yaml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
espnet: 0.8.0
|
2 |
+
files:
|
3 |
+
model_file: exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/train.loss.ave_5best.pth
|
4 |
+
python: "3.7.3 (default, Mar 27 2019, 22:11:17) \n[GCC 7.3.0]"
|
5 |
+
timestamp: 1609824874.219107
|
6 |
+
torch: 1.5.1
|
7 |
+
yaml_files:
|
8 |
+
train_config: exp/tts_train_xvector_conformer_fastspeech2_transformer_teacher_raw_phn_tacotron_g2p_en_no_space/config.yaml
|