yingzhi commited on
Commit
122f8f6
1 Parent(s): 3094a67

Update hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +148 -1
hyperparams.yaml CHANGED
@@ -1 +1,148 @@
1
- /home/ywang/.cache/huggingface/hub/models--speechbrain--tts-fastspeech2-ljspeech/snapshots/3df449681f33a0dbb17376bee5a7b7a3d4950c87/hyperparams.yaml
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ################################
2
+ # Model: Fastspeech2 Internal Alignment
3
+ # Authors: Yingzhi Wang
4
+ # ################################
5
+
6
+ # Input parameters
7
+ lexicon:
8
+ - "AA"
9
+ - "AE"
10
+ - "AH"
11
+ - "AO"
12
+ - "AW"
13
+ - "AY"
14
+ - "B"
15
+ - "CH"
16
+ - "D"
17
+ - "DH"
18
+ - "EH"
19
+ - "ER"
20
+ - "EY"
21
+ - "F"
22
+ - "G"
23
+ - "HH"
24
+ - "IH"
25
+ - "IY"
26
+ - "JH"
27
+ - "K"
28
+ - "L"
29
+ - "M"
30
+ - "N"
31
+ - "NG"
32
+ - "OW"
33
+ - "OY"
34
+ - "P"
35
+ - "R"
36
+ - "S"
37
+ - "SH"
38
+ - "T"
39
+ - "TH"
40
+ - "UH"
41
+ - "UW"
42
+ - "V"
43
+ - "W"
44
+ - "Y"
45
+ - "Z"
46
+ - "ZH"
47
+ - "-"
48
+ - "!"
49
+ - "'"
50
+ - "("
51
+ - ")"
52
+ - ","
53
+ - "."
54
+ - ":"
55
+ - ";"
56
+ - "?"
57
+ - " "
58
+
59
+ n_symbols: 52 #fixed depending on symbols in the lexicon (+1 for a dummy symbol used for padding, +1 for unknown)
60
+ padding_idx: 0
61
+ n_mel_channels: 80
62
+
63
+ hidden_channels: 512
64
+
65
+ # Encoder parameters
66
+ enc_num_layers: 4
67
+ enc_num_head: 2
68
+ enc_d_model: !ref <hidden_channels>
69
+ enc_ffn_dim: 1024
70
+ enc_k_dim: !ref <hidden_channels>
71
+ enc_v_dim: !ref <hidden_channels>
72
+ enc_dropout: 0.2
73
+
74
+ # Aligner parameters
75
+ in_query_channels: 80
76
+ in_key_channels: !ref <hidden_channels>
77
+ attn_channels: 80
78
+ temperature: 0.0005
79
+
80
+ # Decoder parameters
81
+ dec_num_layers: 4
82
+ dec_num_head: 2
83
+ dec_d_model: !ref <hidden_channels>
84
+ dec_ffn_dim: 1024
85
+ dec_k_dim: !ref <hidden_channels>
86
+ dec_v_dim: !ref <hidden_channels>
87
+ dec_dropout: 0.2
88
+
89
+ # Postnet parameters
90
+ postnet_embedding_dim: 512
91
+ postnet_kernel_size: 5
92
+ postnet_n_convolutions: 5
93
+ postnet_dropout: 0.2
94
+
95
+ # Common
96
+ normalize_before: True
97
+ ffn_type: 1dcnn #1dcnn or ffn
98
+ ffn_cnn_kernel_size_list: [9, 1]
99
+
100
+ # Variance predictor
101
+ dur_pred_kernel_size: 3
102
+ pitch_pred_kernel_size: 3
103
+ energy_pred_kernel_size: 3
104
+ variance_predictor_dropout: 0.5
105
+
106
+ # Model
107
+ model: !new:speechbrain.lobes.models.FastSpeech2.FastSpeech2WithAlignment
108
+ enc_num_layers: !ref <enc_num_layers>
109
+ enc_num_head: !ref <enc_num_head>
110
+ enc_d_model: !ref <enc_d_model>
111
+ enc_ffn_dim: !ref <enc_ffn_dim>
112
+ enc_k_dim: !ref <enc_k_dim>
113
+ enc_v_dim: !ref <enc_v_dim>
114
+ enc_dropout: !ref <enc_dropout>
115
+ in_query_channels: !ref <in_query_channels>
116
+ in_key_channels: !ref <in_key_channels>
117
+ attn_channels: !ref <attn_channels>
118
+ temperature: !ref <temperature>
119
+ dec_num_layers: !ref <dec_num_layers>
120
+ dec_num_head: !ref <dec_num_head>
121
+ dec_d_model: !ref <dec_d_model>
122
+ dec_ffn_dim: !ref <dec_ffn_dim>
123
+ dec_k_dim: !ref <dec_k_dim>
124
+ dec_v_dim: !ref <dec_v_dim>
125
+ dec_dropout: !ref <dec_dropout>
126
+ normalize_before: !ref <normalize_before>
127
+ ffn_type: !ref <ffn_type>
128
+ ffn_cnn_kernel_size_list: !ref <ffn_cnn_kernel_size_list>
129
+ n_char: !ref <n_symbols>
130
+ n_mels: !ref <n_mel_channels>
131
+ postnet_embedding_dim: !ref <postnet_embedding_dim>
132
+ postnet_kernel_size: !ref <postnet_kernel_size>
133
+ postnet_n_convolutions: !ref <postnet_n_convolutions>
134
+ postnet_dropout: !ref <postnet_dropout>
135
+ padding_idx: !ref <padding_idx>
136
+ dur_pred_kernel_size: !ref <dur_pred_kernel_size>
137
+ pitch_pred_kernel_size: !ref <pitch_pred_kernel_size>
138
+ energy_pred_kernel_size: !ref <energy_pred_kernel_size>
139
+ variance_predictor_dropout: !ref <variance_predictor_dropout>
140
+
141
+ input_encoder: !new:speechbrain.dataio.encoder.TextEncoder
142
+
143
+ modules:
144
+ model: !ref <model>
145
+
146
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
147
+ loadables:
148
+ model: !ref <model>