|
data_root: N/A |
|
input_channels: 1 |
|
input_feat_per_channel: 80 |
|
multitask: |
|
source_unit: |
|
data: N/A |
|
decoder_type: transformer |
|
dict: N/A |
|
encoder_layer: 6 |
|
loss_weight: 8.0 |
|
target_type: text |
|
output_channels: 1 |
|
output_feat_per_channel: 1 |
|
output_feat_reduction_rate: 0 |
|
output_sample_rate: 16000 |
|
specaugment: |
|
freq_mask_F: 27 |
|
freq_mask_N: 1 |
|
time_mask_N: 1 |
|
time_mask_T: 100 |
|
time_mask_p: 1.0 |
|
time_wrap_W: 0 |
|
transforms: |
|
_eval: |
|
- utterance_cmvn |
|
_train: |
|
- utterance_cmvn |
|
- specaugment |
|
vocoder: |
|
dur_prediction: true |
|
model_path: N/A |
|
speaker: false |
|
type: code_hifigan |
|
hub: |
|
input_type: fbank80_w_utt_cmvn |
|
tts_model_id: facebookresearch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_es_css10 |
|
unit_vocoder: true |
|
generation_args: |
|
beam: 10 |
|
max_len_a: 1 |
|
|