backbone: | |
class_path: vocos.models.VocosBackbone | |
init_args: | |
adanorm_num_embeddings: null | |
dim: 1024 | |
input_channels: 128 | |
intermediate_dim: 2048 | |
layer_scale_init_value: null | |
num_layers: 8 | |
decay_mel_coeff: false | |
enable_discriminator: true | |
evaluate_periodicty: true | |
evaluate_pesq: true | |
evaluate_utmos: true | |
feature_extractor: | |
class_path: vocos.feature_extractors.MelSpectrogramFeatures | |
init_args: | |
hop_length: 256 | |
n_fft: 2048 | |
n_mels: 128 | |
padding: center | |
sample_rate: 48000 | |
generator_period: 3 | |
grad_acc: 1 | |
head: | |
class_path: vocos.heads.ISTFTHead | |
init_args: | |
dim: 1024 | |
hop_length: 256 | |
n_fft: 2048 | |
padding: center | |
initial_learning_rate: 0.0003 | |
mel_loss_coeff: 15.0 | |
mrd_loss_coeff: 0.1 | |
num_warmup_steps: 500 | |
pretrain_decoupled_steps: 0 | |
pretrain_disc_steps: 500 | |
pretrain_mel_steps: 0 | |
pretrained_ckpt: null | |
sample_rate: 48000 | |