model: | |
class_path: model.lina.Lina | |
init_args: | |
n_warmup_steps: 500 | |
learning_rate: 5e-4 | |
n_codebook: 1024 | |
n_special_token_in: 3 | |
n_special_token_out: 3 | |
n_txt_vocab: 256 | |
d_context: 512 | |
d_model: 512 | |
quant_layer: [0, 1, 2, 3] | |
txt_encoder: | |
class_path: model.encoder.TextEncoder | |
init_args: | |
dim: 512 | |
heads: 8 | |
n_layers: 9 | |
dropout: 0.1 | |
attentive_rnn: | |
class_path: model.mamba.AttentiveMamba | |
init_args: | |
d_model: 512 | |
d_context: 512 | |
heads: 1 | |
dropout_att: 0.1 | |
n_layer: 12 | |
blind: True | |
d_blind: 128 | |