all-models / gla /gla_130.yaml
reach-vb's picture
reach-vb HF staff
Upload folder using huggingface_hub
103d46f verified
raw
history blame
655 Bytes
model:
class_path: model.lina.Lina
init_args:
n_warmup_steps: 500
learning_rate: 5e-4
n_codebook: 1024
n_special_token_in: 3
n_special_token_out: 3
n_txt_vocab: 256
d_context: 768
d_model: 768
quant_layer: [0, 1, 2, 3]
txt_encoder:
class_path: model.encoder.TextEncoder
init_args:
dim: 768
heads: 8
n_layers: 9
dropout: 0.1
attentive_rnn:
class_path: model.gla.AttentiveGLA
init_args:
d_model: 768
d_context: 768
heads: 4
dropout_att: 0.2
dropout: 0.
n_layer: 6
blind: True
d_blind: 128