|
|
|
name: "enzu4000baseline_transformer" |
|
|
|
data: |
|
src: "en" |
|
trg: "zu" |
|
train: "/content/drive/My Drive/masakhane/en-zu-baseline/train.bpe" |
|
dev: "/content/drive/My Drive/masakhane/en-zu-baseline/dev.bpe" |
|
test: "/content/drive/My Drive/masakhane/en-zu-baseline/test.bpe" |
|
level: "bpe" |
|
lowercase: False |
|
max_sent_length: 100 |
|
src_vocab: "/content/drive/My Drive/masakhane/en-zu-baseline/vocab.txt" |
|
trg_vocab: "/content/drive/My Drive/masakhane/en-zu-baseline/vocab.txt" |
|
|
|
testing: |
|
beam_size: 5 |
|
alpha: 1.0 |
|
|
|
training: |
|
load_model: "/content/drive/My Drive/masakhane/en-zu-baseline/pretrained/enzu4000baseline/101000.ckpt" |
|
random_seed: 42 |
|
optimizer: "adam" |
|
normalization: "tokens" |
|
adam_betas: [0.9, 0.999] |
|
scheduling: "plateau" |
|
patience: 5 |
|
learning_rate_factor: 0.5 |
|
learning_rate_warmup: 1000 |
|
decrease_factor: 0.7 |
|
loss: "crossentropy" |
|
learning_rate: 0.0003 |
|
learning_rate_min: 0.00000001 |
|
weight_decay: 0.0 |
|
label_smoothing: 0.1 |
|
batch_size: 4096 |
|
batch_type: "token" |
|
eval_batch_size: 3600 |
|
eval_batch_type: "token" |
|
batch_multiplier: 1 |
|
early_stopping_metric: "ppl" |
|
epochs: 10 |
|
validation_freq: 1000 |
|
logging_freq: 100 |
|
eval_metric: "bleu" |
|
model_dir: "models/enzu4000baseline_transformer" |
|
overwrite: True |
|
shuffle: True |
|
use_cuda: True |
|
max_output_length: 100 |
|
print_valid_sents: [0, 1, 2, 3] |
|
keep_last_ckpts: 3 |
|
|
|
model: |
|
initializer: "xavier" |
|
bias_initializer: "zeros" |
|
init_gain: 1.0 |
|
embed_initializer: "xavier" |
|
embed_init_gain: 1.0 |
|
tied_embeddings: True |
|
tied_softmax: True |
|
encoder: |
|
type: "transformer" |
|
num_layers: 6 |
|
num_heads: 4 |
|
embeddings: |
|
embedding_dim: 256 |
|
scale: True |
|
dropout: 0.3 |
|
|
|
hidden_size: 256 |
|
ff_size: 1024 |
|
dropout: 0.4 |
|
decoder: |
|
type: "transformer" |
|
num_layers: 6 |
|
num_heads: 8 |
|
embeddings: |
|
embedding_dim: 256 |
|
scale: True |
|
dropout: 0.3 |
|
|
|
hidden_size: 256 |
|
ff_size: 1024 |
|
dropout: 0.4 |
|
|