pszemraj's picture
Upload folder using huggingface_hub
4dfcb10 verified
raw
history blame
883 Bytes
mode: pt
device: gpu
precision: bf16
eval_only: false
predict_only: false
seed: 34534
model:
klass: hf_t5
name: pszemraj/tFINE-900m-e16-d32
overwrite:
dropout_rate: 0.0
checkpoint_path: ''
random_init: false
compile: true
tokenizer:
name: BEE-spoke-data/slimpajama_tok-48128-BPE-forT5
data:
input_length: 1024
mlm_probability: 0.15
mean_noise_span_length: 3.0
num_workers: 16
optim:
name: adamwscale
base_lr: 0.01
batch_size: 128
total_steps: 20000
epochs: -1
warmup_steps: 5000
lr_scheduler: cosine
weight_decay: 0.0001
grad_clip: 1.0
grad_acc: 8
final_cosine: 2.0e-05
eval:
every_steps: 1000000000
steps: 500
checkpoint:
every_steps: 2500
logging:
use_wandb: true
wandb_config:
project: nanoT5
entity: pszemraj
tags:
- 900m
- '1024'
mode: online
every_steps: 25
grad_l2: true
weights_l2: true