crystal-technologies's picture
Upload 2711 files
6e73cd3
tokenizer_name: eluzhnica/mpt-7b-instruct-peft-compatible # Change to 30b when training is working
max_seq_len: 8192
global_seed: 17
model:
name: hf_causal_lm
pretrained: true
pretrained_model_name_or_path: eluzhnica/mpt-7b-instruct-peft-compatible # Change to 30b when training is working
init_device: meta
config_overrides:
max_seq_len: ${max_seq_len}
attn_config:
attn_uses_sequence_id: false
tokenizer:
name: ${tokenizer_name}
kwargs:
model_max_length: ${max_seq_len}
train_loader:
name: finetuning
dataset:
hf_name: json
hf_kwargs:
data_dir: finetune-data
split: train
max_seq_len: ${max_seq_len}
allow_pad_trimming: false
decoder_only_format: true
packing_ratio: 9
shuffle: true
drop_last: true
num_workers: 8
pin_memory: false
prefetch_factor: 2
persistent_workers: true
timeout: 0
####### This part is copy pasted from CPU example. Change back with other examples
scheduler:
name: cosine_with_warmup
t_warmup: 100ba
alpha_f: 0.1
optimizer:
name: decoupled_adamw
lr: 6.0e-4
betas:
- 0.9
- 0.95
eps: 1.0e-08
weight_decay: 0.0
algorithms:
gradient_clipping:
clipping_type: norm
clipping_threshold: 1.0
############
max_duration: 1ep # Change to different values. I changed to 1ep because CPU example said so. Default was 8ep
eval_interval: 1 # Same here. But default was 1ep
eval_first: false # Same here again. Default was true
global_train_batch_size: 2 # Increase when training samples are complete
seed: ${global_seed}
device_eval_batch_size: 4
device_train_microbatch_size: 1
precision: fp32 # Change to amp_bf16 when on a supported GPU. Use fp32 of CPU
fsdp_config:
sharding_strategy: FULL_SHARD
mixed_precision: PURE
activation_checkpointing: true
activation_checkpointing_reentrant: false
activation_cpu_offload: false
limit_all_gathers: true
sync_module_states: true
verbose: false
progress_bar: true
log_to_console: true
console_log_interval: 20ba
callbacks:
speed_monitor:
window_size: 10
runtime_estimator: {}
lr_monitor: {}
# loggers:
# wandb: {}
# save_folder:
# save_interval: 3ep
# save_num_checkpoints_to_keep: 1
# need to use converted checkpoint with llm-foundry code
# load_path:
autoresume: false
load_weights_only: false
python_log_level: debug
icl_max_seq_len: 2048