tokenizer_name: eluzhnica/mpt-7b-instruct-peft-compatible # Change to 30b when training is working max_seq_len: 8192 global_seed: 17 model: name: hf_causal_lm pretrained: true pretrained_model_name_or_path: eluzhnica/mpt-7b-instruct-peft-compatible # Change to 30b when training is working init_device: meta config_overrides: max_seq_len: ${max_seq_len} attn_config: attn_uses_sequence_id: false tokenizer: name: ${tokenizer_name} kwargs: model_max_length: ${max_seq_len} train_loader: name: finetuning dataset: hf_name: json hf_kwargs: data_dir: finetune-data split: train max_seq_len: ${max_seq_len} allow_pad_trimming: false decoder_only_format: true packing_ratio: 9 shuffle: true drop_last: true num_workers: 8 pin_memory: false prefetch_factor: 2 persistent_workers: true timeout: 0 ####### This part is copy pasted from CPU example. Change back with other examples scheduler: name: cosine_with_warmup t_warmup: 100ba alpha_f: 0.1 optimizer: name: decoupled_adamw lr: 6.0e-4 betas: - 0.9 - 0.95 eps: 1.0e-08 weight_decay: 0.0 algorithms: gradient_clipping: clipping_type: norm clipping_threshold: 1.0 ############ max_duration: 1ep # Change to different values. I changed to 1ep because CPU example said so. Default was 8ep eval_interval: 1 # Same here. But default was 1ep eval_first: false # Same here again. Default was true global_train_batch_size: 2 # Increase when training samples are complete seed: ${global_seed} device_eval_batch_size: 4 device_train_microbatch_size: 1 precision: fp32 # Change to amp_bf16 when on a supported GPU. Use fp32 of CPU fsdp_config: sharding_strategy: FULL_SHARD mixed_precision: PURE activation_checkpointing: true activation_checkpointing_reentrant: false activation_cpu_offload: false limit_all_gathers: true sync_module_states: true verbose: false progress_bar: true log_to_console: true console_log_interval: 20ba callbacks: speed_monitor: window_size: 10 runtime_estimator: {} lr_monitor: {} # loggers: # wandb: {} # save_folder: # save_interval: 3ep # save_num_checkpoints_to_keep: 1 # need to use converted checkpoint with llm-foundry code # load_path: autoresume: false load_weights_only: false python_log_level: debug icl_max_seq_len: 2048