{ "model": { "params": { "output_dir": "outputs", "num_train_epochs": 3, "per_device_train_batch_size": 4, "gradient_accumulation_steps": 8, "learning_rate": 5e-5, "warmup_steps": 10, "fp16": true, "bf16": false, "logging_steps": 1, "optim": "adamw_8bit", "weight_decay": 0.01, "lr_scheduler_type": "linear", "seed": 42, "max_seq_length": 128, "dataset_num_proc": 2, "packing": false }, "choices": ["gpt2", "bert-base-uncased", "llama3-8b"] } }