{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0038405407481373376, "eval_steps": 4, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015362162992549352, "grad_norm": 0.8514974117279053, "learning_rate": 1e-05, "loss": 1.7793, "step": 1 }, { "epoch": 0.00015362162992549352, "eval_loss": 0.7040082216262817, "eval_runtime": 1627.2384, "eval_samples_per_second": 3.369, "eval_steps_per_second": 0.422, "step": 1 }, { "epoch": 0.00030724325985098704, "grad_norm": 0.8747180104255676, "learning_rate": 2e-05, "loss": 1.3549, "step": 2 }, { "epoch": 0.00046086488977648053, "grad_norm": 0.6679717898368835, "learning_rate": 3e-05, "loss": 1.1768, "step": 3 }, { "epoch": 0.0006144865197019741, "grad_norm": 0.49650415778160095, "learning_rate": 4e-05, "loss": 0.7923, "step": 4 }, { "epoch": 0.0006144865197019741, "eval_loss": 0.6929552555084229, "eval_runtime": 1631.2957, "eval_samples_per_second": 3.361, "eval_steps_per_second": 0.421, "step": 4 }, { "epoch": 0.0007681081496274675, "grad_norm": 0.950884997844696, "learning_rate": 5e-05, "loss": 1.6277, "step": 5 }, { "epoch": 0.0009217297795529611, "grad_norm": 0.7995210886001587, "learning_rate": 6e-05, "loss": 1.3187, "step": 6 }, { "epoch": 0.0010753514094784546, "grad_norm": 1.4403150081634521, "learning_rate": 7e-05, "loss": 1.4341, "step": 7 }, { "epoch": 0.0012289730394039482, "grad_norm": 1.0983939170837402, "learning_rate": 8e-05, "loss": 1.0183, "step": 8 }, { "epoch": 0.0012289730394039482, "eval_loss": 0.5220252275466919, "eval_runtime": 1628.9557, "eval_samples_per_second": 3.365, "eval_steps_per_second": 0.421, "step": 8 }, { "epoch": 0.0013825946693294415, "grad_norm": 0.9374691247940063, "learning_rate": 9e-05, "loss": 1.0436, "step": 9 }, { "epoch": 0.001536216299254935, "grad_norm": 0.7291067242622375, "learning_rate": 0.0001, "loss": 0.6992, "step": 10 }, { "epoch": 0.0016898379291804286, "grad_norm": 0.6516621708869934, "learning_rate": 9.890738003669029e-05, "loss": 0.9425, "step": 11 }, { "epoch": 0.0018434595591059221, "grad_norm": 0.5123838782310486, "learning_rate": 9.567727288213005e-05, "loss": 0.4621, "step": 12 }, { "epoch": 0.0018434595591059221, "eval_loss": 0.2799856662750244, "eval_runtime": 1627.648, "eval_samples_per_second": 3.368, "eval_steps_per_second": 0.421, "step": 12 }, { "epoch": 0.0019970811890314155, "grad_norm": 0.6764194965362549, "learning_rate": 9.045084971874738e-05, "loss": 0.4816, "step": 13 }, { "epoch": 0.0021507028189569092, "grad_norm": 1.0711296796798706, "learning_rate": 8.345653031794292e-05, "loss": 0.5074, "step": 14 }, { "epoch": 0.0023043244488824026, "grad_norm": 0.810668408870697, "learning_rate": 7.500000000000001e-05, "loss": 0.6828, "step": 15 }, { "epoch": 0.0024579460788078963, "grad_norm": 0.5851515531539917, "learning_rate": 6.545084971874738e-05, "loss": 0.5093, "step": 16 }, { "epoch": 0.0024579460788078963, "eval_loss": 0.1574617475271225, "eval_runtime": 1631.6557, "eval_samples_per_second": 3.36, "eval_steps_per_second": 0.42, "step": 16 }, { "epoch": 0.0026115677087333897, "grad_norm": 0.41902437806129456, "learning_rate": 5.522642316338268e-05, "loss": 0.2711, "step": 17 }, { "epoch": 0.002765189338658883, "grad_norm": 0.5801205039024353, "learning_rate": 4.477357683661734e-05, "loss": 0.2748, "step": 18 }, { "epoch": 0.0029188109685843767, "grad_norm": 0.4619535207748413, "learning_rate": 3.4549150281252636e-05, "loss": 0.3859, "step": 19 }, { "epoch": 0.00307243259850987, "grad_norm": 0.4101043939590454, "learning_rate": 2.500000000000001e-05, "loss": 0.2883, "step": 20 }, { "epoch": 0.00307243259850987, "eval_loss": 0.10978986322879791, "eval_runtime": 1630.2129, "eval_samples_per_second": 3.363, "eval_steps_per_second": 0.421, "step": 20 }, { "epoch": 0.003226054228435364, "grad_norm": 0.30318722128868103, "learning_rate": 1.6543469682057106e-05, "loss": 0.2262, "step": 21 }, { "epoch": 0.003379675858360857, "grad_norm": 0.3537350594997406, "learning_rate": 9.549150281252633e-06, "loss": 0.1973, "step": 22 }, { "epoch": 0.0035332974882863505, "grad_norm": 0.35009416937828064, "learning_rate": 4.322727117869951e-06, "loss": 0.1602, "step": 23 }, { "epoch": 0.0036869191182118443, "grad_norm": 0.5747281908988953, "learning_rate": 1.0926199633097157e-06, "loss": 0.151, "step": 24 }, { "epoch": 0.0036869191182118443, "eval_loss": 0.10253717005252838, "eval_runtime": 1631.654, "eval_samples_per_second": 3.36, "eval_steps_per_second": 0.42, "step": 24 }, { "epoch": 0.0038405407481373376, "grad_norm": 0.3095178008079529, "learning_rate": 0.0, "loss": 0.1561, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.093916763848704e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }