{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 1.8571428571428572e-05, "loss": 0.474, "step": 13 }, { "epoch": 0.37, "learning_rate": 2.9206349206349206e-05, "loss": 0.4638, "step": 26 }, { "epoch": 0.56, "learning_rate": 2.7142857142857144e-05, "loss": 0.4295, "step": 39 }, { "epoch": 0.74, "learning_rate": 2.507936507936508e-05, "loss": 0.4306, "step": 52 }, { "epoch": 0.93, "learning_rate": 2.301587301587302e-05, "loss": 0.3871, "step": 65 }, { "epoch": 1.11, "learning_rate": 2.095238095238095e-05, "loss": 0.3386, "step": 78 }, { "epoch": 1.3, "learning_rate": 1.888888888888889e-05, "loss": 0.3411, "step": 91 }, { "epoch": 1.49, "learning_rate": 1.6825396825396824e-05, "loss": 0.3176, "step": 104 }, { "epoch": 1.67, "learning_rate": 1.4761904761904761e-05, "loss": 0.2954, "step": 117 }, { "epoch": 1.86, "learning_rate": 1.2698412698412699e-05, "loss": 0.2926, "step": 130 }, { "epoch": 2.04, "learning_rate": 1.0634920634920634e-05, "loss": 0.2705, "step": 143 }, { "epoch": 2.23, "learning_rate": 8.571428571428571e-06, "loss": 0.2638, "step": 156 }, { "epoch": 2.41, "learning_rate": 6.507936507936508e-06, "loss": 0.2702, "step": 169 }, { "epoch": 2.6, "learning_rate": 4.444444444444444e-06, "loss": 0.2727, "step": 182 }, { "epoch": 2.79, "learning_rate": 2.3809523809523808e-06, "loss": 0.2775, "step": 195 }, { "epoch": 2.97, "learning_rate": 3.1746031746031743e-07, "loss": 0.2542, "step": 208 } ], "logging_steps": 13, "max_steps": 210, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.694999141941248e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }