{ "best_metric": null, "best_model_checkpoint": null, "epoch": 97.19626168224299, "eval_steps": 500, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.18, "learning_rate": 0.0003269230769230769, "loss": 1.6646, "step": 85 }, { "epoch": 6.36, "learning_rate": 0.0006538461538461538, "loss": 1.517, "step": 170 }, { "epoch": 9.53, "learning_rate": 0.0009807692307692308, "loss": 1.4586, "step": 255 }, { "epoch": 12.71, "learning_rate": 0.0009658119658119658, "loss": 1.3597, "step": 340 }, { "epoch": 15.89, "learning_rate": 0.0009294871794871796, "loss": 1.2625, "step": 425 }, { "epoch": 19.07, "learning_rate": 0.0008931623931623932, "loss": 1.1835, "step": 510 }, { "epoch": 22.24, "learning_rate": 0.0008568376068376068, "loss": 1.1184, "step": 595 }, { "epoch": 25.42, "learning_rate": 0.0008205128205128205, "loss": 1.0683, "step": 680 }, { "epoch": 28.6, "learning_rate": 0.0007841880341880342, "loss": 1.0321, "step": 765 }, { "epoch": 31.78, "learning_rate": 0.0007478632478632479, "loss": 1.0009, "step": 850 }, { "epoch": 34.95, "learning_rate": 0.0007115384615384616, "loss": 0.9663, "step": 935 }, { "epoch": 38.13, "learning_rate": 0.0006752136752136753, "loss": 0.938, "step": 1020 }, { "epoch": 41.31, "learning_rate": 0.0006388888888888888, "loss": 0.9178, "step": 1105 }, { "epoch": 44.49, "learning_rate": 0.0006025641025641026, "loss": 0.8984, "step": 1190 }, { "epoch": 47.66, "learning_rate": 0.0005662393162393163, "loss": 0.8827, "step": 1275 }, { "epoch": 50.84, "learning_rate": 0.0005299145299145299, "loss": 0.8693, "step": 1360 }, { "epoch": 54.02, "learning_rate": 0.0004935897435897436, "loss": 0.8514, "step": 1445 }, { "epoch": 57.2, "learning_rate": 0.0004572649572649573, "loss": 0.8349, "step": 1530 }, { "epoch": 60.37, "learning_rate": 0.00042094017094017095, "loss": 0.824, "step": 1615 }, { "epoch": 63.55, "learning_rate": 0.00038461538461538467, "loss": 0.8138, "step": 1700 }, { "epoch": 66.73, "learning_rate": 0.0003482905982905983, "loss": 0.8044, "step": 1785 }, { "epoch": 69.91, "learning_rate": 0.00031196581196581195, "loss": 0.7962, "step": 1870 }, { "epoch": 73.08, "learning_rate": 0.0002756410256410257, "loss": 0.7854, "step": 1955 }, { "epoch": 76.26, "learning_rate": 0.00023931623931623932, "loss": 0.7756, "step": 2040 }, { "epoch": 79.44, "learning_rate": 0.000202991452991453, "loss": 0.7663, "step": 2125 }, { "epoch": 82.62, "learning_rate": 0.00016666666666666666, "loss": 0.7569, "step": 2210 }, { "epoch": 85.79, "learning_rate": 0.00013034188034188036, "loss": 0.7491, "step": 2295 }, { "epoch": 88.97, "learning_rate": 9.401709401709401e-05, "loss": 0.7427, "step": 2380 }, { "epoch": 92.15, "learning_rate": 5.76923076923077e-05, "loss": 0.7344, "step": 2465 }, { "epoch": 95.33, "learning_rate": 2.1367521367521368e-05, "loss": 0.7307, "step": 2550 } ], "logging_steps": 85, "max_steps": 2600, "num_train_epochs": 100, "save_steps": 500, "total_flos": 2.7054682298346635e+19, "trial_name": null, "trial_params": null }