{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 570, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05263157894736842, "grad_norm": 3.230023438847162, "learning_rate": 8.771929824561404e-07, "loss": 0.8785, "step": 10 }, { "epoch": 0.10526315789473684, "grad_norm": 1.9510876137424145, "learning_rate": 1.7543859649122807e-06, "loss": 0.8742, "step": 20 }, { "epoch": 0.15789473684210525, "grad_norm": 1.0320266512111935, "learning_rate": 2.631578947368421e-06, "loss": 0.8391, "step": 30 }, { "epoch": 0.21052631578947367, "grad_norm": 0.7562938730352367, "learning_rate": 3.5087719298245615e-06, "loss": 0.7881, "step": 40 }, { "epoch": 0.2631578947368421, "grad_norm": 0.6746737747334569, "learning_rate": 4.385964912280702e-06, "loss": 0.7857, "step": 50 }, { "epoch": 0.3157894736842105, "grad_norm": 0.6593917110239484, "learning_rate": 4.999578104083307e-06, "loss": 0.7468, "step": 60 }, { "epoch": 0.3684210526315789, "grad_norm": 0.6792501503727066, "learning_rate": 4.992081692902699e-06, "loss": 0.7684, "step": 70 }, { "epoch": 0.42105263157894735, "grad_norm": 0.6939275587796848, "learning_rate": 4.975242169652916e-06, "loss": 0.7363, "step": 80 }, { "epoch": 0.47368421052631576, "grad_norm": 0.6751844581947373, "learning_rate": 4.949122667718935e-06, "loss": 0.7226, "step": 90 }, { "epoch": 0.5263157894736842, "grad_norm": 0.6678046054641891, "learning_rate": 4.913821112234774e-06, "loss": 0.73, "step": 100 }, { "epoch": 0.5789473684210527, "grad_norm": 0.6221954242988779, "learning_rate": 4.869469852950461e-06, "loss": 0.6965, "step": 110 }, { "epoch": 0.631578947368421, "grad_norm": 0.6290204386000421, "learning_rate": 4.8162351680370046e-06, "loss": 0.7053, "step": 120 }, { "epoch": 0.6842105263157895, "grad_norm": 0.6445192335525791, "learning_rate": 4.754316640689665e-06, "loss": 0.6997, "step": 130 }, { "epoch": 0.7368421052631579, "grad_norm": 0.6876556043322378, "learning_rate": 4.6839464108666966e-06, "loss": 0.7308, "step": 140 }, { "epoch": 0.7894736842105263, "grad_norm": 0.6295519095132266, "learning_rate": 4.605388304968915e-06, "loss": 0.7199, "step": 150 }, { "epoch": 0.8421052631578947, "grad_norm": 0.6539860573937848, "learning_rate": 4.5189368467229825e-06, "loss": 0.6926, "step": 160 }, { "epoch": 0.8947368421052632, "grad_norm": 0.6300905141578006, "learning_rate": 4.424916152976768e-06, "loss": 0.6947, "step": 170 }, { "epoch": 0.9473684210526315, "grad_norm": 0.6615562911331709, "learning_rate": 4.323678718546552e-06, "loss": 0.7123, "step": 180 }, { "epoch": 1.0, "grad_norm": 0.6470437913415834, "learning_rate": 4.215604094671835e-06, "loss": 0.7092, "step": 190 }, { "epoch": 1.0526315789473684, "grad_norm": 0.7060511412402218, "learning_rate": 4.101097466032383e-06, "loss": 0.6839, "step": 200 }, { "epoch": 1.1052631578947367, "grad_norm": 0.6412400907212885, "learning_rate": 3.980588131662451e-06, "loss": 0.6677, "step": 210 }, { "epoch": 1.1578947368421053, "grad_norm": 0.6264126402577104, "learning_rate": 3.854527895457394e-06, "loss": 0.6702, "step": 220 }, { "epoch": 1.2105263157894737, "grad_norm": 0.6449251591261542, "learning_rate": 3.7233893723068794e-06, "loss": 0.6708, "step": 230 }, { "epoch": 1.263157894736842, "grad_norm": 0.6652956921191605, "learning_rate": 3.5876642162051833e-06, "loss": 0.6413, "step": 240 }, { "epoch": 1.3157894736842106, "grad_norm": 0.6253857010686688, "learning_rate": 3.4478612769816195e-06, "loss": 0.6593, "step": 250 }, { "epoch": 1.368421052631579, "grad_norm": 0.6445948481366444, "learning_rate": 3.3045046925617145e-06, "loss": 0.659, "step": 260 }, { "epoch": 1.4210526315789473, "grad_norm": 0.6480051495386988, "learning_rate": 3.1581319239114983e-06, "loss": 0.663, "step": 270 }, { "epoch": 1.4736842105263157, "grad_norm": 0.633862227291522, "learning_rate": 3.009291740032111e-06, "loss": 0.6748, "step": 280 }, { "epoch": 1.526315789473684, "grad_norm": 0.6358525653558635, "learning_rate": 2.858542160559241e-06, "loss": 0.6575, "step": 290 }, { "epoch": 1.5789473684210527, "grad_norm": 0.6614867978116219, "learning_rate": 2.7064483636808314e-06, "loss": 0.6501, "step": 300 }, { "epoch": 1.631578947368421, "grad_norm": 0.6615867287203148, "learning_rate": 2.5535805672165083e-06, "loss": 0.6456, "step": 310 }, { "epoch": 1.6842105263157894, "grad_norm": 0.6416738575236249, "learning_rate": 2.4005118908028397e-06, "loss": 0.6412, "step": 320 }, { "epoch": 1.736842105263158, "grad_norm": 0.6646967245534955, "learning_rate": 2.2478162071993296e-06, "loss": 0.645, "step": 330 }, { "epoch": 1.7894736842105263, "grad_norm": 0.5909525270332697, "learning_rate": 2.0960659907708633e-06, "loss": 0.6244, "step": 340 }, { "epoch": 1.8421052631578947, "grad_norm": 0.6298825515473904, "learning_rate": 1.9458301712129034e-06, "loss": 0.6583, "step": 350 }, { "epoch": 1.8947368421052633, "grad_norm": 0.6257581849719303, "learning_rate": 1.797672000566077e-06, "loss": 0.6594, "step": 360 }, { "epoch": 1.9473684210526314, "grad_norm": 0.6206670443623271, "learning_rate": 1.6521469415169632e-06, "loss": 0.643, "step": 370 }, { "epoch": 2.0, "grad_norm": 0.5987542159919731, "learning_rate": 1.509800584902108e-06, "loss": 0.6501, "step": 380 }, { "epoch": 2.0526315789473686, "grad_norm": 0.6405902014607973, "learning_rate": 1.3711666042227772e-06, "loss": 0.6368, "step": 390 }, { "epoch": 2.1052631578947367, "grad_norm": 0.6326449118176264, "learning_rate": 1.236764754839226e-06, "loss": 0.6259, "step": 400 }, { "epoch": 2.1578947368421053, "grad_norm": 0.6270368504954021, "learning_rate": 1.1070989253457461e-06, "loss": 0.6322, "step": 410 }, { "epoch": 2.2105263157894735, "grad_norm": 0.6625982845048544, "learning_rate": 9.826552484321086e-07, "loss": 0.6147, "step": 420 }, { "epoch": 2.263157894736842, "grad_norm": 0.6144875346367215, "learning_rate": 8.639002783140183e-07, "loss": 0.602, "step": 430 }, { "epoch": 2.3157894736842106, "grad_norm": 0.633998767362059, "learning_rate": 7.512792415656056e-07, "loss": 0.6391, "step": 440 }, { "epoch": 2.3684210526315788, "grad_norm": 0.6240289395475747, "learning_rate": 6.452143679117965e-07, "loss": 0.6258, "step": 450 }, { "epoch": 2.4210526315789473, "grad_norm": 0.6262640150031007, "learning_rate": 5.461033072386171e-07, "loss": 0.6339, "step": 460 }, { "epoch": 2.473684210526316, "grad_norm": 0.6070034987067966, "learning_rate": 4.543176387562523e-07, "loss": 0.611, "step": 470 }, { "epoch": 2.526315789473684, "grad_norm": 0.6298166958922391, "learning_rate": 3.7020147790418266e-07, "loss": 0.5994, "step": 480 }, { "epoch": 2.5789473684210527, "grad_norm": 0.6483280464534616, "learning_rate": 2.9407018622128024e-07, "loss": 0.6179, "step": 490 }, { "epoch": 2.6315789473684212, "grad_norm": 0.6204891629886625, "learning_rate": 2.262091890177151e-07, "loss": 0.6236, "step": 500 }, { "epoch": 2.6842105263157894, "grad_norm": 0.6424740818520618, "learning_rate": 1.6687290528135725e-07, "loss": 0.6487, "step": 510 }, { "epoch": 2.736842105263158, "grad_norm": 0.5789123272434008, "learning_rate": 1.1628379383059024e-07, "loss": 0.6127, "step": 520 }, { "epoch": 2.7894736842105265, "grad_norm": 0.648084660680176, "learning_rate": 7.463151928961549e-08, "loss": 0.6282, "step": 530 }, { "epoch": 2.8421052631578947, "grad_norm": 0.6353845279488379, "learning_rate": 4.207224101311247e-08, "loss": 0.6407, "step": 540 }, { "epoch": 2.8947368421052633, "grad_norm": 0.6247887487352389, "learning_rate": 1.8728027626156996e-08, "loss": 0.6035, "step": 550 }, { "epoch": 2.9473684210526314, "grad_norm": 0.6002823720543023, "learning_rate": 4.686399374358441e-09, "loss": 0.6269, "step": 560 }, { "epoch": 3.0, "grad_norm": 0.6433987512407431, "learning_rate": 0.0, "loss": 0.6228, "step": 570 }, { "epoch": 3.0, "step": 570, "total_flos": 60591251128320.0, "train_loss": 0.6761606249892921, "train_runtime": 1362.0846, "train_samples_per_second": 53.47, "train_steps_per_second": 0.418 } ], "logging_steps": 10, "max_steps": 570, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10086, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 60591251128320.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }