FirmanBr's picture
commit version 1
3e6a817
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.933304952462041,
"global_step": 140000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 4.9822619554420325e-05,
"loss": 7.0399,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 4.964523910884065e-05,
"loss": 6.9038,
"step": 1000
},
{
"epoch": 0.11,
"learning_rate": 4.946785866326096e-05,
"loss": 6.8142,
"step": 1500
},
{
"epoch": 0.14,
"learning_rate": 4.9290478217681286e-05,
"loss": 6.7261,
"step": 2000
},
{
"epoch": 0.18,
"learning_rate": 4.911309777210161e-05,
"loss": 6.6601,
"step": 2500
},
{
"epoch": 0.21,
"learning_rate": 4.893571732652193e-05,
"loss": 6.607,
"step": 3000
},
{
"epoch": 0.25,
"learning_rate": 4.8758336880942247e-05,
"loss": 6.565,
"step": 3500
},
{
"epoch": 0.28,
"learning_rate": 4.858095643536257e-05,
"loss": 6.5083,
"step": 4000
},
{
"epoch": 0.32,
"learning_rate": 4.840357598978289e-05,
"loss": 6.4739,
"step": 4500
},
{
"epoch": 0.35,
"learning_rate": 4.8226195544203214e-05,
"loss": 6.4472,
"step": 5000
},
{
"epoch": 0.39,
"learning_rate": 4.804881509862353e-05,
"loss": 6.4133,
"step": 5500
},
{
"epoch": 0.43,
"learning_rate": 4.787143465304385e-05,
"loss": 6.3857,
"step": 6000
},
{
"epoch": 0.46,
"learning_rate": 4.7694054207464175e-05,
"loss": 6.3575,
"step": 6500
},
{
"epoch": 0.5,
"learning_rate": 4.75166737618845e-05,
"loss": 6.3337,
"step": 7000
},
{
"epoch": 0.53,
"learning_rate": 4.733929331630481e-05,
"loss": 6.3248,
"step": 7500
},
{
"epoch": 0.57,
"learning_rate": 4.7161912870725136e-05,
"loss": 6.2974,
"step": 8000
},
{
"epoch": 0.6,
"learning_rate": 4.698453242514546e-05,
"loss": 6.2786,
"step": 8500
},
{
"epoch": 0.64,
"learning_rate": 4.680715197956578e-05,
"loss": 6.2569,
"step": 9000
},
{
"epoch": 0.67,
"learning_rate": 4.6629771533986097e-05,
"loss": 6.2477,
"step": 9500
},
{
"epoch": 0.71,
"learning_rate": 4.645239108840642e-05,
"loss": 6.2252,
"step": 10000
},
{
"epoch": 0.74,
"learning_rate": 4.627501064282674e-05,
"loss": 6.212,
"step": 10500
},
{
"epoch": 0.78,
"learning_rate": 4.609763019724706e-05,
"loss": 6.1802,
"step": 11000
},
{
"epoch": 0.82,
"learning_rate": 4.592024975166738e-05,
"loss": 6.1081,
"step": 11500
},
{
"epoch": 0.85,
"learning_rate": 4.5742869306087696e-05,
"loss": 6.0248,
"step": 12000
},
{
"epoch": 0.89,
"learning_rate": 4.556548886050802e-05,
"loss": 5.8882,
"step": 12500
},
{
"epoch": 0.92,
"learning_rate": 4.538810841492834e-05,
"loss": 5.7505,
"step": 13000
},
{
"epoch": 0.96,
"learning_rate": 4.5210727969348656e-05,
"loss": 5.5657,
"step": 13500
},
{
"epoch": 0.99,
"learning_rate": 4.503334752376898e-05,
"loss": 5.4052,
"step": 14000
},
{
"epoch": 1.03,
"learning_rate": 4.48559670781893e-05,
"loss": 5.2834,
"step": 14500
},
{
"epoch": 1.06,
"learning_rate": 4.4678586632609624e-05,
"loss": 5.1588,
"step": 15000
},
{
"epoch": 1.1,
"learning_rate": 4.450120618702994e-05,
"loss": 5.0617,
"step": 15500
},
{
"epoch": 1.14,
"learning_rate": 4.432382574145026e-05,
"loss": 4.9254,
"step": 16000
},
{
"epoch": 1.17,
"learning_rate": 4.4146445295870585e-05,
"loss": 4.7733,
"step": 16500
},
{
"epoch": 1.21,
"learning_rate": 4.396906485029091e-05,
"loss": 4.6491,
"step": 17000
},
{
"epoch": 1.24,
"learning_rate": 4.379168440471122e-05,
"loss": 4.5447,
"step": 17500
},
{
"epoch": 1.28,
"learning_rate": 4.3614303959131546e-05,
"loss": 4.4222,
"step": 18000
},
{
"epoch": 1.31,
"learning_rate": 4.343692351355187e-05,
"loss": 4.3276,
"step": 18500
},
{
"epoch": 1.35,
"learning_rate": 4.325954306797219e-05,
"loss": 4.2297,
"step": 19000
},
{
"epoch": 1.38,
"learning_rate": 4.3082162622392507e-05,
"loss": 4.169,
"step": 19500
},
{
"epoch": 1.42,
"learning_rate": 4.290478217681283e-05,
"loss": 4.1032,
"step": 20000
},
{
"epoch": 1.45,
"learning_rate": 4.272740173123315e-05,
"loss": 4.0278,
"step": 20500
},
{
"epoch": 1.49,
"learning_rate": 4.2550021285653474e-05,
"loss": 3.9557,
"step": 21000
},
{
"epoch": 1.53,
"learning_rate": 4.237264084007379e-05,
"loss": 3.8852,
"step": 21500
},
{
"epoch": 1.56,
"learning_rate": 4.219526039449411e-05,
"loss": 3.8525,
"step": 22000
},
{
"epoch": 1.6,
"learning_rate": 4.2017879948914435e-05,
"loss": 3.8033,
"step": 22500
},
{
"epoch": 1.63,
"learning_rate": 4.184049950333476e-05,
"loss": 3.7525,
"step": 23000
},
{
"epoch": 1.67,
"learning_rate": 4.166311905775507e-05,
"loss": 3.7301,
"step": 23500
},
{
"epoch": 1.7,
"learning_rate": 4.1485738612175396e-05,
"loss": 3.6619,
"step": 24000
},
{
"epoch": 1.74,
"learning_rate": 4.130835816659572e-05,
"loss": 3.6457,
"step": 24500
},
{
"epoch": 1.77,
"learning_rate": 4.113097772101604e-05,
"loss": 3.6079,
"step": 25000
},
{
"epoch": 1.81,
"learning_rate": 4.0953597275436357e-05,
"loss": 3.569,
"step": 25500
},
{
"epoch": 1.84,
"learning_rate": 4.077621682985668e-05,
"loss": 3.5467,
"step": 26000
},
{
"epoch": 1.88,
"learning_rate": 4.0598836384277e-05,
"loss": 3.5302,
"step": 26500
},
{
"epoch": 1.92,
"learning_rate": 4.0421455938697324e-05,
"loss": 3.4952,
"step": 27000
},
{
"epoch": 1.95,
"learning_rate": 4.024407549311764e-05,
"loss": 3.4743,
"step": 27500
},
{
"epoch": 1.99,
"learning_rate": 4.006669504753796e-05,
"loss": 3.4558,
"step": 28000
},
{
"epoch": 2.02,
"learning_rate": 3.9889314601958285e-05,
"loss": 3.4161,
"step": 28500
},
{
"epoch": 2.06,
"learning_rate": 3.971193415637861e-05,
"loss": 3.3899,
"step": 29000
},
{
"epoch": 2.09,
"learning_rate": 3.953455371079892e-05,
"loss": 3.3836,
"step": 29500
},
{
"epoch": 2.13,
"learning_rate": 3.9357173265219246e-05,
"loss": 3.3521,
"step": 30000
},
{
"epoch": 2.16,
"learning_rate": 3.917979281963957e-05,
"loss": 3.3418,
"step": 30500
},
{
"epoch": 2.2,
"learning_rate": 3.900241237405989e-05,
"loss": 3.3161,
"step": 31000
},
{
"epoch": 2.23,
"learning_rate": 3.8825031928480207e-05,
"loss": 3.3091,
"step": 31500
},
{
"epoch": 2.27,
"learning_rate": 3.864765148290053e-05,
"loss": 3.2917,
"step": 32000
},
{
"epoch": 2.31,
"learning_rate": 3.847027103732085e-05,
"loss": 3.268,
"step": 32500
},
{
"epoch": 2.34,
"learning_rate": 3.829289059174117e-05,
"loss": 3.2515,
"step": 33000
},
{
"epoch": 2.38,
"learning_rate": 3.811551014616149e-05,
"loss": 3.2415,
"step": 33500
},
{
"epoch": 2.41,
"learning_rate": 3.793812970058181e-05,
"loss": 3.2177,
"step": 34000
},
{
"epoch": 2.45,
"learning_rate": 3.776074925500213e-05,
"loss": 3.2102,
"step": 34500
},
{
"epoch": 2.48,
"learning_rate": 3.758336880942245e-05,
"loss": 3.2186,
"step": 35000
},
{
"epoch": 2.52,
"learning_rate": 3.740598836384277e-05,
"loss": 3.1951,
"step": 35500
},
{
"epoch": 2.55,
"learning_rate": 3.722860791826309e-05,
"loss": 3.1766,
"step": 36000
},
{
"epoch": 2.59,
"learning_rate": 3.705122747268341e-05,
"loss": 3.1603,
"step": 36500
},
{
"epoch": 2.63,
"learning_rate": 3.6873847027103734e-05,
"loss": 3.1269,
"step": 37000
},
{
"epoch": 2.66,
"learning_rate": 3.669646658152405e-05,
"loss": 3.1431,
"step": 37500
},
{
"epoch": 2.7,
"learning_rate": 3.651908613594437e-05,
"loss": 3.112,
"step": 38000
},
{
"epoch": 2.73,
"learning_rate": 3.6341705690364695e-05,
"loss": 3.1211,
"step": 38500
},
{
"epoch": 2.77,
"learning_rate": 3.616432524478502e-05,
"loss": 3.0896,
"step": 39000
},
{
"epoch": 2.8,
"learning_rate": 3.598694479920533e-05,
"loss": 3.0972,
"step": 39500
},
{
"epoch": 2.84,
"learning_rate": 3.5809564353625656e-05,
"loss": 3.0821,
"step": 40000
},
{
"epoch": 2.87,
"learning_rate": 3.563218390804598e-05,
"loss": 3.0626,
"step": 40500
},
{
"epoch": 2.91,
"learning_rate": 3.54548034624663e-05,
"loss": 3.0586,
"step": 41000
},
{
"epoch": 2.94,
"learning_rate": 3.5277423016886617e-05,
"loss": 3.0464,
"step": 41500
},
{
"epoch": 2.98,
"learning_rate": 3.510004257130694e-05,
"loss": 3.0368,
"step": 42000
},
{
"epoch": 3.02,
"learning_rate": 3.492266212572726e-05,
"loss": 3.0282,
"step": 42500
},
{
"epoch": 3.05,
"learning_rate": 3.4745281680147584e-05,
"loss": 2.9903,
"step": 43000
},
{
"epoch": 3.09,
"learning_rate": 3.45679012345679e-05,
"loss": 2.9912,
"step": 43500
},
{
"epoch": 3.12,
"learning_rate": 3.439052078898822e-05,
"loss": 2.9902,
"step": 44000
},
{
"epoch": 3.16,
"learning_rate": 3.4213140343408545e-05,
"loss": 2.995,
"step": 44500
},
{
"epoch": 3.19,
"learning_rate": 3.403575989782887e-05,
"loss": 2.9736,
"step": 45000
},
{
"epoch": 3.23,
"learning_rate": 3.385837945224918e-05,
"loss": 2.9724,
"step": 45500
},
{
"epoch": 3.26,
"learning_rate": 3.3680999006669506e-05,
"loss": 2.9636,
"step": 46000
},
{
"epoch": 3.3,
"learning_rate": 3.350361856108983e-05,
"loss": 2.957,
"step": 46500
},
{
"epoch": 3.33,
"learning_rate": 3.332623811551015e-05,
"loss": 2.9563,
"step": 47000
},
{
"epoch": 3.37,
"learning_rate": 3.3148857669930467e-05,
"loss": 2.9381,
"step": 47500
},
{
"epoch": 3.41,
"learning_rate": 3.297147722435079e-05,
"loss": 2.926,
"step": 48000
},
{
"epoch": 3.44,
"learning_rate": 3.279409677877111e-05,
"loss": 2.933,
"step": 48500
},
{
"epoch": 3.48,
"learning_rate": 3.2616716333191434e-05,
"loss": 2.9144,
"step": 49000
},
{
"epoch": 3.51,
"learning_rate": 3.243933588761175e-05,
"loss": 2.9084,
"step": 49500
},
{
"epoch": 3.55,
"learning_rate": 3.226195544203207e-05,
"loss": 2.9116,
"step": 50000
},
{
"epoch": 3.58,
"learning_rate": 3.2084574996452395e-05,
"loss": 2.8892,
"step": 50500
},
{
"epoch": 3.62,
"learning_rate": 3.190719455087272e-05,
"loss": 2.8828,
"step": 51000
},
{
"epoch": 3.65,
"learning_rate": 3.172981410529303e-05,
"loss": 2.8843,
"step": 51500
},
{
"epoch": 3.69,
"learning_rate": 3.1552433659713356e-05,
"loss": 2.8799,
"step": 52000
},
{
"epoch": 3.72,
"learning_rate": 3.137505321413368e-05,
"loss": 2.8586,
"step": 52500
},
{
"epoch": 3.76,
"learning_rate": 3.1197672768554e-05,
"loss": 2.8601,
"step": 53000
},
{
"epoch": 3.8,
"learning_rate": 3.1020292322974317e-05,
"loss": 2.8523,
"step": 53500
},
{
"epoch": 3.83,
"learning_rate": 3.084291187739464e-05,
"loss": 2.8517,
"step": 54000
},
{
"epoch": 3.87,
"learning_rate": 3.066553143181496e-05,
"loss": 2.8347,
"step": 54500
},
{
"epoch": 3.9,
"learning_rate": 3.048815098623528e-05,
"loss": 2.8427,
"step": 55000
},
{
"epoch": 3.94,
"learning_rate": 3.0310770540655597e-05,
"loss": 2.8471,
"step": 55500
},
{
"epoch": 3.97,
"learning_rate": 3.013339009507592e-05,
"loss": 2.8201,
"step": 56000
},
{
"epoch": 4.01,
"learning_rate": 2.995600964949624e-05,
"loss": 2.8236,
"step": 56500
},
{
"epoch": 4.04,
"learning_rate": 2.9778629203916564e-05,
"loss": 2.8043,
"step": 57000
},
{
"epoch": 4.08,
"learning_rate": 2.960124875833688e-05,
"loss": 2.7957,
"step": 57500
},
{
"epoch": 4.12,
"learning_rate": 2.9423868312757202e-05,
"loss": 2.8042,
"step": 58000
},
{
"epoch": 4.15,
"learning_rate": 2.9246487867177525e-05,
"loss": 2.7836,
"step": 58500
},
{
"epoch": 4.19,
"learning_rate": 2.9069107421597847e-05,
"loss": 2.7875,
"step": 59000
},
{
"epoch": 4.22,
"learning_rate": 2.8891726976018163e-05,
"loss": 2.7737,
"step": 59500
},
{
"epoch": 4.26,
"learning_rate": 2.8714346530438486e-05,
"loss": 2.7783,
"step": 60000
},
{
"epoch": 4.29,
"learning_rate": 2.8536966084858808e-05,
"loss": 2.7689,
"step": 60500
},
{
"epoch": 4.33,
"learning_rate": 2.835958563927913e-05,
"loss": 2.7689,
"step": 61000
},
{
"epoch": 4.36,
"learning_rate": 2.8182205193699447e-05,
"loss": 2.7737,
"step": 61500
},
{
"epoch": 4.4,
"learning_rate": 2.800482474811977e-05,
"loss": 2.7595,
"step": 62000
},
{
"epoch": 4.43,
"learning_rate": 2.7827444302540088e-05,
"loss": 2.7657,
"step": 62500
},
{
"epoch": 4.47,
"learning_rate": 2.765006385696041e-05,
"loss": 2.7664,
"step": 63000
},
{
"epoch": 4.51,
"learning_rate": 2.747268341138073e-05,
"loss": 2.7394,
"step": 63500
},
{
"epoch": 4.54,
"learning_rate": 2.729530296580105e-05,
"loss": 2.7357,
"step": 64000
},
{
"epoch": 4.58,
"learning_rate": 2.711792252022137e-05,
"loss": 2.7346,
"step": 64500
},
{
"epoch": 4.61,
"learning_rate": 2.6940542074641694e-05,
"loss": 2.7351,
"step": 65000
},
{
"epoch": 4.65,
"learning_rate": 2.676316162906201e-05,
"loss": 2.7268,
"step": 65500
},
{
"epoch": 4.68,
"learning_rate": 2.6585781183482332e-05,
"loss": 2.7319,
"step": 66000
},
{
"epoch": 4.72,
"learning_rate": 2.6408400737902655e-05,
"loss": 2.7282,
"step": 66500
},
{
"epoch": 4.75,
"learning_rate": 2.6231020292322977e-05,
"loss": 2.7297,
"step": 67000
},
{
"epoch": 4.79,
"learning_rate": 2.6053639846743293e-05,
"loss": 2.7112,
"step": 67500
},
{
"epoch": 4.82,
"learning_rate": 2.5876259401163616e-05,
"loss": 2.7315,
"step": 68000
},
{
"epoch": 4.86,
"learning_rate": 2.5698878955583938e-05,
"loss": 2.704,
"step": 68500
},
{
"epoch": 4.9,
"learning_rate": 2.552149851000426e-05,
"loss": 2.7036,
"step": 69000
},
{
"epoch": 4.93,
"learning_rate": 2.5344118064424577e-05,
"loss": 2.7103,
"step": 69500
},
{
"epoch": 4.97,
"learning_rate": 2.51667376188449e-05,
"loss": 2.7022,
"step": 70000
},
{
"epoch": 5.0,
"learning_rate": 2.498935717326522e-05,
"loss": 2.6949,
"step": 70500
},
{
"epoch": 5.04,
"learning_rate": 2.481197672768554e-05,
"loss": 2.681,
"step": 71000
},
{
"epoch": 5.07,
"learning_rate": 2.4634596282105863e-05,
"loss": 2.6636,
"step": 71500
},
{
"epoch": 5.11,
"learning_rate": 2.4457215836526182e-05,
"loss": 2.6868,
"step": 72000
},
{
"epoch": 5.14,
"learning_rate": 2.4279835390946505e-05,
"loss": 2.6683,
"step": 72500
},
{
"epoch": 5.18,
"learning_rate": 2.4102454945366824e-05,
"loss": 2.6765,
"step": 73000
},
{
"epoch": 5.21,
"learning_rate": 2.3925074499787147e-05,
"loss": 2.6665,
"step": 73500
},
{
"epoch": 5.25,
"learning_rate": 2.3747694054207466e-05,
"loss": 2.6672,
"step": 74000
},
{
"epoch": 5.29,
"learning_rate": 2.3570313608627785e-05,
"loss": 2.6604,
"step": 74500
},
{
"epoch": 5.32,
"learning_rate": 2.3392933163048107e-05,
"loss": 2.6552,
"step": 75000
},
{
"epoch": 5.36,
"learning_rate": 2.3215552717468427e-05,
"loss": 2.6561,
"step": 75500
},
{
"epoch": 5.39,
"learning_rate": 2.3038172271888746e-05,
"loss": 2.6418,
"step": 76000
},
{
"epoch": 5.43,
"learning_rate": 2.2860791826309068e-05,
"loss": 2.6552,
"step": 76500
},
{
"epoch": 5.46,
"learning_rate": 2.2683411380729387e-05,
"loss": 2.6404,
"step": 77000
},
{
"epoch": 5.5,
"learning_rate": 2.250603093514971e-05,
"loss": 2.6317,
"step": 77500
},
{
"epoch": 5.53,
"learning_rate": 2.232865048957003e-05,
"loss": 2.6343,
"step": 78000
},
{
"epoch": 5.57,
"learning_rate": 2.215127004399035e-05,
"loss": 2.6446,
"step": 78500
},
{
"epoch": 5.61,
"learning_rate": 2.197388959841067e-05,
"loss": 2.6201,
"step": 79000
},
{
"epoch": 5.64,
"learning_rate": 2.1796509152830993e-05,
"loss": 2.6266,
"step": 79500
},
{
"epoch": 5.68,
"learning_rate": 2.1619128707251312e-05,
"loss": 2.6229,
"step": 80000
},
{
"epoch": 5.71,
"learning_rate": 2.1441748261671635e-05,
"loss": 2.6297,
"step": 80500
},
{
"epoch": 5.75,
"learning_rate": 2.1264367816091954e-05,
"loss": 2.6183,
"step": 81000
},
{
"epoch": 5.78,
"learning_rate": 2.1086987370512277e-05,
"loss": 2.6183,
"step": 81500
},
{
"epoch": 5.82,
"learning_rate": 2.0909606924932596e-05,
"loss": 2.6164,
"step": 82000
},
{
"epoch": 5.85,
"learning_rate": 2.073222647935292e-05,
"loss": 2.6069,
"step": 82500
},
{
"epoch": 5.89,
"learning_rate": 2.0554846033773237e-05,
"loss": 2.6219,
"step": 83000
},
{
"epoch": 5.92,
"learning_rate": 2.037746558819356e-05,
"loss": 2.592,
"step": 83500
},
{
"epoch": 5.96,
"learning_rate": 2.020008514261388e-05,
"loss": 2.5975,
"step": 84000
},
{
"epoch": 6.0,
"learning_rate": 2.00227046970342e-05,
"loss": 2.6029,
"step": 84500
},
{
"epoch": 6.03,
"learning_rate": 1.984532425145452e-05,
"loss": 2.5913,
"step": 85000
},
{
"epoch": 6.07,
"learning_rate": 1.9667943805874843e-05,
"loss": 2.5867,
"step": 85500
},
{
"epoch": 6.1,
"learning_rate": 1.9490563360295162e-05,
"loss": 2.5899,
"step": 86000
},
{
"epoch": 6.14,
"learning_rate": 1.931318291471548e-05,
"loss": 2.5923,
"step": 86500
},
{
"epoch": 6.17,
"learning_rate": 1.91358024691358e-05,
"loss": 2.5742,
"step": 87000
},
{
"epoch": 6.21,
"learning_rate": 1.8958422023556123e-05,
"loss": 2.5859,
"step": 87500
},
{
"epoch": 6.24,
"learning_rate": 1.8781041577976442e-05,
"loss": 2.5683,
"step": 88000
},
{
"epoch": 6.28,
"learning_rate": 1.8603661132396765e-05,
"loss": 2.5808,
"step": 88500
},
{
"epoch": 6.31,
"learning_rate": 1.8426280686817084e-05,
"loss": 2.5663,
"step": 89000
},
{
"epoch": 6.35,
"learning_rate": 1.8248900241237407e-05,
"loss": 2.5677,
"step": 89500
},
{
"epoch": 6.39,
"learning_rate": 1.8071519795657726e-05,
"loss": 2.5647,
"step": 90000
},
{
"epoch": 6.42,
"learning_rate": 1.7894139350078048e-05,
"loss": 2.5656,
"step": 90500
},
{
"epoch": 6.46,
"learning_rate": 1.7716758904498367e-05,
"loss": 2.5682,
"step": 91000
},
{
"epoch": 6.49,
"learning_rate": 1.753937845891869e-05,
"loss": 2.5645,
"step": 91500
},
{
"epoch": 6.53,
"learning_rate": 1.736199801333901e-05,
"loss": 2.5586,
"step": 92000
},
{
"epoch": 6.56,
"learning_rate": 1.718461756775933e-05,
"loss": 2.5488,
"step": 92500
},
{
"epoch": 6.6,
"learning_rate": 1.700723712217965e-05,
"loss": 2.5547,
"step": 93000
},
{
"epoch": 6.63,
"learning_rate": 1.6829856676599973e-05,
"loss": 2.5485,
"step": 93500
},
{
"epoch": 6.67,
"learning_rate": 1.6652476231020292e-05,
"loss": 2.5503,
"step": 94000
},
{
"epoch": 6.7,
"learning_rate": 1.6475095785440615e-05,
"loss": 2.547,
"step": 94500
},
{
"epoch": 6.74,
"learning_rate": 1.6297715339860934e-05,
"loss": 2.5468,
"step": 95000
},
{
"epoch": 6.78,
"learning_rate": 1.6120334894281257e-05,
"loss": 2.5416,
"step": 95500
},
{
"epoch": 6.81,
"learning_rate": 1.5942954448701576e-05,
"loss": 2.5562,
"step": 96000
},
{
"epoch": 6.85,
"learning_rate": 1.57655740031219e-05,
"loss": 2.5311,
"step": 96500
},
{
"epoch": 6.88,
"learning_rate": 1.5588193557542217e-05,
"loss": 2.5481,
"step": 97000
},
{
"epoch": 6.92,
"learning_rate": 1.5410813111962537e-05,
"loss": 2.5362,
"step": 97500
},
{
"epoch": 6.95,
"learning_rate": 1.5233432666382857e-05,
"loss": 2.5161,
"step": 98000
},
{
"epoch": 6.99,
"learning_rate": 1.505605222080318e-05,
"loss": 2.5354,
"step": 98500
},
{
"epoch": 7.02,
"learning_rate": 1.4878671775223499e-05,
"loss": 2.5214,
"step": 99000
},
{
"epoch": 7.06,
"learning_rate": 1.4701291329643822e-05,
"loss": 2.5271,
"step": 99500
},
{
"epoch": 7.1,
"learning_rate": 1.452391088406414e-05,
"loss": 2.5024,
"step": 100000
},
{
"epoch": 7.13,
"learning_rate": 1.4346530438484462e-05,
"loss": 2.5096,
"step": 100500
},
{
"epoch": 7.17,
"learning_rate": 1.4169149992904782e-05,
"loss": 2.5094,
"step": 101000
},
{
"epoch": 7.2,
"learning_rate": 1.3991769547325103e-05,
"loss": 2.5151,
"step": 101500
},
{
"epoch": 7.24,
"learning_rate": 1.3814389101745422e-05,
"loss": 2.5141,
"step": 102000
},
{
"epoch": 7.27,
"learning_rate": 1.3637008656165745e-05,
"loss": 2.5102,
"step": 102500
},
{
"epoch": 7.31,
"learning_rate": 1.3459628210586064e-05,
"loss": 2.5091,
"step": 103000
},
{
"epoch": 7.34,
"learning_rate": 1.3282247765006387e-05,
"loss": 2.5025,
"step": 103500
},
{
"epoch": 7.38,
"learning_rate": 1.3104867319426706e-05,
"loss": 2.5025,
"step": 104000
},
{
"epoch": 7.41,
"learning_rate": 1.2927486873847028e-05,
"loss": 2.5051,
"step": 104500
},
{
"epoch": 7.45,
"learning_rate": 1.2750106428267347e-05,
"loss": 2.5084,
"step": 105000
},
{
"epoch": 7.49,
"learning_rate": 1.257272598268767e-05,
"loss": 2.4955,
"step": 105500
},
{
"epoch": 7.52,
"learning_rate": 1.2395345537107989e-05,
"loss": 2.4915,
"step": 106000
},
{
"epoch": 7.56,
"learning_rate": 1.221796509152831e-05,
"loss": 2.5053,
"step": 106500
},
{
"epoch": 7.59,
"learning_rate": 1.204058464594863e-05,
"loss": 2.4989,
"step": 107000
},
{
"epoch": 7.63,
"learning_rate": 1.1863204200368952e-05,
"loss": 2.4957,
"step": 107500
},
{
"epoch": 7.66,
"learning_rate": 1.1685823754789272e-05,
"loss": 2.4909,
"step": 108000
},
{
"epoch": 7.7,
"learning_rate": 1.1508443309209593e-05,
"loss": 2.492,
"step": 108500
},
{
"epoch": 7.73,
"learning_rate": 1.1331062863629914e-05,
"loss": 2.4929,
"step": 109000
},
{
"epoch": 7.77,
"learning_rate": 1.1153682418050235e-05,
"loss": 2.4871,
"step": 109500
},
{
"epoch": 7.8,
"learning_rate": 1.0976301972470556e-05,
"loss": 2.4843,
"step": 110000
},
{
"epoch": 7.84,
"learning_rate": 1.0798921526890877e-05,
"loss": 2.4849,
"step": 110500
},
{
"epoch": 7.88,
"learning_rate": 1.0621541081311197e-05,
"loss": 2.4903,
"step": 111000
},
{
"epoch": 7.91,
"learning_rate": 1.0444160635731518e-05,
"loss": 2.4766,
"step": 111500
},
{
"epoch": 7.95,
"learning_rate": 1.0266780190151837e-05,
"loss": 2.4895,
"step": 112000
},
{
"epoch": 7.98,
"learning_rate": 1.0089399744572158e-05,
"loss": 2.4818,
"step": 112500
},
{
"epoch": 8.02,
"learning_rate": 9.912019298992479e-06,
"loss": 2.4782,
"step": 113000
},
{
"epoch": 8.05,
"learning_rate": 9.7346388534128e-06,
"loss": 2.4671,
"step": 113500
},
{
"epoch": 8.09,
"learning_rate": 9.55725840783312e-06,
"loss": 2.4665,
"step": 114000
},
{
"epoch": 8.12,
"learning_rate": 9.379877962253442e-06,
"loss": 2.4669,
"step": 114500
},
{
"epoch": 8.16,
"learning_rate": 9.202497516673762e-06,
"loss": 2.4552,
"step": 115000
},
{
"epoch": 8.19,
"learning_rate": 9.025117071094083e-06,
"loss": 2.459,
"step": 115500
},
{
"epoch": 8.23,
"learning_rate": 8.847736625514404e-06,
"loss": 2.455,
"step": 116000
},
{
"epoch": 8.27,
"learning_rate": 8.670356179934725e-06,
"loss": 2.472,
"step": 116500
},
{
"epoch": 8.3,
"learning_rate": 8.492975734355046e-06,
"loss": 2.4517,
"step": 117000
},
{
"epoch": 8.34,
"learning_rate": 8.315595288775365e-06,
"loss": 2.458,
"step": 117500
},
{
"epoch": 8.37,
"learning_rate": 8.138214843195686e-06,
"loss": 2.4597,
"step": 118000
},
{
"epoch": 8.41,
"learning_rate": 7.960834397616007e-06,
"loss": 2.4567,
"step": 118500
},
{
"epoch": 8.44,
"learning_rate": 7.783453952036327e-06,
"loss": 2.4561,
"step": 119000
},
{
"epoch": 8.48,
"learning_rate": 7.606073506456648e-06,
"loss": 2.4614,
"step": 119500
},
{
"epoch": 8.51,
"learning_rate": 7.428693060876969e-06,
"loss": 2.4592,
"step": 120000
},
{
"epoch": 8.55,
"learning_rate": 7.25131261529729e-06,
"loss": 2.456,
"step": 120500
},
{
"epoch": 8.59,
"learning_rate": 7.073932169717611e-06,
"loss": 2.4593,
"step": 121000
},
{
"epoch": 8.62,
"learning_rate": 6.896551724137932e-06,
"loss": 2.452,
"step": 121500
},
{
"epoch": 8.66,
"learning_rate": 6.719171278558252e-06,
"loss": 2.4414,
"step": 122000
},
{
"epoch": 8.69,
"learning_rate": 6.5417908329785725e-06,
"loss": 2.4558,
"step": 122500
},
{
"epoch": 8.73,
"learning_rate": 6.364410387398893e-06,
"loss": 2.4417,
"step": 123000
},
{
"epoch": 8.76,
"learning_rate": 6.187029941819214e-06,
"loss": 2.4364,
"step": 123500
},
{
"epoch": 8.8,
"learning_rate": 6.009649496239535e-06,
"loss": 2.4543,
"step": 124000
},
{
"epoch": 8.83,
"learning_rate": 5.832269050659856e-06,
"loss": 2.4419,
"step": 124500
},
{
"epoch": 8.87,
"learning_rate": 5.654888605080176e-06,
"loss": 2.446,
"step": 125000
},
{
"epoch": 8.9,
"learning_rate": 5.477508159500497e-06,
"loss": 2.4395,
"step": 125500
},
{
"epoch": 8.94,
"learning_rate": 5.3001277139208175e-06,
"loss": 2.4466,
"step": 126000
},
{
"epoch": 8.98,
"learning_rate": 5.122747268341138e-06,
"loss": 2.4301,
"step": 126500
},
{
"epoch": 9.01,
"learning_rate": 4.945366822761459e-06,
"loss": 2.4382,
"step": 127000
},
{
"epoch": 9.05,
"learning_rate": 4.767986377181779e-06,
"loss": 2.4406,
"step": 127500
},
{
"epoch": 9.08,
"learning_rate": 4.5906059316021e-06,
"loss": 2.4312,
"step": 128000
},
{
"epoch": 9.12,
"learning_rate": 4.413225486022421e-06,
"loss": 2.4409,
"step": 128500
},
{
"epoch": 9.15,
"learning_rate": 4.235845040442742e-06,
"loss": 2.4256,
"step": 129000
},
{
"epoch": 9.19,
"learning_rate": 4.0584645948630625e-06,
"loss": 2.4301,
"step": 129500
},
{
"epoch": 9.22,
"learning_rate": 3.881084149283383e-06,
"loss": 2.4338,
"step": 130000
},
{
"epoch": 9.26,
"learning_rate": 3.7037037037037037e-06,
"loss": 2.4367,
"step": 130500
},
{
"epoch": 9.29,
"learning_rate": 3.5263232581240246e-06,
"loss": 2.4198,
"step": 131000
},
{
"epoch": 9.33,
"learning_rate": 3.348942812544345e-06,
"loss": 2.4228,
"step": 131500
},
{
"epoch": 9.37,
"learning_rate": 3.171562366964666e-06,
"loss": 2.4257,
"step": 132000
},
{
"epoch": 9.4,
"learning_rate": 2.9941819213849867e-06,
"loss": 2.4286,
"step": 132500
},
{
"epoch": 9.44,
"learning_rate": 2.8168014758053075e-06,
"loss": 2.4153,
"step": 133000
},
{
"epoch": 9.47,
"learning_rate": 2.6394210302256283e-06,
"loss": 2.4235,
"step": 133500
},
{
"epoch": 9.51,
"learning_rate": 2.4620405846459487e-06,
"loss": 2.4243,
"step": 134000
},
{
"epoch": 9.54,
"learning_rate": 2.2846601390662696e-06,
"loss": 2.4235,
"step": 134500
},
{
"epoch": 9.58,
"learning_rate": 2.1072796934865904e-06,
"loss": 2.4297,
"step": 135000
},
{
"epoch": 9.61,
"learning_rate": 1.929899247906911e-06,
"loss": 2.4356,
"step": 135500
},
{
"epoch": 9.65,
"learning_rate": 1.7525188023272317e-06,
"loss": 2.4223,
"step": 136000
},
{
"epoch": 9.68,
"learning_rate": 1.5751383567475523e-06,
"loss": 2.4218,
"step": 136500
},
{
"epoch": 9.72,
"learning_rate": 1.397757911167873e-06,
"loss": 2.4211,
"step": 137000
},
{
"epoch": 9.76,
"learning_rate": 1.2203774655881937e-06,
"loss": 2.4233,
"step": 137500
},
{
"epoch": 9.79,
"learning_rate": 1.0429970200085144e-06,
"loss": 2.4107,
"step": 138000
},
{
"epoch": 9.83,
"learning_rate": 8.65616574428835e-07,
"loss": 2.4176,
"step": 138500
},
{
"epoch": 9.86,
"learning_rate": 6.882361288491557e-07,
"loss": 2.4227,
"step": 139000
},
{
"epoch": 9.9,
"learning_rate": 5.108556832694765e-07,
"loss": 2.4229,
"step": 139500
},
{
"epoch": 9.93,
"learning_rate": 3.334752376897971e-07,
"loss": 2.4089,
"step": 140000
}
],
"max_steps": 140940,
"num_train_epochs": 10,
"total_flos": 690009058341027840,
"trial_name": null,
"trial_params": null
}