|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.933304952462041, |
|
"global_step": 140000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9822619554420325e-05, |
|
"loss": 7.0399, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.964523910884065e-05, |
|
"loss": 6.9038, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.946785866326096e-05, |
|
"loss": 6.8142, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9290478217681286e-05, |
|
"loss": 6.7261, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.911309777210161e-05, |
|
"loss": 6.6601, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.893571732652193e-05, |
|
"loss": 6.607, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.8758336880942247e-05, |
|
"loss": 6.565, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.858095643536257e-05, |
|
"loss": 6.5083, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.840357598978289e-05, |
|
"loss": 6.4739, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.8226195544203214e-05, |
|
"loss": 6.4472, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.804881509862353e-05, |
|
"loss": 6.4133, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.787143465304385e-05, |
|
"loss": 6.3857, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7694054207464175e-05, |
|
"loss": 6.3575, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.75166737618845e-05, |
|
"loss": 6.3337, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.733929331630481e-05, |
|
"loss": 6.3248, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7161912870725136e-05, |
|
"loss": 6.2974, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.698453242514546e-05, |
|
"loss": 6.2786, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.680715197956578e-05, |
|
"loss": 6.2569, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.6629771533986097e-05, |
|
"loss": 6.2477, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.645239108840642e-05, |
|
"loss": 6.2252, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.627501064282674e-05, |
|
"loss": 6.212, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.609763019724706e-05, |
|
"loss": 6.1802, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.592024975166738e-05, |
|
"loss": 6.1081, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.5742869306087696e-05, |
|
"loss": 6.0248, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.556548886050802e-05, |
|
"loss": 5.8882, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.538810841492834e-05, |
|
"loss": 5.7505, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5210727969348656e-05, |
|
"loss": 5.5657, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.503334752376898e-05, |
|
"loss": 5.4052, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.48559670781893e-05, |
|
"loss": 5.2834, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.4678586632609624e-05, |
|
"loss": 5.1588, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.450120618702994e-05, |
|
"loss": 5.0617, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.432382574145026e-05, |
|
"loss": 4.9254, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.4146445295870585e-05, |
|
"loss": 4.7733, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.396906485029091e-05, |
|
"loss": 4.6491, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.379168440471122e-05, |
|
"loss": 4.5447, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.3614303959131546e-05, |
|
"loss": 4.4222, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.343692351355187e-05, |
|
"loss": 4.3276, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.325954306797219e-05, |
|
"loss": 4.2297, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.3082162622392507e-05, |
|
"loss": 4.169, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.290478217681283e-05, |
|
"loss": 4.1032, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.272740173123315e-05, |
|
"loss": 4.0278, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.2550021285653474e-05, |
|
"loss": 3.9557, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.237264084007379e-05, |
|
"loss": 3.8852, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.219526039449411e-05, |
|
"loss": 3.8525, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.2017879948914435e-05, |
|
"loss": 3.8033, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.184049950333476e-05, |
|
"loss": 3.7525, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.166311905775507e-05, |
|
"loss": 3.7301, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.1485738612175396e-05, |
|
"loss": 3.6619, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.130835816659572e-05, |
|
"loss": 3.6457, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.113097772101604e-05, |
|
"loss": 3.6079, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.0953597275436357e-05, |
|
"loss": 3.569, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.077621682985668e-05, |
|
"loss": 3.5467, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.0598836384277e-05, |
|
"loss": 3.5302, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.0421455938697324e-05, |
|
"loss": 3.4952, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.024407549311764e-05, |
|
"loss": 3.4743, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.006669504753796e-05, |
|
"loss": 3.4558, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.9889314601958285e-05, |
|
"loss": 3.4161, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.971193415637861e-05, |
|
"loss": 3.3899, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.953455371079892e-05, |
|
"loss": 3.3836, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.9357173265219246e-05, |
|
"loss": 3.3521, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.917979281963957e-05, |
|
"loss": 3.3418, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.900241237405989e-05, |
|
"loss": 3.3161, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.8825031928480207e-05, |
|
"loss": 3.3091, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.864765148290053e-05, |
|
"loss": 3.2917, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.847027103732085e-05, |
|
"loss": 3.268, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.829289059174117e-05, |
|
"loss": 3.2515, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.811551014616149e-05, |
|
"loss": 3.2415, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.793812970058181e-05, |
|
"loss": 3.2177, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.776074925500213e-05, |
|
"loss": 3.2102, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.758336880942245e-05, |
|
"loss": 3.2186, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.740598836384277e-05, |
|
"loss": 3.1951, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.722860791826309e-05, |
|
"loss": 3.1766, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.705122747268341e-05, |
|
"loss": 3.1603, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.6873847027103734e-05, |
|
"loss": 3.1269, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.669646658152405e-05, |
|
"loss": 3.1431, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.651908613594437e-05, |
|
"loss": 3.112, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 3.6341705690364695e-05, |
|
"loss": 3.1211, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.616432524478502e-05, |
|
"loss": 3.0896, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.598694479920533e-05, |
|
"loss": 3.0972, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.5809564353625656e-05, |
|
"loss": 3.0821, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.563218390804598e-05, |
|
"loss": 3.0626, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.54548034624663e-05, |
|
"loss": 3.0586, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.5277423016886617e-05, |
|
"loss": 3.0464, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.510004257130694e-05, |
|
"loss": 3.0368, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.492266212572726e-05, |
|
"loss": 3.0282, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.4745281680147584e-05, |
|
"loss": 2.9903, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.45679012345679e-05, |
|
"loss": 2.9912, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.439052078898822e-05, |
|
"loss": 2.9902, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.4213140343408545e-05, |
|
"loss": 2.995, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.403575989782887e-05, |
|
"loss": 2.9736, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.385837945224918e-05, |
|
"loss": 2.9724, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.3680999006669506e-05, |
|
"loss": 2.9636, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.350361856108983e-05, |
|
"loss": 2.957, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.332623811551015e-05, |
|
"loss": 2.9563, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.3148857669930467e-05, |
|
"loss": 2.9381, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.297147722435079e-05, |
|
"loss": 2.926, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 3.279409677877111e-05, |
|
"loss": 2.933, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.2616716333191434e-05, |
|
"loss": 2.9144, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.243933588761175e-05, |
|
"loss": 2.9084, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.226195544203207e-05, |
|
"loss": 2.9116, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.2084574996452395e-05, |
|
"loss": 2.8892, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 3.190719455087272e-05, |
|
"loss": 2.8828, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.172981410529303e-05, |
|
"loss": 2.8843, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.1552433659713356e-05, |
|
"loss": 2.8799, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 3.137505321413368e-05, |
|
"loss": 2.8586, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 3.1197672768554e-05, |
|
"loss": 2.8601, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3.1020292322974317e-05, |
|
"loss": 2.8523, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.084291187739464e-05, |
|
"loss": 2.8517, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 3.066553143181496e-05, |
|
"loss": 2.8347, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 3.048815098623528e-05, |
|
"loss": 2.8427, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.0310770540655597e-05, |
|
"loss": 2.8471, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 3.013339009507592e-05, |
|
"loss": 2.8201, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 2.995600964949624e-05, |
|
"loss": 2.8236, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 2.9778629203916564e-05, |
|
"loss": 2.8043, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.960124875833688e-05, |
|
"loss": 2.7957, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.9423868312757202e-05, |
|
"loss": 2.8042, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 2.9246487867177525e-05, |
|
"loss": 2.7836, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 2.9069107421597847e-05, |
|
"loss": 2.7875, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.8891726976018163e-05, |
|
"loss": 2.7737, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.8714346530438486e-05, |
|
"loss": 2.7783, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.8536966084858808e-05, |
|
"loss": 2.7689, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.835958563927913e-05, |
|
"loss": 2.7689, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.8182205193699447e-05, |
|
"loss": 2.7737, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.800482474811977e-05, |
|
"loss": 2.7595, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.7827444302540088e-05, |
|
"loss": 2.7657, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 2.765006385696041e-05, |
|
"loss": 2.7664, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 2.747268341138073e-05, |
|
"loss": 2.7394, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 2.729530296580105e-05, |
|
"loss": 2.7357, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.711792252022137e-05, |
|
"loss": 2.7346, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 2.6940542074641694e-05, |
|
"loss": 2.7351, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.676316162906201e-05, |
|
"loss": 2.7268, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.6585781183482332e-05, |
|
"loss": 2.7319, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.6408400737902655e-05, |
|
"loss": 2.7282, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.6231020292322977e-05, |
|
"loss": 2.7297, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.6053639846743293e-05, |
|
"loss": 2.7112, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 2.5876259401163616e-05, |
|
"loss": 2.7315, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.5698878955583938e-05, |
|
"loss": 2.704, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.552149851000426e-05, |
|
"loss": 2.7036, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.5344118064424577e-05, |
|
"loss": 2.7103, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.51667376188449e-05, |
|
"loss": 2.7022, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.498935717326522e-05, |
|
"loss": 2.6949, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.481197672768554e-05, |
|
"loss": 2.681, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.4634596282105863e-05, |
|
"loss": 2.6636, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 2.4457215836526182e-05, |
|
"loss": 2.6868, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 2.4279835390946505e-05, |
|
"loss": 2.6683, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 2.4102454945366824e-05, |
|
"loss": 2.6765, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 2.3925074499787147e-05, |
|
"loss": 2.6665, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 2.3747694054207466e-05, |
|
"loss": 2.6672, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.3570313608627785e-05, |
|
"loss": 2.6604, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 2.3392933163048107e-05, |
|
"loss": 2.6552, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 2.3215552717468427e-05, |
|
"loss": 2.6561, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 2.3038172271888746e-05, |
|
"loss": 2.6418, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 2.2860791826309068e-05, |
|
"loss": 2.6552, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 2.2683411380729387e-05, |
|
"loss": 2.6404, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 2.250603093514971e-05, |
|
"loss": 2.6317, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 2.232865048957003e-05, |
|
"loss": 2.6343, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 2.215127004399035e-05, |
|
"loss": 2.6446, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 2.197388959841067e-05, |
|
"loss": 2.6201, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 2.1796509152830993e-05, |
|
"loss": 2.6266, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 2.1619128707251312e-05, |
|
"loss": 2.6229, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 2.1441748261671635e-05, |
|
"loss": 2.6297, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 2.1264367816091954e-05, |
|
"loss": 2.6183, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 2.1086987370512277e-05, |
|
"loss": 2.6183, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 2.0909606924932596e-05, |
|
"loss": 2.6164, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 2.073222647935292e-05, |
|
"loss": 2.6069, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 2.0554846033773237e-05, |
|
"loss": 2.6219, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 2.037746558819356e-05, |
|
"loss": 2.592, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 2.020008514261388e-05, |
|
"loss": 2.5975, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.00227046970342e-05, |
|
"loss": 2.6029, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1.984532425145452e-05, |
|
"loss": 2.5913, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.9667943805874843e-05, |
|
"loss": 2.5867, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.9490563360295162e-05, |
|
"loss": 2.5899, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.931318291471548e-05, |
|
"loss": 2.5923, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 1.91358024691358e-05, |
|
"loss": 2.5742, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 1.8958422023556123e-05, |
|
"loss": 2.5859, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1.8781041577976442e-05, |
|
"loss": 2.5683, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 1.8603661132396765e-05, |
|
"loss": 2.5808, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 1.8426280686817084e-05, |
|
"loss": 2.5663, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.8248900241237407e-05, |
|
"loss": 2.5677, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.8071519795657726e-05, |
|
"loss": 2.5647, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 1.7894139350078048e-05, |
|
"loss": 2.5656, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 1.7716758904498367e-05, |
|
"loss": 2.5682, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 1.753937845891869e-05, |
|
"loss": 2.5645, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.736199801333901e-05, |
|
"loss": 2.5586, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.718461756775933e-05, |
|
"loss": 2.5488, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.700723712217965e-05, |
|
"loss": 2.5547, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 1.6829856676599973e-05, |
|
"loss": 2.5485, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.6652476231020292e-05, |
|
"loss": 2.5503, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 1.6475095785440615e-05, |
|
"loss": 2.547, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.6297715339860934e-05, |
|
"loss": 2.5468, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.6120334894281257e-05, |
|
"loss": 2.5416, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.5942954448701576e-05, |
|
"loss": 2.5562, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1.57655740031219e-05, |
|
"loss": 2.5311, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1.5588193557542217e-05, |
|
"loss": 2.5481, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.5410813111962537e-05, |
|
"loss": 2.5362, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 1.5233432666382857e-05, |
|
"loss": 2.5161, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.505605222080318e-05, |
|
"loss": 2.5354, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.4878671775223499e-05, |
|
"loss": 2.5214, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.4701291329643822e-05, |
|
"loss": 2.5271, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.452391088406414e-05, |
|
"loss": 2.5024, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 1.4346530438484462e-05, |
|
"loss": 2.5096, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 1.4169149992904782e-05, |
|
"loss": 2.5094, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.3991769547325103e-05, |
|
"loss": 2.5151, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 1.3814389101745422e-05, |
|
"loss": 2.5141, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 1.3637008656165745e-05, |
|
"loss": 2.5102, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 1.3459628210586064e-05, |
|
"loss": 2.5091, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1.3282247765006387e-05, |
|
"loss": 2.5025, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 1.3104867319426706e-05, |
|
"loss": 2.5025, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.2927486873847028e-05, |
|
"loss": 2.5051, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 1.2750106428267347e-05, |
|
"loss": 2.5084, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 1.257272598268767e-05, |
|
"loss": 2.4955, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 1.2395345537107989e-05, |
|
"loss": 2.4915, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 1.221796509152831e-05, |
|
"loss": 2.5053, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 1.204058464594863e-05, |
|
"loss": 2.4989, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 1.1863204200368952e-05, |
|
"loss": 2.4957, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 1.1685823754789272e-05, |
|
"loss": 2.4909, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 1.1508443309209593e-05, |
|
"loss": 2.492, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 1.1331062863629914e-05, |
|
"loss": 2.4929, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 1.1153682418050235e-05, |
|
"loss": 2.4871, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 1.0976301972470556e-05, |
|
"loss": 2.4843, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 1.0798921526890877e-05, |
|
"loss": 2.4849, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 1.0621541081311197e-05, |
|
"loss": 2.4903, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 1.0444160635731518e-05, |
|
"loss": 2.4766, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1.0266780190151837e-05, |
|
"loss": 2.4895, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 1.0089399744572158e-05, |
|
"loss": 2.4818, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.912019298992479e-06, |
|
"loss": 2.4782, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 9.7346388534128e-06, |
|
"loss": 2.4671, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 9.55725840783312e-06, |
|
"loss": 2.4665, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 9.379877962253442e-06, |
|
"loss": 2.4669, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 9.202497516673762e-06, |
|
"loss": 2.4552, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 9.025117071094083e-06, |
|
"loss": 2.459, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 8.847736625514404e-06, |
|
"loss": 2.455, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.670356179934725e-06, |
|
"loss": 2.472, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 8.492975734355046e-06, |
|
"loss": 2.4517, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.315595288775365e-06, |
|
"loss": 2.458, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 8.138214843195686e-06, |
|
"loss": 2.4597, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 7.960834397616007e-06, |
|
"loss": 2.4567, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 7.783453952036327e-06, |
|
"loss": 2.4561, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 7.606073506456648e-06, |
|
"loss": 2.4614, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 7.428693060876969e-06, |
|
"loss": 2.4592, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 7.25131261529729e-06, |
|
"loss": 2.456, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 7.073932169717611e-06, |
|
"loss": 2.4593, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 2.452, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 6.719171278558252e-06, |
|
"loss": 2.4414, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 6.5417908329785725e-06, |
|
"loss": 2.4558, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 6.364410387398893e-06, |
|
"loss": 2.4417, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 6.187029941819214e-06, |
|
"loss": 2.4364, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.009649496239535e-06, |
|
"loss": 2.4543, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 5.832269050659856e-06, |
|
"loss": 2.4419, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 5.654888605080176e-06, |
|
"loss": 2.446, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 5.477508159500497e-06, |
|
"loss": 2.4395, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 5.3001277139208175e-06, |
|
"loss": 2.4466, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 5.122747268341138e-06, |
|
"loss": 2.4301, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.945366822761459e-06, |
|
"loss": 2.4382, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 4.767986377181779e-06, |
|
"loss": 2.4406, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 4.5906059316021e-06, |
|
"loss": 2.4312, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.413225486022421e-06, |
|
"loss": 2.4409, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 4.235845040442742e-06, |
|
"loss": 2.4256, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.0584645948630625e-06, |
|
"loss": 2.4301, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 3.881084149283383e-06, |
|
"loss": 2.4338, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 2.4367, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 3.5263232581240246e-06, |
|
"loss": 2.4198, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 3.348942812544345e-06, |
|
"loss": 2.4228, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 3.171562366964666e-06, |
|
"loss": 2.4257, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 2.9941819213849867e-06, |
|
"loss": 2.4286, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 2.8168014758053075e-06, |
|
"loss": 2.4153, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.6394210302256283e-06, |
|
"loss": 2.4235, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 2.4620405846459487e-06, |
|
"loss": 2.4243, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 2.2846601390662696e-06, |
|
"loss": 2.4235, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 2.1072796934865904e-06, |
|
"loss": 2.4297, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 1.929899247906911e-06, |
|
"loss": 2.4356, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 1.7525188023272317e-06, |
|
"loss": 2.4223, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1.5751383567475523e-06, |
|
"loss": 2.4218, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 1.397757911167873e-06, |
|
"loss": 2.4211, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 1.2203774655881937e-06, |
|
"loss": 2.4233, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 1.0429970200085144e-06, |
|
"loss": 2.4107, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 8.65616574428835e-07, |
|
"loss": 2.4176, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 6.882361288491557e-07, |
|
"loss": 2.4227, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 5.108556832694765e-07, |
|
"loss": 2.4229, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 3.334752376897971e-07, |
|
"loss": 2.4089, |
|
"step": 140000 |
|
} |
|
], |
|
"max_steps": 140940, |
|
"num_train_epochs": 10, |
|
"total_flos": 690009058341027840, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|