|
{ |
|
"best_metric": 0.29094862937927246, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.7761966364812419, |
|
"eval_steps": 25, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00517464424320828, |
|
"grad_norm": 137.61221313476562, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 45.3923, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00517464424320828, |
|
"eval_loss": 0.879862904548645, |
|
"eval_runtime": 2.4641, |
|
"eval_samples_per_second": 20.291, |
|
"eval_steps_per_second": 5.276, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01034928848641656, |
|
"grad_norm": 145.64358520507812, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 47.4644, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.015523932729624839, |
|
"grad_norm": 143.3395233154297, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 46.5953, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02069857697283312, |
|
"grad_norm": 131.87200927734375, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 46.5328, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0258732212160414, |
|
"grad_norm": 114.46612548828125, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 44.6468, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.031047865459249677, |
|
"grad_norm": 86.16083526611328, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 40.958, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03622250970245795, |
|
"grad_norm": 135.6624755859375, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 39.1718, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04139715394566624, |
|
"grad_norm": 88.87834167480469, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 35.5996, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04657179818887452, |
|
"grad_norm": 134.31365966796875, |
|
"learning_rate": 5e-05, |
|
"loss": 44.2182, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0517464424320828, |
|
"grad_norm": 112.15583038330078, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 40.9168, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.056921086675291076, |
|
"grad_norm": 116.23684692382812, |
|
"learning_rate": 6.111111111111112e-05, |
|
"loss": 40.278, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.062095730918499355, |
|
"grad_norm": 91.58599853515625, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 39.5289, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06727037516170763, |
|
"grad_norm": 84.65013122558594, |
|
"learning_rate": 7.222222222222222e-05, |
|
"loss": 34.301, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0724450194049159, |
|
"grad_norm": 63.37664031982422, |
|
"learning_rate": 7.777777777777778e-05, |
|
"loss": 30.2999, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07761966364812418, |
|
"grad_norm": 39.683807373046875, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 29.2968, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08279430789133248, |
|
"grad_norm": 31.897132873535156, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 28.7925, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08796895213454076, |
|
"grad_norm": 34.73341751098633, |
|
"learning_rate": 9.444444444444444e-05, |
|
"loss": 28.2201, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09314359637774904, |
|
"grad_norm": 33.116825103759766, |
|
"learning_rate": 0.0001, |
|
"loss": 27.4134, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09831824062095731, |
|
"grad_norm": 29.32647132873535, |
|
"learning_rate": 9.999884773765534e-05, |
|
"loss": 27.2748, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1034928848641656, |
|
"grad_norm": 34.262603759765625, |
|
"learning_rate": 9.999539100963065e-05, |
|
"loss": 26.7995, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10866752910737387, |
|
"grad_norm": 34.93130111694336, |
|
"learning_rate": 9.998962999295068e-05, |
|
"loss": 30.0288, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.11384217335058215, |
|
"grad_norm": 45.51692581176758, |
|
"learning_rate": 9.998156498264669e-05, |
|
"loss": 32.0179, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11901681759379043, |
|
"grad_norm": 53.52436828613281, |
|
"learning_rate": 9.997119639174122e-05, |
|
"loss": 31.3302, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.12419146183699871, |
|
"grad_norm": 53.14868927001953, |
|
"learning_rate": 9.995852475122702e-05, |
|
"loss": 31.3536, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.129366106080207, |
|
"grad_norm": 55.426658630371094, |
|
"learning_rate": 9.994355071003984e-05, |
|
"loss": 30.1863, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.129366106080207, |
|
"eval_loss": 0.4283739924430847, |
|
"eval_runtime": 2.4658, |
|
"eval_samples_per_second": 20.277, |
|
"eval_steps_per_second": 5.272, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13454075032341525, |
|
"grad_norm": 59.29961395263672, |
|
"learning_rate": 9.992627503502517e-05, |
|
"loss": 25.8217, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.13971539456662355, |
|
"grad_norm": 32.18626403808594, |
|
"learning_rate": 9.990669861089904e-05, |
|
"loss": 24.9423, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1448900388098318, |
|
"grad_norm": 42.07598876953125, |
|
"learning_rate": 9.988482244020256e-05, |
|
"loss": 24.4099, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1500646830530401, |
|
"grad_norm": 32.522953033447266, |
|
"learning_rate": 9.986064764325079e-05, |
|
"loss": 23.9485, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.15523932729624837, |
|
"grad_norm": 32.758941650390625, |
|
"learning_rate": 9.983417545807521e-05, |
|
"loss": 23.9998, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16041397153945666, |
|
"grad_norm": 30.58071517944336, |
|
"learning_rate": 9.980540724036031e-05, |
|
"loss": 23.4842, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.16558861578266496, |
|
"grad_norm": 29.740413665771484, |
|
"learning_rate": 9.977434446337431e-05, |
|
"loss": 23.3358, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.17076326002587322, |
|
"grad_norm": 31.934289932250977, |
|
"learning_rate": 9.974098871789359e-05, |
|
"loss": 24.1982, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1759379042690815, |
|
"grad_norm": 52.297630310058594, |
|
"learning_rate": 9.970534171212117e-05, |
|
"loss": 29.5956, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.18111254851228978, |
|
"grad_norm": 44.05955123901367, |
|
"learning_rate": 9.966740527159945e-05, |
|
"loss": 28.9523, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.18628719275549807, |
|
"grad_norm": 48.28145217895508, |
|
"learning_rate": 9.962718133911648e-05, |
|
"loss": 27.3425, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.19146183699870634, |
|
"grad_norm": 50.967891693115234, |
|
"learning_rate": 9.958467197460662e-05, |
|
"loss": 28.0913, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.19663648124191463, |
|
"grad_norm": 48.36716842651367, |
|
"learning_rate": 9.953987935504497e-05, |
|
"loss": 24.9412, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2018111254851229, |
|
"grad_norm": 34.47467803955078, |
|
"learning_rate": 9.949280577433593e-05, |
|
"loss": 22.663, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2069857697283312, |
|
"grad_norm": 24.475858688354492, |
|
"learning_rate": 9.944345364319571e-05, |
|
"loss": 22.2558, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21216041397153945, |
|
"grad_norm": 24.5921630859375, |
|
"learning_rate": 9.939182548902883e-05, |
|
"loss": 23.2109, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.21733505821474774, |
|
"grad_norm": 22.380992889404297, |
|
"learning_rate": 9.933792395579877e-05, |
|
"loss": 22.2931, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.222509702457956, |
|
"grad_norm": 27.86241340637207, |
|
"learning_rate": 9.928175180389254e-05, |
|
"loss": 21.9427, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2276843467011643, |
|
"grad_norm": 25.076570510864258, |
|
"learning_rate": 9.922331190997922e-05, |
|
"loss": 21.5344, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.23285899094437257, |
|
"grad_norm": 26.751014709472656, |
|
"learning_rate": 9.916260726686278e-05, |
|
"loss": 21.4259, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.23803363518758086, |
|
"grad_norm": 34.77674865722656, |
|
"learning_rate": 9.909964098332879e-05, |
|
"loss": 24.7821, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.24320827943078913, |
|
"grad_norm": 45.179378509521484, |
|
"learning_rate": 9.903441628398511e-05, |
|
"loss": 25.4269, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.24838292367399742, |
|
"grad_norm": 49.48162078857422, |
|
"learning_rate": 9.896693650909686e-05, |
|
"loss": 26.9395, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2535575679172057, |
|
"grad_norm": 51.22860336303711, |
|
"learning_rate": 9.889720511441532e-05, |
|
"loss": 24.3481, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.258732212160414, |
|
"grad_norm": 45.01136779785156, |
|
"learning_rate": 9.882522567100093e-05, |
|
"loss": 25.6337, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.258732212160414, |
|
"eval_loss": 0.3623690903186798, |
|
"eval_runtime": 2.4658, |
|
"eval_samples_per_second": 20.278, |
|
"eval_steps_per_second": 5.272, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26390685640362227, |
|
"grad_norm": 34.13395690917969, |
|
"learning_rate": 9.875100186504046e-05, |
|
"loss": 22.0404, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2690815006468305, |
|
"grad_norm": 25.99323272705078, |
|
"learning_rate": 9.867453749765821e-05, |
|
"loss": 21.5824, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2742561448900388, |
|
"grad_norm": 25.306379318237305, |
|
"learning_rate": 9.859583648472133e-05, |
|
"loss": 21.0068, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2794307891332471, |
|
"grad_norm": 24.23813247680664, |
|
"learning_rate": 9.851490285663937e-05, |
|
"loss": 20.8086, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2846054333764554, |
|
"grad_norm": 26.623321533203125, |
|
"learning_rate": 9.84317407581577e-05, |
|
"loss": 20.7912, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.2897800776196636, |
|
"grad_norm": 24.135229110717773, |
|
"learning_rate": 9.834635444814545e-05, |
|
"loss": 21.1925, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2949547218628719, |
|
"grad_norm": 23.138614654541016, |
|
"learning_rate": 9.825874829937722e-05, |
|
"loss": 20.337, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3001293661060802, |
|
"grad_norm": 25.857755661010742, |
|
"learning_rate": 9.816892679830937e-05, |
|
"loss": 20.4409, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3053040103492885, |
|
"grad_norm": 51.40500259399414, |
|
"learning_rate": 9.807689454485e-05, |
|
"loss": 27.1275, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.31047865459249674, |
|
"grad_norm": 46.01802444458008, |
|
"learning_rate": 9.798265625212358e-05, |
|
"loss": 24.8525, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.31565329883570503, |
|
"grad_norm": 44.5273323059082, |
|
"learning_rate": 9.788621674622949e-05, |
|
"loss": 25.6639, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3208279430789133, |
|
"grad_norm": 55.26118469238281, |
|
"learning_rate": 9.778758096599488e-05, |
|
"loss": 24.8307, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3260025873221216, |
|
"grad_norm": 39.436317443847656, |
|
"learning_rate": 9.76867539627218e-05, |
|
"loss": 23.664, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3311772315653299, |
|
"grad_norm": 27.185758590698242, |
|
"learning_rate": 9.758374089992841e-05, |
|
"loss": 20.8368, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.33635187580853815, |
|
"grad_norm": 24.435218811035156, |
|
"learning_rate": 9.747854705308464e-05, |
|
"loss": 20.0758, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.34152652005174644, |
|
"grad_norm": 24.6842041015625, |
|
"learning_rate": 9.737117780934197e-05, |
|
"loss": 20.2967, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.34670116429495473, |
|
"grad_norm": 24.836183547973633, |
|
"learning_rate": 9.726163866725763e-05, |
|
"loss": 19.7435, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.351875808538163, |
|
"grad_norm": 23.984460830688477, |
|
"learning_rate": 9.714993523651283e-05, |
|
"loss": 19.9251, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.35705045278137126, |
|
"grad_norm": 24.694488525390625, |
|
"learning_rate": 9.703607323762569e-05, |
|
"loss": 19.8217, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.36222509702457956, |
|
"grad_norm": 24.874622344970703, |
|
"learning_rate": 9.692005850165816e-05, |
|
"loss": 20.1879, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.36739974126778785, |
|
"grad_norm": 27.704814910888672, |
|
"learning_rate": 9.680189696991742e-05, |
|
"loss": 21.443, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.37257438551099614, |
|
"grad_norm": 47.33317565917969, |
|
"learning_rate": 9.668159469365163e-05, |
|
"loss": 24.7285, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3777490297542044, |
|
"grad_norm": 39.55653762817383, |
|
"learning_rate": 9.655915783374005e-05, |
|
"loss": 23.3597, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.3829236739974127, |
|
"grad_norm": 41.66143798828125, |
|
"learning_rate": 9.643459266037744e-05, |
|
"loss": 22.6492, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.38809831824062097, |
|
"grad_norm": 61.841583251953125, |
|
"learning_rate": 9.630790555275313e-05, |
|
"loss": 23.7358, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.38809831824062097, |
|
"eval_loss": 0.3401211202144623, |
|
"eval_runtime": 2.4648, |
|
"eval_samples_per_second": 20.286, |
|
"eval_steps_per_second": 5.274, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.39327296248382926, |
|
"grad_norm": 47.54359817504883, |
|
"learning_rate": 9.617910299872416e-05, |
|
"loss": 20.79, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3984476067270375, |
|
"grad_norm": 28.126712799072266, |
|
"learning_rate": 9.604819159448309e-05, |
|
"loss": 20.1379, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.4036222509702458, |
|
"grad_norm": 29.2869930267334, |
|
"learning_rate": 9.591517804422023e-05, |
|
"loss": 19.5183, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4087968952134541, |
|
"grad_norm": 30.21017074584961, |
|
"learning_rate": 9.578006915978022e-05, |
|
"loss": 19.2306, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4139715394566624, |
|
"grad_norm": 23.017995834350586, |
|
"learning_rate": 9.564287186031333e-05, |
|
"loss": 19.4107, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4191461836998706, |
|
"grad_norm": 24.11803436279297, |
|
"learning_rate": 9.550359317192096e-05, |
|
"loss": 19.2999, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4243208279430789, |
|
"grad_norm": 25.78137969970703, |
|
"learning_rate": 9.536224022729591e-05, |
|
"loss": 19.0366, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4294954721862872, |
|
"grad_norm": 23.27781867980957, |
|
"learning_rate": 9.521882026535708e-05, |
|
"loss": 18.6447, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.4346701164294955, |
|
"grad_norm": 43.739017486572266, |
|
"learning_rate": 9.50733406308788e-05, |
|
"loss": 23.4598, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4398447606727037, |
|
"grad_norm": 39.98222351074219, |
|
"learning_rate": 9.492580877411456e-05, |
|
"loss": 22.9503, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.445019404915912, |
|
"grad_norm": 39.081974029541016, |
|
"learning_rate": 9.477623225041565e-05, |
|
"loss": 23.2451, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4501940491591203, |
|
"grad_norm": 45.064517974853516, |
|
"learning_rate": 9.462461871984411e-05, |
|
"loss": 22.8303, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4553686934023286, |
|
"grad_norm": 35.90961456298828, |
|
"learning_rate": 9.447097594678046e-05, |
|
"loss": 20.3165, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.46054333764553684, |
|
"grad_norm": 29.820219039916992, |
|
"learning_rate": 9.431531179952613e-05, |
|
"loss": 19.7648, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.46571798188874514, |
|
"grad_norm": 24.659387588500977, |
|
"learning_rate": 9.415763424990047e-05, |
|
"loss": 19.644, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.47089262613195343, |
|
"grad_norm": 23.654937744140625, |
|
"learning_rate": 9.39979513728325e-05, |
|
"loss": 18.7199, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.4760672703751617, |
|
"grad_norm": 25.759111404418945, |
|
"learning_rate": 9.383627134594741e-05, |
|
"loss": 19.023, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.48124191461837, |
|
"grad_norm": 26.290327072143555, |
|
"learning_rate": 9.367260244914768e-05, |
|
"loss": 19.474, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.48641655886157825, |
|
"grad_norm": 22.56080436706543, |
|
"learning_rate": 9.350695306418922e-05, |
|
"loss": 18.6015, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.49159120310478654, |
|
"grad_norm": 24.438447952270508, |
|
"learning_rate": 9.333933167425194e-05, |
|
"loss": 18.6668, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.49676584734799484, |
|
"grad_norm": 26.50576400756836, |
|
"learning_rate": 9.316974686350542e-05, |
|
"loss": 20.2677, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5019404915912031, |
|
"grad_norm": 34.5866584777832, |
|
"learning_rate": 9.299820731666933e-05, |
|
"loss": 22.8894, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5071151358344114, |
|
"grad_norm": 37.455406188964844, |
|
"learning_rate": 9.282472181856854e-05, |
|
"loss": 22.4006, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5122897800776197, |
|
"grad_norm": 42.7640380859375, |
|
"learning_rate": 9.264929925368338e-05, |
|
"loss": 22.2345, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.517464424320828, |
|
"grad_norm": 38.7821044921875, |
|
"learning_rate": 9.247194860569454e-05, |
|
"loss": 20.2589, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.517464424320828, |
|
"eval_loss": 0.30999085307121277, |
|
"eval_runtime": 2.4658, |
|
"eval_samples_per_second": 20.278, |
|
"eval_steps_per_second": 5.272, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5226390685640362, |
|
"grad_norm": 36.86570739746094, |
|
"learning_rate": 9.229267895702307e-05, |
|
"loss": 19.0464, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5278137128072445, |
|
"grad_norm": 25.674314498901367, |
|
"learning_rate": 9.211149948836523e-05, |
|
"loss": 18.7845, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5329883570504528, |
|
"grad_norm": 26.45655632019043, |
|
"learning_rate": 9.192841947822232e-05, |
|
"loss": 18.6392, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.538163001293661, |
|
"grad_norm": 21.524782180786133, |
|
"learning_rate": 9.17434483024255e-05, |
|
"loss": 17.6237, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5433376455368694, |
|
"grad_norm": 22.33011817932129, |
|
"learning_rate": 9.155659543365574e-05, |
|
"loss": 18.1102, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5485122897800776, |
|
"grad_norm": 25.29583168029785, |
|
"learning_rate": 9.136787044095856e-05, |
|
"loss": 17.8555, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.553686934023286, |
|
"grad_norm": 23.13666343688965, |
|
"learning_rate": 9.117728298925407e-05, |
|
"loss": 17.7725, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5588615782664942, |
|
"grad_norm": 29.024078369140625, |
|
"learning_rate": 9.0984842838842e-05, |
|
"loss": 19.2546, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5640362225097024, |
|
"grad_norm": 50.17615509033203, |
|
"learning_rate": 9.079055984490186e-05, |
|
"loss": 23.3918, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5692108667529108, |
|
"grad_norm": 35.480079650878906, |
|
"learning_rate": 9.059444395698823e-05, |
|
"loss": 22.019, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.574385510996119, |
|
"grad_norm": 38.16156005859375, |
|
"learning_rate": 9.039650521852124e-05, |
|
"loss": 20.7054, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5795601552393272, |
|
"grad_norm": 46.26918411254883, |
|
"learning_rate": 9.019675376627223e-05, |
|
"loss": 22.4658, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5847347994825356, |
|
"grad_norm": 39.86201095581055, |
|
"learning_rate": 8.99951998298446e-05, |
|
"loss": 19.7684, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5899094437257438, |
|
"grad_norm": 31.37214469909668, |
|
"learning_rate": 8.979185373114996e-05, |
|
"loss": 18.5229, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5950840879689522, |
|
"grad_norm": 23.724746704101562, |
|
"learning_rate": 8.958672588387953e-05, |
|
"loss": 18.7853, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6002587322121604, |
|
"grad_norm": 20.492137908935547, |
|
"learning_rate": 8.937982679297084e-05, |
|
"loss": 17.696, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6054333764553687, |
|
"grad_norm": 21.25743293762207, |
|
"learning_rate": 8.917116705406973e-05, |
|
"loss": 18.5685, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.610608020698577, |
|
"grad_norm": 20.935260772705078, |
|
"learning_rate": 8.89607573529878e-05, |
|
"loss": 17.7144, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6157826649417852, |
|
"grad_norm": 22.177967071533203, |
|
"learning_rate": 8.8748608465155e-05, |
|
"loss": 17.7657, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6209573091849935, |
|
"grad_norm": 23.44055938720703, |
|
"learning_rate": 8.853473125506803e-05, |
|
"loss": 17.5972, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6261319534282018, |
|
"grad_norm": 28.071083068847656, |
|
"learning_rate": 8.831913667573379e-05, |
|
"loss": 20.4785, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6313065976714101, |
|
"grad_norm": 41.40524673461914, |
|
"learning_rate": 8.810183576810856e-05, |
|
"loss": 22.6656, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6364812419146184, |
|
"grad_norm": 35.51282501220703, |
|
"learning_rate": 8.788283966053244e-05, |
|
"loss": 20.7936, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6416558861578266, |
|
"grad_norm": 36.20588302612305, |
|
"learning_rate": 8.766215956815959e-05, |
|
"loss": 20.6696, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6468305304010349, |
|
"grad_norm": 45.860923767089844, |
|
"learning_rate": 8.743980679238385e-05, |
|
"loss": 20.0276, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6468305304010349, |
|
"eval_loss": 0.3056069612503052, |
|
"eval_runtime": 2.4651, |
|
"eval_samples_per_second": 20.283, |
|
"eval_steps_per_second": 5.274, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6520051746442432, |
|
"grad_norm": 29.0579891204834, |
|
"learning_rate": 8.721579272025989e-05, |
|
"loss": 18.0009, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6571798188874515, |
|
"grad_norm": 24.840801239013672, |
|
"learning_rate": 8.699012882392018e-05, |
|
"loss": 18.2828, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6623544631306598, |
|
"grad_norm": 21.309526443481445, |
|
"learning_rate": 8.676282665998736e-05, |
|
"loss": 17.479, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6675291073738681, |
|
"grad_norm": 19.316205978393555, |
|
"learning_rate": 8.653389786898255e-05, |
|
"loss": 17.4983, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6727037516170763, |
|
"grad_norm": 19.56000328063965, |
|
"learning_rate": 8.630335417472909e-05, |
|
"loss": 17.2595, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6778783958602846, |
|
"grad_norm": 21.257116317749023, |
|
"learning_rate": 8.607120738375219e-05, |
|
"loss": 17.2479, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6830530401034929, |
|
"grad_norm": 22.04469108581543, |
|
"learning_rate": 8.583746938467436e-05, |
|
"loss": 17.3352, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6882276843467011, |
|
"grad_norm": 22.032026290893555, |
|
"learning_rate": 8.560215214760647e-05, |
|
"loss": 17.1702, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6934023285899095, |
|
"grad_norm": 35.43321228027344, |
|
"learning_rate": 8.53652677235348e-05, |
|
"loss": 21.7454, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6985769728331177, |
|
"grad_norm": 35.82448196411133, |
|
"learning_rate": 8.512682824370386e-05, |
|
"loss": 21.3486, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.703751617076326, |
|
"grad_norm": 33.74680709838867, |
|
"learning_rate": 8.48868459189952e-05, |
|
"loss": 19.9599, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7089262613195343, |
|
"grad_norm": 34.64817428588867, |
|
"learning_rate": 8.464533303930195e-05, |
|
"loss": 20.4086, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7141009055627425, |
|
"grad_norm": 30.78453826904297, |
|
"learning_rate": 8.440230197289955e-05, |
|
"loss": 19.2218, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7192755498059509, |
|
"grad_norm": 21.971385955810547, |
|
"learning_rate": 8.415776516581229e-05, |
|
"loss": 17.5228, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7244501940491591, |
|
"grad_norm": 25.183547973632812, |
|
"learning_rate": 8.391173514117591e-05, |
|
"loss": 17.9851, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7296248382923674, |
|
"grad_norm": 21.053089141845703, |
|
"learning_rate": 8.366422449859635e-05, |
|
"loss": 17.2393, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7347994825355757, |
|
"grad_norm": 20.95396614074707, |
|
"learning_rate": 8.34152459135044e-05, |
|
"loss": 16.9388, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7399741267787839, |
|
"grad_norm": 20.432722091674805, |
|
"learning_rate": 8.316481213650668e-05, |
|
"loss": 16.9324, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7451487710219923, |
|
"grad_norm": 21.420108795166016, |
|
"learning_rate": 8.291293599273253e-05, |
|
"loss": 17.3103, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7503234152652005, |
|
"grad_norm": 22.44611167907715, |
|
"learning_rate": 8.265963038117736e-05, |
|
"loss": 16.4267, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7554980595084088, |
|
"grad_norm": 29.85533905029297, |
|
"learning_rate": 8.240490827404196e-05, |
|
"loss": 19.9061, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.7606727037516171, |
|
"grad_norm": 36.1021614074707, |
|
"learning_rate": 8.21487827160682e-05, |
|
"loss": 21.6027, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.7658473479948253, |
|
"grad_norm": 35.79337692260742, |
|
"learning_rate": 8.189126682387103e-05, |
|
"loss": 21.6038, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7710219922380336, |
|
"grad_norm": 39.69446563720703, |
|
"learning_rate": 8.163237378526669e-05, |
|
"loss": 21.0512, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7761966364812419, |
|
"grad_norm": 37.87336730957031, |
|
"learning_rate": 8.137211685859739e-05, |
|
"loss": 20.3793, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7761966364812419, |
|
"eval_loss": 0.29094862937927246, |
|
"eval_runtime": 2.466, |
|
"eval_samples_per_second": 20.276, |
|
"eval_steps_per_second": 5.272, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 457, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.500900642665267e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|