|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1405, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007117437722419929, |
|
"grad_norm": 7.60900351618248, |
|
"learning_rate": 0.0, |
|
"loss": 0.3226, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014234875444839859, |
|
"grad_norm": 5.560136785951512, |
|
"learning_rate": 3.685776662974123e-07, |
|
"loss": 0.2375, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002135231316725979, |
|
"grad_norm": 11.276059972500056, |
|
"learning_rate": 5.841817796847145e-07, |
|
"loss": 0.3882, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0028469750889679717, |
|
"grad_norm": 13.05602282686669, |
|
"learning_rate": 7.371553325948246e-07, |
|
"loss": 0.4181, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0035587188612099642, |
|
"grad_norm": 16.506693089412153, |
|
"learning_rate": 8.558108385239805e-07, |
|
"loss": 0.643, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004270462633451958, |
|
"grad_norm": 9.264657032728305, |
|
"learning_rate": 9.527594459821267e-07, |
|
"loss": 0.3291, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00498220640569395, |
|
"grad_norm": 7.619801484573377, |
|
"learning_rate": 1.0347283256405455e-06, |
|
"loss": 0.3482, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0056939501779359435, |
|
"grad_norm": 13.58337314966525, |
|
"learning_rate": 1.1057329988922369e-06, |
|
"loss": 0.5296, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006405693950177936, |
|
"grad_norm": 12.954991407490938, |
|
"learning_rate": 1.168363559369429e-06, |
|
"loss": 0.4107, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0071174377224199285, |
|
"grad_norm": 7.044442105752508, |
|
"learning_rate": 1.2243885048213931e-06, |
|
"loss": 0.251, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007829181494661922, |
|
"grad_norm": 4.909048072229877, |
|
"learning_rate": 1.2750692327128147e-06, |
|
"loss": 0.1637, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008540925266903915, |
|
"grad_norm": 10.090920444391536, |
|
"learning_rate": 1.3213371122795392e-06, |
|
"loss": 0.2977, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.009252669039145907, |
|
"grad_norm": 14.066506590693214, |
|
"learning_rate": 1.363899435586698e-06, |
|
"loss": 0.4403, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0099644128113879, |
|
"grad_norm": 8.253082162037396, |
|
"learning_rate": 1.4033059919379577e-06, |
|
"loss": 0.3865, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010676156583629894, |
|
"grad_norm": 6.319219959263539, |
|
"learning_rate": 1.439992618208695e-06, |
|
"loss": 0.1669, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.011387900355871887, |
|
"grad_norm": 14.359855834747316, |
|
"learning_rate": 1.4743106651896492e-06, |
|
"loss": 0.4247, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012099644128113879, |
|
"grad_norm": 18.480766402036487, |
|
"learning_rate": 1.5065475151054406e-06, |
|
"loss": 0.3372, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.012811387900355872, |
|
"grad_norm": 9.522867087101119, |
|
"learning_rate": 1.536941225666841e-06, |
|
"loss": 0.3334, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.013523131672597865, |
|
"grad_norm": 12.335019039391405, |
|
"learning_rate": 1.5656912095056063e-06, |
|
"loss": 0.2071, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.014234875444839857, |
|
"grad_norm": 8.524794375269412, |
|
"learning_rate": 1.592966171118805e-06, |
|
"loss": -0.0933, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01494661921708185, |
|
"grad_norm": 10.132122030975838, |
|
"learning_rate": 1.61891010532526e-06, |
|
"loss": 0.2318, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.015658362989323844, |
|
"grad_norm": 12.212529386799341, |
|
"learning_rate": 1.6436468990102273e-06, |
|
"loss": 0.3701, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.016370106761565837, |
|
"grad_norm": 5.9529833839373, |
|
"learning_rate": 1.6672839091152516e-06, |
|
"loss": 0.0561, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01708185053380783, |
|
"grad_norm": 11.755479549489955, |
|
"learning_rate": 1.6899147785769513e-06, |
|
"loss": 0.274, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.017793594306049824, |
|
"grad_norm": 7.7205467449537295, |
|
"learning_rate": 1.711621677047961e-06, |
|
"loss": 0.231, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.018505338078291814, |
|
"grad_norm": 21.633394409156168, |
|
"learning_rate": 1.7324771018841105e-06, |
|
"loss": 0.5554, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.019217081850533807, |
|
"grad_norm": 6.9139962920558835, |
|
"learning_rate": 1.7525453390541434e-06, |
|
"loss": 0.1606, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0199288256227758, |
|
"grad_norm": 10.241558041059887, |
|
"learning_rate": 1.7718836582353703e-06, |
|
"loss": 0.1679, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.020640569395017794, |
|
"grad_norm": 15.538854813374531, |
|
"learning_rate": 1.7905432981013013e-06, |
|
"loss": 0.2847, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.021352313167259787, |
|
"grad_norm": 6.217944529141481, |
|
"learning_rate": 1.8085702845061074e-06, |
|
"loss": 0.1358, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02206405693950178, |
|
"grad_norm": 5.611328125, |
|
"learning_rate": 1.826006114461645e-06, |
|
"loss": 0.0723, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.022775800711743774, |
|
"grad_norm": 7.467791507779399, |
|
"learning_rate": 1.8428883314870616e-06, |
|
"loss": 0.0246, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.023487544483985764, |
|
"grad_norm": 9.448473077362943, |
|
"learning_rate": 1.8592510123975292e-06, |
|
"loss": 0.1425, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.024199288256227757, |
|
"grad_norm": 5.296569815426998, |
|
"learning_rate": 1.8751251814028527e-06, |
|
"loss": 0.2125, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02491103202846975, |
|
"grad_norm": 16.286462809634553, |
|
"learning_rate": 1.8905391641645261e-06, |
|
"loss": 0.134, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.025622775800711744, |
|
"grad_norm": 6.165472103955093, |
|
"learning_rate": 1.9055188919642534e-06, |
|
"loss": 0.1923, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.026334519572953737, |
|
"grad_norm": 11.80539517960664, |
|
"learning_rate": 1.9200881641887184e-06, |
|
"loss": 0.3054, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02704626334519573, |
|
"grad_norm": 8.523822835154286, |
|
"learning_rate": 1.9342688758030187e-06, |
|
"loss": 0.1712, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.027758007117437724, |
|
"grad_norm": 8.074927399961465, |
|
"learning_rate": 1.9480812152714124e-06, |
|
"loss": 0.1644, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.028469750889679714, |
|
"grad_norm": 8.360886709835688, |
|
"learning_rate": 1.9615438374162175e-06, |
|
"loss": 0.1798, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.029181494661921707, |
|
"grad_norm": 15.089155109836698, |
|
"learning_rate": 1.9746740149291565e-06, |
|
"loss": 0.2107, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0298932384341637, |
|
"grad_norm": 6.37160626004211, |
|
"learning_rate": 1.9874877716226724e-06, |
|
"loss": 0.0778, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.030604982206405694, |
|
"grad_norm": 8.791224560461469, |
|
"learning_rate": 1.9999999999999995e-06, |
|
"loss": 0.0476, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03131672597864769, |
|
"grad_norm": 5.854989569807006, |
|
"learning_rate": 2e-06, |
|
"loss": 0.2265, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03202846975088968, |
|
"grad_norm": 10.330861154797649, |
|
"learning_rate": 1.9985315712187957e-06, |
|
"loss": 0.0709, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.032740213523131674, |
|
"grad_norm": 3.875388833503013, |
|
"learning_rate": 1.997063142437592e-06, |
|
"loss": 0.1164, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03345195729537367, |
|
"grad_norm": 4.411229559661042, |
|
"learning_rate": 1.9955947136563876e-06, |
|
"loss": 0.1332, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03416370106761566, |
|
"grad_norm": 6.6818738426669215, |
|
"learning_rate": 1.9941262848751834e-06, |
|
"loss": 0.0902, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.034875444839857654, |
|
"grad_norm": 6.317530978345379, |
|
"learning_rate": 1.992657856093979e-06, |
|
"loss": 0.1217, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03558718861209965, |
|
"grad_norm": 5.011833112308298, |
|
"learning_rate": 1.9911894273127754e-06, |
|
"loss": 0.0623, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.036298932384341634, |
|
"grad_norm": 4.000249378060063, |
|
"learning_rate": 1.989720998531571e-06, |
|
"loss": 0.0499, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03701067615658363, |
|
"grad_norm": 3.6701163767727008, |
|
"learning_rate": 1.988252569750367e-06, |
|
"loss": 0.0168, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03772241992882562, |
|
"grad_norm": 6.081936227280345, |
|
"learning_rate": 1.9867841409691626e-06, |
|
"loss": 0.0552, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.038434163701067614, |
|
"grad_norm": 7.2095431450001835, |
|
"learning_rate": 1.985315712187959e-06, |
|
"loss": 0.1475, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03914590747330961, |
|
"grad_norm": 3.911782215320487, |
|
"learning_rate": 1.9838472834067546e-06, |
|
"loss": 0.0324, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0398576512455516, |
|
"grad_norm": 4.43042136812578, |
|
"learning_rate": 1.982378854625551e-06, |
|
"loss": 0.1017, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.040569395017793594, |
|
"grad_norm": 5.044943803185019, |
|
"learning_rate": 1.9809104258443466e-06, |
|
"loss": 0.1222, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04128113879003559, |
|
"grad_norm": 3.9711826108138335, |
|
"learning_rate": 1.9794419970631423e-06, |
|
"loss": -0.0114, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04199288256227758, |
|
"grad_norm": 5.835195552945358, |
|
"learning_rate": 1.977973568281938e-06, |
|
"loss": 0.0505, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.042704626334519574, |
|
"grad_norm": 6.459322781806197, |
|
"learning_rate": 1.9765051395007343e-06, |
|
"loss": 0.0874, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04341637010676157, |
|
"grad_norm": 5.233507135667908, |
|
"learning_rate": 1.97503671071953e-06, |
|
"loss": 0.076, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04412811387900356, |
|
"grad_norm": 5.151067730484437, |
|
"learning_rate": 1.973568281938326e-06, |
|
"loss": 0.0179, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.044839857651245554, |
|
"grad_norm": 3.957796133966113, |
|
"learning_rate": 1.972099853157122e-06, |
|
"loss": -0.0188, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04555160142348755, |
|
"grad_norm": 5.406649536187206, |
|
"learning_rate": 1.9706314243759178e-06, |
|
"loss": 0.0788, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.046263345195729534, |
|
"grad_norm": 7.595276479001964, |
|
"learning_rate": 1.9691629955947135e-06, |
|
"loss": 0.1444, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04697508896797153, |
|
"grad_norm": 3.711667023276038, |
|
"learning_rate": 1.9676945668135093e-06, |
|
"loss": 0.0428, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.04768683274021352, |
|
"grad_norm": 5.0012249399790605, |
|
"learning_rate": 1.9662261380323055e-06, |
|
"loss": 0.171, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.048398576512455514, |
|
"grad_norm": 4.020935110013036, |
|
"learning_rate": 1.9647577092511012e-06, |
|
"loss": 0.0764, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04911032028469751, |
|
"grad_norm": 6.544660170834616, |
|
"learning_rate": 1.963289280469897e-06, |
|
"loss": 0.0735, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0498220640569395, |
|
"grad_norm": 5.52421147860282, |
|
"learning_rate": 1.9618208516886928e-06, |
|
"loss": 0.0966, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.050533807829181494, |
|
"grad_norm": 5.826681249732675, |
|
"learning_rate": 1.960352422907489e-06, |
|
"loss": 0.0558, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05124555160142349, |
|
"grad_norm": 3.651382466662001, |
|
"learning_rate": 1.9588839941262847e-06, |
|
"loss": 0.0579, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05195729537366548, |
|
"grad_norm": 6.462907446997599, |
|
"learning_rate": 1.9574155653450805e-06, |
|
"loss": 0.2494, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.052669039145907474, |
|
"grad_norm": 6.418817168035706, |
|
"learning_rate": 1.9559471365638767e-06, |
|
"loss": 0.2365, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05338078291814947, |
|
"grad_norm": 8.436266999752513, |
|
"learning_rate": 1.9544787077826725e-06, |
|
"loss": 0.186, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05409252669039146, |
|
"grad_norm": 5.451772265172017, |
|
"learning_rate": 1.9530102790014682e-06, |
|
"loss": 0.0795, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.054804270462633455, |
|
"grad_norm": 6.3842834495967455, |
|
"learning_rate": 1.9515418502202644e-06, |
|
"loss": 0.0591, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05551601423487545, |
|
"grad_norm": 7.865708940472592, |
|
"learning_rate": 1.95007342143906e-06, |
|
"loss": 0.117, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.056227758007117434, |
|
"grad_norm": 4.359122757809347, |
|
"learning_rate": 1.948604992657856e-06, |
|
"loss": 0.0383, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.05693950177935943, |
|
"grad_norm": 6.408372778537557, |
|
"learning_rate": 1.947136563876652e-06, |
|
"loss": 0.1241, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05765124555160142, |
|
"grad_norm": 5.159773853580481, |
|
"learning_rate": 1.945668135095448e-06, |
|
"loss": 0.0865, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.058362989323843414, |
|
"grad_norm": 5.270421048450883, |
|
"learning_rate": 1.9441997063142437e-06, |
|
"loss": 0.1287, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.05907473309608541, |
|
"grad_norm": 7.587688522887674, |
|
"learning_rate": 1.9427312775330394e-06, |
|
"loss": 0.1413, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0597864768683274, |
|
"grad_norm": 5.945961474414765, |
|
"learning_rate": 1.9412628487518356e-06, |
|
"loss": 0.1981, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.060498220640569395, |
|
"grad_norm": 5.193881969404905, |
|
"learning_rate": 1.9397944199706314e-06, |
|
"loss": 0.1441, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06120996441281139, |
|
"grad_norm": 7.28816359886349, |
|
"learning_rate": 1.938325991189427e-06, |
|
"loss": 0.1343, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06192170818505338, |
|
"grad_norm": 7.258030357417186, |
|
"learning_rate": 1.936857562408223e-06, |
|
"loss": -0.0681, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06263345195729537, |
|
"grad_norm": 4.389772379889555, |
|
"learning_rate": 1.935389133627019e-06, |
|
"loss": 0.1312, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06334519572953737, |
|
"grad_norm": 5.281648158125961, |
|
"learning_rate": 1.933920704845815e-06, |
|
"loss": -0.0927, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06405693950177936, |
|
"grad_norm": 5.403255978657539, |
|
"learning_rate": 1.9324522760646106e-06, |
|
"loss": 0.0721, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06476868327402135, |
|
"grad_norm": 5.316790654441754, |
|
"learning_rate": 1.9309838472834064e-06, |
|
"loss": 0.0725, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06548042704626335, |
|
"grad_norm": 3.7253524767120534, |
|
"learning_rate": 1.9295154185022026e-06, |
|
"loss": 0.0371, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06619217081850534, |
|
"grad_norm": 4.489100926703136, |
|
"learning_rate": 1.9280469897209984e-06, |
|
"loss": 0.073, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.06690391459074733, |
|
"grad_norm": 6.741548121116984, |
|
"learning_rate": 1.9265785609397945e-06, |
|
"loss": 0.0089, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.06761565836298933, |
|
"grad_norm": 8.5491531934284, |
|
"learning_rate": 1.9251101321585903e-06, |
|
"loss": 0.1794, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06832740213523132, |
|
"grad_norm": 6.63521867473652, |
|
"learning_rate": 1.923641703377386e-06, |
|
"loss": 0.2012, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.06903914590747331, |
|
"grad_norm": 5.711657853102565, |
|
"learning_rate": 1.9221732745961823e-06, |
|
"loss": 0.018, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.06975088967971531, |
|
"grad_norm": 6.295723556615106, |
|
"learning_rate": 1.920704845814978e-06, |
|
"loss": 0.0415, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0704626334519573, |
|
"grad_norm": 5.1015357678400886, |
|
"learning_rate": 1.919236417033774e-06, |
|
"loss": 0.1646, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0711743772241993, |
|
"grad_norm": 8.01743609991442, |
|
"learning_rate": 1.9177679882525696e-06, |
|
"loss": 0.1194, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07188612099644127, |
|
"grad_norm": 5.814410152154771, |
|
"learning_rate": 1.9162995594713658e-06, |
|
"loss": 0.1503, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07259786476868327, |
|
"grad_norm": 3.963587970772288, |
|
"learning_rate": 1.9148311306901615e-06, |
|
"loss": 0.0283, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07330960854092526, |
|
"grad_norm": 9.086552554015991, |
|
"learning_rate": 1.9133627019089573e-06, |
|
"loss": 0.0953, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07402135231316725, |
|
"grad_norm": 6.49258146226826, |
|
"learning_rate": 1.911894273127753e-06, |
|
"loss": 0.1819, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07473309608540925, |
|
"grad_norm": 4.830091946123574, |
|
"learning_rate": 1.9104258443465492e-06, |
|
"loss": 0.0733, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07544483985765124, |
|
"grad_norm": 5.181409270596075, |
|
"learning_rate": 1.908957415565345e-06, |
|
"loss": 0.1348, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07615658362989323, |
|
"grad_norm": 7.155432242100337, |
|
"learning_rate": 1.9074889867841408e-06, |
|
"loss": 0.1745, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.07686832740213523, |
|
"grad_norm": 5.385060321324789, |
|
"learning_rate": 1.9060205580029367e-06, |
|
"loss": 0.133, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07758007117437722, |
|
"grad_norm": 8.457953634023596, |
|
"learning_rate": 1.9045521292217325e-06, |
|
"loss": 0.0449, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.07829181494661921, |
|
"grad_norm": 5.9295118705462775, |
|
"learning_rate": 1.9030837004405285e-06, |
|
"loss": 0.0189, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07900355871886121, |
|
"grad_norm": 4.69031430002641, |
|
"learning_rate": 1.9016152716593243e-06, |
|
"loss": 0.1328, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0797153024911032, |
|
"grad_norm": 7.986583187874881, |
|
"learning_rate": 1.9001468428781202e-06, |
|
"loss": 0.1729, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0804270462633452, |
|
"grad_norm": 5.246412232049036, |
|
"learning_rate": 1.8986784140969162e-06, |
|
"loss": 0.1714, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08113879003558719, |
|
"grad_norm": 7.792759973315551, |
|
"learning_rate": 1.8972099853157122e-06, |
|
"loss": 0.1794, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08185053380782918, |
|
"grad_norm": 4.207402771649719, |
|
"learning_rate": 1.8957415565345082e-06, |
|
"loss": 0.0331, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08256227758007118, |
|
"grad_norm": 5.116878121611436, |
|
"learning_rate": 1.894273127753304e-06, |
|
"loss": -0.0015, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08327402135231317, |
|
"grad_norm": 3.6983379858284824, |
|
"learning_rate": 1.8928046989721e-06, |
|
"loss": -0.035, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08398576512455516, |
|
"grad_norm": 5.951576331841862, |
|
"learning_rate": 1.8913362701908957e-06, |
|
"loss": 0.1008, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08469750889679716, |
|
"grad_norm": 5.009799890233696, |
|
"learning_rate": 1.8898678414096916e-06, |
|
"loss": 0.1037, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.08540925266903915, |
|
"grad_norm": 4.965469332944383, |
|
"learning_rate": 1.8883994126284874e-06, |
|
"loss": 0.0348, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08612099644128114, |
|
"grad_norm": 7.9142446995932, |
|
"learning_rate": 1.8869309838472834e-06, |
|
"loss": 0.1599, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.08683274021352314, |
|
"grad_norm": 3.3174167347213226, |
|
"learning_rate": 1.8854625550660792e-06, |
|
"loss": -0.1183, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.08754448398576513, |
|
"grad_norm": 5.90505921000642, |
|
"learning_rate": 1.8839941262848751e-06, |
|
"loss": 0.1535, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.08825622775800712, |
|
"grad_norm": 7.857140528690935, |
|
"learning_rate": 1.882525697503671e-06, |
|
"loss": 0.0556, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.08896797153024912, |
|
"grad_norm": 4.818522066694991, |
|
"learning_rate": 1.8810572687224669e-06, |
|
"loss": 0.1041, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08967971530249111, |
|
"grad_norm": 9.055482028072397, |
|
"learning_rate": 1.8795888399412626e-06, |
|
"loss": 0.056, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0903914590747331, |
|
"grad_norm": 6.0870744184366545, |
|
"learning_rate": 1.8781204111600586e-06, |
|
"loss": 0.1566, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0911032028469751, |
|
"grad_norm": 6.217795754068048, |
|
"learning_rate": 1.8766519823788544e-06, |
|
"loss": 0.0989, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09181494661921709, |
|
"grad_norm": 10.18800736550528, |
|
"learning_rate": 1.8751835535976504e-06, |
|
"loss": 0.071, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09252669039145907, |
|
"grad_norm": 4.216140731359187, |
|
"learning_rate": 1.8737151248164461e-06, |
|
"loss": -0.0453, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09323843416370106, |
|
"grad_norm": 7.185461136601148, |
|
"learning_rate": 1.8722466960352421e-06, |
|
"loss": 0.1436, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09395017793594305, |
|
"grad_norm": 12.892273213757303, |
|
"learning_rate": 1.8707782672540383e-06, |
|
"loss": 0.0127, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09466192170818505, |
|
"grad_norm": 5.811455089210124, |
|
"learning_rate": 1.869309838472834e-06, |
|
"loss": 0.0611, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09537366548042704, |
|
"grad_norm": 8.637525668292405, |
|
"learning_rate": 1.86784140969163e-06, |
|
"loss": 0.1458, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.09608540925266904, |
|
"grad_norm": 4.6089392504571265, |
|
"learning_rate": 1.8663729809104258e-06, |
|
"loss": 0.0784, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09679715302491103, |
|
"grad_norm": 3.9349535019101336, |
|
"learning_rate": 1.8649045521292218e-06, |
|
"loss": 0.0716, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09750889679715302, |
|
"grad_norm": 7.3242760414425945, |
|
"learning_rate": 1.8634361233480175e-06, |
|
"loss": 0.0706, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.09822064056939502, |
|
"grad_norm": 8.010497834811732, |
|
"learning_rate": 1.8619676945668135e-06, |
|
"loss": 0.1117, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.09893238434163701, |
|
"grad_norm": 3.481371350459092, |
|
"learning_rate": 1.8604992657856093e-06, |
|
"loss": 0.1401, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.099644128113879, |
|
"grad_norm": 4.368546439955386, |
|
"learning_rate": 1.8590308370044053e-06, |
|
"loss": 0.1753, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.100355871886121, |
|
"grad_norm": 4.542224931382252, |
|
"learning_rate": 1.857562408223201e-06, |
|
"loss": 0.129, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.10106761565836299, |
|
"grad_norm": 4.088790801733064, |
|
"learning_rate": 1.856093979441997e-06, |
|
"loss": 0.0443, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.10177935943060498, |
|
"grad_norm": 3.4192080621509175, |
|
"learning_rate": 1.8546255506607928e-06, |
|
"loss": 0.0547, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.10249110320284698, |
|
"grad_norm": 7.379458873728003, |
|
"learning_rate": 1.8531571218795888e-06, |
|
"loss": 0.2053, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.10320284697508897, |
|
"grad_norm": 5.112633648829968, |
|
"learning_rate": 1.8516886930983845e-06, |
|
"loss": 0.0284, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10391459074733096, |
|
"grad_norm": 9.952083611017523, |
|
"learning_rate": 1.8502202643171805e-06, |
|
"loss": 0.1943, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.10462633451957296, |
|
"grad_norm": 4.472282454017271, |
|
"learning_rate": 1.8487518355359763e-06, |
|
"loss": 0.0597, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.10533807829181495, |
|
"grad_norm": 4.735680126419607, |
|
"learning_rate": 1.8472834067547722e-06, |
|
"loss": 0.1466, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10604982206405694, |
|
"grad_norm": 3.779905348102356, |
|
"learning_rate": 1.845814977973568e-06, |
|
"loss": 0.1324, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.10676156583629894, |
|
"grad_norm": 9.724957510651878, |
|
"learning_rate": 1.844346549192364e-06, |
|
"loss": 0.3093, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10747330960854093, |
|
"grad_norm": 4.251906920827278, |
|
"learning_rate": 1.84287812041116e-06, |
|
"loss": 0.0364, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.10818505338078292, |
|
"grad_norm": 4.461130542219279, |
|
"learning_rate": 1.841409691629956e-06, |
|
"loss": 0.1279, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.10889679715302492, |
|
"grad_norm": 4.268606185166076, |
|
"learning_rate": 1.839941262848752e-06, |
|
"loss": 0.0549, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.10960854092526691, |
|
"grad_norm": 6.057791857897027, |
|
"learning_rate": 1.8384728340675477e-06, |
|
"loss": -0.0231, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1103202846975089, |
|
"grad_norm": 5.236227043795447, |
|
"learning_rate": 1.8370044052863437e-06, |
|
"loss": 0.0098, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1110320284697509, |
|
"grad_norm": 4.598051587320243, |
|
"learning_rate": 1.8355359765051394e-06, |
|
"loss": 0.0601, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11174377224199289, |
|
"grad_norm": 9.861476861561522, |
|
"learning_rate": 1.8340675477239354e-06, |
|
"loss": 0.07, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.11245551601423487, |
|
"grad_norm": 6.225559819185409, |
|
"learning_rate": 1.8325991189427312e-06, |
|
"loss": 0.1472, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11316725978647686, |
|
"grad_norm": 4.631070045753654, |
|
"learning_rate": 1.8311306901615271e-06, |
|
"loss": 0.0717, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.11387900355871886, |
|
"grad_norm": 5.736291549508864, |
|
"learning_rate": 1.829662261380323e-06, |
|
"loss": 0.1787, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11459074733096085, |
|
"grad_norm": 4.815478307431745, |
|
"learning_rate": 1.8281938325991189e-06, |
|
"loss": 0.0704, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.11530249110320284, |
|
"grad_norm": 5.413176052520917, |
|
"learning_rate": 1.8267254038179147e-06, |
|
"loss": 0.1206, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.11601423487544484, |
|
"grad_norm": 4.795932739358852, |
|
"learning_rate": 1.8252569750367106e-06, |
|
"loss": 0.1241, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11672597864768683, |
|
"grad_norm": 6.287620540244574, |
|
"learning_rate": 1.8237885462555064e-06, |
|
"loss": -0.0185, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11743772241992882, |
|
"grad_norm": 5.088224444723123, |
|
"learning_rate": 1.8223201174743024e-06, |
|
"loss": 0.0659, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11814946619217082, |
|
"grad_norm": 5.277869773642978, |
|
"learning_rate": 1.8208516886930981e-06, |
|
"loss": 0.1567, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.11886120996441281, |
|
"grad_norm": 4.098325312915435, |
|
"learning_rate": 1.8193832599118941e-06, |
|
"loss": 0.0174, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1195729537366548, |
|
"grad_norm": 3.747692160944269, |
|
"learning_rate": 1.81791483113069e-06, |
|
"loss": 0.0087, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1202846975088968, |
|
"grad_norm": 4.252810054108135, |
|
"learning_rate": 1.8164464023494859e-06, |
|
"loss": 0.0087, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12099644128113879, |
|
"grad_norm": 4.828844134158512, |
|
"learning_rate": 1.8149779735682818e-06, |
|
"loss": 0.0951, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12170818505338078, |
|
"grad_norm": 4.874472173824431, |
|
"learning_rate": 1.8135095447870778e-06, |
|
"loss": 0.1129, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.12241992882562278, |
|
"grad_norm": 7.430423222806325, |
|
"learning_rate": 1.8120411160058738e-06, |
|
"loss": 0.1398, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.12313167259786477, |
|
"grad_norm": 3.1268219786286453, |
|
"learning_rate": 1.8105726872246696e-06, |
|
"loss": -0.0363, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.12384341637010676, |
|
"grad_norm": 5.396860601762695, |
|
"learning_rate": 1.8091042584434655e-06, |
|
"loss": 0.1385, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.12455516014234876, |
|
"grad_norm": 4.718918348934545, |
|
"learning_rate": 1.8076358296622613e-06, |
|
"loss": 0.0925, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12526690391459075, |
|
"grad_norm": 7.307204717516227, |
|
"learning_rate": 1.8061674008810573e-06, |
|
"loss": 0.2154, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.12597864768683273, |
|
"grad_norm": 5.0495562443417255, |
|
"learning_rate": 1.804698972099853e-06, |
|
"loss": 0.1373, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.12669039145907474, |
|
"grad_norm": 4.222659186010196, |
|
"learning_rate": 1.803230543318649e-06, |
|
"loss": 0.1066, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.12740213523131672, |
|
"grad_norm": 5.402167393915139, |
|
"learning_rate": 1.8017621145374448e-06, |
|
"loss": 0.0681, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.12811387900355872, |
|
"grad_norm": 4.224096682439072, |
|
"learning_rate": 1.8002936857562408e-06, |
|
"loss": 0.1081, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1288256227758007, |
|
"grad_norm": 5.742877031304056, |
|
"learning_rate": 1.7988252569750365e-06, |
|
"loss": 0.0093, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1295373665480427, |
|
"grad_norm": 5.540002826455837, |
|
"learning_rate": 1.7973568281938325e-06, |
|
"loss": 0.0098, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.1302491103202847, |
|
"grad_norm": 4.891124760744802, |
|
"learning_rate": 1.7958883994126283e-06, |
|
"loss": 0.1506, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1309608540925267, |
|
"grad_norm": 2.99421547474528, |
|
"learning_rate": 1.7944199706314243e-06, |
|
"loss": -0.012, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13167259786476868, |
|
"grad_norm": 8.379749374829832, |
|
"learning_rate": 1.7929515418502202e-06, |
|
"loss": 0.0833, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.13238434163701068, |
|
"grad_norm": 6.515892846619184, |
|
"learning_rate": 1.791483113069016e-06, |
|
"loss": 0.0955, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.13309608540925266, |
|
"grad_norm": 5.017827486261835, |
|
"learning_rate": 1.790014684287812e-06, |
|
"loss": 0.1188, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.13380782918149467, |
|
"grad_norm": 5.695263939586359, |
|
"learning_rate": 1.7885462555066077e-06, |
|
"loss": 0.124, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.13451957295373665, |
|
"grad_norm": 6.125125494469321, |
|
"learning_rate": 1.7870778267254037e-06, |
|
"loss": 0.0924, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.13523131672597866, |
|
"grad_norm": 4.521668926781665, |
|
"learning_rate": 1.7856093979441997e-06, |
|
"loss": -0.1061, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13594306049822064, |
|
"grad_norm": 4.267143230844152, |
|
"learning_rate": 1.7841409691629957e-06, |
|
"loss": 0.0566, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.13665480427046264, |
|
"grad_norm": 6.08031214414718, |
|
"learning_rate": 1.7826725403817914e-06, |
|
"loss": 0.115, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.13736654804270462, |
|
"grad_norm": 5.940400710028165, |
|
"learning_rate": 1.7812041116005874e-06, |
|
"loss": 0.1274, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.13807829181494663, |
|
"grad_norm": 4.585486004779037, |
|
"learning_rate": 1.7797356828193832e-06, |
|
"loss": 0.1713, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1387900355871886, |
|
"grad_norm": 4.136195163173087, |
|
"learning_rate": 1.7782672540381792e-06, |
|
"loss": 0.0155, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.13950177935943062, |
|
"grad_norm": 5.6989356134689615, |
|
"learning_rate": 1.776798825256975e-06, |
|
"loss": 0.0103, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1402135231316726, |
|
"grad_norm": 6.1401351461541225, |
|
"learning_rate": 1.775330396475771e-06, |
|
"loss": 0.1273, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1409252669039146, |
|
"grad_norm": 4.885723546305384, |
|
"learning_rate": 1.7738619676945667e-06, |
|
"loss": 0.027, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.14163701067615658, |
|
"grad_norm": 3.887840530837794, |
|
"learning_rate": 1.7723935389133626e-06, |
|
"loss": -0.0177, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1423487544483986, |
|
"grad_norm": 6.78388018953942, |
|
"learning_rate": 1.7709251101321584e-06, |
|
"loss": 0.2093, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14306049822064057, |
|
"grad_norm": 3.94780351917843, |
|
"learning_rate": 1.7694566813509544e-06, |
|
"loss": 0.1258, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.14377224199288255, |
|
"grad_norm": 3.8629567441046673, |
|
"learning_rate": 1.7679882525697504e-06, |
|
"loss": 0.0233, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.14448398576512456, |
|
"grad_norm": 4.979996147920275, |
|
"learning_rate": 1.7665198237885461e-06, |
|
"loss": 0.0793, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.14519572953736654, |
|
"grad_norm": 4.0538696169783215, |
|
"learning_rate": 1.765051395007342e-06, |
|
"loss": 0.0413, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.14590747330960854, |
|
"grad_norm": 6.589998597302671, |
|
"learning_rate": 1.7635829662261379e-06, |
|
"loss": 0.1296, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.14661921708185052, |
|
"grad_norm": 4.877422415596789, |
|
"learning_rate": 1.7621145374449338e-06, |
|
"loss": 0.0984, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.14733096085409253, |
|
"grad_norm": 4.852718393716642, |
|
"learning_rate": 1.7606461086637296e-06, |
|
"loss": 0.0614, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.1480427046263345, |
|
"grad_norm": 5.871070034362448, |
|
"learning_rate": 1.7591776798825256e-06, |
|
"loss": 0.0569, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.14875444839857652, |
|
"grad_norm": 6.958293374074386, |
|
"learning_rate": 1.7577092511013214e-06, |
|
"loss": 0.0621, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1494661921708185, |
|
"grad_norm": 7.285285846283627, |
|
"learning_rate": 1.7562408223201175e-06, |
|
"loss": 0.0888, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1501779359430605, |
|
"grad_norm": 5.471947133249475, |
|
"learning_rate": 1.7547723935389133e-06, |
|
"loss": 0.0383, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.15088967971530248, |
|
"grad_norm": 4.096639065092099, |
|
"learning_rate": 1.7533039647577093e-06, |
|
"loss": -0.0454, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1516014234875445, |
|
"grad_norm": 3.473240875019082, |
|
"learning_rate": 1.751835535976505e-06, |
|
"loss": 0.0136, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.15231316725978647, |
|
"grad_norm": 4.26736649396595, |
|
"learning_rate": 1.750367107195301e-06, |
|
"loss": 0.0447, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.15302491103202848, |
|
"grad_norm": 3.2994470190796923, |
|
"learning_rate": 1.7488986784140968e-06, |
|
"loss": 0.0757, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.15373665480427046, |
|
"grad_norm": 8.31822386123182, |
|
"learning_rate": 1.7474302496328928e-06, |
|
"loss": 0.1598, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.15444839857651246, |
|
"grad_norm": 4.753324048290485, |
|
"learning_rate": 1.7459618208516885e-06, |
|
"loss": 0.0325, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.15516014234875444, |
|
"grad_norm": 5.741786563915158, |
|
"learning_rate": 1.7444933920704845e-06, |
|
"loss": 0.2178, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.15587188612099645, |
|
"grad_norm": 3.1404840997622285, |
|
"learning_rate": 1.7430249632892805e-06, |
|
"loss": 0.1023, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.15658362989323843, |
|
"grad_norm": 5.481284115553573, |
|
"learning_rate": 1.7415565345080763e-06, |
|
"loss": 0.0977, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15729537366548044, |
|
"grad_norm": 5.366544771093536, |
|
"learning_rate": 1.7400881057268722e-06, |
|
"loss": 0.1473, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.15800711743772242, |
|
"grad_norm": 14.973850090188478, |
|
"learning_rate": 1.738619676945668e-06, |
|
"loss": 0.1034, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.15871886120996442, |
|
"grad_norm": 7.50503244046107, |
|
"learning_rate": 1.737151248164464e-06, |
|
"loss": 0.1243, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.1594306049822064, |
|
"grad_norm": 4.518201891315668, |
|
"learning_rate": 1.7356828193832597e-06, |
|
"loss": 0.092, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1601423487544484, |
|
"grad_norm": 3.4314498370679942, |
|
"learning_rate": 1.7342143906020557e-06, |
|
"loss": 0.0721, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1608540925266904, |
|
"grad_norm": 5.197626180947426, |
|
"learning_rate": 1.7327459618208515e-06, |
|
"loss": 0.0609, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.1615658362989324, |
|
"grad_norm": 5.547918860845338, |
|
"learning_rate": 1.7312775330396475e-06, |
|
"loss": 0.0444, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.16227758007117438, |
|
"grad_norm": 6.431239963890675, |
|
"learning_rate": 1.7298091042584432e-06, |
|
"loss": -0.0016, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.16298932384341638, |
|
"grad_norm": 4.553712298781429, |
|
"learning_rate": 1.7283406754772394e-06, |
|
"loss": 0.1477, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.16370106761565836, |
|
"grad_norm": 5.22540005699231, |
|
"learning_rate": 1.7268722466960352e-06, |
|
"loss": 0.0993, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16441281138790034, |
|
"grad_norm": 6.437986781409808, |
|
"learning_rate": 1.7254038179148312e-06, |
|
"loss": 0.0359, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.16512455516014235, |
|
"grad_norm": 4.695253800264834, |
|
"learning_rate": 1.723935389133627e-06, |
|
"loss": 0.0645, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.16583629893238433, |
|
"grad_norm": 6.533685695462389, |
|
"learning_rate": 1.722466960352423e-06, |
|
"loss": 0.0512, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.16654804270462634, |
|
"grad_norm": 9.138116893479024, |
|
"learning_rate": 1.7209985315712187e-06, |
|
"loss": 0.0731, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.16725978647686832, |
|
"grad_norm": 5.624524583858941, |
|
"learning_rate": 1.7195301027900147e-06, |
|
"loss": 0.17, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.16797153024911032, |
|
"grad_norm": 3.2300353491770943, |
|
"learning_rate": 1.7180616740088106e-06, |
|
"loss": 0.0418, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.1686832740213523, |
|
"grad_norm": 3.1549379246525033, |
|
"learning_rate": 1.7165932452276064e-06, |
|
"loss": 0.0072, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1693950177935943, |
|
"grad_norm": 6.45374448449067, |
|
"learning_rate": 1.7151248164464024e-06, |
|
"loss": 0.0965, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.1701067615658363, |
|
"grad_norm": 3.2046807209540225, |
|
"learning_rate": 1.7136563876651981e-06, |
|
"loss": 0.1052, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.1708185053380783, |
|
"grad_norm": 17.53155549951064, |
|
"learning_rate": 1.7121879588839941e-06, |
|
"loss": 0.0309, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17153024911032028, |
|
"grad_norm": 7.999750610279058, |
|
"learning_rate": 1.7107195301027899e-06, |
|
"loss": 0.0794, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.17224199288256228, |
|
"grad_norm": 4.18793587764703, |
|
"learning_rate": 1.7092511013215859e-06, |
|
"loss": 0.1157, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.17295373665480426, |
|
"grad_norm": 5.043597689648659, |
|
"learning_rate": 1.7077826725403816e-06, |
|
"loss": 0.0871, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.17366548042704627, |
|
"grad_norm": 4.195142234600667, |
|
"learning_rate": 1.7063142437591776e-06, |
|
"loss": 0.0907, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.17437722419928825, |
|
"grad_norm": 7.052542629714854, |
|
"learning_rate": 1.7048458149779734e-06, |
|
"loss": 0.1538, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.17508896797153026, |
|
"grad_norm": 4.6646820116985115, |
|
"learning_rate": 1.7033773861967693e-06, |
|
"loss": 0.0998, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.17580071174377224, |
|
"grad_norm": 5.141792327153177, |
|
"learning_rate": 1.7019089574155651e-06, |
|
"loss": 0.0019, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.17651245551601424, |
|
"grad_norm": 5.1975182920191845, |
|
"learning_rate": 1.700440528634361e-06, |
|
"loss": 0.1805, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.17722419928825622, |
|
"grad_norm": 6.812105219890212, |
|
"learning_rate": 1.698972099853157e-06, |
|
"loss": 0.1567, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.17793594306049823, |
|
"grad_norm": 8.036900295351552, |
|
"learning_rate": 1.697503671071953e-06, |
|
"loss": 0.0388, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1786476868327402, |
|
"grad_norm": 5.270335640108646, |
|
"learning_rate": 1.6960352422907488e-06, |
|
"loss": 0.0209, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.17935943060498222, |
|
"grad_norm": 4.691025692308499, |
|
"learning_rate": 1.6945668135095448e-06, |
|
"loss": 0.1134, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.1800711743772242, |
|
"grad_norm": 4.582434595019, |
|
"learning_rate": 1.6930983847283406e-06, |
|
"loss": 0.0534, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.1807829181494662, |
|
"grad_norm": 5.2674928516109505, |
|
"learning_rate": 1.6916299559471365e-06, |
|
"loss": 0.1076, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.18149466192170818, |
|
"grad_norm": 4.571391599369404, |
|
"learning_rate": 1.6901615271659325e-06, |
|
"loss": 0.0748, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.1822064056939502, |
|
"grad_norm": 6.935073015616605, |
|
"learning_rate": 1.6886930983847283e-06, |
|
"loss": 0.0991, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.18291814946619217, |
|
"grad_norm": 8.20458599340894, |
|
"learning_rate": 1.6872246696035242e-06, |
|
"loss": 0.0553, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.18362989323843418, |
|
"grad_norm": 3.884973795641647, |
|
"learning_rate": 1.68575624082232e-06, |
|
"loss": 0.1122, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.18434163701067616, |
|
"grad_norm": 7.161399887308854, |
|
"learning_rate": 1.684287812041116e-06, |
|
"loss": 0.0906, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.18505338078291814, |
|
"grad_norm": 5.799383314825413, |
|
"learning_rate": 1.6828193832599118e-06, |
|
"loss": 0.091, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18576512455516014, |
|
"grad_norm": 8.574044346176157, |
|
"learning_rate": 1.6813509544787077e-06, |
|
"loss": 0.1818, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.18647686832740212, |
|
"grad_norm": 5.636631955055512, |
|
"learning_rate": 1.6798825256975035e-06, |
|
"loss": -0.0482, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.18718861209964413, |
|
"grad_norm": 4.843371961054096, |
|
"learning_rate": 1.6784140969162995e-06, |
|
"loss": 0.0977, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.1879003558718861, |
|
"grad_norm": 4.0177832121329375, |
|
"learning_rate": 1.6769456681350952e-06, |
|
"loss": 0.0209, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.18861209964412812, |
|
"grad_norm": 4.87016389073631, |
|
"learning_rate": 1.6754772393538912e-06, |
|
"loss": 0.1963, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.1893238434163701, |
|
"grad_norm": 5.152554013905234, |
|
"learning_rate": 1.674008810572687e-06, |
|
"loss": 0.0175, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.1900355871886121, |
|
"grad_norm": 5.382963624114512, |
|
"learning_rate": 1.672540381791483e-06, |
|
"loss": 0.0613, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.19074733096085408, |
|
"grad_norm": 5.0952696610386745, |
|
"learning_rate": 1.671071953010279e-06, |
|
"loss": 0.0462, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.1914590747330961, |
|
"grad_norm": 4.557392135239278, |
|
"learning_rate": 1.669603524229075e-06, |
|
"loss": 0.2422, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.19217081850533807, |
|
"grad_norm": 3.243416500403976, |
|
"learning_rate": 1.6681350954478707e-06, |
|
"loss": 0.0716, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19288256227758008, |
|
"grad_norm": 4.752322181453295, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0106, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.19359430604982206, |
|
"grad_norm": 5.174592102487308, |
|
"learning_rate": 1.6651982378854626e-06, |
|
"loss": 0.0096, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.19430604982206406, |
|
"grad_norm": 6.572477476154677, |
|
"learning_rate": 1.6637298091042584e-06, |
|
"loss": 0.2208, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.19501779359430604, |
|
"grad_norm": 5.709859305283521, |
|
"learning_rate": 1.6622613803230544e-06, |
|
"loss": 0.1049, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.19572953736654805, |
|
"grad_norm": 5.854991524394585, |
|
"learning_rate": 1.6607929515418501e-06, |
|
"loss": 0.0919, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19644128113879003, |
|
"grad_norm": 6.049764253421174, |
|
"learning_rate": 1.6593245227606461e-06, |
|
"loss": 0.0477, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.19715302491103204, |
|
"grad_norm": 7.285867561202631, |
|
"learning_rate": 1.6578560939794419e-06, |
|
"loss": 0.0226, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.19786476868327402, |
|
"grad_norm": 4.852360707346899, |
|
"learning_rate": 1.6563876651982379e-06, |
|
"loss": 0.0613, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.19857651245551602, |
|
"grad_norm": 7.958493322658308, |
|
"learning_rate": 1.6549192364170336e-06, |
|
"loss": 0.0974, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.199288256227758, |
|
"grad_norm": 12.052061950787715, |
|
"learning_rate": 1.6534508076358296e-06, |
|
"loss": 0.125, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.141730855422617, |
|
"learning_rate": 1.6519823788546254e-06, |
|
"loss": 0.0366, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.200711743772242, |
|
"grad_norm": 4.70039039776032, |
|
"learning_rate": 1.6505139500734214e-06, |
|
"loss": 0.099, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.201423487544484, |
|
"grad_norm": 3.6386434376824224, |
|
"learning_rate": 1.6490455212922171e-06, |
|
"loss": 0.0729, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.20213523131672598, |
|
"grad_norm": 5.342530763972189, |
|
"learning_rate": 1.647577092511013e-06, |
|
"loss": 0.115, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.20284697508896798, |
|
"grad_norm": 4.117094151055721, |
|
"learning_rate": 1.6461086637298089e-06, |
|
"loss": -0.0421, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.20355871886120996, |
|
"grad_norm": 7.676379588638591, |
|
"learning_rate": 1.6446402349486048e-06, |
|
"loss": 0.1763, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.20427046263345194, |
|
"grad_norm": 6.232882136914352, |
|
"learning_rate": 1.6431718061674006e-06, |
|
"loss": 0.0297, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.20498220640569395, |
|
"grad_norm": 4.461775879651974, |
|
"learning_rate": 1.6417033773861968e-06, |
|
"loss": 0.1226, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.20569395017793593, |
|
"grad_norm": 6.945988041879261, |
|
"learning_rate": 1.6402349486049928e-06, |
|
"loss": 0.2332, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.20640569395017794, |
|
"grad_norm": 5.492669508987496, |
|
"learning_rate": 1.6387665198237885e-06, |
|
"loss": 0.0424, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.20711743772241992, |
|
"grad_norm": 3.6574823633839104, |
|
"learning_rate": 1.6372980910425845e-06, |
|
"loss": -0.1162, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.20782918149466192, |
|
"grad_norm": 4.45017248955224, |
|
"learning_rate": 1.6358296622613803e-06, |
|
"loss": 0.0168, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.2085409252669039, |
|
"grad_norm": 5.901019005144134, |
|
"learning_rate": 1.6343612334801763e-06, |
|
"loss": 0.1194, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2092526690391459, |
|
"grad_norm": 5.770360970932692, |
|
"learning_rate": 1.632892804698972e-06, |
|
"loss": 0.0818, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2099644128113879, |
|
"grad_norm": 4.54764594919165, |
|
"learning_rate": 1.631424375917768e-06, |
|
"loss": 0.0749, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2106761565836299, |
|
"grad_norm": 4.338576714724069, |
|
"learning_rate": 1.6299559471365638e-06, |
|
"loss": 0.0211, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.21138790035587188, |
|
"grad_norm": 5.744409248117972, |
|
"learning_rate": 1.6284875183553597e-06, |
|
"loss": 0.0809, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.21209964412811388, |
|
"grad_norm": 5.62291661993109, |
|
"learning_rate": 1.6270190895741555e-06, |
|
"loss": 0.0373, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.21281138790035586, |
|
"grad_norm": 6.451351633319061, |
|
"learning_rate": 1.6255506607929515e-06, |
|
"loss": 0.1273, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.21352313167259787, |
|
"grad_norm": 5.775770735398924, |
|
"learning_rate": 1.6240822320117473e-06, |
|
"loss": 0.1488, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21423487544483985, |
|
"grad_norm": 5.65199853440014, |
|
"learning_rate": 1.6226138032305432e-06, |
|
"loss": 0.0648, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.21494661921708186, |
|
"grad_norm": 5.095604494601722, |
|
"learning_rate": 1.621145374449339e-06, |
|
"loss": 0.0744, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.21565836298932384, |
|
"grad_norm": 5.413118442504562, |
|
"learning_rate": 1.619676945668135e-06, |
|
"loss": 0.061, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.21637010676156584, |
|
"grad_norm": 3.7007801986026054, |
|
"learning_rate": 1.6182085168869307e-06, |
|
"loss": 0.0895, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.21708185053380782, |
|
"grad_norm": 5.775824232856091, |
|
"learning_rate": 1.6167400881057267e-06, |
|
"loss": 0.0592, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.21779359430604983, |
|
"grad_norm": 4.671481623083791, |
|
"learning_rate": 1.6152716593245225e-06, |
|
"loss": 0.0348, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2185053380782918, |
|
"grad_norm": 5.548421466861163, |
|
"learning_rate": 1.6138032305433187e-06, |
|
"loss": 0.0918, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.21921708185053382, |
|
"grad_norm": 4.07343007369804, |
|
"learning_rate": 1.6123348017621146e-06, |
|
"loss": 0.0971, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2199288256227758, |
|
"grad_norm": 3.7358505334627647, |
|
"learning_rate": 1.6108663729809104e-06, |
|
"loss": 0.0479, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2206405693950178, |
|
"grad_norm": 6.594222933183962, |
|
"learning_rate": 1.6093979441997064e-06, |
|
"loss": 0.0583, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22135231316725978, |
|
"grad_norm": 4.517549414900124, |
|
"learning_rate": 1.6079295154185022e-06, |
|
"loss": 0.026, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2220640569395018, |
|
"grad_norm": 3.893732997312208, |
|
"learning_rate": 1.6064610866372981e-06, |
|
"loss": -0.041, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.22277580071174377, |
|
"grad_norm": 6.014426372062505, |
|
"learning_rate": 1.604992657856094e-06, |
|
"loss": 0.0646, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.22348754448398578, |
|
"grad_norm": 7.267265807250511, |
|
"learning_rate": 1.6035242290748899e-06, |
|
"loss": 0.1517, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.22419928825622776, |
|
"grad_norm": 3.7282693021207227, |
|
"learning_rate": 1.6020558002936856e-06, |
|
"loss": 0.0652, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.22491103202846974, |
|
"grad_norm": 4.892638160809301, |
|
"learning_rate": 1.6005873715124816e-06, |
|
"loss": 0.212, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.22562277580071174, |
|
"grad_norm": 12.588780182154638, |
|
"learning_rate": 1.5991189427312774e-06, |
|
"loss": 0.2024, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.22633451957295372, |
|
"grad_norm": 4.509674058119238, |
|
"learning_rate": 1.5976505139500734e-06, |
|
"loss": 0.0002, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.22704626334519573, |
|
"grad_norm": 5.275333318077758, |
|
"learning_rate": 1.5961820851688691e-06, |
|
"loss": 0.0628, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2277580071174377, |
|
"grad_norm": 4.985806920162633, |
|
"learning_rate": 1.5947136563876651e-06, |
|
"loss": 0.12, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.22846975088967972, |
|
"grad_norm": 3.368050519976615, |
|
"learning_rate": 1.5932452276064609e-06, |
|
"loss": 0.0697, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2291814946619217, |
|
"grad_norm": 3.7965809900554333, |
|
"learning_rate": 1.5917767988252569e-06, |
|
"loss": 0.0518, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.2298932384341637, |
|
"grad_norm": 6.750969993690751, |
|
"learning_rate": 1.5903083700440526e-06, |
|
"loss": 0.0444, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.23060498220640568, |
|
"grad_norm": 3.7359230312448544, |
|
"learning_rate": 1.5888399412628486e-06, |
|
"loss": 0.1547, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2313167259786477, |
|
"grad_norm": 5.383174446682282, |
|
"learning_rate": 1.5873715124816446e-06, |
|
"loss": 0.1029, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.23202846975088967, |
|
"grad_norm": 4.133656521949432, |
|
"learning_rate": 1.5859030837004403e-06, |
|
"loss": 0.1273, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.23274021352313168, |
|
"grad_norm": 5.0061001758801495, |
|
"learning_rate": 1.5844346549192365e-06, |
|
"loss": 0.0476, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.23345195729537366, |
|
"grad_norm": 5.067461949390494, |
|
"learning_rate": 1.5829662261380323e-06, |
|
"loss": 0.0065, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.23416370106761566, |
|
"grad_norm": 4.070687362776556, |
|
"learning_rate": 1.5814977973568283e-06, |
|
"loss": 0.0182, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.23487544483985764, |
|
"grad_norm": 4.104503688755299, |
|
"learning_rate": 1.580029368575624e-06, |
|
"loss": 0.0249, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.23558718861209965, |
|
"grad_norm": 6.173363151286035, |
|
"learning_rate": 1.57856093979442e-06, |
|
"loss": 0.1274, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.23629893238434163, |
|
"grad_norm": 5.123209943077641, |
|
"learning_rate": 1.5770925110132158e-06, |
|
"loss": 0.1077, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.23701067615658364, |
|
"grad_norm": 9.147892910509416, |
|
"learning_rate": 1.5756240822320118e-06, |
|
"loss": 0.0639, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.23772241992882562, |
|
"grad_norm": 9.078930082093752, |
|
"learning_rate": 1.5741556534508075e-06, |
|
"loss": 0.1176, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.23843416370106763, |
|
"grad_norm": 7.396135723397457, |
|
"learning_rate": 1.5726872246696035e-06, |
|
"loss": 0.1046, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2391459074733096, |
|
"grad_norm": 17.515198129834094, |
|
"learning_rate": 1.5712187958883993e-06, |
|
"loss": 0.1535, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2398576512455516, |
|
"grad_norm": 5.654849558750833, |
|
"learning_rate": 1.5697503671071952e-06, |
|
"loss": 0.1156, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.2405693950177936, |
|
"grad_norm": 4.6881638120471925, |
|
"learning_rate": 1.568281938325991e-06, |
|
"loss": 0.1043, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.2412811387900356, |
|
"grad_norm": 3.9981055541920023, |
|
"learning_rate": 1.566813509544787e-06, |
|
"loss": -0.074, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.24199288256227758, |
|
"grad_norm": 6.089583473932049, |
|
"learning_rate": 1.5653450807635827e-06, |
|
"loss": 0.1465, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.24270462633451959, |
|
"grad_norm": 4.313100800611475, |
|
"learning_rate": 1.5638766519823787e-06, |
|
"loss": 0.053, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.24341637010676156, |
|
"grad_norm": 4.889467144975206, |
|
"learning_rate": 1.5624082232011747e-06, |
|
"loss": 0.0717, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.24412811387900357, |
|
"grad_norm": 3.7378928242805607, |
|
"learning_rate": 1.5609397944199705e-06, |
|
"loss": 0.0617, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.24483985765124555, |
|
"grad_norm": 5.483036932234192, |
|
"learning_rate": 1.5594713656387664e-06, |
|
"loss": -0.0442, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.24555160142348753, |
|
"grad_norm": 4.64961948273496, |
|
"learning_rate": 1.5580029368575622e-06, |
|
"loss": 0.0449, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.24626334519572954, |
|
"grad_norm": 9.0044620896761, |
|
"learning_rate": 1.5565345080763584e-06, |
|
"loss": 0.0847, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.24697508896797152, |
|
"grad_norm": 4.30818314035339, |
|
"learning_rate": 1.5550660792951542e-06, |
|
"loss": 0.0968, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.24768683274021353, |
|
"grad_norm": 7.698780498292484, |
|
"learning_rate": 1.5535976505139501e-06, |
|
"loss": 0.0552, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2483985765124555, |
|
"grad_norm": 4.735780211542353, |
|
"learning_rate": 1.552129221732746e-06, |
|
"loss": -0.021, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.2491103202846975, |
|
"grad_norm": 7.141409465679317, |
|
"learning_rate": 1.5506607929515419e-06, |
|
"loss": 0.0741, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2498220640569395, |
|
"grad_norm": 3.889744793088473, |
|
"learning_rate": 1.5491923641703377e-06, |
|
"loss": 0.0057, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.2505338078291815, |
|
"grad_norm": 6.426034438875903, |
|
"learning_rate": 1.5477239353891336e-06, |
|
"loss": 0.0225, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2512455516014235, |
|
"grad_norm": 5.831394781705116, |
|
"learning_rate": 1.5462555066079294e-06, |
|
"loss": 0.1369, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.25195729537366546, |
|
"grad_norm": 4.004979610319777, |
|
"learning_rate": 1.5447870778267254e-06, |
|
"loss": 0.131, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2526690391459075, |
|
"grad_norm": 6.7718243465456744, |
|
"learning_rate": 1.5433186490455211e-06, |
|
"loss": 0.0178, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.25338078291814947, |
|
"grad_norm": 6.211271610510303, |
|
"learning_rate": 1.5418502202643171e-06, |
|
"loss": -0.0385, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.25409252669039145, |
|
"grad_norm": 7.959437774662272, |
|
"learning_rate": 1.5403817914831129e-06, |
|
"loss": 0.1653, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.25480427046263343, |
|
"grad_norm": 10.470130943200923, |
|
"learning_rate": 1.5389133627019089e-06, |
|
"loss": 0.4195, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.25551601423487547, |
|
"grad_norm": 6.235314472897717, |
|
"learning_rate": 1.5374449339207048e-06, |
|
"loss": 0.0351, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.25622775800711745, |
|
"grad_norm": 18.27060737890617, |
|
"learning_rate": 1.5359765051395006e-06, |
|
"loss": 0.1399, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2569395017793594, |
|
"grad_norm": 6.088106486287744, |
|
"learning_rate": 1.5345080763582966e-06, |
|
"loss": 0.1531, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2576512455516014, |
|
"grad_norm": 5.902550563054421, |
|
"learning_rate": 1.5330396475770923e-06, |
|
"loss": 0.0878, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.25836298932384344, |
|
"grad_norm": 3.8566639138632017, |
|
"learning_rate": 1.5315712187958883e-06, |
|
"loss": 0.1211, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2590747330960854, |
|
"grad_norm": 3.6802199316647317, |
|
"learning_rate": 1.530102790014684e-06, |
|
"loss": 0.0758, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2597864768683274, |
|
"grad_norm": 7.416574999067755, |
|
"learning_rate": 1.52863436123348e-06, |
|
"loss": 0.0767, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2604982206405694, |
|
"grad_norm": 17.2285305706842, |
|
"learning_rate": 1.527165932452276e-06, |
|
"loss": 0.1486, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2612099644128114, |
|
"grad_norm": 4.5616956027842255, |
|
"learning_rate": 1.525697503671072e-06, |
|
"loss": 0.0447, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2619217081850534, |
|
"grad_norm": 3.569935985386615, |
|
"learning_rate": 1.5242290748898678e-06, |
|
"loss": 0.0119, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.26263345195729537, |
|
"grad_norm": 5.312914843350158, |
|
"learning_rate": 1.5227606461086638e-06, |
|
"loss": 0.0733, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.26334519572953735, |
|
"grad_norm": 4.058544639606025, |
|
"learning_rate": 1.5212922173274595e-06, |
|
"loss": 0.1414, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2640569395017794, |
|
"grad_norm": 5.341206620154259, |
|
"learning_rate": 1.5198237885462555e-06, |
|
"loss": 0.1118, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.26476868327402137, |
|
"grad_norm": 5.4139085447762385, |
|
"learning_rate": 1.5183553597650513e-06, |
|
"loss": 0.0318, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.26548042704626335, |
|
"grad_norm": 5.787271858480889, |
|
"learning_rate": 1.5168869309838473e-06, |
|
"loss": 0.1029, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2661921708185053, |
|
"grad_norm": 5.893029326858708, |
|
"learning_rate": 1.515418502202643e-06, |
|
"loss": 0.051, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2669039145907473, |
|
"grad_norm": 5.2863059136227015, |
|
"learning_rate": 1.513950073421439e-06, |
|
"loss": 0.0402, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.26761565836298934, |
|
"grad_norm": 4.423267805643734, |
|
"learning_rate": 1.512481644640235e-06, |
|
"loss": 0.114, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2683274021352313, |
|
"grad_norm": 3.5517169990953104, |
|
"learning_rate": 1.5110132158590307e-06, |
|
"loss": -0.0008, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2690391459074733, |
|
"grad_norm": 3.0540230655261307, |
|
"learning_rate": 1.5095447870778267e-06, |
|
"loss": -0.0426, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2697508896797153, |
|
"grad_norm": 4.542990162110976, |
|
"learning_rate": 1.5080763582966225e-06, |
|
"loss": 0.1026, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2704626334519573, |
|
"grad_norm": 4.400804550133596, |
|
"learning_rate": 1.5066079295154185e-06, |
|
"loss": 0.132, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2711743772241993, |
|
"grad_norm": 4.707977326473843, |
|
"learning_rate": 1.5051395007342142e-06, |
|
"loss": 0.0669, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.27188612099644127, |
|
"grad_norm": 5.231473477237746, |
|
"learning_rate": 1.5036710719530102e-06, |
|
"loss": 0.0588, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.27259786476868325, |
|
"grad_norm": 4.141758025995536, |
|
"learning_rate": 1.502202643171806e-06, |
|
"loss": 0.0349, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.2733096085409253, |
|
"grad_norm": 3.417607123726839, |
|
"learning_rate": 1.500734214390602e-06, |
|
"loss": -0.0088, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.27402135231316727, |
|
"grad_norm": 6.084464642962031, |
|
"learning_rate": 1.499265785609398e-06, |
|
"loss": 0.114, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.27473309608540925, |
|
"grad_norm": 9.337398551992413, |
|
"learning_rate": 1.497797356828194e-06, |
|
"loss": 0.041, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.2754448398576512, |
|
"grad_norm": 6.293725828334604, |
|
"learning_rate": 1.4963289280469897e-06, |
|
"loss": 0.1886, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.27615658362989326, |
|
"grad_norm": 2.8274617128621595, |
|
"learning_rate": 1.4948604992657856e-06, |
|
"loss": -0.0402, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.27686832740213524, |
|
"grad_norm": 6.682761251392663, |
|
"learning_rate": 1.4933920704845814e-06, |
|
"loss": 0.0407, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.2775800711743772, |
|
"grad_norm": 5.532712064282298, |
|
"learning_rate": 1.4919236417033774e-06, |
|
"loss": 0.0822, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2782918149466192, |
|
"grad_norm": 6.354605270840461, |
|
"learning_rate": 1.4904552129221731e-06, |
|
"loss": 0.2162, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.27900355871886123, |
|
"grad_norm": 4.605656562580033, |
|
"learning_rate": 1.4889867841409691e-06, |
|
"loss": 0.0018, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.2797153024911032, |
|
"grad_norm": 5.623376400067919, |
|
"learning_rate": 1.4875183553597649e-06, |
|
"loss": 0.1569, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.2804270462633452, |
|
"grad_norm": 3.9692180687934897, |
|
"learning_rate": 1.4860499265785609e-06, |
|
"loss": -0.0416, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.28113879003558717, |
|
"grad_norm": 7.423001302286039, |
|
"learning_rate": 1.4845814977973568e-06, |
|
"loss": 0.026, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2818505338078292, |
|
"grad_norm": 8.085510065695358, |
|
"learning_rate": 1.4831130690161526e-06, |
|
"loss": 0.0845, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.2825622775800712, |
|
"grad_norm": 4.9124011701975245, |
|
"learning_rate": 1.4816446402349486e-06, |
|
"loss": 0.0907, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.28327402135231317, |
|
"grad_norm": 4.4743904849478655, |
|
"learning_rate": 1.4801762114537444e-06, |
|
"loss": 0.0154, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.28398576512455515, |
|
"grad_norm": 4.688328173910629, |
|
"learning_rate": 1.4787077826725403e-06, |
|
"loss": 0.0273, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.2846975088967972, |
|
"grad_norm": 6.729777242479757, |
|
"learning_rate": 1.477239353891336e-06, |
|
"loss": 0.1049, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28540925266903916, |
|
"grad_norm": 3.998078838569102, |
|
"learning_rate": 1.475770925110132e-06, |
|
"loss": 0.0952, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.28612099644128114, |
|
"grad_norm": 3.632400325239873, |
|
"learning_rate": 1.4743024963289278e-06, |
|
"loss": 0.0122, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.2868327402135231, |
|
"grad_norm": 5.621302597481837, |
|
"learning_rate": 1.4728340675477238e-06, |
|
"loss": 0.1393, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.2875444839857651, |
|
"grad_norm": 3.4926245499112936, |
|
"learning_rate": 1.4713656387665198e-06, |
|
"loss": -0.0072, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.28825622775800713, |
|
"grad_norm": 6.871401868269233, |
|
"learning_rate": 1.4698972099853158e-06, |
|
"loss": 0.0845, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.2889679715302491, |
|
"grad_norm": 7.371299203052198, |
|
"learning_rate": 1.4684287812041115e-06, |
|
"loss": 0.1009, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.2896797153024911, |
|
"grad_norm": 5.3878488662242034, |
|
"learning_rate": 1.4669603524229075e-06, |
|
"loss": 0.0183, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.29039145907473307, |
|
"grad_norm": 6.202672985144754, |
|
"learning_rate": 1.4654919236417033e-06, |
|
"loss": -0.0093, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.2911032028469751, |
|
"grad_norm": 4.22439807463946, |
|
"learning_rate": 1.4640234948604993e-06, |
|
"loss": -0.0559, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.2918149466192171, |
|
"grad_norm": 5.732779881282407, |
|
"learning_rate": 1.462555066079295e-06, |
|
"loss": 0.1191, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.29252669039145907, |
|
"grad_norm": 4.403590705236258, |
|
"learning_rate": 1.461086637298091e-06, |
|
"loss": 0.0391, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.29323843416370104, |
|
"grad_norm": 7.371522503842622, |
|
"learning_rate": 1.459618208516887e-06, |
|
"loss": 0.2122, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.2939501779359431, |
|
"grad_norm": 4.063849826456104, |
|
"learning_rate": 1.4581497797356827e-06, |
|
"loss": -0.0325, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.29466192170818506, |
|
"grad_norm": 6.537322657205629, |
|
"learning_rate": 1.4566813509544787e-06, |
|
"loss": 0.1573, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.29537366548042704, |
|
"grad_norm": 3.9070658327741112, |
|
"learning_rate": 1.4552129221732745e-06, |
|
"loss": 0.0689, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.296085409252669, |
|
"grad_norm": 6.406279177715566, |
|
"learning_rate": 1.4537444933920705e-06, |
|
"loss": 0.0146, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.29679715302491105, |
|
"grad_norm": 5.901355794529173, |
|
"learning_rate": 1.4522760646108662e-06, |
|
"loss": 0.046, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.29750889679715303, |
|
"grad_norm": 3.850309156415311, |
|
"learning_rate": 1.4508076358296622e-06, |
|
"loss": 0.0394, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.298220640569395, |
|
"grad_norm": 3.258587568922452, |
|
"learning_rate": 1.449339207048458e-06, |
|
"loss": 0.1155, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.298932384341637, |
|
"grad_norm": 3.745109038860105, |
|
"learning_rate": 1.447870778267254e-06, |
|
"loss": 0.0726, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.299644128113879, |
|
"grad_norm": 4.505449386351336, |
|
"learning_rate": 1.4464023494860497e-06, |
|
"loss": 0.0293, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.300355871886121, |
|
"grad_norm": 5.907561625814238, |
|
"learning_rate": 1.4449339207048457e-06, |
|
"loss": 0.1216, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.301067615658363, |
|
"grad_norm": 4.612488189930256, |
|
"learning_rate": 1.4434654919236415e-06, |
|
"loss": 0.12, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.30177935943060497, |
|
"grad_norm": 6.018073200585667, |
|
"learning_rate": 1.4419970631424377e-06, |
|
"loss": -0.0622, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.302491103202847, |
|
"grad_norm": 3.359965143931201, |
|
"learning_rate": 1.4405286343612334e-06, |
|
"loss": -0.0557, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.303202846975089, |
|
"grad_norm": 4.530239650290313, |
|
"learning_rate": 1.4390602055800294e-06, |
|
"loss": 0.0155, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.30391459074733096, |
|
"grad_norm": 3.923181080766024, |
|
"learning_rate": 1.4375917767988252e-06, |
|
"loss": 0.0844, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.30462633451957294, |
|
"grad_norm": 3.5202703684229815, |
|
"learning_rate": 1.4361233480176211e-06, |
|
"loss": -0.0512, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.305338078291815, |
|
"grad_norm": 3.7198987196394206, |
|
"learning_rate": 1.4346549192364171e-06, |
|
"loss": 0.0487, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.30604982206405695, |
|
"grad_norm": 4.361700060061493, |
|
"learning_rate": 1.4331864904552129e-06, |
|
"loss": 0.0465, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.30676156583629893, |
|
"grad_norm": 4.3036197508138105, |
|
"learning_rate": 1.4317180616740089e-06, |
|
"loss": 0.0604, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3074733096085409, |
|
"grad_norm": 5.652510613520501, |
|
"learning_rate": 1.4302496328928046e-06, |
|
"loss": 0.1603, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3081850533807829, |
|
"grad_norm": 4.460907997795395, |
|
"learning_rate": 1.4287812041116006e-06, |
|
"loss": 0.0773, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3088967971530249, |
|
"grad_norm": 6.627971054944022, |
|
"learning_rate": 1.4273127753303964e-06, |
|
"loss": 0.1324, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3096085409252669, |
|
"grad_norm": 6.483300692256294, |
|
"learning_rate": 1.4258443465491923e-06, |
|
"loss": 0.1187, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3103202846975089, |
|
"grad_norm": 3.818791810351555, |
|
"learning_rate": 1.4243759177679881e-06, |
|
"loss": 0.1222, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.31103202846975087, |
|
"grad_norm": 4.205532589817094, |
|
"learning_rate": 1.422907488986784e-06, |
|
"loss": 0.04, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3117437722419929, |
|
"grad_norm": 4.138905524462921, |
|
"learning_rate": 1.4214390602055799e-06, |
|
"loss": 0.1356, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3124555160142349, |
|
"grad_norm": 4.560324163124626, |
|
"learning_rate": 1.4199706314243758e-06, |
|
"loss": -0.001, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.31316725978647686, |
|
"grad_norm": 5.088704049660316, |
|
"learning_rate": 1.4185022026431716e-06, |
|
"loss": 0.0828, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.31387900355871884, |
|
"grad_norm": 3.643900825951513, |
|
"learning_rate": 1.4170337738619676e-06, |
|
"loss": -0.0633, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3145907473309609, |
|
"grad_norm": 12.367839552654106, |
|
"learning_rate": 1.4155653450807633e-06, |
|
"loss": 0.2273, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.31530249110320285, |
|
"grad_norm": 3.035800153655871, |
|
"learning_rate": 1.4140969162995595e-06, |
|
"loss": -0.0196, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.31601423487544483, |
|
"grad_norm": 18.763915734499722, |
|
"learning_rate": 1.4126284875183553e-06, |
|
"loss": 0.0782, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3167259786476868, |
|
"grad_norm": 2.571197728361492, |
|
"learning_rate": 1.4111600587371513e-06, |
|
"loss": -0.0196, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.31743772241992885, |
|
"grad_norm": 5.409603600214178, |
|
"learning_rate": 1.4096916299559472e-06, |
|
"loss": 0.1013, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.3181494661921708, |
|
"grad_norm": 5.796783528801567, |
|
"learning_rate": 1.408223201174743e-06, |
|
"loss": 0.085, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3188612099644128, |
|
"grad_norm": 5.821264734394937, |
|
"learning_rate": 1.406754772393539e-06, |
|
"loss": 0.0568, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3195729537366548, |
|
"grad_norm": 3.001625097715217, |
|
"learning_rate": 1.4052863436123348e-06, |
|
"loss": 0.0876, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.3202846975088968, |
|
"grad_norm": 4.7979999428447355, |
|
"learning_rate": 1.4038179148311307e-06, |
|
"loss": 0.1001, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3209964412811388, |
|
"grad_norm": 6.605368263206687, |
|
"learning_rate": 1.4023494860499265e-06, |
|
"loss": 0.1632, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3217081850533808, |
|
"grad_norm": 3.502035638929594, |
|
"learning_rate": 1.4008810572687225e-06, |
|
"loss": -0.0367, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.32241992882562276, |
|
"grad_norm": 9.002808450564668, |
|
"learning_rate": 1.3994126284875182e-06, |
|
"loss": 0.0653, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3231316725978648, |
|
"grad_norm": 5.094547700731088, |
|
"learning_rate": 1.3979441997063142e-06, |
|
"loss": 0.0839, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.3238434163701068, |
|
"grad_norm": 5.66990673708365, |
|
"learning_rate": 1.39647577092511e-06, |
|
"loss": 0.0375, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.32455516014234875, |
|
"grad_norm": 7.343498323064397, |
|
"learning_rate": 1.395007342143906e-06, |
|
"loss": 0.0998, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.32526690391459073, |
|
"grad_norm": 8.127266905066636, |
|
"learning_rate": 1.3935389133627017e-06, |
|
"loss": 0.1422, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.32597864768683277, |
|
"grad_norm": 3.800517277896503, |
|
"learning_rate": 1.3920704845814977e-06, |
|
"loss": -0.0181, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.32669039145907475, |
|
"grad_norm": 5.43134160815067, |
|
"learning_rate": 1.3906020558002935e-06, |
|
"loss": 0.0497, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.3274021352313167, |
|
"grad_norm": 3.8471828111327633, |
|
"learning_rate": 1.3891336270190894e-06, |
|
"loss": 0.0897, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3281138790035587, |
|
"grad_norm": 6.528410517911757, |
|
"learning_rate": 1.3876651982378852e-06, |
|
"loss": -0.0511, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.3288256227758007, |
|
"grad_norm": 4.252585353992694, |
|
"learning_rate": 1.3861967694566812e-06, |
|
"loss": 0.0951, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3295373665480427, |
|
"grad_norm": 8.229788789692552, |
|
"learning_rate": 1.3847283406754774e-06, |
|
"loss": 0.0933, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.3302491103202847, |
|
"grad_norm": 4.1539330011510485, |
|
"learning_rate": 1.3832599118942731e-06, |
|
"loss": 0.023, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3309608540925267, |
|
"grad_norm": 4.6663483556543826, |
|
"learning_rate": 1.3817914831130691e-06, |
|
"loss": 0.007, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.33167259786476866, |
|
"grad_norm": 4.669672111746775, |
|
"learning_rate": 1.3803230543318649e-06, |
|
"loss": 0.0893, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3323843416370107, |
|
"grad_norm": 5.133795657125543, |
|
"learning_rate": 1.3788546255506609e-06, |
|
"loss": 0.1242, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3330960854092527, |
|
"grad_norm": 5.475945496459967, |
|
"learning_rate": 1.3773861967694566e-06, |
|
"loss": 0.1022, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.33380782918149465, |
|
"grad_norm": 4.605885985857843, |
|
"learning_rate": 1.3759177679882526e-06, |
|
"loss": 0.1267, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.33451957295373663, |
|
"grad_norm": 3.0454480081847795, |
|
"learning_rate": 1.3744493392070484e-06, |
|
"loss": 0.0601, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.33523131672597867, |
|
"grad_norm": 4.401587000661199, |
|
"learning_rate": 1.3729809104258444e-06, |
|
"loss": 0.0515, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.33594306049822065, |
|
"grad_norm": 13.88100598913882, |
|
"learning_rate": 1.3715124816446401e-06, |
|
"loss": 0.0645, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.3366548042704626, |
|
"grad_norm": 5.4928368826174845, |
|
"learning_rate": 1.370044052863436e-06, |
|
"loss": 0.079, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3373665480427046, |
|
"grad_norm": 4.3823646683805215, |
|
"learning_rate": 1.3685756240822319e-06, |
|
"loss": 0.0301, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.33807829181494664, |
|
"grad_norm": 5.159143918902839, |
|
"learning_rate": 1.3671071953010278e-06, |
|
"loss": 0.0445, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3387900355871886, |
|
"grad_norm": 5.394407322017402, |
|
"learning_rate": 1.3656387665198236e-06, |
|
"loss": -0.0628, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.3395017793594306, |
|
"grad_norm": 5.8944791534932754, |
|
"learning_rate": 1.3641703377386196e-06, |
|
"loss": 0.1409, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.3402135231316726, |
|
"grad_norm": 3.5826852049675066, |
|
"learning_rate": 1.3627019089574153e-06, |
|
"loss": 0.0186, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.3409252669039146, |
|
"grad_norm": 8.927906783297527, |
|
"learning_rate": 1.3612334801762113e-06, |
|
"loss": 0.1988, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3416370106761566, |
|
"grad_norm": 3.6064993892760655, |
|
"learning_rate": 1.359765051395007e-06, |
|
"loss": -0.0085, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3423487544483986, |
|
"grad_norm": 5.465970450161264, |
|
"learning_rate": 1.358296622613803e-06, |
|
"loss": 0.018, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.34306049822064055, |
|
"grad_norm": 5.639957969725356, |
|
"learning_rate": 1.3568281938325993e-06, |
|
"loss": 0.0933, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3437722419928826, |
|
"grad_norm": 5.134464735383474, |
|
"learning_rate": 1.355359765051395e-06, |
|
"loss": 0.0109, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.34448398576512457, |
|
"grad_norm": 6.339479408912485, |
|
"learning_rate": 1.353891336270191e-06, |
|
"loss": 0.1694, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.34519572953736655, |
|
"grad_norm": 5.629471252167426, |
|
"learning_rate": 1.3524229074889868e-06, |
|
"loss": 0.1749, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3459074733096085, |
|
"grad_norm": 4.1844924830495565, |
|
"learning_rate": 1.3509544787077827e-06, |
|
"loss": 0.0548, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.34661921708185056, |
|
"grad_norm": 3.81599741125119, |
|
"learning_rate": 1.3494860499265785e-06, |
|
"loss": 0.0581, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.34733096085409254, |
|
"grad_norm": 3.82408146519255, |
|
"learning_rate": 1.3480176211453745e-06, |
|
"loss": -0.0245, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3480427046263345, |
|
"grad_norm": 4.118872049980593, |
|
"learning_rate": 1.3465491923641703e-06, |
|
"loss": -0.0963, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3487544483985765, |
|
"grad_norm": 6.1088792058255, |
|
"learning_rate": 1.3450807635829662e-06, |
|
"loss": 0.0747, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3494661921708185, |
|
"grad_norm": 6.388375406316636, |
|
"learning_rate": 1.343612334801762e-06, |
|
"loss": -0.0349, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.3501779359430605, |
|
"grad_norm": 5.8039027796462905, |
|
"learning_rate": 1.342143906020558e-06, |
|
"loss": 0.0393, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3508896797153025, |
|
"grad_norm": 4.94103529672343, |
|
"learning_rate": 1.3406754772393537e-06, |
|
"loss": 0.0246, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.3516014234875445, |
|
"grad_norm": 4.054831441628558, |
|
"learning_rate": 1.3392070484581497e-06, |
|
"loss": -0.0507, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.35231316725978645, |
|
"grad_norm": 3.061671242195688, |
|
"learning_rate": 1.3377386196769455e-06, |
|
"loss": 0.0015, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3530249110320285, |
|
"grad_norm": 6.231153836747014, |
|
"learning_rate": 1.3362701908957415e-06, |
|
"loss": 0.1093, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.35373665480427047, |
|
"grad_norm": 4.803948868211813, |
|
"learning_rate": 1.3348017621145372e-06, |
|
"loss": -0.0375, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.35444839857651245, |
|
"grad_norm": 7.2124323426879755, |
|
"learning_rate": 1.3333333333333332e-06, |
|
"loss": 0.0418, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.3551601423487544, |
|
"grad_norm": 12.051137049042497, |
|
"learning_rate": 1.3318649045521292e-06, |
|
"loss": 0.0088, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.35587188612099646, |
|
"grad_norm": 5.94286256858033, |
|
"learning_rate": 1.330396475770925e-06, |
|
"loss": 0.1674, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35658362989323844, |
|
"grad_norm": 4.965540395103566, |
|
"learning_rate": 1.328928046989721e-06, |
|
"loss": 0.0542, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.3572953736654804, |
|
"grad_norm": 4.590895615840333, |
|
"learning_rate": 1.327459618208517e-06, |
|
"loss": 0.0241, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.3580071174377224, |
|
"grad_norm": 5.8626146642862595, |
|
"learning_rate": 1.3259911894273129e-06, |
|
"loss": -0.0145, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.35871886120996443, |
|
"grad_norm": 4.977928656796741, |
|
"learning_rate": 1.3245227606461086e-06, |
|
"loss": 0.0542, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.3594306049822064, |
|
"grad_norm": 3.974031673502917, |
|
"learning_rate": 1.3230543318649046e-06, |
|
"loss": 0.0275, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.3601423487544484, |
|
"grad_norm": 5.703999157106998, |
|
"learning_rate": 1.3215859030837004e-06, |
|
"loss": 0.0349, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.3608540925266904, |
|
"grad_norm": 4.052662367583191, |
|
"learning_rate": 1.3201174743024964e-06, |
|
"loss": 0.0043, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.3615658362989324, |
|
"grad_norm": 3.643033258471114, |
|
"learning_rate": 1.3186490455212921e-06, |
|
"loss": 0.1718, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.3622775800711744, |
|
"grad_norm": 5.142755038140959, |
|
"learning_rate": 1.3171806167400881e-06, |
|
"loss": 0.1742, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.36298932384341637, |
|
"grad_norm": 5.241172817397939, |
|
"learning_rate": 1.3157121879588839e-06, |
|
"loss": 0.0968, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.36370106761565835, |
|
"grad_norm": 5.1078352635046445, |
|
"learning_rate": 1.3142437591776798e-06, |
|
"loss": 0.1197, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.3644128113879004, |
|
"grad_norm": 6.225810427937807, |
|
"learning_rate": 1.3127753303964756e-06, |
|
"loss": 0.1229, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.36512455516014236, |
|
"grad_norm": 6.7014735750723196, |
|
"learning_rate": 1.3113069016152716e-06, |
|
"loss": 0.12, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.36583629893238434, |
|
"grad_norm": 4.627706612086469, |
|
"learning_rate": 1.3098384728340674e-06, |
|
"loss": 0.0387, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.3665480427046263, |
|
"grad_norm": 4.634265382361618, |
|
"learning_rate": 1.3083700440528633e-06, |
|
"loss": 0.068, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.36725978647686836, |
|
"grad_norm": 7.336944153509603, |
|
"learning_rate": 1.3069016152716593e-06, |
|
"loss": 0.044, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.36797153024911033, |
|
"grad_norm": 5.110609552864615, |
|
"learning_rate": 1.305433186490455e-06, |
|
"loss": 0.0432, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.3686832740213523, |
|
"grad_norm": 5.7487944292813875, |
|
"learning_rate": 1.303964757709251e-06, |
|
"loss": 0.0086, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.3693950177935943, |
|
"grad_norm": 3.9281649664133167, |
|
"learning_rate": 1.3024963289280468e-06, |
|
"loss": -0.0906, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.3701067615658363, |
|
"grad_norm": 5.635784576684936, |
|
"learning_rate": 1.3010279001468428e-06, |
|
"loss": 0.0388, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3708185053380783, |
|
"grad_norm": 4.504403185332134, |
|
"learning_rate": 1.2995594713656388e-06, |
|
"loss": 0.0674, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3715302491103203, |
|
"grad_norm": 5.796118838611627, |
|
"learning_rate": 1.2980910425844348e-06, |
|
"loss": 0.0867, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.37224199288256227, |
|
"grad_norm": 3.7423957018184564, |
|
"learning_rate": 1.2966226138032305e-06, |
|
"loss": 0.1033, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.37295373665480425, |
|
"grad_norm": 4.653161989863049, |
|
"learning_rate": 1.2951541850220265e-06, |
|
"loss": 0.1115, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3736654804270463, |
|
"grad_norm": 5.487569286282674, |
|
"learning_rate": 1.2936857562408223e-06, |
|
"loss": 0.0015, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.37437722419928826, |
|
"grad_norm": 3.660972349527697, |
|
"learning_rate": 1.2922173274596182e-06, |
|
"loss": -0.0158, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.37508896797153024, |
|
"grad_norm": 14.386677916212754, |
|
"learning_rate": 1.290748898678414e-06, |
|
"loss": 0.1662, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.3758007117437722, |
|
"grad_norm": 4.804991772949289, |
|
"learning_rate": 1.28928046989721e-06, |
|
"loss": 0.1037, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.37651245551601425, |
|
"grad_norm": 4.285523891761131, |
|
"learning_rate": 1.2878120411160057e-06, |
|
"loss": 0.1347, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.37722419928825623, |
|
"grad_norm": 8.680000416276632, |
|
"learning_rate": 1.2863436123348017e-06, |
|
"loss": 0.1935, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3779359430604982, |
|
"grad_norm": 6.3227238717346435, |
|
"learning_rate": 1.2848751835535975e-06, |
|
"loss": 0.1116, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.3786476868327402, |
|
"grad_norm": 4.369406257640907, |
|
"learning_rate": 1.2834067547723935e-06, |
|
"loss": 0.0217, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.37935943060498223, |
|
"grad_norm": 3.511049947393694, |
|
"learning_rate": 1.2819383259911892e-06, |
|
"loss": 0.0571, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.3800711743772242, |
|
"grad_norm": 3.4769751828887587, |
|
"learning_rate": 1.2804698972099852e-06, |
|
"loss": 0.0599, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.3807829181494662, |
|
"grad_norm": 4.425270529198389, |
|
"learning_rate": 1.2790014684287812e-06, |
|
"loss": -0.0344, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.38149466192170817, |
|
"grad_norm": 4.27135554896897, |
|
"learning_rate": 1.277533039647577e-06, |
|
"loss": 0.0781, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.3822064056939502, |
|
"grad_norm": 4.215674289333216, |
|
"learning_rate": 1.276064610866373e-06, |
|
"loss": 0.0871, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.3829181494661922, |
|
"grad_norm": 6.4279361532063515, |
|
"learning_rate": 1.2745961820851687e-06, |
|
"loss": 0.1274, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.38362989323843416, |
|
"grad_norm": 2.9956366278559794, |
|
"learning_rate": 1.2731277533039647e-06, |
|
"loss": 0.0482, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.38434163701067614, |
|
"grad_norm": 2.833506148808736, |
|
"learning_rate": 1.2716593245227604e-06, |
|
"loss": 0.0121, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3850533807829182, |
|
"grad_norm": 4.398064985474303, |
|
"learning_rate": 1.2701908957415566e-06, |
|
"loss": 0.0557, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.38576512455516015, |
|
"grad_norm": 4.301774106985314, |
|
"learning_rate": 1.2687224669603524e-06, |
|
"loss": 0.1388, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.38647686832740213, |
|
"grad_norm": 4.016092829782654, |
|
"learning_rate": 1.2672540381791484e-06, |
|
"loss": -0.0178, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.3871886120996441, |
|
"grad_norm": 4.230056193093481, |
|
"learning_rate": 1.2657856093979441e-06, |
|
"loss": 0.0911, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.3879003558718861, |
|
"grad_norm": 4.648048910359669, |
|
"learning_rate": 1.2643171806167401e-06, |
|
"loss": 0.0079, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.38861209964412813, |
|
"grad_norm": 6.046269980431698, |
|
"learning_rate": 1.2628487518355359e-06, |
|
"loss": 0.01, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.3893238434163701, |
|
"grad_norm": 5.598650442721456, |
|
"learning_rate": 1.2613803230543319e-06, |
|
"loss": 0.1025, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.3900355871886121, |
|
"grad_norm": 4.154987344183164, |
|
"learning_rate": 1.2599118942731276e-06, |
|
"loss": 0.073, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.39074733096085407, |
|
"grad_norm": 5.151314702783577, |
|
"learning_rate": 1.2584434654919236e-06, |
|
"loss": 0.1204, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.3914590747330961, |
|
"grad_norm": 4.410394761804895, |
|
"learning_rate": 1.2569750367107194e-06, |
|
"loss": 0.0851, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3921708185053381, |
|
"grad_norm": 4.99583108672324, |
|
"learning_rate": 1.2555066079295153e-06, |
|
"loss": 0.0927, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.39288256227758006, |
|
"grad_norm": 5.8725775736382095, |
|
"learning_rate": 1.2540381791483113e-06, |
|
"loss": 0.2121, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.39359430604982204, |
|
"grad_norm": 4.747630682859618, |
|
"learning_rate": 1.252569750367107e-06, |
|
"loss": -0.0295, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.3943060498220641, |
|
"grad_norm": 9.183776100778932, |
|
"learning_rate": 1.251101321585903e-06, |
|
"loss": 0.3017, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.39501779359430605, |
|
"grad_norm": 6.072579411018435, |
|
"learning_rate": 1.2496328928046988e-06, |
|
"loss": 0.1708, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.39572953736654803, |
|
"grad_norm": 4.213015126956102, |
|
"learning_rate": 1.2481644640234948e-06, |
|
"loss": 0.09, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.39644128113879, |
|
"grad_norm": 7.692972607852313, |
|
"learning_rate": 1.2466960352422906e-06, |
|
"loss": 0.0703, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.39715302491103205, |
|
"grad_norm": 3.336700137869538, |
|
"learning_rate": 1.2452276064610866e-06, |
|
"loss": 0.0718, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.39786476868327403, |
|
"grad_norm": 3.437064819532712, |
|
"learning_rate": 1.2437591776798823e-06, |
|
"loss": 0.1232, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.398576512455516, |
|
"grad_norm": 4.752532484654912, |
|
"learning_rate": 1.2422907488986785e-06, |
|
"loss": -0.0222, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.399288256227758, |
|
"grad_norm": 5.671494316167505, |
|
"learning_rate": 1.2408223201174743e-06, |
|
"loss": 0.1919, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 4.530732644899779, |
|
"learning_rate": 1.2393538913362703e-06, |
|
"loss": 0.0283, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.400711743772242, |
|
"grad_norm": 8.233660614468986, |
|
"learning_rate": 1.237885462555066e-06, |
|
"loss": 0.0892, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.401423487544484, |
|
"grad_norm": 9.502726514967256, |
|
"learning_rate": 1.236417033773862e-06, |
|
"loss": 0.0579, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.40213523131672596, |
|
"grad_norm": 3.5236594390328295, |
|
"learning_rate": 1.2349486049926578e-06, |
|
"loss": 0.0199, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.402846975088968, |
|
"grad_norm": 5.95870557924922, |
|
"learning_rate": 1.2334801762114537e-06, |
|
"loss": 0.0288, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.40355871886121, |
|
"grad_norm": 7.876154981414457, |
|
"learning_rate": 1.2320117474302495e-06, |
|
"loss": 0.0504, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.40427046263345195, |
|
"grad_norm": 5.71977049680604, |
|
"learning_rate": 1.2305433186490455e-06, |
|
"loss": 0.0537, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.40498220640569393, |
|
"grad_norm": 5.252680866045221, |
|
"learning_rate": 1.2290748898678415e-06, |
|
"loss": -0.0052, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.40569395017793597, |
|
"grad_norm": 4.443807498007278, |
|
"learning_rate": 1.2276064610866372e-06, |
|
"loss": 0.0806, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.40640569395017795, |
|
"grad_norm": 4.114830428160394, |
|
"learning_rate": 1.2261380323054332e-06, |
|
"loss": 0.0369, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.40711743772241993, |
|
"grad_norm": 5.210110211222593, |
|
"learning_rate": 1.224669603524229e-06, |
|
"loss": 0.1161, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.4078291814946619, |
|
"grad_norm": 4.043897557109054, |
|
"learning_rate": 1.223201174743025e-06, |
|
"loss": -0.0277, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4085409252669039, |
|
"grad_norm": 6.136939420563002, |
|
"learning_rate": 1.2217327459618207e-06, |
|
"loss": 0.1014, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.4092526690391459, |
|
"grad_norm": 5.27770281063622, |
|
"learning_rate": 1.2202643171806167e-06, |
|
"loss": 0.2431, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4099644128113879, |
|
"grad_norm": 3.3599898374494637, |
|
"learning_rate": 1.2187958883994125e-06, |
|
"loss": 0.0897, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4106761565836299, |
|
"grad_norm": 4.979236213911304, |
|
"learning_rate": 1.2173274596182084e-06, |
|
"loss": 0.0831, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.41138790035587186, |
|
"grad_norm": 4.575722750914581, |
|
"learning_rate": 1.2158590308370042e-06, |
|
"loss": 0.1251, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.4120996441281139, |
|
"grad_norm": 3.7621845658093855, |
|
"learning_rate": 1.2143906020558002e-06, |
|
"loss": -0.0291, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4128113879003559, |
|
"grad_norm": 4.218297411342295, |
|
"learning_rate": 1.2129221732745961e-06, |
|
"loss": 0.1321, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.41352313167259785, |
|
"grad_norm": 3.879794323572462, |
|
"learning_rate": 1.2114537444933921e-06, |
|
"loss": 0.0918, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.41423487544483983, |
|
"grad_norm": 4.052563296464422, |
|
"learning_rate": 1.2099853157121879e-06, |
|
"loss": 0.0771, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.41494661921708187, |
|
"grad_norm": 4.85871887683837, |
|
"learning_rate": 1.2085168869309839e-06, |
|
"loss": 0.0863, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.41565836298932385, |
|
"grad_norm": 4.707446979825805, |
|
"learning_rate": 1.2070484581497796e-06, |
|
"loss": 0.0187, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.41637010676156583, |
|
"grad_norm": 3.8785600613390097, |
|
"learning_rate": 1.2055800293685756e-06, |
|
"loss": -0.0032, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4170818505338078, |
|
"grad_norm": 4.765160109560956, |
|
"learning_rate": 1.2041116005873716e-06, |
|
"loss": 0.0677, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.41779359430604984, |
|
"grad_norm": 4.455527851930796, |
|
"learning_rate": 1.2026431718061674e-06, |
|
"loss": 0.111, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.4185053380782918, |
|
"grad_norm": 3.1973105155389394, |
|
"learning_rate": 1.2011747430249633e-06, |
|
"loss": 0.1831, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.4192170818505338, |
|
"grad_norm": 7.28943877562424, |
|
"learning_rate": 1.199706314243759e-06, |
|
"loss": 0.0971, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.4199288256227758, |
|
"grad_norm": 6.135188134626812, |
|
"learning_rate": 1.198237885462555e-06, |
|
"loss": 0.0729, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4206405693950178, |
|
"grad_norm": 3.7048101655046723, |
|
"learning_rate": 1.1967694566813508e-06, |
|
"loss": -0.1257, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.4213523131672598, |
|
"grad_norm": 4.813423563533292, |
|
"learning_rate": 1.1953010279001468e-06, |
|
"loss": 0.0854, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.4220640569395018, |
|
"grad_norm": 3.208585076962423, |
|
"learning_rate": 1.1938325991189426e-06, |
|
"loss": 0.0278, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.42277580071174375, |
|
"grad_norm": 5.694992998368377, |
|
"learning_rate": 1.1923641703377386e-06, |
|
"loss": 0.122, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.4234875444839858, |
|
"grad_norm": 6.8821335283366505, |
|
"learning_rate": 1.1908957415565343e-06, |
|
"loss": 0.0321, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.42419928825622777, |
|
"grad_norm": 4.215796899366015, |
|
"learning_rate": 1.1894273127753303e-06, |
|
"loss": 0.0321, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.42491103202846975, |
|
"grad_norm": 5.316432439892324, |
|
"learning_rate": 1.187958883994126e-06, |
|
"loss": 0.068, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.42562277580071173, |
|
"grad_norm": 5.511432729500699, |
|
"learning_rate": 1.186490455212922e-06, |
|
"loss": 0.0614, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.42633451957295376, |
|
"grad_norm": 5.582771310939926, |
|
"learning_rate": 1.185022026431718e-06, |
|
"loss": 0.2272, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.42704626334519574, |
|
"grad_norm": 5.323291364746015, |
|
"learning_rate": 1.183553597650514e-06, |
|
"loss": 0.1055, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4277580071174377, |
|
"grad_norm": 2.7981336095292777, |
|
"learning_rate": 1.1820851688693098e-06, |
|
"loss": 0.0135, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.4284697508896797, |
|
"grad_norm": 5.2514595546013, |
|
"learning_rate": 1.1806167400881057e-06, |
|
"loss": 0.1438, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4291814946619217, |
|
"grad_norm": 4.8052648678120855, |
|
"learning_rate": 1.1791483113069017e-06, |
|
"loss": 0.0248, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4298932384341637, |
|
"grad_norm": 4.272397432563753, |
|
"learning_rate": 1.1776798825256975e-06, |
|
"loss": 0.1677, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.4306049822064057, |
|
"grad_norm": 6.343124283623192, |
|
"learning_rate": 1.1762114537444935e-06, |
|
"loss": 0.1471, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4313167259786477, |
|
"grad_norm": 13.126869867733665, |
|
"learning_rate": 1.1747430249632892e-06, |
|
"loss": 0.1401, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.43202846975088965, |
|
"grad_norm": 4.4220643559003765, |
|
"learning_rate": 1.1732745961820852e-06, |
|
"loss": 0.0717, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4327402135231317, |
|
"grad_norm": 5.323938419745406, |
|
"learning_rate": 1.171806167400881e-06, |
|
"loss": 0.0522, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.43345195729537367, |
|
"grad_norm": 4.280517515464142, |
|
"learning_rate": 1.170337738619677e-06, |
|
"loss": -0.0424, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.43416370106761565, |
|
"grad_norm": 4.523195250354239, |
|
"learning_rate": 1.1688693098384727e-06, |
|
"loss": 0.0748, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.43487544483985763, |
|
"grad_norm": 3.867967315038678, |
|
"learning_rate": 1.1674008810572687e-06, |
|
"loss": -0.0254, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.43558718861209966, |
|
"grad_norm": 5.799611229856745, |
|
"learning_rate": 1.1659324522760645e-06, |
|
"loss": 0.0168, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.43629893238434164, |
|
"grad_norm": 5.017825585689327, |
|
"learning_rate": 1.1644640234948604e-06, |
|
"loss": -0.0632, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.4370106761565836, |
|
"grad_norm": 4.292392681648742, |
|
"learning_rate": 1.1629955947136562e-06, |
|
"loss": 0.1748, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.4377224199288256, |
|
"grad_norm": 5.436171095243215, |
|
"learning_rate": 1.1615271659324522e-06, |
|
"loss": 0.1117, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.43843416370106764, |
|
"grad_norm": 4.987884529606562, |
|
"learning_rate": 1.160058737151248e-06, |
|
"loss": -0.0053, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.4391459074733096, |
|
"grad_norm": 4.723339551994701, |
|
"learning_rate": 1.158590308370044e-06, |
|
"loss": 0.0186, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4398576512455516, |
|
"grad_norm": 4.6346798200417, |
|
"learning_rate": 1.15712187958884e-06, |
|
"loss": 0.0858, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.4405693950177936, |
|
"grad_norm": 5.337842922227848, |
|
"learning_rate": 1.1556534508076359e-06, |
|
"loss": 0.052, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.4412811387900356, |
|
"grad_norm": 3.4759681450774633, |
|
"learning_rate": 1.1541850220264319e-06, |
|
"loss": 0.0147, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4419928825622776, |
|
"grad_norm": 5.502678479043237, |
|
"learning_rate": 1.1527165932452276e-06, |
|
"loss": 0.0753, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.44270462633451957, |
|
"grad_norm": 3.188013970319323, |
|
"learning_rate": 1.1512481644640236e-06, |
|
"loss": 0.0928, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.44341637010676155, |
|
"grad_norm": 5.628840512931552, |
|
"learning_rate": 1.1497797356828194e-06, |
|
"loss": 0.1731, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.4441281138790036, |
|
"grad_norm": 5.2497544004123124, |
|
"learning_rate": 1.1483113069016153e-06, |
|
"loss": 0.1112, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.44483985765124556, |
|
"grad_norm": 5.598846841337847, |
|
"learning_rate": 1.1468428781204111e-06, |
|
"loss": 0.0839, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.44555160142348754, |
|
"grad_norm": 3.8121356477329487, |
|
"learning_rate": 1.145374449339207e-06, |
|
"loss": 0.0858, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.4462633451957295, |
|
"grad_norm": 6.402138781801384, |
|
"learning_rate": 1.1439060205580029e-06, |
|
"loss": 0.0405, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.44697508896797156, |
|
"grad_norm": 3.5680096501361027, |
|
"learning_rate": 1.1424375917767988e-06, |
|
"loss": 0.0179, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.44768683274021354, |
|
"grad_norm": 12.93989459742168, |
|
"learning_rate": 1.1409691629955946e-06, |
|
"loss": 0.139, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.4483985765124555, |
|
"grad_norm": 4.992624756195558, |
|
"learning_rate": 1.1395007342143906e-06, |
|
"loss": 0.1577, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4491103202846975, |
|
"grad_norm": 5.4319610446325255, |
|
"learning_rate": 1.1380323054331863e-06, |
|
"loss": 0.1754, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.4498220640569395, |
|
"grad_norm": 5.069115166733579, |
|
"learning_rate": 1.1365638766519823e-06, |
|
"loss": -0.0696, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.4505338078291815, |
|
"grad_norm": 5.741666310849707, |
|
"learning_rate": 1.135095447870778e-06, |
|
"loss": 0.1294, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.4512455516014235, |
|
"grad_norm": 5.463031329189953, |
|
"learning_rate": 1.133627019089574e-06, |
|
"loss": 0.0757, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.45195729537366547, |
|
"grad_norm": 6.8511844474163395, |
|
"learning_rate": 1.1321585903083698e-06, |
|
"loss": 0.0903, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.45266903914590745, |
|
"grad_norm": 4.259850474495963, |
|
"learning_rate": 1.1306901615271658e-06, |
|
"loss": -0.0736, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.4533807829181495, |
|
"grad_norm": 7.009460323527026, |
|
"learning_rate": 1.1292217327459616e-06, |
|
"loss": -0.0667, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.45409252669039146, |
|
"grad_norm": 2.704359268907955, |
|
"learning_rate": 1.1277533039647578e-06, |
|
"loss": -0.1515, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.45480427046263344, |
|
"grad_norm": 4.892012036861605, |
|
"learning_rate": 1.1262848751835537e-06, |
|
"loss": 0.1136, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.4555160142348754, |
|
"grad_norm": 4.420897035322563, |
|
"learning_rate": 1.1248164464023495e-06, |
|
"loss": 0.0102, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.45622775800711746, |
|
"grad_norm": 6.963193037787631, |
|
"learning_rate": 1.1233480176211455e-06, |
|
"loss": 0.0197, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.45693950177935944, |
|
"grad_norm": 4.10057023897775, |
|
"learning_rate": 1.1218795888399412e-06, |
|
"loss": -0.0319, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.4576512455516014, |
|
"grad_norm": 5.54032283159087, |
|
"learning_rate": 1.1204111600587372e-06, |
|
"loss": 0.1388, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.4583629893238434, |
|
"grad_norm": 5.050577134494581, |
|
"learning_rate": 1.118942731277533e-06, |
|
"loss": 0.0912, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.45907473309608543, |
|
"grad_norm": 4.9180573689518665, |
|
"learning_rate": 1.117474302496329e-06, |
|
"loss": 0.0964, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.4597864768683274, |
|
"grad_norm": 5.7503008556439825, |
|
"learning_rate": 1.1160058737151247e-06, |
|
"loss": 0.2445, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.4604982206405694, |
|
"grad_norm": 4.905400682546049, |
|
"learning_rate": 1.1145374449339207e-06, |
|
"loss": 0.206, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.46120996441281137, |
|
"grad_norm": 5.087415254904014, |
|
"learning_rate": 1.1130690161527165e-06, |
|
"loss": 0.0828, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.4619217081850534, |
|
"grad_norm": 5.023255815082948, |
|
"learning_rate": 1.1116005873715124e-06, |
|
"loss": 0.0484, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.4626334519572954, |
|
"grad_norm": 4.355133071075149, |
|
"learning_rate": 1.1101321585903082e-06, |
|
"loss": 0.0645, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.46334519572953736, |
|
"grad_norm": 9.497353436091503, |
|
"learning_rate": 1.1086637298091042e-06, |
|
"loss": 0.0136, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.46405693950177934, |
|
"grad_norm": 4.8079658054906735, |
|
"learning_rate": 1.1071953010279e-06, |
|
"loss": 0.0363, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.4647686832740214, |
|
"grad_norm": 3.4912562824852427, |
|
"learning_rate": 1.105726872246696e-06, |
|
"loss": -0.0103, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.46548042704626336, |
|
"grad_norm": 3.907574848573548, |
|
"learning_rate": 1.1042584434654917e-06, |
|
"loss": 0.0148, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.46619217081850534, |
|
"grad_norm": 4.75599773605203, |
|
"learning_rate": 1.1027900146842877e-06, |
|
"loss": 0.0955, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.4669039145907473, |
|
"grad_norm": 6.334151198466081, |
|
"learning_rate": 1.1013215859030837e-06, |
|
"loss": 0.2039, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.46761565836298935, |
|
"grad_norm": 4.128379362299159, |
|
"learning_rate": 1.0998531571218796e-06, |
|
"loss": -0.07, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.46832740213523133, |
|
"grad_norm": 3.9676377547638273, |
|
"learning_rate": 1.0983847283406756e-06, |
|
"loss": -0.0183, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.4690391459074733, |
|
"grad_norm": 4.269930835672946, |
|
"learning_rate": 1.0969162995594714e-06, |
|
"loss": -0.0271, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.4697508896797153, |
|
"grad_norm": 5.596920542742016, |
|
"learning_rate": 1.0954478707782674e-06, |
|
"loss": 0.1345, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.47046263345195727, |
|
"grad_norm": 3.7310194957083724, |
|
"learning_rate": 1.0939794419970631e-06, |
|
"loss": 0.0609, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.4711743772241993, |
|
"grad_norm": 4.239345659471919, |
|
"learning_rate": 1.092511013215859e-06, |
|
"loss": 0.0363, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.4718861209964413, |
|
"grad_norm": 4.978031151336516, |
|
"learning_rate": 1.0910425844346549e-06, |
|
"loss": -0.0304, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.47259786476868326, |
|
"grad_norm": 6.292115791239946, |
|
"learning_rate": 1.0895741556534508e-06, |
|
"loss": 0.0228, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.47330960854092524, |
|
"grad_norm": 3.388984670136675, |
|
"learning_rate": 1.0881057268722466e-06, |
|
"loss": -0.0092, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.4740213523131673, |
|
"grad_norm": 4.9208120984624095, |
|
"learning_rate": 1.0866372980910426e-06, |
|
"loss": 0.1315, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.47473309608540926, |
|
"grad_norm": 8.255729766326498, |
|
"learning_rate": 1.0851688693098383e-06, |
|
"loss": 0.1453, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.47544483985765124, |
|
"grad_norm": 4.201986733068396, |
|
"learning_rate": 1.0837004405286343e-06, |
|
"loss": 0.016, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.4761565836298932, |
|
"grad_norm": 5.843632824380879, |
|
"learning_rate": 1.08223201174743e-06, |
|
"loss": 0.0414, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.47686832740213525, |
|
"grad_norm": 6.714297784487186, |
|
"learning_rate": 1.080763582966226e-06, |
|
"loss": -0.0167, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.47758007117437723, |
|
"grad_norm": 4.545591402594477, |
|
"learning_rate": 1.0792951541850218e-06, |
|
"loss": 0.0521, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.4782918149466192, |
|
"grad_norm": 5.895020805238048, |
|
"learning_rate": 1.0778267254038178e-06, |
|
"loss": 0.1075, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.4790035587188612, |
|
"grad_norm": 3.4508361134951264, |
|
"learning_rate": 1.0763582966226136e-06, |
|
"loss": 0.0254, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.4797153024911032, |
|
"grad_norm": 4.771987420077502, |
|
"learning_rate": 1.0748898678414096e-06, |
|
"loss": 0.0573, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.4804270462633452, |
|
"grad_norm": 11.226430999686528, |
|
"learning_rate": 1.0734214390602055e-06, |
|
"loss": 0.1012, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.4811387900355872, |
|
"grad_norm": 10.846049392039056, |
|
"learning_rate": 1.0719530102790013e-06, |
|
"loss": 0.0998, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.48185053380782916, |
|
"grad_norm": 6.275120719569148, |
|
"learning_rate": 1.0704845814977975e-06, |
|
"loss": 0.0751, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.4825622775800712, |
|
"grad_norm": 4.128723544695201, |
|
"learning_rate": 1.0690161527165933e-06, |
|
"loss": 0.0578, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.4832740213523132, |
|
"grad_norm": 5.355283727030443, |
|
"learning_rate": 1.0675477239353892e-06, |
|
"loss": -0.0071, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.48398576512455516, |
|
"grad_norm": 5.164628498494913, |
|
"learning_rate": 1.066079295154185e-06, |
|
"loss": 0.056, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.48469750889679714, |
|
"grad_norm": 3.9710771841378723, |
|
"learning_rate": 1.064610866372981e-06, |
|
"loss": 0.0391, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.48540925266903917, |
|
"grad_norm": 5.315392458503421, |
|
"learning_rate": 1.0631424375917767e-06, |
|
"loss": 0.0435, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.48612099644128115, |
|
"grad_norm": 3.2116434105620204, |
|
"learning_rate": 1.0616740088105727e-06, |
|
"loss": 0.0427, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.48683274021352313, |
|
"grad_norm": 8.68683517437045, |
|
"learning_rate": 1.0602055800293685e-06, |
|
"loss": 0.0512, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.4875444839857651, |
|
"grad_norm": 3.513970150885388, |
|
"learning_rate": 1.0587371512481645e-06, |
|
"loss": -0.032, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.48825622775800714, |
|
"grad_norm": 4.866089723085183, |
|
"learning_rate": 1.0572687224669602e-06, |
|
"loss": 0.1657, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.4889679715302491, |
|
"grad_norm": 6.017385408509331, |
|
"learning_rate": 1.0558002936857562e-06, |
|
"loss": 0.0817, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.4896797153024911, |
|
"grad_norm": 5.83203779093739, |
|
"learning_rate": 1.054331864904552e-06, |
|
"loss": 0.1678, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.4903914590747331, |
|
"grad_norm": 4.722061712990318, |
|
"learning_rate": 1.052863436123348e-06, |
|
"loss": 0.1022, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.49110320284697506, |
|
"grad_norm": 10.584918006143333, |
|
"learning_rate": 1.0513950073421437e-06, |
|
"loss": 0.0488, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4918149466192171, |
|
"grad_norm": 3.746498762239735, |
|
"learning_rate": 1.0499265785609397e-06, |
|
"loss": 0.0109, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.4925266903914591, |
|
"grad_norm": 5.269616671379547, |
|
"learning_rate": 1.0484581497797357e-06, |
|
"loss": 0.198, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.49323843416370106, |
|
"grad_norm": 4.065242839095878, |
|
"learning_rate": 1.0469897209985314e-06, |
|
"loss": 0.0446, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.49395017793594304, |
|
"grad_norm": 5.2828106803180095, |
|
"learning_rate": 1.0455212922173274e-06, |
|
"loss": 0.1978, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.49466192170818507, |
|
"grad_norm": 13.032941895937794, |
|
"learning_rate": 1.0440528634361232e-06, |
|
"loss": 0.206, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.49537366548042705, |
|
"grad_norm": 4.385566159174779, |
|
"learning_rate": 1.0425844346549194e-06, |
|
"loss": 0.0638, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.49608540925266903, |
|
"grad_norm": 4.175354381756935, |
|
"learning_rate": 1.0411160058737151e-06, |
|
"loss": -0.0326, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.496797153024911, |
|
"grad_norm": 4.094552463766955, |
|
"learning_rate": 1.0396475770925111e-06, |
|
"loss": 0.0364, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.49750889679715304, |
|
"grad_norm": 8.284794808093139, |
|
"learning_rate": 1.0381791483113069e-06, |
|
"loss": 0.1064, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.498220640569395, |
|
"grad_norm": 4.51657295176749, |
|
"learning_rate": 1.0367107195301028e-06, |
|
"loss": 0.0779, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.498932384341637, |
|
"grad_norm": 6.743114491694153, |
|
"learning_rate": 1.0352422907488986e-06, |
|
"loss": 0.0201, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.499644128113879, |
|
"grad_norm": 4.109578522838562, |
|
"learning_rate": 1.0337738619676946e-06, |
|
"loss": 0.1309, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.500355871886121, |
|
"grad_norm": 4.307896243759484, |
|
"learning_rate": 1.0323054331864904e-06, |
|
"loss": 0.0356, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.501067615658363, |
|
"grad_norm": 3.970264416207935, |
|
"learning_rate": 1.0308370044052863e-06, |
|
"loss": 0.0292, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.501779359430605, |
|
"grad_norm": 6.6073855027819475, |
|
"learning_rate": 1.029368575624082e-06, |
|
"loss": 0.0101, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.502491103202847, |
|
"grad_norm": 6.492944849966541, |
|
"learning_rate": 1.027900146842878e-06, |
|
"loss": 0.1188, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.503202846975089, |
|
"grad_norm": 6.040494327148118, |
|
"learning_rate": 1.0264317180616738e-06, |
|
"loss": 0.0784, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5039145907473309, |
|
"grad_norm": 4.63609509021837, |
|
"learning_rate": 1.0249632892804698e-06, |
|
"loss": 0.0678, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.504626334519573, |
|
"grad_norm": 6.6077951116687155, |
|
"learning_rate": 1.0234948604992658e-06, |
|
"loss": 0.1119, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.505338078291815, |
|
"grad_norm": 4.34544526095902, |
|
"learning_rate": 1.0220264317180616e-06, |
|
"loss": 0.2166, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5060498220640569, |
|
"grad_norm": 4.0070871511015875, |
|
"learning_rate": 1.0205580029368575e-06, |
|
"loss": 0.0856, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5067615658362989, |
|
"grad_norm": 4.446734852837578, |
|
"learning_rate": 1.0190895741556533e-06, |
|
"loss": -0.079, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.507473309608541, |
|
"grad_norm": 5.756477025097774, |
|
"learning_rate": 1.0176211453744493e-06, |
|
"loss": 0.0168, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5081850533807829, |
|
"grad_norm": 5.620792934912572, |
|
"learning_rate": 1.016152716593245e-06, |
|
"loss": 0.0884, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5088967971530249, |
|
"grad_norm": 3.263840475241881, |
|
"learning_rate": 1.014684287812041e-06, |
|
"loss": -0.0387, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5096085409252669, |
|
"grad_norm": 4.9965648294818354, |
|
"learning_rate": 1.013215859030837e-06, |
|
"loss": 0.182, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5103202846975089, |
|
"grad_norm": 4.602115202839881, |
|
"learning_rate": 1.011747430249633e-06, |
|
"loss": 0.0814, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5110320284697509, |
|
"grad_norm": 3.365418715251831, |
|
"learning_rate": 1.0102790014684287e-06, |
|
"loss": -0.0525, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5117437722419929, |
|
"grad_norm": 3.1922592834677044, |
|
"learning_rate": 1.0088105726872247e-06, |
|
"loss": 0.09, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5124555160142349, |
|
"grad_norm": 3.549032487795268, |
|
"learning_rate": 1.0073421439060205e-06, |
|
"loss": -0.0008, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5131672597864768, |
|
"grad_norm": 4.877635536832963, |
|
"learning_rate": 1.0058737151248165e-06, |
|
"loss": 0.1009, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5138790035587188, |
|
"grad_norm": 3.822705726451478, |
|
"learning_rate": 1.0044052863436122e-06, |
|
"loss": 0.0335, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5145907473309609, |
|
"grad_norm": 5.089722146655097, |
|
"learning_rate": 1.0029368575624082e-06, |
|
"loss": 0.0302, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5153024911032028, |
|
"grad_norm": 5.256543214582418, |
|
"learning_rate": 1.001468428781204e-06, |
|
"loss": 0.0756, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5160142348754448, |
|
"grad_norm": 6.205469721275733, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0743, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5167259786476869, |
|
"grad_norm": 4.824095195469929, |
|
"learning_rate": 9.98531571218796e-07, |
|
"loss": 0.0462, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5174377224199288, |
|
"grad_norm": 6.044532184026359, |
|
"learning_rate": 9.970631424375917e-07, |
|
"loss": 0.1479, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5181494661921708, |
|
"grad_norm": 6.564234622404848, |
|
"learning_rate": 9.955947136563877e-07, |
|
"loss": 0.0437, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5188612099644128, |
|
"grad_norm": 4.015261622511888, |
|
"learning_rate": 9.941262848751834e-07, |
|
"loss": 0.0073, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5195729537366548, |
|
"grad_norm": 5.187190035096562, |
|
"learning_rate": 9.926578560939794e-07, |
|
"loss": 0.0647, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5202846975088968, |
|
"grad_norm": 7.051144002527871, |
|
"learning_rate": 9.911894273127754e-07, |
|
"loss": 0.0051, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5209964412811388, |
|
"grad_norm": 6.711230803398463, |
|
"learning_rate": 9.897209985315712e-07, |
|
"loss": 0.1652, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5217081850533808, |
|
"grad_norm": 7.284132489226529, |
|
"learning_rate": 9.882525697503671e-07, |
|
"loss": 0.0454, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5224199288256228, |
|
"grad_norm": 6.953294799102971, |
|
"learning_rate": 9.86784140969163e-07, |
|
"loss": 0.0806, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5231316725978647, |
|
"grad_norm": 4.530157128444712, |
|
"learning_rate": 9.853157121879589e-07, |
|
"loss": -0.0047, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5238434163701068, |
|
"grad_norm": 5.280898042948249, |
|
"learning_rate": 9.838472834067546e-07, |
|
"loss": 0.0573, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5245551601423487, |
|
"grad_norm": 8.207179057748009, |
|
"learning_rate": 9.823788546255506e-07, |
|
"loss": 0.0707, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5252669039145907, |
|
"grad_norm": 5.339631573829629, |
|
"learning_rate": 9.809104258443464e-07, |
|
"loss": 0.1164, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5259786476868328, |
|
"grad_norm": 3.3373897348541077, |
|
"learning_rate": 9.794419970631424e-07, |
|
"loss": -0.0819, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5266903914590747, |
|
"grad_norm": 4.900868373919202, |
|
"learning_rate": 9.779735682819383e-07, |
|
"loss": 0.1355, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5274021352313167, |
|
"grad_norm": 7.232896295285823, |
|
"learning_rate": 9.765051395007341e-07, |
|
"loss": 0.2699, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5281138790035588, |
|
"grad_norm": 3.8975046146309227, |
|
"learning_rate": 9.7503671071953e-07, |
|
"loss": 0.0535, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5288256227758007, |
|
"grad_norm": 6.188097992331352, |
|
"learning_rate": 9.73568281938326e-07, |
|
"loss": -0.0402, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5295373665480427, |
|
"grad_norm": 4.807307228512303, |
|
"learning_rate": 9.720998531571218e-07, |
|
"loss": 0.1625, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5302491103202847, |
|
"grad_norm": 6.3360410557197016, |
|
"learning_rate": 9.706314243759178e-07, |
|
"loss": 0.0756, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5309608540925267, |
|
"grad_norm": 5.461588852093009, |
|
"learning_rate": 9.691629955947136e-07, |
|
"loss": 0.1382, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5316725978647687, |
|
"grad_norm": 4.912735849402141, |
|
"learning_rate": 9.676945668135096e-07, |
|
"loss": 0.1125, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5323843416370106, |
|
"grad_norm": 5.822463161799121, |
|
"learning_rate": 9.662261380323053e-07, |
|
"loss": 0.1063, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5330960854092527, |
|
"grad_norm": 6.120739447607458, |
|
"learning_rate": 9.647577092511013e-07, |
|
"loss": 0.0771, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5338078291814946, |
|
"grad_norm": 3.4986944488443847, |
|
"learning_rate": 9.632892804698973e-07, |
|
"loss": 0.1074, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5345195729537366, |
|
"grad_norm": 3.188142300386645, |
|
"learning_rate": 9.61820851688693e-07, |
|
"loss": 0.0394, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5352313167259787, |
|
"grad_norm": 4.301724447408916, |
|
"learning_rate": 9.60352422907489e-07, |
|
"loss": 0.1679, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5359430604982206, |
|
"grad_norm": 3.6983788573076413, |
|
"learning_rate": 9.588839941262848e-07, |
|
"loss": -0.0002, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5366548042704626, |
|
"grad_norm": 6.6257306541739345, |
|
"learning_rate": 9.574155653450808e-07, |
|
"loss": 0.0013, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5373665480427047, |
|
"grad_norm": 3.138478014976094, |
|
"learning_rate": 9.559471365638765e-07, |
|
"loss": 0.11, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5380782918149466, |
|
"grad_norm": 8.610525175552773, |
|
"learning_rate": 9.544787077826725e-07, |
|
"loss": 0.1, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5387900355871886, |
|
"grad_norm": 4.293998726605181, |
|
"learning_rate": 9.530102790014684e-07, |
|
"loss": 0.0206, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5395017793594306, |
|
"grad_norm": 7.107177663089392, |
|
"learning_rate": 9.515418502202642e-07, |
|
"loss": 0.1868, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5402135231316726, |
|
"grad_norm": 4.437300771619768, |
|
"learning_rate": 9.500734214390601e-07, |
|
"loss": -0.0052, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5409252669039146, |
|
"grad_norm": 4.933126903201541, |
|
"learning_rate": 9.486049926578561e-07, |
|
"loss": 0.0428, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5416370106761565, |
|
"grad_norm": 4.007325616898821, |
|
"learning_rate": 9.47136563876652e-07, |
|
"loss": 0.1194, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5423487544483986, |
|
"grad_norm": 3.3928461490539035, |
|
"learning_rate": 9.456681350954478e-07, |
|
"loss": 0.0942, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.5430604982206406, |
|
"grad_norm": 3.6371352861702553, |
|
"learning_rate": 9.441997063142437e-07, |
|
"loss": -0.0607, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.5437722419928825, |
|
"grad_norm": 4.584237142517063, |
|
"learning_rate": 9.427312775330396e-07, |
|
"loss": 0.0438, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.5444839857651246, |
|
"grad_norm": 5.492813096427028, |
|
"learning_rate": 9.412628487518355e-07, |
|
"loss": 0.0376, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5451957295373665, |
|
"grad_norm": 6.072503086181962, |
|
"learning_rate": 9.397944199706313e-07, |
|
"loss": 0.0891, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5459074733096085, |
|
"grad_norm": 4.134151825824703, |
|
"learning_rate": 9.383259911894272e-07, |
|
"loss": 0.0644, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5466192170818506, |
|
"grad_norm": 4.4160799800430155, |
|
"learning_rate": 9.368575624082231e-07, |
|
"loss": 0.0636, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5473309608540925, |
|
"grad_norm": 4.714994439195608, |
|
"learning_rate": 9.353891336270191e-07, |
|
"loss": -0.0305, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.5480427046263345, |
|
"grad_norm": 4.771488172519467, |
|
"learning_rate": 9.33920704845815e-07, |
|
"loss": 0.039, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5487544483985766, |
|
"grad_norm": 4.60214711541248, |
|
"learning_rate": 9.324522760646109e-07, |
|
"loss": 0.0347, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.5494661921708185, |
|
"grad_norm": 3.7325284215071153, |
|
"learning_rate": 9.309838472834068e-07, |
|
"loss": 0.0216, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5501779359430605, |
|
"grad_norm": 4.241197558370274, |
|
"learning_rate": 9.295154185022026e-07, |
|
"loss": 0.1269, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.5508896797153024, |
|
"grad_norm": 5.679316407559781, |
|
"learning_rate": 9.280469897209985e-07, |
|
"loss": 0.0889, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.5516014234875445, |
|
"grad_norm": 4.815106367320797, |
|
"learning_rate": 9.265785609397944e-07, |
|
"loss": 0.1188, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5523131672597865, |
|
"grad_norm": 3.5294291435075626, |
|
"learning_rate": 9.251101321585902e-07, |
|
"loss": 0.0955, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.5530249110320284, |
|
"grad_norm": 5.426602372172119, |
|
"learning_rate": 9.236417033773861e-07, |
|
"loss": 0.0026, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.5537366548042705, |
|
"grad_norm": 3.412274413984105, |
|
"learning_rate": 9.22173274596182e-07, |
|
"loss": -0.0744, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5544483985765124, |
|
"grad_norm": 5.901877747777146, |
|
"learning_rate": 9.20704845814978e-07, |
|
"loss": 0.1465, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.5551601423487544, |
|
"grad_norm": 4.37856953642428, |
|
"learning_rate": 9.192364170337738e-07, |
|
"loss": 0.1163, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5558718861209965, |
|
"grad_norm": 5.049603459869766, |
|
"learning_rate": 9.177679882525697e-07, |
|
"loss": 0.028, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5565836298932384, |
|
"grad_norm": 5.211098732805948, |
|
"learning_rate": 9.162995594713656e-07, |
|
"loss": -0.0797, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.5572953736654804, |
|
"grad_norm": 3.208772620971025, |
|
"learning_rate": 9.148311306901615e-07, |
|
"loss": 0.0965, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5580071174377225, |
|
"grad_norm": 5.775537586374198, |
|
"learning_rate": 9.133627019089573e-07, |
|
"loss": 0.079, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5587188612099644, |
|
"grad_norm": 3.7980849610478558, |
|
"learning_rate": 9.118942731277532e-07, |
|
"loss": 0.0555, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5594306049822064, |
|
"grad_norm": 3.7820751456345447, |
|
"learning_rate": 9.104258443465491e-07, |
|
"loss": 0.102, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.5601423487544483, |
|
"grad_norm": 4.53042552945143, |
|
"learning_rate": 9.08957415565345e-07, |
|
"loss": 0.0037, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5608540925266904, |
|
"grad_norm": 5.130468846659909, |
|
"learning_rate": 9.074889867841409e-07, |
|
"loss": 0.1375, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5615658362989324, |
|
"grad_norm": 5.0011692587301795, |
|
"learning_rate": 9.060205580029369e-07, |
|
"loss": 0.12, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.5622775800711743, |
|
"grad_norm": 4.990387450257397, |
|
"learning_rate": 9.045521292217328e-07, |
|
"loss": 0.058, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5629893238434164, |
|
"grad_norm": 4.713852317506844, |
|
"learning_rate": 9.030837004405286e-07, |
|
"loss": 0.1319, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.5637010676156584, |
|
"grad_norm": 7.664848388446512, |
|
"learning_rate": 9.016152716593245e-07, |
|
"loss": 0.0903, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.5644128113879003, |
|
"grad_norm": 4.3792586172064505, |
|
"learning_rate": 9.001468428781204e-07, |
|
"loss": -0.0151, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.5651245551601424, |
|
"grad_norm": 3.2950223083204007, |
|
"learning_rate": 8.986784140969163e-07, |
|
"loss": 0.0394, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.5658362989323843, |
|
"grad_norm": 7.631142065486189, |
|
"learning_rate": 8.972099853157121e-07, |
|
"loss": 0.1386, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.5665480427046263, |
|
"grad_norm": 4.223643311945042, |
|
"learning_rate": 8.95741556534508e-07, |
|
"loss": -0.0308, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.5672597864768684, |
|
"grad_norm": 3.4974575344995174, |
|
"learning_rate": 8.942731277533039e-07, |
|
"loss": -0.0312, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.5679715302491103, |
|
"grad_norm": 3.028662137320869, |
|
"learning_rate": 8.928046989720998e-07, |
|
"loss": -0.027, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.5686832740213523, |
|
"grad_norm": 3.39632600982153, |
|
"learning_rate": 8.913362701908957e-07, |
|
"loss": -0.0008, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.5693950177935944, |
|
"grad_norm": 6.1897883951874535, |
|
"learning_rate": 8.898678414096916e-07, |
|
"loss": 0.0858, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5701067615658363, |
|
"grad_norm": 4.702874566694659, |
|
"learning_rate": 8.883994126284875e-07, |
|
"loss": 0.0146, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.5708185053380783, |
|
"grad_norm": 4.7294983376418305, |
|
"learning_rate": 8.869309838472833e-07, |
|
"loss": 0.1296, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.5715302491103202, |
|
"grad_norm": 4.857229660765176, |
|
"learning_rate": 8.854625550660792e-07, |
|
"loss": 0.0874, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.5722419928825623, |
|
"grad_norm": 10.192813914978096, |
|
"learning_rate": 8.839941262848752e-07, |
|
"loss": 0.1704, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.5729537366548043, |
|
"grad_norm": 3.644774204999828, |
|
"learning_rate": 8.82525697503671e-07, |
|
"loss": 0.0573, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5736654804270462, |
|
"grad_norm": 6.851142966150024, |
|
"learning_rate": 8.810572687224669e-07, |
|
"loss": 0.1111, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.5743772241992883, |
|
"grad_norm": 5.16915314842292, |
|
"learning_rate": 8.795888399412628e-07, |
|
"loss": 0.0982, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.5750889679715302, |
|
"grad_norm": 3.7247312512833877, |
|
"learning_rate": 8.781204111600588e-07, |
|
"loss": 0.0495, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.5758007117437722, |
|
"grad_norm": 6.8775044821222275, |
|
"learning_rate": 8.766519823788546e-07, |
|
"loss": 0.0514, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.5765124555160143, |
|
"grad_norm": 10.730556558500393, |
|
"learning_rate": 8.751835535976505e-07, |
|
"loss": 0.0751, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5772241992882562, |
|
"grad_norm": 9.122296494576403, |
|
"learning_rate": 8.737151248164464e-07, |
|
"loss": 0.0662, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.5779359430604982, |
|
"grad_norm": 3.808482445288645, |
|
"learning_rate": 8.722466960352423e-07, |
|
"loss": 0.0866, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.5786476868327403, |
|
"grad_norm": 4.963997925652648, |
|
"learning_rate": 8.707782672540381e-07, |
|
"loss": 0.0274, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.5793594306049822, |
|
"grad_norm": 4.972806414354329, |
|
"learning_rate": 8.69309838472834e-07, |
|
"loss": 0.0145, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.5800711743772242, |
|
"grad_norm": 5.200450862998976, |
|
"learning_rate": 8.678414096916299e-07, |
|
"loss": 0.0583, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.5807829181494661, |
|
"grad_norm": 5.21945185710134, |
|
"learning_rate": 8.663729809104257e-07, |
|
"loss": 0.0849, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.5814946619217082, |
|
"grad_norm": 5.9386659782373865, |
|
"learning_rate": 8.649045521292216e-07, |
|
"loss": 0.2583, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.5822064056939502, |
|
"grad_norm": 6.670258158616344, |
|
"learning_rate": 8.634361233480176e-07, |
|
"loss": 0.0157, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.5829181494661921, |
|
"grad_norm": 4.226534746399752, |
|
"learning_rate": 8.619676945668135e-07, |
|
"loss": 0.1208, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.5836298932384342, |
|
"grad_norm": 4.928546368395039, |
|
"learning_rate": 8.604992657856093e-07, |
|
"loss": 0.0773, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5843416370106762, |
|
"grad_norm": 2.968915914617463, |
|
"learning_rate": 8.590308370044053e-07, |
|
"loss": 0.1178, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.5850533807829181, |
|
"grad_norm": 8.789100694361455, |
|
"learning_rate": 8.575624082232012e-07, |
|
"loss": -0.0098, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.5857651245551602, |
|
"grad_norm": 3.883119628548879, |
|
"learning_rate": 8.560939794419971e-07, |
|
"loss": 0.1088, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.5864768683274021, |
|
"grad_norm": 5.206495321843375, |
|
"learning_rate": 8.546255506607929e-07, |
|
"loss": 0.0098, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.5871886120996441, |
|
"grad_norm": 3.9007823061659894, |
|
"learning_rate": 8.531571218795888e-07, |
|
"loss": 0.0431, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5879003558718862, |
|
"grad_norm": 3.8238623728705603, |
|
"learning_rate": 8.516886930983847e-07, |
|
"loss": 0.0652, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.5886120996441281, |
|
"grad_norm": 5.147270197655345, |
|
"learning_rate": 8.502202643171805e-07, |
|
"loss": 0.0553, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.5893238434163701, |
|
"grad_norm": 3.1488070211968027, |
|
"learning_rate": 8.487518355359765e-07, |
|
"loss": 0.0409, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.5900355871886122, |
|
"grad_norm": 6.545808327390556, |
|
"learning_rate": 8.472834067547724e-07, |
|
"loss": 0.0596, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.5907473309608541, |
|
"grad_norm": 4.262885139494661, |
|
"learning_rate": 8.458149779735683e-07, |
|
"loss": 0.0387, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5914590747330961, |
|
"grad_norm": 5.216472562808622, |
|
"learning_rate": 8.443465491923641e-07, |
|
"loss": 0.1205, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.592170818505338, |
|
"grad_norm": 3.274785094453674, |
|
"learning_rate": 8.4287812041116e-07, |
|
"loss": 0.052, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.5928825622775801, |
|
"grad_norm": 5.076651115735975, |
|
"learning_rate": 8.414096916299559e-07, |
|
"loss": 0.165, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.5935943060498221, |
|
"grad_norm": 4.67123133075057, |
|
"learning_rate": 8.399412628487518e-07, |
|
"loss": 0.0592, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.594306049822064, |
|
"grad_norm": 11.997396186622707, |
|
"learning_rate": 8.384728340675476e-07, |
|
"loss": 0.0962, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.5950177935943061, |
|
"grad_norm": 5.373216554936181, |
|
"learning_rate": 8.370044052863435e-07, |
|
"loss": 0.1828, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.595729537366548, |
|
"grad_norm": 3.297467942273627, |
|
"learning_rate": 8.355359765051395e-07, |
|
"loss": -0.0903, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.59644128113879, |
|
"grad_norm": 6.0804009184913, |
|
"learning_rate": 8.340675477239353e-07, |
|
"loss": 0.1039, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.5971530249110321, |
|
"grad_norm": 8.681008527677944, |
|
"learning_rate": 8.325991189427313e-07, |
|
"loss": -0.0249, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.597864768683274, |
|
"grad_norm": 6.081078133109813, |
|
"learning_rate": 8.311306901615272e-07, |
|
"loss": 0.0362, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.598576512455516, |
|
"grad_norm": 4.786545358004393, |
|
"learning_rate": 8.296622613803231e-07, |
|
"loss": 0.0963, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.599288256227758, |
|
"grad_norm": 4.020818179930494, |
|
"learning_rate": 8.281938325991189e-07, |
|
"loss": 0.1366, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 4.908818635550565, |
|
"learning_rate": 8.267254038179148e-07, |
|
"loss": 0.0357, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.600711743772242, |
|
"grad_norm": 6.085069626321275, |
|
"learning_rate": 8.252569750367107e-07, |
|
"loss": 0.0482, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6014234875444839, |
|
"grad_norm": 4.059258444201513, |
|
"learning_rate": 8.237885462555065e-07, |
|
"loss": 0.042, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.602135231316726, |
|
"grad_norm": 5.8277267033597875, |
|
"learning_rate": 8.223201174743024e-07, |
|
"loss": 0.079, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.602846975088968, |
|
"grad_norm": 3.5099927757601828, |
|
"learning_rate": 8.208516886930984e-07, |
|
"loss": -0.0134, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6035587188612099, |
|
"grad_norm": 4.192415227971293, |
|
"learning_rate": 8.193832599118943e-07, |
|
"loss": 0.0844, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.604270462633452, |
|
"grad_norm": 4.586599376946333, |
|
"learning_rate": 8.179148311306901e-07, |
|
"loss": 0.1048, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.604982206405694, |
|
"grad_norm": 3.7801229515464962, |
|
"learning_rate": 8.16446402349486e-07, |
|
"loss": -0.0248, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6056939501779359, |
|
"grad_norm": 5.737160320657065, |
|
"learning_rate": 8.149779735682819e-07, |
|
"loss": 0.063, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.606405693950178, |
|
"grad_norm": 4.308955186916713, |
|
"learning_rate": 8.135095447870778e-07, |
|
"loss": 0.0559, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6071174377224199, |
|
"grad_norm": 6.269661761200003, |
|
"learning_rate": 8.120411160058736e-07, |
|
"loss": 0.0686, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6078291814946619, |
|
"grad_norm": 3.4091553502783563, |
|
"learning_rate": 8.105726872246695e-07, |
|
"loss": 0.0133, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.608540925266904, |
|
"grad_norm": 4.54197927443203, |
|
"learning_rate": 8.091042584434654e-07, |
|
"loss": 0.0049, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6092526690391459, |
|
"grad_norm": 5.638036244806688, |
|
"learning_rate": 8.076358296622612e-07, |
|
"loss": -0.0274, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6099644128113879, |
|
"grad_norm": 4.050464584642166, |
|
"learning_rate": 8.061674008810573e-07, |
|
"loss": -0.0498, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.61067615658363, |
|
"grad_norm": 6.185879581851871, |
|
"learning_rate": 8.046989720998532e-07, |
|
"loss": 0.095, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6113879003558719, |
|
"grad_norm": 5.597479402845821, |
|
"learning_rate": 8.032305433186491e-07, |
|
"loss": 0.1485, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6120996441281139, |
|
"grad_norm": 6.770712263540968, |
|
"learning_rate": 8.017621145374449e-07, |
|
"loss": 0.0033, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6128113879003558, |
|
"grad_norm": 5.023930503182311, |
|
"learning_rate": 8.002936857562408e-07, |
|
"loss": 0.0971, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6135231316725979, |
|
"grad_norm": 3.8390228232172072, |
|
"learning_rate": 7.988252569750367e-07, |
|
"loss": 0.0792, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6142348754448399, |
|
"grad_norm": 4.63076731937446, |
|
"learning_rate": 7.973568281938326e-07, |
|
"loss": 0.082, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6149466192170818, |
|
"grad_norm": 3.772994305664748, |
|
"learning_rate": 7.958883994126284e-07, |
|
"loss": -0.0717, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6156583629893239, |
|
"grad_norm": 2.651596750679966, |
|
"learning_rate": 7.944199706314243e-07, |
|
"loss": -0.0746, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6163701067615658, |
|
"grad_norm": 3.339891863916958, |
|
"learning_rate": 7.929515418502202e-07, |
|
"loss": 0.0358, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6170818505338078, |
|
"grad_norm": 9.563176149885061, |
|
"learning_rate": 7.914831130690161e-07, |
|
"loss": 0.138, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6177935943060499, |
|
"grad_norm": 4.018473168029476, |
|
"learning_rate": 7.90014684287812e-07, |
|
"loss": 0.115, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6185053380782918, |
|
"grad_norm": 5.509934902464323, |
|
"learning_rate": 7.885462555066079e-07, |
|
"loss": 0.0952, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6192170818505338, |
|
"grad_norm": 5.151079579500843, |
|
"learning_rate": 7.870778267254038e-07, |
|
"loss": 0.1107, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6199288256227758, |
|
"grad_norm": 4.313174208860737, |
|
"learning_rate": 7.856093979441996e-07, |
|
"loss": 0.0696, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6206405693950178, |
|
"grad_norm": 8.448847621010032, |
|
"learning_rate": 7.841409691629955e-07, |
|
"loss": 0.1063, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6213523131672598, |
|
"grad_norm": 4.076951132014819, |
|
"learning_rate": 7.826725403817914e-07, |
|
"loss": 0.1106, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6220640569395017, |
|
"grad_norm": 5.086643246204726, |
|
"learning_rate": 7.812041116005874e-07, |
|
"loss": -0.0188, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6227758007117438, |
|
"grad_norm": 4.08189971565891, |
|
"learning_rate": 7.797356828193832e-07, |
|
"loss": 0.0276, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6234875444839858, |
|
"grad_norm": 3.766159589500804, |
|
"learning_rate": 7.782672540381792e-07, |
|
"loss": 0.0417, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6241992882562277, |
|
"grad_norm": 2.713107337287823, |
|
"learning_rate": 7.767988252569751e-07, |
|
"loss": -0.021, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6249110320284698, |
|
"grad_norm": 7.909322631614341, |
|
"learning_rate": 7.753303964757709e-07, |
|
"loss": 0.1673, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6256227758007118, |
|
"grad_norm": 4.289154107343657, |
|
"learning_rate": 7.738619676945668e-07, |
|
"loss": 0.214, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.6263345195729537, |
|
"grad_norm": 4.613836682997994, |
|
"learning_rate": 7.723935389133627e-07, |
|
"loss": 0.1429, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6270462633451958, |
|
"grad_norm": 9.965118224978214, |
|
"learning_rate": 7.709251101321586e-07, |
|
"loss": 0.1514, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6277580071174377, |
|
"grad_norm": 3.7370059908481235, |
|
"learning_rate": 7.694566813509544e-07, |
|
"loss": 0.1028, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.6284697508896797, |
|
"grad_norm": 4.3755263965548465, |
|
"learning_rate": 7.679882525697503e-07, |
|
"loss": -0.0689, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.6291814946619217, |
|
"grad_norm": 5.050525207402243, |
|
"learning_rate": 7.665198237885462e-07, |
|
"loss": 0.063, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6298932384341637, |
|
"grad_norm": 4.915976050198276, |
|
"learning_rate": 7.65051395007342e-07, |
|
"loss": 0.1072, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6306049822064057, |
|
"grad_norm": 4.7320990300783805, |
|
"learning_rate": 7.63582966226138e-07, |
|
"loss": 0.0415, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6313167259786477, |
|
"grad_norm": 4.438720750124185, |
|
"learning_rate": 7.621145374449339e-07, |
|
"loss": 0.0805, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6320284697508897, |
|
"grad_norm": 5.093607333004949, |
|
"learning_rate": 7.606461086637298e-07, |
|
"loss": -0.0102, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6327402135231317, |
|
"grad_norm": 6.2013902520603885, |
|
"learning_rate": 7.591776798825256e-07, |
|
"loss": 0.0973, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.6334519572953736, |
|
"grad_norm": 4.721127952995933, |
|
"learning_rate": 7.577092511013215e-07, |
|
"loss": 0.1244, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6341637010676157, |
|
"grad_norm": 5.7984725519352835, |
|
"learning_rate": 7.562408223201175e-07, |
|
"loss": 0.1645, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6348754448398577, |
|
"grad_norm": 4.2837367945151135, |
|
"learning_rate": 7.547723935389134e-07, |
|
"loss": 0.0197, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6355871886120996, |
|
"grad_norm": 3.7337004878385387, |
|
"learning_rate": 7.533039647577092e-07, |
|
"loss": 0.1216, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6362989323843417, |
|
"grad_norm": 4.525227088920381, |
|
"learning_rate": 7.518355359765051e-07, |
|
"loss": 0.1481, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.6370106761565836, |
|
"grad_norm": 3.7562219501279537, |
|
"learning_rate": 7.50367107195301e-07, |
|
"loss": 0.0086, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6377224199288256, |
|
"grad_norm": 4.288567074927515, |
|
"learning_rate": 7.48898678414097e-07, |
|
"loss": 0.128, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.6384341637010676, |
|
"grad_norm": 3.9364468816203844, |
|
"learning_rate": 7.474302496328928e-07, |
|
"loss": -0.0388, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.6391459074733096, |
|
"grad_norm": 5.361060586448568, |
|
"learning_rate": 7.459618208516887e-07, |
|
"loss": -0.0335, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.6398576512455516, |
|
"grad_norm": 3.238064490634461, |
|
"learning_rate": 7.444933920704846e-07, |
|
"loss": -0.0411, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.6405693950177936, |
|
"grad_norm": 3.8410604107512456, |
|
"learning_rate": 7.430249632892804e-07, |
|
"loss": 0.092, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6412811387900356, |
|
"grad_norm": 8.013488841819695, |
|
"learning_rate": 7.415565345080763e-07, |
|
"loss": 0.0765, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6419928825622776, |
|
"grad_norm": 3.4830077358338465, |
|
"learning_rate": 7.400881057268722e-07, |
|
"loss": 0.0321, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.6427046263345195, |
|
"grad_norm": 4.148883228099494, |
|
"learning_rate": 7.38619676945668e-07, |
|
"loss": 0.0473, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6434163701067616, |
|
"grad_norm": 8.750289912189382, |
|
"learning_rate": 7.371512481644639e-07, |
|
"loss": 0.1622, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.6441281138790036, |
|
"grad_norm": 6.100827845697011, |
|
"learning_rate": 7.356828193832599e-07, |
|
"loss": 0.0693, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6448398576512455, |
|
"grad_norm": 3.436611684981597, |
|
"learning_rate": 7.342143906020558e-07, |
|
"loss": 0.0338, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.6455516014234876, |
|
"grad_norm": 3.2463466945109105, |
|
"learning_rate": 7.327459618208516e-07, |
|
"loss": -0.0213, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6462633451957296, |
|
"grad_norm": 3.4580185681843463, |
|
"learning_rate": 7.312775330396475e-07, |
|
"loss": -0.0004, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6469750889679715, |
|
"grad_norm": 7.1902770938455, |
|
"learning_rate": 7.298091042584435e-07, |
|
"loss": 0.1418, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.6476868327402135, |
|
"grad_norm": 5.850461644925145, |
|
"learning_rate": 7.283406754772394e-07, |
|
"loss": 0.1297, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6483985765124555, |
|
"grad_norm": 4.174847975835514, |
|
"learning_rate": 7.268722466960352e-07, |
|
"loss": 0.048, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6491103202846975, |
|
"grad_norm": 5.945931320969125, |
|
"learning_rate": 7.254038179148311e-07, |
|
"loss": 0.0002, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6498220640569395, |
|
"grad_norm": 5.4566388982409215, |
|
"learning_rate": 7.23935389133627e-07, |
|
"loss": 0.0795, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.6505338078291815, |
|
"grad_norm": 7.77405555606714, |
|
"learning_rate": 7.224669603524228e-07, |
|
"loss": -0.014, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6512455516014235, |
|
"grad_norm": 7.794588598487911, |
|
"learning_rate": 7.209985315712188e-07, |
|
"loss": 0.0568, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6519572953736655, |
|
"grad_norm": 3.9613239167558505, |
|
"learning_rate": 7.195301027900147e-07, |
|
"loss": 0.0538, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6526690391459075, |
|
"grad_norm": 5.250170205399911, |
|
"learning_rate": 7.180616740088106e-07, |
|
"loss": -0.0189, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6533807829181495, |
|
"grad_norm": 3.7272174507575935, |
|
"learning_rate": 7.165932452276064e-07, |
|
"loss": 0.1008, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.6540925266903914, |
|
"grad_norm": 5.5762382329676825, |
|
"learning_rate": 7.151248164464023e-07, |
|
"loss": 0.096, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6548042704626335, |
|
"grad_norm": 4.35001234732443, |
|
"learning_rate": 7.136563876651982e-07, |
|
"loss": 0.0592, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6555160142348755, |
|
"grad_norm": 3.0296201748619684, |
|
"learning_rate": 7.121879588839941e-07, |
|
"loss": 0.0638, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.6562277580071174, |
|
"grad_norm": 8.409568840523043, |
|
"learning_rate": 7.107195301027899e-07, |
|
"loss": 0.2424, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.6569395017793594, |
|
"grad_norm": 7.918188678263488, |
|
"learning_rate": 7.092511013215858e-07, |
|
"loss": -0.0219, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.6576512455516014, |
|
"grad_norm": 10.7701330393842, |
|
"learning_rate": 7.077826725403817e-07, |
|
"loss": 0.032, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.6583629893238434, |
|
"grad_norm": 4.218135308589257, |
|
"learning_rate": 7.063142437591776e-07, |
|
"loss": -0.0272, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6590747330960854, |
|
"grad_norm": 5.331667977838016, |
|
"learning_rate": 7.048458149779736e-07, |
|
"loss": 0.0864, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.6597864768683274, |
|
"grad_norm": 5.3750991812138444, |
|
"learning_rate": 7.033773861967695e-07, |
|
"loss": 0.0726, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.6604982206405694, |
|
"grad_norm": 5.207399920428084, |
|
"learning_rate": 7.019089574155654e-07, |
|
"loss": 0.1336, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.6612099644128114, |
|
"grad_norm": 6.606289334933452, |
|
"learning_rate": 7.004405286343612e-07, |
|
"loss": 0.123, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.6619217081850534, |
|
"grad_norm": 5.007336574559658, |
|
"learning_rate": 6.989720998531571e-07, |
|
"loss": 0.1693, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6626334519572954, |
|
"grad_norm": 4.513131585131538, |
|
"learning_rate": 6.97503671071953e-07, |
|
"loss": 0.0533, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.6633451957295373, |
|
"grad_norm": 6.118064534929691, |
|
"learning_rate": 6.960352422907489e-07, |
|
"loss": -0.0167, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.6640569395017794, |
|
"grad_norm": 4.967171664241232, |
|
"learning_rate": 6.945668135095447e-07, |
|
"loss": 0.1309, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.6647686832740214, |
|
"grad_norm": 6.141986259243848, |
|
"learning_rate": 6.930983847283406e-07, |
|
"loss": 0.0683, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.6654804270462633, |
|
"grad_norm": 2.843838533134232, |
|
"learning_rate": 6.916299559471366e-07, |
|
"loss": -0.0248, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6661921708185053, |
|
"grad_norm": 6.254489354452603, |
|
"learning_rate": 6.901615271659324e-07, |
|
"loss": 0.2101, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.6669039145907474, |
|
"grad_norm": 5.187993037681643, |
|
"learning_rate": 6.886930983847283e-07, |
|
"loss": 0.0127, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.6676156583629893, |
|
"grad_norm": 4.579762865615799, |
|
"learning_rate": 6.872246696035242e-07, |
|
"loss": 0.0603, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.6683274021352313, |
|
"grad_norm": 5.244168812482439, |
|
"learning_rate": 6.857562408223201e-07, |
|
"loss": 0.0201, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.6690391459074733, |
|
"grad_norm": 3.816794681206833, |
|
"learning_rate": 6.842878120411159e-07, |
|
"loss": 0.0043, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6697508896797153, |
|
"grad_norm": 3.5284472119876193, |
|
"learning_rate": 6.828193832599118e-07, |
|
"loss": 0.0502, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.6704626334519573, |
|
"grad_norm": 3.933868913771442, |
|
"learning_rate": 6.813509544787077e-07, |
|
"loss": -0.0462, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.6711743772241993, |
|
"grad_norm": 4.004455469191327, |
|
"learning_rate": 6.798825256975035e-07, |
|
"loss": 0.0586, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.6718861209964413, |
|
"grad_norm": 6.617395318921544, |
|
"learning_rate": 6.784140969162996e-07, |
|
"loss": 0.1136, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.6725978647686833, |
|
"grad_norm": 5.621511437204682, |
|
"learning_rate": 6.769456681350955e-07, |
|
"loss": 0.1687, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.6733096085409253, |
|
"grad_norm": 3.562357447934307, |
|
"learning_rate": 6.754772393538914e-07, |
|
"loss": 0.0862, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.6740213523131673, |
|
"grad_norm": 6.125694391242706, |
|
"learning_rate": 6.740088105726872e-07, |
|
"loss": 0.1892, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.6747330960854092, |
|
"grad_norm": 3.9863843451296472, |
|
"learning_rate": 6.725403817914831e-07, |
|
"loss": -0.0151, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.6754448398576512, |
|
"grad_norm": 4.2001625029598, |
|
"learning_rate": 6.71071953010279e-07, |
|
"loss": 0.0487, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.6761565836298933, |
|
"grad_norm": 4.935771398113328, |
|
"learning_rate": 6.696035242290749e-07, |
|
"loss": 0.0966, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6768683274021352, |
|
"grad_norm": 3.858605435514549, |
|
"learning_rate": 6.681350954478707e-07, |
|
"loss": -0.0329, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.6775800711743772, |
|
"grad_norm": 3.871614515457192, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 0.0103, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.6782918149466192, |
|
"grad_norm": 4.820372050951537, |
|
"learning_rate": 6.651982378854625e-07, |
|
"loss": -0.0042, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.6790035587188612, |
|
"grad_norm": 4.562317308922618, |
|
"learning_rate": 6.637298091042585e-07, |
|
"loss": 0.1401, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.6797153024911032, |
|
"grad_norm": 8.34284882554388, |
|
"learning_rate": 6.622613803230543e-07, |
|
"loss": 0.1722, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.6804270462633452, |
|
"grad_norm": 3.5720720692334655, |
|
"learning_rate": 6.607929515418502e-07, |
|
"loss": 0.0558, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.6811387900355872, |
|
"grad_norm": 5.725857679596323, |
|
"learning_rate": 6.593245227606461e-07, |
|
"loss": 0.126, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.6818505338078292, |
|
"grad_norm": 10.288907153463756, |
|
"learning_rate": 6.578560939794419e-07, |
|
"loss": 0.2195, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.6825622775800712, |
|
"grad_norm": 5.933298009417249, |
|
"learning_rate": 6.563876651982378e-07, |
|
"loss": 0.119, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.6832740213523132, |
|
"grad_norm": 5.129065041215746, |
|
"learning_rate": 6.549192364170337e-07, |
|
"loss": -0.0546, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6839857651245551, |
|
"grad_norm": 4.000375253241631, |
|
"learning_rate": 6.534508076358297e-07, |
|
"loss": 0.174, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.6846975088967971, |
|
"grad_norm": 7.894596482752509, |
|
"learning_rate": 6.519823788546255e-07, |
|
"loss": 0.1145, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.6854092526690392, |
|
"grad_norm": 4.82797863192124, |
|
"learning_rate": 6.505139500734214e-07, |
|
"loss": 0.0619, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.6861209964412811, |
|
"grad_norm": 6.221309269428655, |
|
"learning_rate": 6.490455212922174e-07, |
|
"loss": 0.0239, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.6868327402135231, |
|
"grad_norm": 4.86341008394864, |
|
"learning_rate": 6.475770925110132e-07, |
|
"loss": 0.086, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.6875444839857652, |
|
"grad_norm": 5.337886337127806, |
|
"learning_rate": 6.461086637298091e-07, |
|
"loss": 0.1969, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.6882562277580071, |
|
"grad_norm": 3.769358829646403, |
|
"learning_rate": 6.44640234948605e-07, |
|
"loss": 0.0967, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.6889679715302491, |
|
"grad_norm": 3.669919926032279, |
|
"learning_rate": 6.431718061674009e-07, |
|
"loss": 0.0377, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.6896797153024911, |
|
"grad_norm": 4.883374186443654, |
|
"learning_rate": 6.417033773861967e-07, |
|
"loss": 0.1519, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.6903914590747331, |
|
"grad_norm": 4.748723511086028, |
|
"learning_rate": 6.402349486049926e-07, |
|
"loss": 0.0907, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6911032028469751, |
|
"grad_norm": 4.976284049252291, |
|
"learning_rate": 6.387665198237885e-07, |
|
"loss": -0.0896, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.691814946619217, |
|
"grad_norm": 5.283750585371445, |
|
"learning_rate": 6.372980910425843e-07, |
|
"loss": 0.0419, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.6925266903914591, |
|
"grad_norm": 5.255946107260031, |
|
"learning_rate": 6.358296622613802e-07, |
|
"loss": 0.0141, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.6932384341637011, |
|
"grad_norm": 5.006047787434039, |
|
"learning_rate": 6.343612334801762e-07, |
|
"loss": -0.0255, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.693950177935943, |
|
"grad_norm": 9.757667461016755, |
|
"learning_rate": 6.328928046989721e-07, |
|
"loss": 0.1332, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6946619217081851, |
|
"grad_norm": 3.1321392144435065, |
|
"learning_rate": 6.314243759177679e-07, |
|
"loss": 0.081, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.695373665480427, |
|
"grad_norm": 2.8216884141577, |
|
"learning_rate": 6.299559471365638e-07, |
|
"loss": -0.0474, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.696085409252669, |
|
"grad_norm": 5.573286078737938, |
|
"learning_rate": 6.284875183553597e-07, |
|
"loss": 0.1186, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.6967971530249111, |
|
"grad_norm": 3.1582386059675924, |
|
"learning_rate": 6.270190895741557e-07, |
|
"loss": 0.0906, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.697508896797153, |
|
"grad_norm": 4.523244376004749, |
|
"learning_rate": 6.255506607929515e-07, |
|
"loss": 0.0386, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.698220640569395, |
|
"grad_norm": 3.4688155709332436, |
|
"learning_rate": 6.240822320117474e-07, |
|
"loss": 0.0308, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.698932384341637, |
|
"grad_norm": 5.411845227957125, |
|
"learning_rate": 6.226138032305433e-07, |
|
"loss": 0.0835, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.699644128113879, |
|
"grad_norm": 3.427770822010646, |
|
"learning_rate": 6.211453744493393e-07, |
|
"loss": 0.208, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.700355871886121, |
|
"grad_norm": 4.651382052221419, |
|
"learning_rate": 6.196769456681351e-07, |
|
"loss": 0.0119, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.701067615658363, |
|
"grad_norm": 4.988862029987204, |
|
"learning_rate": 6.18208516886931e-07, |
|
"loss": -0.0324, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.701779359430605, |
|
"grad_norm": 3.8284184557483334, |
|
"learning_rate": 6.167400881057269e-07, |
|
"loss": -0.0045, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.702491103202847, |
|
"grad_norm": 12.378991302169158, |
|
"learning_rate": 6.152716593245227e-07, |
|
"loss": 0.1294, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.703202846975089, |
|
"grad_norm": 6.56777945189323, |
|
"learning_rate": 6.138032305433186e-07, |
|
"loss": 0.0573, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.703914590747331, |
|
"grad_norm": 4.537732978493385, |
|
"learning_rate": 6.123348017621145e-07, |
|
"loss": 0.0434, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7046263345195729, |
|
"grad_norm": 4.800209501780763, |
|
"learning_rate": 6.108663729809104e-07, |
|
"loss": 0.1148, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7053380782918149, |
|
"grad_norm": 4.436723238161359, |
|
"learning_rate": 6.093979441997062e-07, |
|
"loss": 0.0949, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.706049822064057, |
|
"grad_norm": 4.622691635194972, |
|
"learning_rate": 6.079295154185021e-07, |
|
"loss": 0.1296, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.7067615658362989, |
|
"grad_norm": 3.1184266192212657, |
|
"learning_rate": 6.064610866372981e-07, |
|
"loss": -0.0653, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7074733096085409, |
|
"grad_norm": 7.086422803719531, |
|
"learning_rate": 6.049926578560939e-07, |
|
"loss": 0.1266, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.708185053380783, |
|
"grad_norm": 4.147286982938081, |
|
"learning_rate": 6.035242290748898e-07, |
|
"loss": 0.0978, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7088967971530249, |
|
"grad_norm": 3.494585071868179, |
|
"learning_rate": 6.020558002936858e-07, |
|
"loss": 0.0997, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7096085409252669, |
|
"grad_norm": 5.382206897967533, |
|
"learning_rate": 6.005873715124817e-07, |
|
"loss": 0.0151, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7103202846975089, |
|
"grad_norm": 6.120207176441689, |
|
"learning_rate": 5.991189427312775e-07, |
|
"loss": 0.0475, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7110320284697509, |
|
"grad_norm": 4.805974921203533, |
|
"learning_rate": 5.976505139500734e-07, |
|
"loss": -0.0197, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7117437722419929, |
|
"grad_norm": 11.14135465867464, |
|
"learning_rate": 5.961820851688693e-07, |
|
"loss": 0.1477, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7124555160142348, |
|
"grad_norm": 6.639000548093658, |
|
"learning_rate": 5.947136563876652e-07, |
|
"loss": 0.1196, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.7131672597864769, |
|
"grad_norm": 3.8551733953168674, |
|
"learning_rate": 5.93245227606461e-07, |
|
"loss": 0.0621, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7138790035587189, |
|
"grad_norm": 5.225696988138599, |
|
"learning_rate": 5.91776798825257e-07, |
|
"loss": 0.1545, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.7145907473309608, |
|
"grad_norm": 4.353477946786258, |
|
"learning_rate": 5.903083700440529e-07, |
|
"loss": 0.0293, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.7153024911032029, |
|
"grad_norm": 5.739092224858361, |
|
"learning_rate": 5.888399412628487e-07, |
|
"loss": 0.0309, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7160142348754448, |
|
"grad_norm": 4.288231941227374, |
|
"learning_rate": 5.873715124816446e-07, |
|
"loss": 0.1673, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.7167259786476868, |
|
"grad_norm": 6.874966985449849, |
|
"learning_rate": 5.859030837004405e-07, |
|
"loss": 0.179, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.7174377224199289, |
|
"grad_norm": 3.872807774783665, |
|
"learning_rate": 5.844346549192364e-07, |
|
"loss": -0.0374, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.7181494661921708, |
|
"grad_norm": 4.472528527447646, |
|
"learning_rate": 5.829662261380322e-07, |
|
"loss": 0.1206, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.7188612099644128, |
|
"grad_norm": 4.577847937835349, |
|
"learning_rate": 5.814977973568281e-07, |
|
"loss": 0.0201, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7195729537366548, |
|
"grad_norm": 4.076940605668476, |
|
"learning_rate": 5.80029368575624e-07, |
|
"loss": -0.0147, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.7202846975088968, |
|
"grad_norm": 4.729777807779337, |
|
"learning_rate": 5.7856093979442e-07, |
|
"loss": 0.0282, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.7209964412811388, |
|
"grad_norm": 4.258425271272895, |
|
"learning_rate": 5.770925110132159e-07, |
|
"loss": 0.0342, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.7217081850533807, |
|
"grad_norm": 5.128062751945477, |
|
"learning_rate": 5.756240822320118e-07, |
|
"loss": 0.072, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.7224199288256228, |
|
"grad_norm": 3.325966291631845, |
|
"learning_rate": 5.741556534508077e-07, |
|
"loss": 0.0912, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.7231316725978648, |
|
"grad_norm": 4.127479963710312, |
|
"learning_rate": 5.726872246696035e-07, |
|
"loss": 0.0248, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.7238434163701067, |
|
"grad_norm": 5.122925315168087, |
|
"learning_rate": 5.712187958883994e-07, |
|
"loss": 0.0543, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.7245551601423488, |
|
"grad_norm": 5.82638564758251, |
|
"learning_rate": 5.697503671071953e-07, |
|
"loss": 0.0469, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.7252669039145907, |
|
"grad_norm": 4.348666694898038, |
|
"learning_rate": 5.682819383259912e-07, |
|
"loss": 0.0599, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.7259786476868327, |
|
"grad_norm": 6.187173237505027, |
|
"learning_rate": 5.66813509544787e-07, |
|
"loss": 0.0595, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7266903914590748, |
|
"grad_norm": 6.884889183288436, |
|
"learning_rate": 5.653450807635829e-07, |
|
"loss": 0.1994, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.7274021352313167, |
|
"grad_norm": 4.530112288138082, |
|
"learning_rate": 5.638766519823789e-07, |
|
"loss": 0.0517, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.7281138790035587, |
|
"grad_norm": 4.690687596197606, |
|
"learning_rate": 5.624082232011747e-07, |
|
"loss": 0.0067, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.7288256227758008, |
|
"grad_norm": 4.500960353525001, |
|
"learning_rate": 5.609397944199706e-07, |
|
"loss": 0.0875, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.7295373665480427, |
|
"grad_norm": 5.880851591036031, |
|
"learning_rate": 5.594713656387665e-07, |
|
"loss": 0.0519, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7302491103202847, |
|
"grad_norm": 3.5596931509583087, |
|
"learning_rate": 5.580029368575624e-07, |
|
"loss": 0.0276, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.7309608540925266, |
|
"grad_norm": 8.68598760160492, |
|
"learning_rate": 5.565345080763582e-07, |
|
"loss": 0.047, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.7316725978647687, |
|
"grad_norm": 3.402191684272178, |
|
"learning_rate": 5.550660792951541e-07, |
|
"loss": 0.0965, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.7323843416370107, |
|
"grad_norm": 4.095134238999353, |
|
"learning_rate": 5.5359765051395e-07, |
|
"loss": 0.0581, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.7330960854092526, |
|
"grad_norm": 4.794722580506972, |
|
"learning_rate": 5.521292217327459e-07, |
|
"loss": 0.0635, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7338078291814947, |
|
"grad_norm": 6.070597795338894, |
|
"learning_rate": 5.506607929515418e-07, |
|
"loss": 0.0225, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.7345195729537367, |
|
"grad_norm": 4.129191494330407, |
|
"learning_rate": 5.491923641703378e-07, |
|
"loss": -0.0081, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.7352313167259786, |
|
"grad_norm": 3.571695740107198, |
|
"learning_rate": 5.477239353891337e-07, |
|
"loss": 0.0014, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.7359430604982207, |
|
"grad_norm": 5.510492373585215, |
|
"learning_rate": 5.462555066079295e-07, |
|
"loss": 0.1682, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.7366548042704626, |
|
"grad_norm": 5.0145359461275065, |
|
"learning_rate": 5.447870778267254e-07, |
|
"loss": 0.1367, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.7373665480427046, |
|
"grad_norm": 6.322219768330761, |
|
"learning_rate": 5.433186490455213e-07, |
|
"loss": 0.0956, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.7380782918149467, |
|
"grad_norm": 5.761999463500739, |
|
"learning_rate": 5.418502202643172e-07, |
|
"loss": 0.1635, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.7387900355871886, |
|
"grad_norm": 4.837240656719542, |
|
"learning_rate": 5.40381791483113e-07, |
|
"loss": 0.0648, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.7395017793594306, |
|
"grad_norm": 4.030925175791179, |
|
"learning_rate": 5.389133627019089e-07, |
|
"loss": 0.1624, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.7402135231316725, |
|
"grad_norm": 4.088053226654463, |
|
"learning_rate": 5.374449339207048e-07, |
|
"loss": 0.0263, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7409252669039146, |
|
"grad_norm": 3.348281926516349, |
|
"learning_rate": 5.359765051395006e-07, |
|
"loss": 0.029, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.7416370106761566, |
|
"grad_norm": 4.1162599390988985, |
|
"learning_rate": 5.345080763582966e-07, |
|
"loss": -0.0179, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.7423487544483985, |
|
"grad_norm": 5.81149250445998, |
|
"learning_rate": 5.330396475770925e-07, |
|
"loss": 0.1455, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.7430604982206406, |
|
"grad_norm": 3.1102540580282114, |
|
"learning_rate": 5.315712187958884e-07, |
|
"loss": 0.0614, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.7437722419928826, |
|
"grad_norm": 2.6760635115723153, |
|
"learning_rate": 5.301027900146842e-07, |
|
"loss": 0.0029, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.7444839857651245, |
|
"grad_norm": 5.107803896444269, |
|
"learning_rate": 5.286343612334801e-07, |
|
"loss": -0.0163, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.7451957295373666, |
|
"grad_norm": 3.5529373052269086, |
|
"learning_rate": 5.27165932452276e-07, |
|
"loss": 0.1096, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.7459074733096085, |
|
"grad_norm": 6.545845624530535, |
|
"learning_rate": 5.256975036710719e-07, |
|
"loss": 0.1949, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.7466192170818505, |
|
"grad_norm": 4.83801737539602, |
|
"learning_rate": 5.242290748898678e-07, |
|
"loss": 0.0639, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.7473309608540926, |
|
"grad_norm": 3.6609832904083195, |
|
"learning_rate": 5.227606461086637e-07, |
|
"loss": 0.0275, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7480427046263345, |
|
"grad_norm": 3.259977284685812, |
|
"learning_rate": 5.212922173274597e-07, |
|
"loss": 0.1671, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.7487544483985765, |
|
"grad_norm": 4.963751239762047, |
|
"learning_rate": 5.198237885462556e-07, |
|
"loss": 0.0218, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.7494661921708186, |
|
"grad_norm": 4.204189690387292, |
|
"learning_rate": 5.183553597650514e-07, |
|
"loss": -0.0142, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.7501779359430605, |
|
"grad_norm": 4.215976736044352, |
|
"learning_rate": 5.168869309838473e-07, |
|
"loss": 0.0902, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.7508896797153025, |
|
"grad_norm": 4.1399549409929755, |
|
"learning_rate": 5.154185022026432e-07, |
|
"loss": 0.0241, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.7516014234875444, |
|
"grad_norm": 3.879652737090344, |
|
"learning_rate": 5.13950073421439e-07, |
|
"loss": -0.009, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.7523131672597865, |
|
"grad_norm": 4.668346897274869, |
|
"learning_rate": 5.124816446402349e-07, |
|
"loss": 0.097, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.7530249110320285, |
|
"grad_norm": 4.588309667165746, |
|
"learning_rate": 5.110132158590308e-07, |
|
"loss": 0.0533, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.7537366548042704, |
|
"grad_norm": 6.046628330926524, |
|
"learning_rate": 5.095447870778267e-07, |
|
"loss": 0.0351, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.7544483985765125, |
|
"grad_norm": 3.739341465912661, |
|
"learning_rate": 5.080763582966225e-07, |
|
"loss": -0.0943, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7551601423487544, |
|
"grad_norm": 4.556068381207451, |
|
"learning_rate": 5.066079295154185e-07, |
|
"loss": 0.06, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.7558718861209964, |
|
"grad_norm": 4.202334872149713, |
|
"learning_rate": 5.051395007342144e-07, |
|
"loss": -0.0082, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.7565836298932385, |
|
"grad_norm": 5.125659993171668, |
|
"learning_rate": 5.036710719530102e-07, |
|
"loss": 0.0845, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.7572953736654804, |
|
"grad_norm": 5.27421670313849, |
|
"learning_rate": 5.022026431718061e-07, |
|
"loss": -0.001, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.7580071174377224, |
|
"grad_norm": 4.368187532248877, |
|
"learning_rate": 5.00734214390602e-07, |
|
"loss": 0.0348, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.7587188612099645, |
|
"grad_norm": 3.855942902787289, |
|
"learning_rate": 4.99265785609398e-07, |
|
"loss": -0.0982, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.7594306049822064, |
|
"grad_norm": 4.977053248284057, |
|
"learning_rate": 4.977973568281938e-07, |
|
"loss": 0.0883, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.7601423487544484, |
|
"grad_norm": 4.289193239981424, |
|
"learning_rate": 4.963289280469897e-07, |
|
"loss": 0.0051, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.7608540925266903, |
|
"grad_norm": 4.006762034148149, |
|
"learning_rate": 4.948604992657856e-07, |
|
"loss": -0.002, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.7615658362989324, |
|
"grad_norm": 2.808128859625345, |
|
"learning_rate": 4.933920704845815e-07, |
|
"loss": -0.0264, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7622775800711744, |
|
"grad_norm": 5.882154612844199, |
|
"learning_rate": 4.919236417033773e-07, |
|
"loss": 0.069, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.7629893238434163, |
|
"grad_norm": 4.794409301460062, |
|
"learning_rate": 4.904552129221732e-07, |
|
"loss": -0.0557, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.7637010676156584, |
|
"grad_norm": 4.826240443348902, |
|
"learning_rate": 4.889867841409692e-07, |
|
"loss": 0.0804, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.7644128113879004, |
|
"grad_norm": 3.0164349510884447, |
|
"learning_rate": 4.87518355359765e-07, |
|
"loss": -0.0245, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.7651245551601423, |
|
"grad_norm": 4.518231863704466, |
|
"learning_rate": 4.860499265785609e-07, |
|
"loss": 0.0782, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7658362989323844, |
|
"grad_norm": 4.864847615787066, |
|
"learning_rate": 4.845814977973568e-07, |
|
"loss": 0.1056, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.7665480427046263, |
|
"grad_norm": 4.773824273405901, |
|
"learning_rate": 4.831130690161527e-07, |
|
"loss": 0.0828, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.7672597864768683, |
|
"grad_norm": 4.336462040695792, |
|
"learning_rate": 4.816446402349486e-07, |
|
"loss": 0.1055, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.7679715302491104, |
|
"grad_norm": 3.805670076736972, |
|
"learning_rate": 4.801762114537445e-07, |
|
"loss": 0.0526, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.7686832740213523, |
|
"grad_norm": 6.332442706210725, |
|
"learning_rate": 4.787077826725404e-07, |
|
"loss": 0.0388, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7693950177935943, |
|
"grad_norm": 6.059518926680791, |
|
"learning_rate": 4.772393538913363e-07, |
|
"loss": 0.005, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.7701067615658364, |
|
"grad_norm": 5.162646216291588, |
|
"learning_rate": 4.757709251101321e-07, |
|
"loss": 0.0971, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.7708185053380783, |
|
"grad_norm": 3.3748493867399048, |
|
"learning_rate": 4.7430249632892805e-07, |
|
"loss": 0.0727, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.7715302491103203, |
|
"grad_norm": 3.933000567201699, |
|
"learning_rate": 4.728340675477239e-07, |
|
"loss": -0.0607, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.7722419928825622, |
|
"grad_norm": 5.233340215172262, |
|
"learning_rate": 4.713656387665198e-07, |
|
"loss": 0.0636, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7729537366548043, |
|
"grad_norm": 4.78712650764478, |
|
"learning_rate": 4.6989720998531566e-07, |
|
"loss": 0.0701, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.7736654804270463, |
|
"grad_norm": 3.20609767023997, |
|
"learning_rate": 4.6842878120411153e-07, |
|
"loss": 0.0033, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.7743772241992882, |
|
"grad_norm": 4.914803987244276, |
|
"learning_rate": 4.669603524229075e-07, |
|
"loss": 0.0541, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.7750889679715303, |
|
"grad_norm": 5.8612510017626835, |
|
"learning_rate": 4.654919236417034e-07, |
|
"loss": 0.1272, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.7758007117437722, |
|
"grad_norm": 4.283141226610423, |
|
"learning_rate": 4.6402349486049925e-07, |
|
"loss": 0.1489, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7765124555160142, |
|
"grad_norm": 4.109306305865953, |
|
"learning_rate": 4.625550660792951e-07, |
|
"loss": -0.0282, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.7772241992882563, |
|
"grad_norm": 12.656682211948667, |
|
"learning_rate": 4.61086637298091e-07, |
|
"loss": 0.0796, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.7779359430604982, |
|
"grad_norm": 4.479936163992436, |
|
"learning_rate": 4.596182085168869e-07, |
|
"loss": -0.009, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.7786476868327402, |
|
"grad_norm": 8.41345994752303, |
|
"learning_rate": 4.581497797356828e-07, |
|
"loss": 0.0569, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.7793594306049823, |
|
"grad_norm": 3.2918309681766584, |
|
"learning_rate": 4.5668135095447866e-07, |
|
"loss": 0.0434, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7800711743772242, |
|
"grad_norm": 7.22334810268064, |
|
"learning_rate": 4.5521292217327454e-07, |
|
"loss": 0.0356, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.7807829181494662, |
|
"grad_norm": 4.178584158182841, |
|
"learning_rate": 4.5374449339207046e-07, |
|
"loss": 0.0572, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.7814946619217081, |
|
"grad_norm": 7.574573557719195, |
|
"learning_rate": 4.522760646108664e-07, |
|
"loss": 0.023, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.7822064056939502, |
|
"grad_norm": 4.004238029317759, |
|
"learning_rate": 4.5080763582966226e-07, |
|
"loss": 0.0766, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.7829181494661922, |
|
"grad_norm": 2.7957439160059487, |
|
"learning_rate": 4.4933920704845813e-07, |
|
"loss": 0.004, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7836298932384341, |
|
"grad_norm": 4.801530895880167, |
|
"learning_rate": 4.47870778267254e-07, |
|
"loss": -0.0173, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.7843416370106762, |
|
"grad_norm": 5.397057098691664, |
|
"learning_rate": 4.464023494860499e-07, |
|
"loss": 0.214, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.7850533807829182, |
|
"grad_norm": 4.48820921837597, |
|
"learning_rate": 4.449339207048458e-07, |
|
"loss": 0.0879, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.7857651245551601, |
|
"grad_norm": 5.375386246398824, |
|
"learning_rate": 4.4346549192364167e-07, |
|
"loss": 0.0434, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.7864768683274022, |
|
"grad_norm": 3.3797885690956395, |
|
"learning_rate": 4.419970631424376e-07, |
|
"loss": -0.1084, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.7871886120996441, |
|
"grad_norm": 4.5641016826887055, |
|
"learning_rate": 4.4052863436123346e-07, |
|
"loss": 0.1856, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.7879003558718861, |
|
"grad_norm": 5.042023680265534, |
|
"learning_rate": 4.390602055800294e-07, |
|
"loss": 0.0168, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.7886120996441282, |
|
"grad_norm": 6.028319917002458, |
|
"learning_rate": 4.3759177679882526e-07, |
|
"loss": 0.1204, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.7893238434163701, |
|
"grad_norm": 4.695297266575226, |
|
"learning_rate": 4.3612334801762113e-07, |
|
"loss": 0.0593, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.7900355871886121, |
|
"grad_norm": 4.941555048093157, |
|
"learning_rate": 4.34654919236417e-07, |
|
"loss": -0.0071, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7907473309608541, |
|
"grad_norm": 3.807724259723377, |
|
"learning_rate": 4.3318649045521287e-07, |
|
"loss": -0.1208, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.7914590747330961, |
|
"grad_norm": 5.959791240748352, |
|
"learning_rate": 4.317180616740088e-07, |
|
"loss": 0.0559, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.7921708185053381, |
|
"grad_norm": 4.177561338603296, |
|
"learning_rate": 4.3024963289280467e-07, |
|
"loss": 0.0331, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.79288256227758, |
|
"grad_norm": 4.257159317730894, |
|
"learning_rate": 4.287812041116006e-07, |
|
"loss": 0.0398, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.7935943060498221, |
|
"grad_norm": 5.799847028622386, |
|
"learning_rate": 4.2731277533039646e-07, |
|
"loss": 0.0275, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.7943060498220641, |
|
"grad_norm": 4.0517315683275745, |
|
"learning_rate": 4.2584434654919234e-07, |
|
"loss": -0.0107, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.795017793594306, |
|
"grad_norm": 5.283166842239091, |
|
"learning_rate": 4.2437591776798826e-07, |
|
"loss": 0.1154, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.7957295373665481, |
|
"grad_norm": 4.4525632269695095, |
|
"learning_rate": 4.2290748898678413e-07, |
|
"loss": 0.0285, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.79644128113879, |
|
"grad_norm": 3.550895932467998, |
|
"learning_rate": 4.2143906020558e-07, |
|
"loss": -0.0089, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.797153024911032, |
|
"grad_norm": 5.442417070057488, |
|
"learning_rate": 4.199706314243759e-07, |
|
"loss": 0.1652, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.797864768683274, |
|
"grad_norm": 4.130251460513657, |
|
"learning_rate": 4.1850220264317175e-07, |
|
"loss": 0.0325, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.798576512455516, |
|
"grad_norm": 6.459392468964495, |
|
"learning_rate": 4.1703377386196767e-07, |
|
"loss": 0.1389, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.799288256227758, |
|
"grad_norm": 3.8430552281131902, |
|
"learning_rate": 4.155653450807636e-07, |
|
"loss": 0.0573, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.7470455016002004, |
|
"learning_rate": 4.1409691629955947e-07, |
|
"loss": 0.0415, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.800711743772242, |
|
"grad_norm": 4.652746123906856, |
|
"learning_rate": 4.1262848751835534e-07, |
|
"loss": 0.1245, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.801423487544484, |
|
"grad_norm": 8.278977337768074, |
|
"learning_rate": 4.111600587371512e-07, |
|
"loss": 0.1232, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.8021352313167259, |
|
"grad_norm": 7.110397902459385, |
|
"learning_rate": 4.0969162995594713e-07, |
|
"loss": 0.0488, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.802846975088968, |
|
"grad_norm": 7.176907903098698, |
|
"learning_rate": 4.08223201174743e-07, |
|
"loss": 0.0988, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.80355871886121, |
|
"grad_norm": 5.053592710724586, |
|
"learning_rate": 4.067547723935389e-07, |
|
"loss": 0.2123, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.8042704626334519, |
|
"grad_norm": 4.508350465827772, |
|
"learning_rate": 4.0528634361233475e-07, |
|
"loss": -0.0135, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.804982206405694, |
|
"grad_norm": 3.865882853833389, |
|
"learning_rate": 4.038179148311306e-07, |
|
"loss": -0.0057, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.805693950177936, |
|
"grad_norm": 6.146207257118892, |
|
"learning_rate": 4.023494860499266e-07, |
|
"loss": 0.0603, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.8064056939501779, |
|
"grad_norm": 6.3975962177776315, |
|
"learning_rate": 4.0088105726872247e-07, |
|
"loss": 0.0544, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.80711743772242, |
|
"grad_norm": 4.223977474195932, |
|
"learning_rate": 3.9941262848751834e-07, |
|
"loss": -0.0547, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.8078291814946619, |
|
"grad_norm": 3.5733018376494896, |
|
"learning_rate": 3.979441997063142e-07, |
|
"loss": 0.042, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8085409252669039, |
|
"grad_norm": 4.780337776475858, |
|
"learning_rate": 3.964757709251101e-07, |
|
"loss": 0.0426, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.8092526690391459, |
|
"grad_norm": 3.389181647310211, |
|
"learning_rate": 3.95007342143906e-07, |
|
"loss": 0.1313, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.8099644128113879, |
|
"grad_norm": 5.303930753908911, |
|
"learning_rate": 3.935389133627019e-07, |
|
"loss": 0.0883, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.8106761565836299, |
|
"grad_norm": 5.272766160971037, |
|
"learning_rate": 3.9207048458149775e-07, |
|
"loss": 0.0267, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.8113879003558719, |
|
"grad_norm": 5.986879944998904, |
|
"learning_rate": 3.906020558002937e-07, |
|
"loss": 0.0682, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8120996441281139, |
|
"grad_norm": 5.61959460469509, |
|
"learning_rate": 3.891336270190896e-07, |
|
"loss": 0.0423, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.8128113879003559, |
|
"grad_norm": 6.0012730201640405, |
|
"learning_rate": 3.8766519823788547e-07, |
|
"loss": 0.1035, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.8135231316725978, |
|
"grad_norm": 3.5381491453003573, |
|
"learning_rate": 3.8619676945668134e-07, |
|
"loss": 0.0021, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.8142348754448399, |
|
"grad_norm": 5.831482766300704, |
|
"learning_rate": 3.847283406754772e-07, |
|
"loss": 0.0062, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.8149466192170819, |
|
"grad_norm": 3.4472659016465896, |
|
"learning_rate": 3.832599118942731e-07, |
|
"loss": 0.1066, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.8156583629893238, |
|
"grad_norm": 3.5852129833847344, |
|
"learning_rate": 3.81791483113069e-07, |
|
"loss": 0.055, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.8163701067615659, |
|
"grad_norm": 3.9696159319224646, |
|
"learning_rate": 3.803230543318649e-07, |
|
"loss": 0.0708, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.8170818505338078, |
|
"grad_norm": 6.360338292751588, |
|
"learning_rate": 3.7885462555066075e-07, |
|
"loss": -0.0076, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.8177935943060498, |
|
"grad_norm": 5.288302435982548, |
|
"learning_rate": 3.773861967694567e-07, |
|
"loss": 0.026, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.8185053380782918, |
|
"grad_norm": 7.675548660598207, |
|
"learning_rate": 3.7591776798825255e-07, |
|
"loss": 0.1351, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8192170818505338, |
|
"grad_norm": 7.627203951141249, |
|
"learning_rate": 3.744493392070485e-07, |
|
"loss": 0.0791, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.8199288256227758, |
|
"grad_norm": 5.0203299157902865, |
|
"learning_rate": 3.7298091042584435e-07, |
|
"loss": 0.0489, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.8206405693950178, |
|
"grad_norm": 9.704403017560951, |
|
"learning_rate": 3.715124816446402e-07, |
|
"loss": 0.2749, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.8213523131672598, |
|
"grad_norm": 5.885301040818936, |
|
"learning_rate": 3.700440528634361e-07, |
|
"loss": 0.121, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.8220640569395018, |
|
"grad_norm": 3.541970782219233, |
|
"learning_rate": 3.6857562408223196e-07, |
|
"loss": -0.0055, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.8227758007117437, |
|
"grad_norm": 4.392345376503781, |
|
"learning_rate": 3.671071953010279e-07, |
|
"loss": 0.0054, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.8234875444839858, |
|
"grad_norm": 3.801961623381928, |
|
"learning_rate": 3.6563876651982376e-07, |
|
"loss": 0.1504, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.8241992882562278, |
|
"grad_norm": 3.9349614997734994, |
|
"learning_rate": 3.641703377386197e-07, |
|
"loss": -0.0417, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.8249110320284697, |
|
"grad_norm": 5.0205635642024555, |
|
"learning_rate": 3.6270190895741555e-07, |
|
"loss": 0.0424, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.8256227758007118, |
|
"grad_norm": 4.786843811164654, |
|
"learning_rate": 3.612334801762114e-07, |
|
"loss": 0.2124, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8263345195729538, |
|
"grad_norm": 5.1614117304304825, |
|
"learning_rate": 3.5976505139500735e-07, |
|
"loss": 0.0754, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.8270462633451957, |
|
"grad_norm": 4.045083138987178, |
|
"learning_rate": 3.582966226138032e-07, |
|
"loss": 0.0696, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.8277580071174377, |
|
"grad_norm": 4.064066423519107, |
|
"learning_rate": 3.568281938325991e-07, |
|
"loss": 0.0532, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.8284697508896797, |
|
"grad_norm": 4.849379092712681, |
|
"learning_rate": 3.5535976505139496e-07, |
|
"loss": 0.0967, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.8291814946619217, |
|
"grad_norm": 5.927734053233105, |
|
"learning_rate": 3.5389133627019083e-07, |
|
"loss": 0.0504, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8298932384341637, |
|
"grad_norm": 3.2137744557515866, |
|
"learning_rate": 3.524229074889868e-07, |
|
"loss": -0.0482, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.8306049822064057, |
|
"grad_norm": 4.127693683925132, |
|
"learning_rate": 3.509544787077827e-07, |
|
"loss": -0.0798, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.8313167259786477, |
|
"grad_norm": 6.74093443010439, |
|
"learning_rate": 3.4948604992657856e-07, |
|
"loss": 0.0956, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.8320284697508897, |
|
"grad_norm": 6.502392255278353, |
|
"learning_rate": 3.4801762114537443e-07, |
|
"loss": 0.0345, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.8327402135231317, |
|
"grad_norm": 9.415956678603605, |
|
"learning_rate": 3.465491923641703e-07, |
|
"loss": 0.026, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8334519572953737, |
|
"grad_norm": 4.913570312692537, |
|
"learning_rate": 3.450807635829662e-07, |
|
"loss": -0.0465, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.8341637010676156, |
|
"grad_norm": 4.883220100174979, |
|
"learning_rate": 3.436123348017621e-07, |
|
"loss": 0.1425, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.8348754448398576, |
|
"grad_norm": 4.353421209791031, |
|
"learning_rate": 3.4214390602055797e-07, |
|
"loss": 0.0989, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.8355871886120997, |
|
"grad_norm": 5.625728136195165, |
|
"learning_rate": 3.4067547723935384e-07, |
|
"loss": 0.0988, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.8362989323843416, |
|
"grad_norm": 5.402349045889487, |
|
"learning_rate": 3.392070484581498e-07, |
|
"loss": 0.0214, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8370106761565836, |
|
"grad_norm": 6.621572507580396, |
|
"learning_rate": 3.377386196769457e-07, |
|
"loss": 0.1132, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.8377224199288256, |
|
"grad_norm": 6.275645626335489, |
|
"learning_rate": 3.3627019089574156e-07, |
|
"loss": 0.1082, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.8384341637010676, |
|
"grad_norm": 3.2447733299427126, |
|
"learning_rate": 3.3480176211453743e-07, |
|
"loss": 0.0059, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.8391459074733096, |
|
"grad_norm": 4.915025189200319, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.0367, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.8398576512455516, |
|
"grad_norm": 5.089077918990016, |
|
"learning_rate": 3.318649045521292e-07, |
|
"loss": 0.1114, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8405693950177936, |
|
"grad_norm": 3.7565824593214563, |
|
"learning_rate": 3.303964757709251e-07, |
|
"loss": 0.0784, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.8412811387900356, |
|
"grad_norm": 6.680757635403923, |
|
"learning_rate": 3.2892804698972097e-07, |
|
"loss": 0.0629, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.8419928825622776, |
|
"grad_norm": 10.207636678370621, |
|
"learning_rate": 3.2745961820851684e-07, |
|
"loss": 0.1372, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.8427046263345196, |
|
"grad_norm": 4.724435374922215, |
|
"learning_rate": 3.2599118942731276e-07, |
|
"loss": -0.0382, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.8434163701067615, |
|
"grad_norm": 6.11825595065175, |
|
"learning_rate": 3.245227606461087e-07, |
|
"loss": 0.0118, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.8441281138790035, |
|
"grad_norm": 4.777281267158339, |
|
"learning_rate": 3.2305433186490456e-07, |
|
"loss": 0.1171, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.8448398576512456, |
|
"grad_norm": 3.8501119919378795, |
|
"learning_rate": 3.2158590308370043e-07, |
|
"loss": -0.0024, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.8455516014234875, |
|
"grad_norm": 3.3414392776052138, |
|
"learning_rate": 3.201174743024963e-07, |
|
"loss": 0.1114, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.8462633451957295, |
|
"grad_norm": 7.324431767735627, |
|
"learning_rate": 3.186490455212922e-07, |
|
"loss": 0.083, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.8469750889679716, |
|
"grad_norm": 4.751681381589935, |
|
"learning_rate": 3.171806167400881e-07, |
|
"loss": 0.1198, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8476868327402135, |
|
"grad_norm": 4.526551224178008, |
|
"learning_rate": 3.1571218795888397e-07, |
|
"loss": 0.0667, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.8483985765124555, |
|
"grad_norm": 5.568011350269368, |
|
"learning_rate": 3.1424375917767984e-07, |
|
"loss": 0.1299, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.8491103202846975, |
|
"grad_norm": 4.716195128541494, |
|
"learning_rate": 3.1277533039647577e-07, |
|
"loss": 0.0358, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.8498220640569395, |
|
"grad_norm": 4.930553523437846, |
|
"learning_rate": 3.1130690161527164e-07, |
|
"loss": 0.0242, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.8505338078291815, |
|
"grad_norm": 4.167079498184484, |
|
"learning_rate": 3.0983847283406756e-07, |
|
"loss": 0.1109, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.8512455516014235, |
|
"grad_norm": 8.865833115177155, |
|
"learning_rate": 3.0837004405286343e-07, |
|
"loss": 0.1126, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.8519572953736655, |
|
"grad_norm": 6.24647575197954, |
|
"learning_rate": 3.069016152716593e-07, |
|
"loss": 0.1096, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.8526690391459075, |
|
"grad_norm": 3.8633019858518334, |
|
"learning_rate": 3.054331864904552e-07, |
|
"loss": 0.0957, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.8533807829181494, |
|
"grad_norm": 4.648285884147683, |
|
"learning_rate": 3.0396475770925105e-07, |
|
"loss": 0.0063, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.8540925266903915, |
|
"grad_norm": 3.5454437460212467, |
|
"learning_rate": 3.0249632892804697e-07, |
|
"loss": 0.1057, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8548042704626334, |
|
"grad_norm": 3.751280248177442, |
|
"learning_rate": 3.010279001468429e-07, |
|
"loss": 0.0517, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.8555160142348754, |
|
"grad_norm": 4.767039805037044, |
|
"learning_rate": 2.9955947136563877e-07, |
|
"loss": 0.1117, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.8562277580071175, |
|
"grad_norm": 5.2435561097664944, |
|
"learning_rate": 2.9809104258443464e-07, |
|
"loss": -0.0266, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.8569395017793594, |
|
"grad_norm": 4.347412541251751, |
|
"learning_rate": 2.966226138032305e-07, |
|
"loss": 0.157, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.8576512455516014, |
|
"grad_norm": 3.863655095386486, |
|
"learning_rate": 2.9515418502202644e-07, |
|
"loss": 0.0768, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.8583629893238434, |
|
"grad_norm": 5.329615051797061, |
|
"learning_rate": 2.936857562408223e-07, |
|
"loss": 0.0112, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.8590747330960854, |
|
"grad_norm": 4.979521202755483, |
|
"learning_rate": 2.922173274596182e-07, |
|
"loss": 0.0948, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.8597864768683274, |
|
"grad_norm": 3.8038100967899813, |
|
"learning_rate": 2.9074889867841405e-07, |
|
"loss": 0.1154, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.8604982206405694, |
|
"grad_norm": 4.38046881966738, |
|
"learning_rate": 2.8928046989721e-07, |
|
"loss": -0.1282, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.8612099644128114, |
|
"grad_norm": 3.1962156628258764, |
|
"learning_rate": 2.878120411160059e-07, |
|
"loss": -0.0098, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8619217081850534, |
|
"grad_norm": 3.725578258101522, |
|
"learning_rate": 2.8634361233480177e-07, |
|
"loss": 0.0316, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.8626334519572953, |
|
"grad_norm": 3.162711450797923, |
|
"learning_rate": 2.8487518355359764e-07, |
|
"loss": 0.0854, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.8633451957295374, |
|
"grad_norm": 6.603746980474714, |
|
"learning_rate": 2.834067547723935e-07, |
|
"loss": 0.1426, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.8640569395017793, |
|
"grad_norm": 5.756610884728429, |
|
"learning_rate": 2.8193832599118944e-07, |
|
"loss": -0.0035, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.8647686832740213, |
|
"grad_norm": 7.9170728345042845, |
|
"learning_rate": 2.804698972099853e-07, |
|
"loss": 0.001, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.8654804270462634, |
|
"grad_norm": 3.5693745455760797, |
|
"learning_rate": 2.790014684287812e-07, |
|
"loss": -0.0611, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.8661921708185053, |
|
"grad_norm": 5.3110198707848095, |
|
"learning_rate": 2.7753303964757705e-07, |
|
"loss": 0.0459, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.8669039145907473, |
|
"grad_norm": 6.727355839358839, |
|
"learning_rate": 2.760646108663729e-07, |
|
"loss": 0.1669, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.8676156583629894, |
|
"grad_norm": 6.640828064450381, |
|
"learning_rate": 2.745961820851689e-07, |
|
"loss": -0.019, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.8683274021352313, |
|
"grad_norm": 3.3816289439070664, |
|
"learning_rate": 2.731277533039648e-07, |
|
"loss": -0.037, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8690391459074733, |
|
"grad_norm": 3.6714876011147255, |
|
"learning_rate": 2.7165932452276065e-07, |
|
"loss": 0.0014, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.8697508896797153, |
|
"grad_norm": 4.3569622404376664, |
|
"learning_rate": 2.701908957415565e-07, |
|
"loss": 0.0987, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.8704626334519573, |
|
"grad_norm": 6.344404816304247, |
|
"learning_rate": 2.687224669603524e-07, |
|
"loss": 0.0991, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.8711743772241993, |
|
"grad_norm": 5.361162693827478, |
|
"learning_rate": 2.672540381791483e-07, |
|
"loss": 0.0084, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.8718861209964412, |
|
"grad_norm": 7.6540302406353575, |
|
"learning_rate": 2.657856093979442e-07, |
|
"loss": 0.0212, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8725978647686833, |
|
"grad_norm": 5.700938351675065, |
|
"learning_rate": 2.6431718061674006e-07, |
|
"loss": 0.0438, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.8733096085409253, |
|
"grad_norm": 8.474665866442852, |
|
"learning_rate": 2.6284875183553593e-07, |
|
"loss": 0.092, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.8740213523131672, |
|
"grad_norm": 4.7571553996414595, |
|
"learning_rate": 2.6138032305433185e-07, |
|
"loss": 0.0078, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.8747330960854093, |
|
"grad_norm": 3.8293542250786787, |
|
"learning_rate": 2.599118942731278e-07, |
|
"loss": 0.0439, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.8754448398576512, |
|
"grad_norm": 2.71698186095146, |
|
"learning_rate": 2.5844346549192365e-07, |
|
"loss": -0.0106, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8761565836298932, |
|
"grad_norm": 4.79252959511404, |
|
"learning_rate": 2.569750367107195e-07, |
|
"loss": 0.0729, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.8768683274021353, |
|
"grad_norm": 3.3103671134823385, |
|
"learning_rate": 2.555066079295154e-07, |
|
"loss": 0.0302, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.8775800711743772, |
|
"grad_norm": 8.561876260356502, |
|
"learning_rate": 2.5403817914831126e-07, |
|
"loss": -0.0087, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.8782918149466192, |
|
"grad_norm": 3.919649138416107, |
|
"learning_rate": 2.525697503671072e-07, |
|
"loss": 0.0727, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.8790035587188612, |
|
"grad_norm": 3.8392293754828977, |
|
"learning_rate": 2.5110132158590306e-07, |
|
"loss": 0.0023, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.8797153024911032, |
|
"grad_norm": 4.715863690361431, |
|
"learning_rate": 2.49632892804699e-07, |
|
"loss": 0.0673, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.8804270462633452, |
|
"grad_norm": 4.228383008890401, |
|
"learning_rate": 2.4816446402349485e-07, |
|
"loss": 0.0424, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.8811387900355871, |
|
"grad_norm": 5.061146307910219, |
|
"learning_rate": 2.466960352422907e-07, |
|
"loss": 0.1331, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.8818505338078292, |
|
"grad_norm": 3.7345466095506494, |
|
"learning_rate": 2.452276064610866e-07, |
|
"loss": 0.2358, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.8825622775800712, |
|
"grad_norm": 5.272468624826541, |
|
"learning_rate": 2.437591776798825e-07, |
|
"loss": 0.1534, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8832740213523131, |
|
"grad_norm": 3.1883609581358936, |
|
"learning_rate": 2.422907488986784e-07, |
|
"loss": 0.0114, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.8839857651245552, |
|
"grad_norm": 5.573841149011958, |
|
"learning_rate": 2.408223201174743e-07, |
|
"loss": 0.0248, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.8846975088967971, |
|
"grad_norm": 4.224567838156875, |
|
"learning_rate": 2.393538913362702e-07, |
|
"loss": 0.0828, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.8854092526690391, |
|
"grad_norm": 3.9928614575669577, |
|
"learning_rate": 2.3788546255506606e-07, |
|
"loss": 0.0194, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.8861209964412812, |
|
"grad_norm": 6.428441164422629, |
|
"learning_rate": 2.3641703377386196e-07, |
|
"loss": 0.2083, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.8868327402135231, |
|
"grad_norm": 6.2312799481712, |
|
"learning_rate": 2.3494860499265783e-07, |
|
"loss": 0.0811, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.8875444839857651, |
|
"grad_norm": 4.068910675860895, |
|
"learning_rate": 2.3348017621145376e-07, |
|
"loss": 0.0592, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.8882562277580072, |
|
"grad_norm": 5.52784115335928, |
|
"learning_rate": 2.3201174743024963e-07, |
|
"loss": 0.089, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.8889679715302491, |
|
"grad_norm": 4.580499547509674, |
|
"learning_rate": 2.305433186490455e-07, |
|
"loss": 0.1023, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.8896797153024911, |
|
"grad_norm": 7.07961420893908, |
|
"learning_rate": 2.290748898678414e-07, |
|
"loss": 0.1685, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.890391459074733, |
|
"grad_norm": 4.178254582429226, |
|
"learning_rate": 2.2760646108663727e-07, |
|
"loss": 0.0558, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.8911032028469751, |
|
"grad_norm": 8.001441825637583, |
|
"learning_rate": 2.261380323054332e-07, |
|
"loss": 0.1452, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.8918149466192171, |
|
"grad_norm": 3.946074459411681, |
|
"learning_rate": 2.2466960352422906e-07, |
|
"loss": 0.0363, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.892526690391459, |
|
"grad_norm": 8.4028417775753, |
|
"learning_rate": 2.2320117474302496e-07, |
|
"loss": 0.0724, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.8932384341637011, |
|
"grad_norm": 3.9318556520698187, |
|
"learning_rate": 2.2173274596182083e-07, |
|
"loss": -0.0137, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.8939501779359431, |
|
"grad_norm": 3.458687974754859, |
|
"learning_rate": 2.2026431718061673e-07, |
|
"loss": -0.0946, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.894661921708185, |
|
"grad_norm": 6.622453308056072, |
|
"learning_rate": 2.1879588839941263e-07, |
|
"loss": 0.172, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.8953736654804271, |
|
"grad_norm": 3.9673919757254, |
|
"learning_rate": 2.173274596182085e-07, |
|
"loss": 0.0234, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.896085409252669, |
|
"grad_norm": 5.118865458731527, |
|
"learning_rate": 2.158590308370044e-07, |
|
"loss": 0.0551, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.896797153024911, |
|
"grad_norm": 6.76356471453516, |
|
"learning_rate": 2.143906020558003e-07, |
|
"loss": 0.1191, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.8975088967971531, |
|
"grad_norm": 4.301543318112993, |
|
"learning_rate": 2.1292217327459617e-07, |
|
"loss": 0.1043, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.898220640569395, |
|
"grad_norm": 4.590072093721571, |
|
"learning_rate": 2.1145374449339207e-07, |
|
"loss": 0.0605, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.898932384341637, |
|
"grad_norm": 3.055191505785509, |
|
"learning_rate": 2.0998531571218794e-07, |
|
"loss": 0.031, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.899644128113879, |
|
"grad_norm": 4.323248915562587, |
|
"learning_rate": 2.0851688693098384e-07, |
|
"loss": 0.0008, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.900355871886121, |
|
"grad_norm": 4.7286854413693, |
|
"learning_rate": 2.0704845814977973e-07, |
|
"loss": -0.0549, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.901067615658363, |
|
"grad_norm": 3.99503709950128, |
|
"learning_rate": 2.055800293685756e-07, |
|
"loss": 0.0984, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.9017793594306049, |
|
"grad_norm": 6.636398298712216, |
|
"learning_rate": 2.041116005873715e-07, |
|
"loss": 0.082, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.902491103202847, |
|
"grad_norm": 4.989754670558792, |
|
"learning_rate": 2.0264317180616737e-07, |
|
"loss": 0.0104, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.903202846975089, |
|
"grad_norm": 6.410286278483913, |
|
"learning_rate": 2.011747430249633e-07, |
|
"loss": 0.0933, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.9039145907473309, |
|
"grad_norm": 4.920601816354115, |
|
"learning_rate": 1.9970631424375917e-07, |
|
"loss": -0.0104, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.904626334519573, |
|
"grad_norm": 4.892842822804742, |
|
"learning_rate": 1.9823788546255504e-07, |
|
"loss": 0.0739, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.9053380782918149, |
|
"grad_norm": 6.840931769367717, |
|
"learning_rate": 1.9676945668135094e-07, |
|
"loss": 0.2031, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.9060498220640569, |
|
"grad_norm": 4.812134766033649, |
|
"learning_rate": 1.9530102790014684e-07, |
|
"loss": 0.2425, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.906761565836299, |
|
"grad_norm": 5.42050215080619, |
|
"learning_rate": 1.9383259911894274e-07, |
|
"loss": 0.0727, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.9074733096085409, |
|
"grad_norm": 3.8417355600883645, |
|
"learning_rate": 1.923641703377386e-07, |
|
"loss": -0.0279, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.9081850533807829, |
|
"grad_norm": 13.373758133606684, |
|
"learning_rate": 1.908957415565345e-07, |
|
"loss": -0.0461, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.908896797153025, |
|
"grad_norm": 5.112087637912734, |
|
"learning_rate": 1.8942731277533038e-07, |
|
"loss": 0.0965, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.9096085409252669, |
|
"grad_norm": 4.630412671872866, |
|
"learning_rate": 1.8795888399412628e-07, |
|
"loss": 0.1044, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.9103202846975089, |
|
"grad_norm": 3.9406530683275625, |
|
"learning_rate": 1.8649045521292217e-07, |
|
"loss": 0.1613, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.9110320284697508, |
|
"grad_norm": 6.648033551183105, |
|
"learning_rate": 1.8502202643171804e-07, |
|
"loss": -0.025, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9117437722419929, |
|
"grad_norm": 4.950505764486973, |
|
"learning_rate": 1.8355359765051394e-07, |
|
"loss": 0.1267, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.9124555160142349, |
|
"grad_norm": 5.032867929697572, |
|
"learning_rate": 1.8208516886930984e-07, |
|
"loss": 0.0465, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.9131672597864768, |
|
"grad_norm": 6.276816250394059, |
|
"learning_rate": 1.806167400881057e-07, |
|
"loss": 0.0473, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.9138790035587189, |
|
"grad_norm": 2.9365357784700774, |
|
"learning_rate": 1.791483113069016e-07, |
|
"loss": -0.0447, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.9145907473309609, |
|
"grad_norm": 3.8896844791827543, |
|
"learning_rate": 1.7767988252569748e-07, |
|
"loss": -0.0443, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.9153024911032028, |
|
"grad_norm": 10.739547615966208, |
|
"learning_rate": 1.762114537444934e-07, |
|
"loss": 0.1637, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.9160142348754449, |
|
"grad_norm": 5.993350158656473, |
|
"learning_rate": 1.7474302496328928e-07, |
|
"loss": 0.0432, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.9167259786476868, |
|
"grad_norm": 6.71234933126412, |
|
"learning_rate": 1.7327459618208515e-07, |
|
"loss": 0.1618, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.9174377224199288, |
|
"grad_norm": 6.316009150415693, |
|
"learning_rate": 1.7180616740088105e-07, |
|
"loss": 0.124, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.9181494661921709, |
|
"grad_norm": 5.748994822339394, |
|
"learning_rate": 1.7033773861967692e-07, |
|
"loss": -0.0388, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9188612099644128, |
|
"grad_norm": 4.14975638536304, |
|
"learning_rate": 1.6886930983847284e-07, |
|
"loss": 0.0756, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.9195729537366548, |
|
"grad_norm": 5.056815641283716, |
|
"learning_rate": 1.6740088105726871e-07, |
|
"loss": 0.0242, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.9202846975088967, |
|
"grad_norm": 2.4390976389492653, |
|
"learning_rate": 1.659324522760646e-07, |
|
"loss": 0.043, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.9209964412811388, |
|
"grad_norm": 3.907872953866281, |
|
"learning_rate": 1.6446402349486048e-07, |
|
"loss": 0.0113, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.9217081850533808, |
|
"grad_norm": 5.101758967730574, |
|
"learning_rate": 1.6299559471365638e-07, |
|
"loss": 0.0432, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.9224199288256227, |
|
"grad_norm": 5.3237433441686575, |
|
"learning_rate": 1.6152716593245228e-07, |
|
"loss": 0.1411, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.9231316725978648, |
|
"grad_norm": 14.346407110236912, |
|
"learning_rate": 1.6005873715124815e-07, |
|
"loss": 0.0642, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.9238434163701068, |
|
"grad_norm": 4.540641365625617, |
|
"learning_rate": 1.5859030837004405e-07, |
|
"loss": -0.0814, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.9245551601423487, |
|
"grad_norm": 4.059461660539323, |
|
"learning_rate": 1.5712187958883992e-07, |
|
"loss": 0.1611, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.9252669039145908, |
|
"grad_norm": 4.607153405634738, |
|
"learning_rate": 1.5565345080763582e-07, |
|
"loss": 0.1309, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9259786476868327, |
|
"grad_norm": 4.3752650589350015, |
|
"learning_rate": 1.5418502202643172e-07, |
|
"loss": 0.1112, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.9266903914590747, |
|
"grad_norm": 4.68187610223401, |
|
"learning_rate": 1.527165932452276e-07, |
|
"loss": 0.0666, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.9274021352313168, |
|
"grad_norm": 5.2790054878774315, |
|
"learning_rate": 1.5124816446402349e-07, |
|
"loss": 0.0326, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.9281138790035587, |
|
"grad_norm": 4.7191667183286565, |
|
"learning_rate": 1.4977973568281938e-07, |
|
"loss": 0.0791, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.9288256227758007, |
|
"grad_norm": 7.96740902856242, |
|
"learning_rate": 1.4831130690161526e-07, |
|
"loss": 0.2091, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.9295373665480428, |
|
"grad_norm": 5.549196772831938, |
|
"learning_rate": 1.4684287812041115e-07, |
|
"loss": 0.093, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.9302491103202847, |
|
"grad_norm": 3.030689305626086, |
|
"learning_rate": 1.4537444933920703e-07, |
|
"loss": -0.1049, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.9309608540925267, |
|
"grad_norm": 4.364531936282188, |
|
"learning_rate": 1.4390602055800295e-07, |
|
"loss": 0.0521, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.9316725978647686, |
|
"grad_norm": 3.8547858625772826, |
|
"learning_rate": 1.4243759177679882e-07, |
|
"loss": 0.1077, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.9323843416370107, |
|
"grad_norm": 5.420010028843909, |
|
"learning_rate": 1.4096916299559472e-07, |
|
"loss": 0.0816, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9330960854092527, |
|
"grad_norm": 4.916350834072121, |
|
"learning_rate": 1.395007342143906e-07, |
|
"loss": 0.1923, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.9338078291814946, |
|
"grad_norm": 5.0089287190817355, |
|
"learning_rate": 1.3803230543318646e-07, |
|
"loss": 0.0315, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.9345195729537367, |
|
"grad_norm": 3.428274363182289, |
|
"learning_rate": 1.365638766519824e-07, |
|
"loss": 0.0982, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.9352313167259787, |
|
"grad_norm": 4.359991652104497, |
|
"learning_rate": 1.3509544787077826e-07, |
|
"loss": 0.1368, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.9359430604982206, |
|
"grad_norm": 4.532641387939283, |
|
"learning_rate": 1.3362701908957416e-07, |
|
"loss": 0.0313, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9366548042704627, |
|
"grad_norm": 4.1063156053014, |
|
"learning_rate": 1.3215859030837003e-07, |
|
"loss": 0.0529, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.9373665480427046, |
|
"grad_norm": 4.411527678910333, |
|
"learning_rate": 1.3069016152716593e-07, |
|
"loss": 0.0312, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.9380782918149466, |
|
"grad_norm": 5.7370203553214845, |
|
"learning_rate": 1.2922173274596182e-07, |
|
"loss": 0.02, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.9387900355871887, |
|
"grad_norm": 3.874441168082076, |
|
"learning_rate": 1.277533039647577e-07, |
|
"loss": 0.1642, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.9395017793594306, |
|
"grad_norm": 5.200612054163917, |
|
"learning_rate": 1.262848751835536e-07, |
|
"loss": -0.0318, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9402135231316726, |
|
"grad_norm": 6.360973410339866, |
|
"learning_rate": 1.248164464023495e-07, |
|
"loss": 0.1692, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.9409252669039145, |
|
"grad_norm": 3.8992922654250495, |
|
"learning_rate": 1.2334801762114536e-07, |
|
"loss": -0.0379, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.9416370106761566, |
|
"grad_norm": 4.993974679182252, |
|
"learning_rate": 1.2187958883994126e-07, |
|
"loss": 0.0495, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.9423487544483986, |
|
"grad_norm": 4.408299395617472, |
|
"learning_rate": 1.2041116005873716e-07, |
|
"loss": 0.022, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.9430604982206405, |
|
"grad_norm": 4.1691217945852745, |
|
"learning_rate": 1.1894273127753303e-07, |
|
"loss": -0.0083, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.9437722419928826, |
|
"grad_norm": 3.5294179299244606, |
|
"learning_rate": 1.1747430249632892e-07, |
|
"loss": 0.0035, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.9444839857651246, |
|
"grad_norm": 3.236758031475521, |
|
"learning_rate": 1.1600587371512481e-07, |
|
"loss": 0.0153, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.9451957295373665, |
|
"grad_norm": 5.664962886409278, |
|
"learning_rate": 1.145374449339207e-07, |
|
"loss": 0.0604, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.9459074733096086, |
|
"grad_norm": 5.32944058372216, |
|
"learning_rate": 1.130690161527166e-07, |
|
"loss": 0.0902, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.9466192170818505, |
|
"grad_norm": 4.743520835196198, |
|
"learning_rate": 1.1160058737151248e-07, |
|
"loss": 0.0022, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9473309608540925, |
|
"grad_norm": 5.603546599883891, |
|
"learning_rate": 1.1013215859030837e-07, |
|
"loss": 0.0725, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.9480427046263346, |
|
"grad_norm": 3.9105017144546097, |
|
"learning_rate": 1.0866372980910425e-07, |
|
"loss": -0.0624, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.9487544483985765, |
|
"grad_norm": 3.991176887891522, |
|
"learning_rate": 1.0719530102790015e-07, |
|
"loss": 0.1552, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.9494661921708185, |
|
"grad_norm": 4.159061943442253, |
|
"learning_rate": 1.0572687224669603e-07, |
|
"loss": -0.0307, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.9501779359430605, |
|
"grad_norm": 4.900008197699716, |
|
"learning_rate": 1.0425844346549192e-07, |
|
"loss": 0.1238, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.9508896797153025, |
|
"grad_norm": 5.8027748047438745, |
|
"learning_rate": 1.027900146842878e-07, |
|
"loss": -0.0756, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.9516014234875445, |
|
"grad_norm": 5.125476908010209, |
|
"learning_rate": 1.0132158590308369e-07, |
|
"loss": 0.0641, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.9523131672597864, |
|
"grad_norm": 4.25671529369216, |
|
"learning_rate": 9.985315712187959e-08, |
|
"loss": -0.006, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.9530249110320285, |
|
"grad_norm": 4.7119212475657655, |
|
"learning_rate": 9.838472834067547e-08, |
|
"loss": -0.0246, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.9537366548042705, |
|
"grad_norm": 8.596870438447308, |
|
"learning_rate": 9.691629955947137e-08, |
|
"loss": 0.1129, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9544483985765124, |
|
"grad_norm": 5.158601721630786, |
|
"learning_rate": 9.544787077826725e-08, |
|
"loss": 0.0471, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.9551601423487545, |
|
"grad_norm": 4.435676952421707, |
|
"learning_rate": 9.397944199706314e-08, |
|
"loss": 0.0255, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.9558718861209965, |
|
"grad_norm": 4.957502386310905, |
|
"learning_rate": 9.251101321585902e-08, |
|
"loss": 0.0724, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.9565836298932384, |
|
"grad_norm": 3.944932976021612, |
|
"learning_rate": 9.104258443465492e-08, |
|
"loss": 0.0747, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.9572953736654805, |
|
"grad_norm": 4.434897935933638, |
|
"learning_rate": 8.95741556534508e-08, |
|
"loss": 0.0596, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.9580071174377224, |
|
"grad_norm": 5.776077843688367, |
|
"learning_rate": 8.81057268722467e-08, |
|
"loss": -0.0457, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.9587188612099644, |
|
"grad_norm": 3.628808224506914, |
|
"learning_rate": 8.663729809104257e-08, |
|
"loss": 0.0567, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.9594306049822064, |
|
"grad_norm": 3.6873894367776607, |
|
"learning_rate": 8.516886930983846e-08, |
|
"loss": 0.1175, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.9601423487544484, |
|
"grad_norm": 5.519140231029694, |
|
"learning_rate": 8.370044052863436e-08, |
|
"loss": 0.0404, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.9608540925266904, |
|
"grad_norm": 4.461927421017156, |
|
"learning_rate": 8.223201174743024e-08, |
|
"loss": 0.0566, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9615658362989323, |
|
"grad_norm": 5.333615692929449, |
|
"learning_rate": 8.076358296622614e-08, |
|
"loss": 0.129, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 0.9622775800711744, |
|
"grad_norm": 3.538932343167442, |
|
"learning_rate": 7.929515418502202e-08, |
|
"loss": 0.0477, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.9629893238434164, |
|
"grad_norm": 5.114559611196206, |
|
"learning_rate": 7.782672540381791e-08, |
|
"loss": 0.1573, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 0.9637010676156583, |
|
"grad_norm": 4.230187404079494, |
|
"learning_rate": 7.63582966226138e-08, |
|
"loss": 0.0854, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.9644128113879004, |
|
"grad_norm": 6.091438676748531, |
|
"learning_rate": 7.488986784140969e-08, |
|
"loss": -0.1022, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.9651245551601424, |
|
"grad_norm": 3.569956288003313, |
|
"learning_rate": 7.342143906020558e-08, |
|
"loss": -0.0432, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.9658362989323843, |
|
"grad_norm": 4.509091835357482, |
|
"learning_rate": 7.195301027900148e-08, |
|
"loss": 0.0026, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 0.9665480427046264, |
|
"grad_norm": 3.621722877605747, |
|
"learning_rate": 7.048458149779736e-08, |
|
"loss": -0.0038, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.9672597864768683, |
|
"grad_norm": 5.385886235182663, |
|
"learning_rate": 6.901615271659323e-08, |
|
"loss": 0.0568, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 0.9679715302491103, |
|
"grad_norm": 5.242695404325819, |
|
"learning_rate": 6.754772393538913e-08, |
|
"loss": 0.1181, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9686832740213523, |
|
"grad_norm": 3.772667595014263, |
|
"learning_rate": 6.607929515418501e-08, |
|
"loss": 0.0671, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 0.9693950177935943, |
|
"grad_norm": 4.239257250093547, |
|
"learning_rate": 6.461086637298091e-08, |
|
"loss": 0.0319, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.9701067615658363, |
|
"grad_norm": 5.284811049505885, |
|
"learning_rate": 6.31424375917768e-08, |
|
"loss": 0.0672, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 0.9708185053380783, |
|
"grad_norm": 4.001900698168065, |
|
"learning_rate": 6.167400881057268e-08, |
|
"loss": -0.0016, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.9715302491103203, |
|
"grad_norm": 4.671011037902923, |
|
"learning_rate": 6.020558002936858e-08, |
|
"loss": 0.0601, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.9722419928825623, |
|
"grad_norm": 3.676485713703232, |
|
"learning_rate": 5.873715124816446e-08, |
|
"loss": 0.053, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.9729537366548042, |
|
"grad_norm": 3.878098264443752, |
|
"learning_rate": 5.726872246696035e-08, |
|
"loss": -0.0868, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 0.9736654804270463, |
|
"grad_norm": 3.2959723660690217, |
|
"learning_rate": 5.580029368575624e-08, |
|
"loss": -0.0472, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.9743772241992883, |
|
"grad_norm": 4.965343915355099, |
|
"learning_rate": 5.4331864904552125e-08, |
|
"loss": 0.1451, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 0.9750889679715302, |
|
"grad_norm": 5.90877547589276, |
|
"learning_rate": 5.2863436123348017e-08, |
|
"loss": 0.1055, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9758007117437723, |
|
"grad_norm": 3.7198052712203014, |
|
"learning_rate": 5.13950073421439e-08, |
|
"loss": -0.0286, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 0.9765124555160143, |
|
"grad_norm": 3.277868072070685, |
|
"learning_rate": 4.992657856093979e-08, |
|
"loss": -0.0904, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.9772241992882562, |
|
"grad_norm": 4.233062202802242, |
|
"learning_rate": 4.8458149779735684e-08, |
|
"loss": 0.0484, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 0.9779359430604982, |
|
"grad_norm": 3.2626225298676372, |
|
"learning_rate": 4.698972099853157e-08, |
|
"loss": 0.1087, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.9786476868327402, |
|
"grad_norm": 5.509936287125614, |
|
"learning_rate": 4.552129221732746e-08, |
|
"loss": 0.0872, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.9793594306049822, |
|
"grad_norm": 5.2997441356005135, |
|
"learning_rate": 4.405286343612335e-08, |
|
"loss": 0.1112, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.9800711743772242, |
|
"grad_norm": 5.624439296327349, |
|
"learning_rate": 4.258443465491923e-08, |
|
"loss": 0.1325, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 0.9807829181494662, |
|
"grad_norm": 4.796297616411571, |
|
"learning_rate": 4.111600587371512e-08, |
|
"loss": 0.058, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.9814946619217082, |
|
"grad_norm": 5.256236051162191, |
|
"learning_rate": 3.964757709251101e-08, |
|
"loss": 0.0558, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 0.9822064056939501, |
|
"grad_norm": 5.459748800458802, |
|
"learning_rate": 3.81791483113069e-08, |
|
"loss": 0.061, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.9829181494661922, |
|
"grad_norm": 4.678092977838486, |
|
"learning_rate": 3.671071953010279e-08, |
|
"loss": 0.1689, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 0.9836298932384342, |
|
"grad_norm": 6.37815001210416, |
|
"learning_rate": 3.524229074889868e-08, |
|
"loss": 0.1142, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 0.9843416370106761, |
|
"grad_norm": 4.324133840276909, |
|
"learning_rate": 3.3773861967694565e-08, |
|
"loss": 0.0369, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 0.9850533807829182, |
|
"grad_norm": 4.446104492002926, |
|
"learning_rate": 3.2305433186490456e-08, |
|
"loss": 0.1194, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.9857651245551602, |
|
"grad_norm": 5.748282466289897, |
|
"learning_rate": 3.083700440528634e-08, |
|
"loss": 0.0976, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.9864768683274021, |
|
"grad_norm": 3.3000657970920395, |
|
"learning_rate": 2.936857562408223e-08, |
|
"loss": 0.0993, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 0.9871886120996441, |
|
"grad_norm": 6.565672480008891, |
|
"learning_rate": 2.790014684287812e-08, |
|
"loss": 0.0394, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 0.9879003558718861, |
|
"grad_norm": 5.079375784962967, |
|
"learning_rate": 2.6431718061674008e-08, |
|
"loss": 0.042, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.9886120996441281, |
|
"grad_norm": 3.9565078675418315, |
|
"learning_rate": 2.4963289280469896e-08, |
|
"loss": 0.109, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 0.9893238434163701, |
|
"grad_norm": 3.839865233321451, |
|
"learning_rate": 2.3494860499265784e-08, |
|
"loss": 0.0618, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9900355871886121, |
|
"grad_norm": 5.350089077787284, |
|
"learning_rate": 2.2026431718061676e-08, |
|
"loss": 0.0817, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 0.9907473309608541, |
|
"grad_norm": 6.152412264003426, |
|
"learning_rate": 2.055800293685756e-08, |
|
"loss": -0.0063, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.9914590747330961, |
|
"grad_norm": 4.5760011921994455, |
|
"learning_rate": 1.908957415565345e-08, |
|
"loss": 0.0224, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 0.9921708185053381, |
|
"grad_norm": 4.185879749727028, |
|
"learning_rate": 1.762114537444934e-08, |
|
"loss": 0.0795, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 0.9928825622775801, |
|
"grad_norm": 3.633543010809335, |
|
"learning_rate": 1.6152716593245228e-08, |
|
"loss": 0.1394, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.993594306049822, |
|
"grad_norm": 4.975818141202149, |
|
"learning_rate": 1.4684287812041114e-08, |
|
"loss": 0.0229, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.994306049822064, |
|
"grad_norm": 5.877233527084385, |
|
"learning_rate": 1.3215859030837004e-08, |
|
"loss": 0.0481, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 0.9950177935943061, |
|
"grad_norm": 5.1264733778517995, |
|
"learning_rate": 1.1747430249632892e-08, |
|
"loss": 0.0426, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 0.995729537366548, |
|
"grad_norm": 3.3972353859001165, |
|
"learning_rate": 1.027900146842878e-08, |
|
"loss": -0.0471, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 0.99644128113879, |
|
"grad_norm": 6.26148115386722, |
|
"learning_rate": 8.81057268722467e-09, |
|
"loss": 0.0972, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9971530249110321, |
|
"grad_norm": 5.05392313488687, |
|
"learning_rate": 7.342143906020557e-09, |
|
"loss": 0.0775, |
|
"step": 1401 |
|
}, |
|
{ |
|
"epoch": 0.997864768683274, |
|
"grad_norm": 6.8793645702833, |
|
"learning_rate": 5.873715124816446e-09, |
|
"loss": -0.0151, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 0.998576512455516, |
|
"grad_norm": 5.4786247691333925, |
|
"learning_rate": 4.405286343612335e-09, |
|
"loss": 0.1371, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 0.999288256227758, |
|
"grad_norm": 5.699875666282905, |
|
"learning_rate": 2.936857562408223e-09, |
|
"loss": -0.0478, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.8003625703125516, |
|
"learning_rate": 1.4684287812041115e-09, |
|
"loss": -0.082, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1405, |
|
"total_flos": 252991429017600.0, |
|
"train_loss": 0.07296714245530635, |
|
"train_runtime": 16463.8109, |
|
"train_samples_per_second": 10.922, |
|
"train_steps_per_second": 0.085 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1405, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 252991429017600.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|