{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 17336, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.7683433317951084e-05, "grad_norm": 0.3999878466129303, "learning_rate": 1.1534025374855825e-07, "loss": 1.182, "step": 1 }, { "epoch": 0.0002884171665897554, "grad_norm": 0.3274151384830475, "learning_rate": 5.767012687427913e-07, "loss": 1.0887, "step": 5 }, { "epoch": 0.0005768343331795108, "grad_norm": 0.42087307572364807, "learning_rate": 1.1534025374855826e-06, "loss": 1.2133, "step": 10 }, { "epoch": 0.0008652514997692663, "grad_norm": 0.48705124855041504, "learning_rate": 1.7301038062283738e-06, "loss": 1.1889, "step": 15 }, { "epoch": 0.0011536686663590216, "grad_norm": 0.3737321197986603, "learning_rate": 2.3068050749711653e-06, "loss": 1.2104, "step": 20 }, { "epoch": 0.001442085832948777, "grad_norm": 0.3640059232711792, "learning_rate": 2.8835063437139563e-06, "loss": 1.1713, "step": 25 }, { "epoch": 0.0017305029995385325, "grad_norm": 0.3122299611568451, "learning_rate": 3.4602076124567477e-06, "loss": 1.0437, "step": 30 }, { "epoch": 0.0020189201661282878, "grad_norm": 0.3902992010116577, "learning_rate": 4.036908881199539e-06, "loss": 1.2119, "step": 35 }, { "epoch": 0.0023073373327180432, "grad_norm": 0.3269282579421997, "learning_rate": 4.6136101499423305e-06, "loss": 1.1553, "step": 40 }, { "epoch": 0.0025957544993077987, "grad_norm": 0.3726448118686676, "learning_rate": 5.190311418685121e-06, "loss": 1.1356, "step": 45 }, { "epoch": 0.002884171665897554, "grad_norm": 0.32537516951560974, "learning_rate": 5.7670126874279126e-06, "loss": 1.1376, "step": 50 }, { "epoch": 0.0031725888324873096, "grad_norm": 0.2968432307243347, "learning_rate": 6.3437139561707036e-06, "loss": 1.1193, "step": 55 }, { "epoch": 0.003461005999077065, "grad_norm": 0.3939710855484009, "learning_rate": 6.920415224913495e-06, "loss": 1.1053, "step": 60 }, { "epoch": 0.0037494231656668205, "grad_norm": 0.31792372465133667, "learning_rate": 7.497116493656286e-06, "loss": 1.1071, "step": 65 }, { "epoch": 0.0040378403322565756, "grad_norm": 0.31301212310791016, "learning_rate": 8.073817762399077e-06, "loss": 1.0676, "step": 70 }, { "epoch": 0.0043262574988463314, "grad_norm": 0.3126572370529175, "learning_rate": 8.650519031141868e-06, "loss": 1.0958, "step": 75 }, { "epoch": 0.0046146746654360865, "grad_norm": 0.28482192754745483, "learning_rate": 9.227220299884661e-06, "loss": 1.0434, "step": 80 }, { "epoch": 0.004903091832025842, "grad_norm": 0.3318271338939667, "learning_rate": 9.803921568627451e-06, "loss": 0.9889, "step": 85 }, { "epoch": 0.005191508998615597, "grad_norm": 0.30128103494644165, "learning_rate": 1.0380622837370241e-05, "loss": 1.1145, "step": 90 }, { "epoch": 0.005479926165205353, "grad_norm": 0.3132428526878357, "learning_rate": 1.0957324106113035e-05, "loss": 1.0595, "step": 95 }, { "epoch": 0.005768343331795108, "grad_norm": 0.3268517255783081, "learning_rate": 1.1534025374855825e-05, "loss": 0.9886, "step": 100 }, { "epoch": 0.006056760498384864, "grad_norm": 0.33606696128845215, "learning_rate": 1.2110726643598615e-05, "loss": 1.0143, "step": 105 }, { "epoch": 0.006345177664974619, "grad_norm": 0.3881881535053253, "learning_rate": 1.2687427912341407e-05, "loss": 1.0, "step": 110 }, { "epoch": 0.006633594831564375, "grad_norm": 0.2971743047237396, "learning_rate": 1.3264129181084197e-05, "loss": 0.957, "step": 115 }, { "epoch": 0.00692201199815413, "grad_norm": 0.30101463198661804, "learning_rate": 1.384083044982699e-05, "loss": 1.0343, "step": 120 }, { "epoch": 0.007210429164743885, "grad_norm": 0.3259972333908081, "learning_rate": 1.4417531718569783e-05, "loss": 1.0182, "step": 125 }, { "epoch": 0.007498846331333641, "grad_norm": 0.2966211438179016, "learning_rate": 1.4994232987312573e-05, "loss": 1.047, "step": 130 }, { "epoch": 0.007787263497923396, "grad_norm": 0.3242364525794983, "learning_rate": 1.5570934256055363e-05, "loss": 1.0458, "step": 135 }, { "epoch": 0.008075680664513151, "grad_norm": 0.31030040979385376, "learning_rate": 1.6147635524798155e-05, "loss": 1.047, "step": 140 }, { "epoch": 0.008364097831102908, "grad_norm": 0.3165462911128998, "learning_rate": 1.6724336793540947e-05, "loss": 1.0784, "step": 145 }, { "epoch": 0.008652514997692663, "grad_norm": 0.3422790467739105, "learning_rate": 1.7301038062283735e-05, "loss": 1.0578, "step": 150 }, { "epoch": 0.008940932164282418, "grad_norm": 0.32128044962882996, "learning_rate": 1.787773933102653e-05, "loss": 1.0142, "step": 155 }, { "epoch": 0.009229349330872173, "grad_norm": 0.30257320404052734, "learning_rate": 1.8454440599769322e-05, "loss": 0.9874, "step": 160 }, { "epoch": 0.00951776649746193, "grad_norm": 0.30845504999160767, "learning_rate": 1.903114186851211e-05, "loss": 0.9731, "step": 165 }, { "epoch": 0.009806183664051685, "grad_norm": 0.36576882004737854, "learning_rate": 1.9607843137254903e-05, "loss": 1.0243, "step": 170 }, { "epoch": 0.01009460083064144, "grad_norm": 0.34204497933387756, "learning_rate": 2.0184544405997694e-05, "loss": 1.1213, "step": 175 }, { "epoch": 0.010383017997231195, "grad_norm": 0.357164204120636, "learning_rate": 2.0761245674740483e-05, "loss": 1.0324, "step": 180 }, { "epoch": 0.01067143516382095, "grad_norm": 0.3807251453399658, "learning_rate": 2.1337946943483278e-05, "loss": 0.9613, "step": 185 }, { "epoch": 0.010959852330410707, "grad_norm": 0.3466082215309143, "learning_rate": 2.191464821222607e-05, "loss": 1.0752, "step": 190 }, { "epoch": 0.011248269497000462, "grad_norm": 0.3457271456718445, "learning_rate": 2.249134948096886e-05, "loss": 1.0638, "step": 195 }, { "epoch": 0.011536686663590217, "grad_norm": 0.3821125626564026, "learning_rate": 2.306805074971165e-05, "loss": 1.0103, "step": 200 }, { "epoch": 0.011825103830179972, "grad_norm": 0.3460346758365631, "learning_rate": 2.3644752018454442e-05, "loss": 1.0103, "step": 205 }, { "epoch": 0.012113520996769728, "grad_norm": 0.3334082365036011, "learning_rate": 2.422145328719723e-05, "loss": 1.0671, "step": 210 }, { "epoch": 0.012401938163359483, "grad_norm": 0.3596220910549164, "learning_rate": 2.4798154555940022e-05, "loss": 0.9234, "step": 215 }, { "epoch": 0.012690355329949238, "grad_norm": 0.4396967887878418, "learning_rate": 2.5374855824682814e-05, "loss": 1.0464, "step": 220 }, { "epoch": 0.012978772496538993, "grad_norm": 0.3878267705440521, "learning_rate": 2.5951557093425606e-05, "loss": 1.0128, "step": 225 }, { "epoch": 0.01326718966312875, "grad_norm": 0.42701923847198486, "learning_rate": 2.6528258362168395e-05, "loss": 1.0255, "step": 230 }, { "epoch": 0.013555606829718505, "grad_norm": 0.41437554359436035, "learning_rate": 2.7104959630911193e-05, "loss": 0.9773, "step": 235 }, { "epoch": 0.01384402399630826, "grad_norm": 0.35881930589675903, "learning_rate": 2.768166089965398e-05, "loss": 1.0045, "step": 240 }, { "epoch": 0.014132441162898015, "grad_norm": 0.3705314099788666, "learning_rate": 2.8258362168396773e-05, "loss": 1.008, "step": 245 }, { "epoch": 0.01442085832948777, "grad_norm": 0.35067903995513916, "learning_rate": 2.8835063437139565e-05, "loss": 0.9855, "step": 250 }, { "epoch": 0.014709275496077527, "grad_norm": 0.3850333094596863, "learning_rate": 2.9411764705882354e-05, "loss": 1.0378, "step": 255 }, { "epoch": 0.014997692662667282, "grad_norm": 0.3616912066936493, "learning_rate": 2.9988465974625146e-05, "loss": 1.0055, "step": 260 }, { "epoch": 0.015286109829257037, "grad_norm": 0.3630939722061157, "learning_rate": 3.0565167243367934e-05, "loss": 0.9497, "step": 265 }, { "epoch": 0.015574526995846792, "grad_norm": 0.3719247579574585, "learning_rate": 3.1141868512110726e-05, "loss": 1.0438, "step": 270 }, { "epoch": 0.015862944162436547, "grad_norm": 0.36650553345680237, "learning_rate": 3.171856978085352e-05, "loss": 0.9474, "step": 275 }, { "epoch": 0.016151361329026302, "grad_norm": 0.386202335357666, "learning_rate": 3.229527104959631e-05, "loss": 0.9687, "step": 280 }, { "epoch": 0.01643977849561606, "grad_norm": 0.3516092896461487, "learning_rate": 3.28719723183391e-05, "loss": 0.9146, "step": 285 }, { "epoch": 0.016728195662205816, "grad_norm": 0.35326242446899414, "learning_rate": 3.344867358708189e-05, "loss": 0.9616, "step": 290 }, { "epoch": 0.01701661282879557, "grad_norm": 0.35251685976982117, "learning_rate": 3.4025374855824685e-05, "loss": 0.9696, "step": 295 }, { "epoch": 0.017305029995385326, "grad_norm": 0.3731367290019989, "learning_rate": 3.460207612456747e-05, "loss": 0.9631, "step": 300 }, { "epoch": 0.01759344716197508, "grad_norm": 0.36492493748664856, "learning_rate": 3.517877739331027e-05, "loss": 1.0923, "step": 305 }, { "epoch": 0.017881864328564836, "grad_norm": 0.37138622999191284, "learning_rate": 3.575547866205306e-05, "loss": 1.048, "step": 310 }, { "epoch": 0.01817028149515459, "grad_norm": 0.34608566761016846, "learning_rate": 3.633217993079585e-05, "loss": 1.0394, "step": 315 }, { "epoch": 0.018458698661744346, "grad_norm": 0.35083648562431335, "learning_rate": 3.6908881199538644e-05, "loss": 1.0003, "step": 320 }, { "epoch": 0.0187471158283341, "grad_norm": 0.33873873949050903, "learning_rate": 3.748558246828143e-05, "loss": 0.9683, "step": 325 }, { "epoch": 0.01903553299492386, "grad_norm": 0.33576804399490356, "learning_rate": 3.806228373702422e-05, "loss": 0.9974, "step": 330 }, { "epoch": 0.019323950161513614, "grad_norm": 0.3308757543563843, "learning_rate": 3.863898500576701e-05, "loss": 0.985, "step": 335 }, { "epoch": 0.01961236732810337, "grad_norm": 0.3734375536441803, "learning_rate": 3.9215686274509805e-05, "loss": 0.9665, "step": 340 }, { "epoch": 0.019900784494693124, "grad_norm": 0.3812713325023651, "learning_rate": 3.97923875432526e-05, "loss": 0.9961, "step": 345 }, { "epoch": 0.02018920166128288, "grad_norm": 0.3471347689628601, "learning_rate": 4.036908881199539e-05, "loss": 0.9386, "step": 350 }, { "epoch": 0.020477618827872635, "grad_norm": 0.34909528493881226, "learning_rate": 4.094579008073818e-05, "loss": 0.9794, "step": 355 }, { "epoch": 0.02076603599446239, "grad_norm": 0.34423884749412537, "learning_rate": 4.1522491349480966e-05, "loss": 1.0128, "step": 360 }, { "epoch": 0.021054453161052145, "grad_norm": 0.33039391040802, "learning_rate": 4.209919261822376e-05, "loss": 0.978, "step": 365 }, { "epoch": 0.0213428703276419, "grad_norm": 0.35275402665138245, "learning_rate": 4.2675893886966556e-05, "loss": 1.0011, "step": 370 }, { "epoch": 0.021631287494231658, "grad_norm": 0.3698658049106598, "learning_rate": 4.325259515570935e-05, "loss": 1.0207, "step": 375 }, { "epoch": 0.021919704660821413, "grad_norm": 0.37382742762565613, "learning_rate": 4.382929642445214e-05, "loss": 0.9759, "step": 380 }, { "epoch": 0.022208121827411168, "grad_norm": 0.3058774173259735, "learning_rate": 4.440599769319493e-05, "loss": 0.8755, "step": 385 }, { "epoch": 0.022496538994000923, "grad_norm": 0.33155399560928345, "learning_rate": 4.498269896193772e-05, "loss": 1.0688, "step": 390 }, { "epoch": 0.022784956160590678, "grad_norm": 0.3562370240688324, "learning_rate": 4.555940023068051e-05, "loss": 1.0279, "step": 395 }, { "epoch": 0.023073373327180433, "grad_norm": 0.3331949710845947, "learning_rate": 4.61361014994233e-05, "loss": 1.006, "step": 400 }, { "epoch": 0.023361790493770188, "grad_norm": 0.36301690340042114, "learning_rate": 4.671280276816609e-05, "loss": 1.0008, "step": 405 }, { "epoch": 0.023650207660359943, "grad_norm": 0.3455171585083008, "learning_rate": 4.7289504036908884e-05, "loss": 0.9911, "step": 410 }, { "epoch": 0.0239386248269497, "grad_norm": 0.33813151717185974, "learning_rate": 4.7866205305651676e-05, "loss": 0.9613, "step": 415 }, { "epoch": 0.024227041993539457, "grad_norm": 0.30761656165122986, "learning_rate": 4.844290657439446e-05, "loss": 1.0059, "step": 420 }, { "epoch": 0.024515459160129212, "grad_norm": 0.31521427631378174, "learning_rate": 4.901960784313725e-05, "loss": 1.0266, "step": 425 }, { "epoch": 0.024803876326718967, "grad_norm": 0.3083288073539734, "learning_rate": 4.9596309111880045e-05, "loss": 0.9526, "step": 430 }, { "epoch": 0.025092293493308722, "grad_norm": 0.3199276924133301, "learning_rate": 5.017301038062284e-05, "loss": 1.0201, "step": 435 }, { "epoch": 0.025380710659898477, "grad_norm": 0.3202233910560608, "learning_rate": 5.074971164936563e-05, "loss": 0.9186, "step": 440 }, { "epoch": 0.025669127826488232, "grad_norm": 0.3125661611557007, "learning_rate": 5.132641291810843e-05, "loss": 1.0323, "step": 445 }, { "epoch": 0.025957544993077987, "grad_norm": 0.3188762068748474, "learning_rate": 5.190311418685121e-05, "loss": 0.9921, "step": 450 }, { "epoch": 0.026245962159667742, "grad_norm": 0.3226945400238037, "learning_rate": 5.2479815455594004e-05, "loss": 1.0233, "step": 455 }, { "epoch": 0.0265343793262575, "grad_norm": 0.33580970764160156, "learning_rate": 5.305651672433679e-05, "loss": 0.9985, "step": 460 }, { "epoch": 0.026822796492847256, "grad_norm": 0.2981513440608978, "learning_rate": 5.363321799307959e-05, "loss": 0.9998, "step": 465 }, { "epoch": 0.02711121365943701, "grad_norm": 0.3163251280784607, "learning_rate": 5.4209919261822386e-05, "loss": 0.9028, "step": 470 }, { "epoch": 0.027399630826026766, "grad_norm": 0.30679088830947876, "learning_rate": 5.478662053056517e-05, "loss": 0.9625, "step": 475 }, { "epoch": 0.02768804799261652, "grad_norm": 0.3147639036178589, "learning_rate": 5.536332179930796e-05, "loss": 0.984, "step": 480 }, { "epoch": 0.027976465159206276, "grad_norm": 0.29801392555236816, "learning_rate": 5.594002306805075e-05, "loss": 0.9277, "step": 485 }, { "epoch": 0.02826488232579603, "grad_norm": 0.3048481047153473, "learning_rate": 5.651672433679355e-05, "loss": 1.011, "step": 490 }, { "epoch": 0.028553299492385786, "grad_norm": 0.3025212585926056, "learning_rate": 5.709342560553633e-05, "loss": 0.9997, "step": 495 }, { "epoch": 0.02884171665897554, "grad_norm": 0.29069867730140686, "learning_rate": 5.767012687427913e-05, "loss": 0.9725, "step": 500 }, { "epoch": 0.0291301338255653, "grad_norm": 0.2819552421569824, "learning_rate": 5.8246828143021916e-05, "loss": 0.9482, "step": 505 }, { "epoch": 0.029418550992155054, "grad_norm": 0.311065673828125, "learning_rate": 5.882352941176471e-05, "loss": 1.0066, "step": 510 }, { "epoch": 0.02970696815874481, "grad_norm": 0.3073347508907318, "learning_rate": 5.940023068050749e-05, "loss": 1.0394, "step": 515 }, { "epoch": 0.029995385325334564, "grad_norm": 0.29304638504981995, "learning_rate": 5.997693194925029e-05, "loss": 0.8912, "step": 520 }, { "epoch": 0.03028380249192432, "grad_norm": 0.29948490858078003, "learning_rate": 6.0553633217993076e-05, "loss": 1.071, "step": 525 }, { "epoch": 0.030572219658514074, "grad_norm": 0.3014158606529236, "learning_rate": 6.113033448673587e-05, "loss": 0.9749, "step": 530 }, { "epoch": 0.03086063682510383, "grad_norm": 0.30555838346481323, "learning_rate": 6.170703575547867e-05, "loss": 1.0307, "step": 535 }, { "epoch": 0.031149053991693584, "grad_norm": 0.29762470722198486, "learning_rate": 6.228373702422145e-05, "loss": 0.9906, "step": 540 }, { "epoch": 0.03143747115828334, "grad_norm": 0.30303990840911865, "learning_rate": 6.286043829296425e-05, "loss": 0.9647, "step": 545 }, { "epoch": 0.031725888324873094, "grad_norm": 0.293807715177536, "learning_rate": 6.343713956170704e-05, "loss": 0.9659, "step": 550 }, { "epoch": 0.03201430549146285, "grad_norm": 0.2783466577529907, "learning_rate": 6.401384083044983e-05, "loss": 0.9687, "step": 555 }, { "epoch": 0.032302722658052604, "grad_norm": 0.28931179642677307, "learning_rate": 6.459054209919262e-05, "loss": 1.015, "step": 560 }, { "epoch": 0.03259113982464236, "grad_norm": 0.29564398527145386, "learning_rate": 6.516724336793542e-05, "loss": 0.9644, "step": 565 }, { "epoch": 0.03287955699123212, "grad_norm": 0.28108495473861694, "learning_rate": 6.57439446366782e-05, "loss": 0.8925, "step": 570 }, { "epoch": 0.033167974157821876, "grad_norm": 0.29815274477005005, "learning_rate": 6.6320645905421e-05, "loss": 0.9802, "step": 575 }, { "epoch": 0.03345639132441163, "grad_norm": 0.2788611054420471, "learning_rate": 6.689734717416379e-05, "loss": 0.9828, "step": 580 }, { "epoch": 0.033744808491001387, "grad_norm": 0.292481392621994, "learning_rate": 6.747404844290659e-05, "loss": 0.9494, "step": 585 }, { "epoch": 0.03403322565759114, "grad_norm": 0.3360653221607208, "learning_rate": 6.805074971164937e-05, "loss": 0.9748, "step": 590 }, { "epoch": 0.0343216428241809, "grad_norm": 0.2787121534347534, "learning_rate": 6.862745098039216e-05, "loss": 1.0023, "step": 595 }, { "epoch": 0.03461005999077065, "grad_norm": 0.2681010663509369, "learning_rate": 6.920415224913494e-05, "loss": 1.0315, "step": 600 }, { "epoch": 0.03489847715736041, "grad_norm": 0.2744525372982025, "learning_rate": 6.978085351787774e-05, "loss": 1.0026, "step": 605 }, { "epoch": 0.03518689432395016, "grad_norm": 0.27870893478393555, "learning_rate": 7.035755478662054e-05, "loss": 1.0194, "step": 610 }, { "epoch": 0.03547531149053992, "grad_norm": 0.2868039309978485, "learning_rate": 7.093425605536332e-05, "loss": 1.0519, "step": 615 }, { "epoch": 0.03576372865712967, "grad_norm": 0.2700194716453552, "learning_rate": 7.151095732410612e-05, "loss": 1.0284, "step": 620 }, { "epoch": 0.03605214582371943, "grad_norm": 0.27872154116630554, "learning_rate": 7.20876585928489e-05, "loss": 0.9432, "step": 625 }, { "epoch": 0.03634056299030918, "grad_norm": 0.29088643193244934, "learning_rate": 7.26643598615917e-05, "loss": 0.9543, "step": 630 }, { "epoch": 0.03662898015689894, "grad_norm": 0.29298341274261475, "learning_rate": 7.324106113033449e-05, "loss": 0.9481, "step": 635 }, { "epoch": 0.03691739732348869, "grad_norm": 0.2777993083000183, "learning_rate": 7.381776239907729e-05, "loss": 1.0225, "step": 640 }, { "epoch": 0.03720581449007845, "grad_norm": 0.2685664892196655, "learning_rate": 7.439446366782007e-05, "loss": 0.9842, "step": 645 }, { "epoch": 0.0374942316566682, "grad_norm": 0.2854040861129761, "learning_rate": 7.497116493656286e-05, "loss": 1.0532, "step": 650 }, { "epoch": 0.03778264882325796, "grad_norm": 0.2788238525390625, "learning_rate": 7.554786620530564e-05, "loss": 1.0158, "step": 655 }, { "epoch": 0.03807106598984772, "grad_norm": 0.2755304276943207, "learning_rate": 7.612456747404844e-05, "loss": 0.9772, "step": 660 }, { "epoch": 0.038359483156437474, "grad_norm": 0.35789754986763, "learning_rate": 7.670126874279123e-05, "loss": 1.0072, "step": 665 }, { "epoch": 0.03864790032302723, "grad_norm": 0.26069143414497375, "learning_rate": 7.727797001153403e-05, "loss": 0.9633, "step": 670 }, { "epoch": 0.038936317489616984, "grad_norm": 0.25112205743789673, "learning_rate": 7.785467128027682e-05, "loss": 0.9779, "step": 675 }, { "epoch": 0.03922473465620674, "grad_norm": 0.2676317095756531, "learning_rate": 7.843137254901961e-05, "loss": 0.9924, "step": 680 }, { "epoch": 0.039513151822796494, "grad_norm": 0.26556289196014404, "learning_rate": 7.900807381776241e-05, "loss": 0.9615, "step": 685 }, { "epoch": 0.03980156898938625, "grad_norm": 0.2796500623226166, "learning_rate": 7.95847750865052e-05, "loss": 0.9579, "step": 690 }, { "epoch": 0.040089986155976004, "grad_norm": 0.2656737267971039, "learning_rate": 8.016147635524799e-05, "loss": 0.9664, "step": 695 }, { "epoch": 0.04037840332256576, "grad_norm": 0.27284595370292664, "learning_rate": 8.073817762399078e-05, "loss": 0.9255, "step": 700 }, { "epoch": 0.040666820489155514, "grad_norm": 0.27243107557296753, "learning_rate": 8.131487889273358e-05, "loss": 1.0724, "step": 705 }, { "epoch": 0.04095523765574527, "grad_norm": 0.2859233617782593, "learning_rate": 8.189158016147636e-05, "loss": 0.9977, "step": 710 }, { "epoch": 0.041243654822335024, "grad_norm": 0.25867760181427, "learning_rate": 8.246828143021915e-05, "loss": 0.9898, "step": 715 }, { "epoch": 0.04153207198892478, "grad_norm": 0.4606216549873352, "learning_rate": 8.304498269896193e-05, "loss": 0.9751, "step": 720 }, { "epoch": 0.041820489155514534, "grad_norm": 0.27268925309181213, "learning_rate": 8.362168396770473e-05, "loss": 0.961, "step": 725 }, { "epoch": 0.04210890632210429, "grad_norm": 0.27551740407943726, "learning_rate": 8.419838523644751e-05, "loss": 1.0218, "step": 730 }, { "epoch": 0.042397323488694044, "grad_norm": 0.26447197794914246, "learning_rate": 8.477508650519031e-05, "loss": 0.8937, "step": 735 }, { "epoch": 0.0426857406552838, "grad_norm": 0.27464747428894043, "learning_rate": 8.535178777393311e-05, "loss": 1.02, "step": 740 }, { "epoch": 0.04297415782187356, "grad_norm": 0.2542886435985565, "learning_rate": 8.59284890426759e-05, "loss": 1.0397, "step": 745 }, { "epoch": 0.043262574988463316, "grad_norm": 0.264526903629303, "learning_rate": 8.65051903114187e-05, "loss": 1.0214, "step": 750 }, { "epoch": 0.04355099215505307, "grad_norm": 0.28241610527038574, "learning_rate": 8.708189158016148e-05, "loss": 0.9854, "step": 755 }, { "epoch": 0.043839409321642826, "grad_norm": 0.2658286988735199, "learning_rate": 8.765859284890428e-05, "loss": 1.0165, "step": 760 }, { "epoch": 0.04412782648823258, "grad_norm": 0.2767401933670044, "learning_rate": 8.823529411764706e-05, "loss": 0.9447, "step": 765 }, { "epoch": 0.044416243654822336, "grad_norm": 0.2705742120742798, "learning_rate": 8.881199538638986e-05, "loss": 1.016, "step": 770 }, { "epoch": 0.04470466082141209, "grad_norm": 0.2611309885978699, "learning_rate": 8.938869665513265e-05, "loss": 0.9275, "step": 775 }, { "epoch": 0.044993077988001846, "grad_norm": 0.26546046137809753, "learning_rate": 8.996539792387543e-05, "loss": 0.9261, "step": 780 }, { "epoch": 0.0452814951545916, "grad_norm": 0.2639457583427429, "learning_rate": 9.054209919261822e-05, "loss": 1.137, "step": 785 }, { "epoch": 0.045569912321181356, "grad_norm": 0.2529033124446869, "learning_rate": 9.111880046136102e-05, "loss": 0.9829, "step": 790 }, { "epoch": 0.04585832948777111, "grad_norm": 0.2628052532672882, "learning_rate": 9.16955017301038e-05, "loss": 1.0721, "step": 795 }, { "epoch": 0.046146746654360866, "grad_norm": 0.24454466998577118, "learning_rate": 9.22722029988466e-05, "loss": 0.9685, "step": 800 }, { "epoch": 0.04643516382095062, "grad_norm": 0.2661277651786804, "learning_rate": 9.28489042675894e-05, "loss": 1.0083, "step": 805 }, { "epoch": 0.046723580987540377, "grad_norm": 0.2556845545768738, "learning_rate": 9.342560553633218e-05, "loss": 0.9418, "step": 810 }, { "epoch": 0.04701199815413013, "grad_norm": 0.26760879158973694, "learning_rate": 9.400230680507498e-05, "loss": 0.9693, "step": 815 }, { "epoch": 0.04730041532071989, "grad_norm": 0.27097398042678833, "learning_rate": 9.457900807381777e-05, "loss": 1.02, "step": 820 }, { "epoch": 0.04758883248730964, "grad_norm": 0.25728651881217957, "learning_rate": 9.515570934256057e-05, "loss": 1.0475, "step": 825 }, { "epoch": 0.0478772496538994, "grad_norm": 0.25373902916908264, "learning_rate": 9.573241061130335e-05, "loss": 1.0506, "step": 830 }, { "epoch": 0.04816566682048916, "grad_norm": 0.2508525252342224, "learning_rate": 9.630911188004614e-05, "loss": 0.9788, "step": 835 }, { "epoch": 0.048454083987078914, "grad_norm": 0.25410938262939453, "learning_rate": 9.688581314878892e-05, "loss": 1.0306, "step": 840 }, { "epoch": 0.04874250115366867, "grad_norm": 0.329357385635376, "learning_rate": 9.746251441753172e-05, "loss": 0.8915, "step": 845 }, { "epoch": 0.049030918320258424, "grad_norm": 0.2622867524623871, "learning_rate": 9.80392156862745e-05, "loss": 1.0084, "step": 850 }, { "epoch": 0.04931933548684818, "grad_norm": 0.24103546142578125, "learning_rate": 9.86159169550173e-05, "loss": 0.9618, "step": 855 }, { "epoch": 0.049607752653437934, "grad_norm": 0.24415351450443268, "learning_rate": 9.919261822376009e-05, "loss": 0.9831, "step": 860 }, { "epoch": 0.04989616982002769, "grad_norm": 0.2494598776102066, "learning_rate": 9.976931949250289e-05, "loss": 0.9796, "step": 865 }, { "epoch": 0.050184586986617444, "grad_norm": 0.25438565015792847, "learning_rate": 0.00010034602076124569, "loss": 0.962, "step": 870 }, { "epoch": 0.0504730041532072, "grad_norm": 0.2473691701889038, "learning_rate": 0.00010092272202998847, "loss": 0.9957, "step": 875 }, { "epoch": 0.050761421319796954, "grad_norm": 0.2563316524028778, "learning_rate": 0.00010149942329873126, "loss": 0.9428, "step": 880 }, { "epoch": 0.05104983848638671, "grad_norm": 0.2498437613248825, "learning_rate": 0.00010207612456747407, "loss": 1.0268, "step": 885 }, { "epoch": 0.051338255652976464, "grad_norm": 0.30138102173805237, "learning_rate": 0.00010265282583621685, "loss": 1.0179, "step": 890 }, { "epoch": 0.05162667281956622, "grad_norm": 0.2591732144355774, "learning_rate": 0.00010322952710495964, "loss": 1.0329, "step": 895 }, { "epoch": 0.051915089986155974, "grad_norm": 0.2612927258014679, "learning_rate": 0.00010380622837370242, "loss": 1.0217, "step": 900 }, { "epoch": 0.05220350715274573, "grad_norm": 0.2682620882987976, "learning_rate": 0.00010438292964244522, "loss": 0.9739, "step": 905 }, { "epoch": 0.052491924319335484, "grad_norm": 0.25226083397865295, "learning_rate": 0.00010495963091118801, "loss": 0.9301, "step": 910 }, { "epoch": 0.05278034148592524, "grad_norm": 0.2584647536277771, "learning_rate": 0.00010553633217993079, "loss": 0.948, "step": 915 }, { "epoch": 0.053068758652515, "grad_norm": 0.25511860847473145, "learning_rate": 0.00010611303344867358, "loss": 0.9927, "step": 920 }, { "epoch": 0.053357175819104756, "grad_norm": 0.24508269131183624, "learning_rate": 0.00010668973471741639, "loss": 0.9725, "step": 925 }, { "epoch": 0.05364559298569451, "grad_norm": 0.2486460655927658, "learning_rate": 0.00010726643598615918, "loss": 0.9572, "step": 930 }, { "epoch": 0.053934010152284266, "grad_norm": 0.2520204186439514, "learning_rate": 0.00010784313725490196, "loss": 1.0018, "step": 935 }, { "epoch": 0.05422242731887402, "grad_norm": 0.2417331486940384, "learning_rate": 0.00010841983852364477, "loss": 0.9141, "step": 940 }, { "epoch": 0.054510844485463776, "grad_norm": 0.2488359957933426, "learning_rate": 0.00010899653979238756, "loss": 1.0062, "step": 945 }, { "epoch": 0.05479926165205353, "grad_norm": 0.24121712148189545, "learning_rate": 0.00010957324106113034, "loss": 1.0109, "step": 950 }, { "epoch": 0.055087678818643286, "grad_norm": 0.23568566143512726, "learning_rate": 0.00011014994232987313, "loss": 1.0166, "step": 955 }, { "epoch": 0.05537609598523304, "grad_norm": 0.24867838621139526, "learning_rate": 0.00011072664359861593, "loss": 0.9547, "step": 960 }, { "epoch": 0.055664513151822796, "grad_norm": 0.25426313281059265, "learning_rate": 0.00011130334486735871, "loss": 0.8802, "step": 965 }, { "epoch": 0.05595293031841255, "grad_norm": 0.25565817952156067, "learning_rate": 0.0001118800461361015, "loss": 1.0254, "step": 970 }, { "epoch": 0.056241347485002306, "grad_norm": 0.2581862807273865, "learning_rate": 0.00011245674740484428, "loss": 0.9911, "step": 975 }, { "epoch": 0.05652976465159206, "grad_norm": 0.2568804919719696, "learning_rate": 0.0001130334486735871, "loss": 0.9868, "step": 980 }, { "epoch": 0.056818181818181816, "grad_norm": 0.24871297180652618, "learning_rate": 0.00011361014994232988, "loss": 0.9995, "step": 985 }, { "epoch": 0.05710659898477157, "grad_norm": 0.2451828271150589, "learning_rate": 0.00011418685121107266, "loss": 1.0186, "step": 990 }, { "epoch": 0.057395016151361326, "grad_norm": 0.22989226877689362, "learning_rate": 0.00011476355247981545, "loss": 0.9973, "step": 995 }, { "epoch": 0.05768343331795108, "grad_norm": 0.24537120759487152, "learning_rate": 0.00011534025374855826, "loss": 0.9782, "step": 1000 }, { "epoch": 0.05797185048454084, "grad_norm": 0.2385062724351883, "learning_rate": 0.00011591695501730105, "loss": 0.9829, "step": 1005 }, { "epoch": 0.0582602676511306, "grad_norm": 0.24060112237930298, "learning_rate": 0.00011649365628604383, "loss": 0.9604, "step": 1010 }, { "epoch": 0.05854868481772035, "grad_norm": 0.24133679270744324, "learning_rate": 0.00011707035755478663, "loss": 1.0636, "step": 1015 }, { "epoch": 0.05883710198431011, "grad_norm": 0.24426597356796265, "learning_rate": 0.00011764705882352942, "loss": 1.03, "step": 1020 }, { "epoch": 0.05912551915089986, "grad_norm": 0.24399541318416595, "learning_rate": 0.0001182237600922722, "loss": 1.0323, "step": 1025 }, { "epoch": 0.05941393631748962, "grad_norm": 0.23879031836986542, "learning_rate": 0.00011880046136101499, "loss": 0.919, "step": 1030 }, { "epoch": 0.059702353484079373, "grad_norm": 0.2616792619228363, "learning_rate": 0.0001193771626297578, "loss": 0.973, "step": 1035 }, { "epoch": 0.05999077065066913, "grad_norm": 0.2673472464084625, "learning_rate": 0.00011995386389850058, "loss": 1.034, "step": 1040 }, { "epoch": 0.060279187817258884, "grad_norm": 0.25476494431495667, "learning_rate": 0.00012053056516724337, "loss": 0.9774, "step": 1045 }, { "epoch": 0.06056760498384864, "grad_norm": 0.24083387851715088, "learning_rate": 0.00012110726643598615, "loss": 0.978, "step": 1050 }, { "epoch": 0.060856022150438394, "grad_norm": 0.24353915452957153, "learning_rate": 0.00012168396770472896, "loss": 0.9743, "step": 1055 }, { "epoch": 0.06114443931702815, "grad_norm": 0.21972060203552246, "learning_rate": 0.00012226066897347174, "loss": 0.9603, "step": 1060 }, { "epoch": 0.061432856483617904, "grad_norm": 0.2495606243610382, "learning_rate": 0.00012283737024221453, "loss": 0.9428, "step": 1065 }, { "epoch": 0.06172127365020766, "grad_norm": 0.243063822388649, "learning_rate": 0.00012341407151095733, "loss": 1.0545, "step": 1070 }, { "epoch": 0.062009690816797414, "grad_norm": 0.43066951632499695, "learning_rate": 0.00012399077277970013, "loss": 1.0169, "step": 1075 }, { "epoch": 0.06229810798338717, "grad_norm": 0.2651910185813904, "learning_rate": 0.0001245674740484429, "loss": 0.9391, "step": 1080 }, { "epoch": 0.06258652514997692, "grad_norm": 0.2392721027135849, "learning_rate": 0.0001251441753171857, "loss": 0.9285, "step": 1085 }, { "epoch": 0.06287494231656668, "grad_norm": 0.27125298976898193, "learning_rate": 0.0001257208765859285, "loss": 1.0027, "step": 1090 }, { "epoch": 0.06316335948315643, "grad_norm": 0.23103715479373932, "learning_rate": 0.0001262975778546713, "loss": 0.9483, "step": 1095 }, { "epoch": 0.06345177664974619, "grad_norm": 0.26006826758384705, "learning_rate": 0.00012687427912341407, "loss": 0.9914, "step": 1100 }, { "epoch": 0.06374019381633594, "grad_norm": 0.24031592905521393, "learning_rate": 0.00012745098039215687, "loss": 0.9377, "step": 1105 }, { "epoch": 0.0640286109829257, "grad_norm": 0.23456595838069916, "learning_rate": 0.00012802768166089967, "loss": 0.9399, "step": 1110 }, { "epoch": 0.06431702814951545, "grad_norm": 0.23771512508392334, "learning_rate": 0.00012860438292964244, "loss": 0.9292, "step": 1115 }, { "epoch": 0.06460544531610521, "grad_norm": 0.260166198015213, "learning_rate": 0.00012918108419838524, "loss": 1.0257, "step": 1120 }, { "epoch": 0.06489386248269496, "grad_norm": 0.2397325038909912, "learning_rate": 0.00012975778546712804, "loss": 0.9911, "step": 1125 }, { "epoch": 0.06518227964928472, "grad_norm": 0.2309105545282364, "learning_rate": 0.00013033448673587084, "loss": 0.945, "step": 1130 }, { "epoch": 0.06547069681587447, "grad_norm": 0.2553274631500244, "learning_rate": 0.0001309111880046136, "loss": 1.0365, "step": 1135 }, { "epoch": 0.06575911398246424, "grad_norm": 0.24227184057235718, "learning_rate": 0.0001314878892733564, "loss": 1.0088, "step": 1140 }, { "epoch": 0.066047531149054, "grad_norm": 0.24748703837394714, "learning_rate": 0.0001320645905420992, "loss": 0.9954, "step": 1145 }, { "epoch": 0.06633594831564375, "grad_norm": 0.237881138920784, "learning_rate": 0.000132641291810842, "loss": 1.0068, "step": 1150 }, { "epoch": 0.06662436548223351, "grad_norm": 0.2657199203968048, "learning_rate": 0.00013321799307958477, "loss": 1.0157, "step": 1155 }, { "epoch": 0.06691278264882326, "grad_norm": 0.22322721779346466, "learning_rate": 0.00013379469434832757, "loss": 0.9121, "step": 1160 }, { "epoch": 0.06720119981541302, "grad_norm": 0.2632087171077728, "learning_rate": 0.00013437139561707037, "loss": 1.0139, "step": 1165 }, { "epoch": 0.06748961698200277, "grad_norm": 0.23888643085956573, "learning_rate": 0.00013494809688581317, "loss": 0.9508, "step": 1170 }, { "epoch": 0.06777803414859253, "grad_norm": 0.2553633153438568, "learning_rate": 0.00013552479815455594, "loss": 0.9303, "step": 1175 }, { "epoch": 0.06806645131518228, "grad_norm": 0.23953106999397278, "learning_rate": 0.00013610149942329874, "loss": 1.0124, "step": 1180 }, { "epoch": 0.06835486848177204, "grad_norm": 0.25543469190597534, "learning_rate": 0.00013667820069204154, "loss": 1.0293, "step": 1185 }, { "epoch": 0.0686432856483618, "grad_norm": 0.261433869600296, "learning_rate": 0.0001372549019607843, "loss": 0.9565, "step": 1190 }, { "epoch": 0.06893170281495155, "grad_norm": 0.24583804607391357, "learning_rate": 0.0001378316032295271, "loss": 1.0264, "step": 1195 }, { "epoch": 0.0692201199815413, "grad_norm": 0.24933773279190063, "learning_rate": 0.00013840830449826988, "loss": 0.9649, "step": 1200 }, { "epoch": 0.06950853714813106, "grad_norm": 0.2605167329311371, "learning_rate": 0.0001389850057670127, "loss": 1.0164, "step": 1205 }, { "epoch": 0.06979695431472081, "grad_norm": 0.2488076090812683, "learning_rate": 0.00013956170703575548, "loss": 1.0082, "step": 1210 }, { "epoch": 0.07008537148131057, "grad_norm": 0.2312484085559845, "learning_rate": 0.00014013840830449828, "loss": 1.0003, "step": 1215 }, { "epoch": 0.07037378864790032, "grad_norm": 0.25844693183898926, "learning_rate": 0.00014071510957324108, "loss": 1.022, "step": 1220 }, { "epoch": 0.07066220581449008, "grad_norm": 0.24452704191207886, "learning_rate": 0.00014129181084198387, "loss": 0.9942, "step": 1225 }, { "epoch": 0.07095062298107983, "grad_norm": 0.25007927417755127, "learning_rate": 0.00014186851211072665, "loss": 0.9771, "step": 1230 }, { "epoch": 0.07123904014766959, "grad_norm": 0.22107909619808197, "learning_rate": 0.00014244521337946944, "loss": 0.9623, "step": 1235 }, { "epoch": 0.07152745731425934, "grad_norm": 0.25855445861816406, "learning_rate": 0.00014302191464821224, "loss": 1.0234, "step": 1240 }, { "epoch": 0.0718158744808491, "grad_norm": 0.24878598749637604, "learning_rate": 0.00014359861591695501, "loss": 0.978, "step": 1245 }, { "epoch": 0.07210429164743885, "grad_norm": 0.24573691189289093, "learning_rate": 0.0001441753171856978, "loss": 1.0046, "step": 1250 }, { "epoch": 0.07239270881402861, "grad_norm": 0.24604535102844238, "learning_rate": 0.00014475201845444058, "loss": 1.0427, "step": 1255 }, { "epoch": 0.07268112598061836, "grad_norm": 0.2441253662109375, "learning_rate": 0.0001453287197231834, "loss": 0.9979, "step": 1260 }, { "epoch": 0.07296954314720812, "grad_norm": 0.24777497351169586, "learning_rate": 0.00014590542099192618, "loss": 1.0299, "step": 1265 }, { "epoch": 0.07325796031379787, "grad_norm": 0.24094311892986298, "learning_rate": 0.00014648212226066898, "loss": 0.9391, "step": 1270 }, { "epoch": 0.07354637748038763, "grad_norm": 0.22625485062599182, "learning_rate": 0.00014705882352941178, "loss": 0.9858, "step": 1275 }, { "epoch": 0.07383479464697738, "grad_norm": 0.23727013170719147, "learning_rate": 0.00014763552479815458, "loss": 0.9819, "step": 1280 }, { "epoch": 0.07412321181356714, "grad_norm": 0.2502304017543793, "learning_rate": 0.00014821222606689735, "loss": 0.9641, "step": 1285 }, { "epoch": 0.0744116289801569, "grad_norm": 0.2629458010196686, "learning_rate": 0.00014878892733564015, "loss": 0.9894, "step": 1290 }, { "epoch": 0.07470004614674665, "grad_norm": 0.2599036693572998, "learning_rate": 0.00014936562860438295, "loss": 1.0051, "step": 1295 }, { "epoch": 0.0749884633133364, "grad_norm": 0.26761215925216675, "learning_rate": 0.00014994232987312572, "loss": 0.9867, "step": 1300 }, { "epoch": 0.07527688047992616, "grad_norm": 0.22773049771785736, "learning_rate": 0.00015051903114186852, "loss": 0.9696, "step": 1305 }, { "epoch": 0.07556529764651591, "grad_norm": 0.2541469633579254, "learning_rate": 0.0001510957324106113, "loss": 0.9657, "step": 1310 }, { "epoch": 0.07585371481310568, "grad_norm": 0.24339397251605988, "learning_rate": 0.00015167243367935411, "loss": 0.9592, "step": 1315 }, { "epoch": 0.07614213197969544, "grad_norm": 0.24885432422161102, "learning_rate": 0.00015224913494809689, "loss": 0.9516, "step": 1320 }, { "epoch": 0.07643054914628519, "grad_norm": 0.24829605221748352, "learning_rate": 0.00015282583621683968, "loss": 0.9578, "step": 1325 }, { "epoch": 0.07671896631287495, "grad_norm": 0.2368239462375641, "learning_rate": 0.00015340253748558246, "loss": 0.9982, "step": 1330 }, { "epoch": 0.0770073834794647, "grad_norm": 0.2484530210494995, "learning_rate": 0.00015397923875432528, "loss": 0.9453, "step": 1335 }, { "epoch": 0.07729580064605446, "grad_norm": 0.26049789786338806, "learning_rate": 0.00015455594002306805, "loss": 1.0236, "step": 1340 }, { "epoch": 0.07758421781264421, "grad_norm": 0.24843549728393555, "learning_rate": 0.00015513264129181085, "loss": 0.9244, "step": 1345 }, { "epoch": 0.07787263497923397, "grad_norm": 0.2523777484893799, "learning_rate": 0.00015570934256055365, "loss": 1.0428, "step": 1350 }, { "epoch": 0.07816105214582372, "grad_norm": 0.2532496750354767, "learning_rate": 0.00015628604382929645, "loss": 0.9755, "step": 1355 }, { "epoch": 0.07844946931241348, "grad_norm": 0.24413040280342102, "learning_rate": 0.00015686274509803922, "loss": 1.0127, "step": 1360 }, { "epoch": 0.07873788647900323, "grad_norm": 0.23477444052696228, "learning_rate": 0.00015743944636678202, "loss": 0.9863, "step": 1365 }, { "epoch": 0.07902630364559299, "grad_norm": 0.25841665267944336, "learning_rate": 0.00015801614763552482, "loss": 0.9707, "step": 1370 }, { "epoch": 0.07931472081218274, "grad_norm": 0.2560499310493469, "learning_rate": 0.0001585928489042676, "loss": 1.0088, "step": 1375 }, { "epoch": 0.0796031379787725, "grad_norm": 0.26023638248443604, "learning_rate": 0.0001591695501730104, "loss": 0.9853, "step": 1380 }, { "epoch": 0.07989155514536225, "grad_norm": 0.24000810086727142, "learning_rate": 0.00015974625144175316, "loss": 0.9841, "step": 1385 }, { "epoch": 0.08017997231195201, "grad_norm": 0.243475079536438, "learning_rate": 0.00016032295271049598, "loss": 0.8794, "step": 1390 }, { "epoch": 0.08046838947854176, "grad_norm": 0.2558750510215759, "learning_rate": 0.00016089965397923876, "loss": 0.9301, "step": 1395 }, { "epoch": 0.08075680664513152, "grad_norm": 0.24674943089485168, "learning_rate": 0.00016147635524798155, "loss": 0.9552, "step": 1400 }, { "epoch": 0.08104522381172127, "grad_norm": 0.23435547947883606, "learning_rate": 0.00016205305651672435, "loss": 0.9505, "step": 1405 }, { "epoch": 0.08133364097831103, "grad_norm": 0.24860598146915436, "learning_rate": 0.00016262975778546715, "loss": 1.0281, "step": 1410 }, { "epoch": 0.08162205814490078, "grad_norm": 0.24203436076641083, "learning_rate": 0.00016320645905420992, "loss": 0.9113, "step": 1415 }, { "epoch": 0.08191047531149054, "grad_norm": 0.2528266906738281, "learning_rate": 0.00016378316032295272, "loss": 0.9578, "step": 1420 }, { "epoch": 0.0821988924780803, "grad_norm": 0.29618388414382935, "learning_rate": 0.00016435986159169552, "loss": 0.982, "step": 1425 }, { "epoch": 0.08248730964467005, "grad_norm": 0.248749777674675, "learning_rate": 0.0001649365628604383, "loss": 0.9963, "step": 1430 }, { "epoch": 0.0827757268112598, "grad_norm": 0.25069600343704224, "learning_rate": 0.0001655132641291811, "loss": 1.0234, "step": 1435 }, { "epoch": 0.08306414397784956, "grad_norm": 0.24154260754585266, "learning_rate": 0.00016608996539792386, "loss": 0.976, "step": 1440 }, { "epoch": 0.08335256114443931, "grad_norm": 0.24604587256908417, "learning_rate": 0.0001666666666666667, "loss": 0.9341, "step": 1445 }, { "epoch": 0.08364097831102907, "grad_norm": 0.23897351324558258, "learning_rate": 0.00016724336793540946, "loss": 1.0146, "step": 1450 }, { "epoch": 0.08392939547761882, "grad_norm": 0.24604809284210205, "learning_rate": 0.00016782006920415226, "loss": 0.916, "step": 1455 }, { "epoch": 0.08421781264420858, "grad_norm": 0.2406589388847351, "learning_rate": 0.00016839677047289503, "loss": 0.9518, "step": 1460 }, { "epoch": 0.08450622981079833, "grad_norm": 0.24334654211997986, "learning_rate": 0.00016897347174163786, "loss": 0.9728, "step": 1465 }, { "epoch": 0.08479464697738809, "grad_norm": 0.2420976758003235, "learning_rate": 0.00016955017301038063, "loss": 0.983, "step": 1470 }, { "epoch": 0.08508306414397784, "grad_norm": 0.2604774832725525, "learning_rate": 0.00017012687427912343, "loss": 0.9632, "step": 1475 }, { "epoch": 0.0853714813105676, "grad_norm": 0.24979344010353088, "learning_rate": 0.00017070357554786622, "loss": 0.9323, "step": 1480 }, { "epoch": 0.08565989847715735, "grad_norm": 0.25925835967063904, "learning_rate": 0.000171280276816609, "loss": 1.0242, "step": 1485 }, { "epoch": 0.08594831564374712, "grad_norm": 0.2391650229692459, "learning_rate": 0.0001718569780853518, "loss": 0.9205, "step": 1490 }, { "epoch": 0.08623673281033688, "grad_norm": 0.2430115044116974, "learning_rate": 0.00017243367935409457, "loss": 0.9971, "step": 1495 }, { "epoch": 0.08652514997692663, "grad_norm": 0.23013629019260406, "learning_rate": 0.0001730103806228374, "loss": 0.9255, "step": 1500 }, { "epoch": 0.08681356714351639, "grad_norm": 0.24768148362636566, "learning_rate": 0.00017358708189158016, "loss": 0.9575, "step": 1505 }, { "epoch": 0.08710198431010614, "grad_norm": 0.24201525747776031, "learning_rate": 0.00017416378316032296, "loss": 0.9346, "step": 1510 }, { "epoch": 0.0873904014766959, "grad_norm": 0.24337361752986908, "learning_rate": 0.00017474048442906573, "loss": 0.9557, "step": 1515 }, { "epoch": 0.08767881864328565, "grad_norm": 0.2556352913379669, "learning_rate": 0.00017531718569780856, "loss": 0.9355, "step": 1520 }, { "epoch": 0.08796723580987541, "grad_norm": 0.2400965839624405, "learning_rate": 0.00017589388696655133, "loss": 1.035, "step": 1525 }, { "epoch": 0.08825565297646516, "grad_norm": 0.2564597427845001, "learning_rate": 0.00017647058823529413, "loss": 1.0211, "step": 1530 }, { "epoch": 0.08854407014305492, "grad_norm": 0.24977734684944153, "learning_rate": 0.00017704728950403693, "loss": 0.9954, "step": 1535 }, { "epoch": 0.08883248730964467, "grad_norm": 0.2484363615512848, "learning_rate": 0.00017762399077277973, "loss": 1.0266, "step": 1540 }, { "epoch": 0.08912090447623443, "grad_norm": 0.2474583089351654, "learning_rate": 0.0001782006920415225, "loss": 1.0453, "step": 1545 }, { "epoch": 0.08940932164282418, "grad_norm": 0.26252439618110657, "learning_rate": 0.0001787773933102653, "loss": 1.0519, "step": 1550 }, { "epoch": 0.08969773880941394, "grad_norm": 0.26378124952316284, "learning_rate": 0.0001793540945790081, "loss": 0.9504, "step": 1555 }, { "epoch": 0.08998615597600369, "grad_norm": 0.26493802666664124, "learning_rate": 0.00017993079584775087, "loss": 0.9936, "step": 1560 }, { "epoch": 0.09027457314259345, "grad_norm": 0.2636902928352356, "learning_rate": 0.00018050749711649367, "loss": 1.0266, "step": 1565 }, { "epoch": 0.0905629903091832, "grad_norm": 0.26412469148635864, "learning_rate": 0.00018108419838523644, "loss": 0.9843, "step": 1570 }, { "epoch": 0.09085140747577296, "grad_norm": 0.23820865154266357, "learning_rate": 0.00018166089965397926, "loss": 0.9403, "step": 1575 }, { "epoch": 0.09113982464236271, "grad_norm": 0.24650079011917114, "learning_rate": 0.00018223760092272203, "loss": 1.086, "step": 1580 }, { "epoch": 0.09142824180895247, "grad_norm": 0.2429857850074768, "learning_rate": 0.00018281430219146483, "loss": 0.9621, "step": 1585 }, { "epoch": 0.09171665897554222, "grad_norm": 0.23865139484405518, "learning_rate": 0.0001833910034602076, "loss": 0.9839, "step": 1590 }, { "epoch": 0.09200507614213198, "grad_norm": 0.2494489699602127, "learning_rate": 0.00018396770472895043, "loss": 1.0557, "step": 1595 }, { "epoch": 0.09229349330872173, "grad_norm": 0.247470885515213, "learning_rate": 0.0001845444059976932, "loss": 0.9255, "step": 1600 }, { "epoch": 0.09258191047531149, "grad_norm": 0.2614395022392273, "learning_rate": 0.000185121107266436, "loss": 0.9396, "step": 1605 }, { "epoch": 0.09287032764190124, "grad_norm": 0.2510371208190918, "learning_rate": 0.0001856978085351788, "loss": 0.9432, "step": 1610 }, { "epoch": 0.093158744808491, "grad_norm": 0.26177895069122314, "learning_rate": 0.00018627450980392157, "loss": 0.9958, "step": 1615 }, { "epoch": 0.09344716197508075, "grad_norm": 0.24554099142551422, "learning_rate": 0.00018685121107266437, "loss": 0.9845, "step": 1620 }, { "epoch": 0.09373557914167051, "grad_norm": 0.2630642354488373, "learning_rate": 0.00018742791234140714, "loss": 0.9659, "step": 1625 }, { "epoch": 0.09402399630826026, "grad_norm": 0.2493613213300705, "learning_rate": 0.00018800461361014997, "loss": 1.0318, "step": 1630 }, { "epoch": 0.09431241347485002, "grad_norm": 0.25541171431541443, "learning_rate": 0.00018858131487889274, "loss": 0.9631, "step": 1635 }, { "epoch": 0.09460083064143977, "grad_norm": 0.25047364830970764, "learning_rate": 0.00018915801614763554, "loss": 0.9596, "step": 1640 }, { "epoch": 0.09488924780802953, "grad_norm": 0.25506365299224854, "learning_rate": 0.0001897347174163783, "loss": 0.9745, "step": 1645 }, { "epoch": 0.09517766497461928, "grad_norm": 0.25513169169425964, "learning_rate": 0.00019031141868512113, "loss": 0.9668, "step": 1650 }, { "epoch": 0.09546608214120904, "grad_norm": 0.27256086468696594, "learning_rate": 0.0001908881199538639, "loss": 0.9768, "step": 1655 }, { "epoch": 0.0957544993077988, "grad_norm": 0.2694586217403412, "learning_rate": 0.0001914648212226067, "loss": 0.9661, "step": 1660 }, { "epoch": 0.09604291647438856, "grad_norm": 0.2510400414466858, "learning_rate": 0.00019204152249134948, "loss": 0.972, "step": 1665 }, { "epoch": 0.09633133364097832, "grad_norm": 0.25899869203567505, "learning_rate": 0.00019261822376009227, "loss": 0.932, "step": 1670 }, { "epoch": 0.09661975080756807, "grad_norm": 0.2670111060142517, "learning_rate": 0.00019319492502883507, "loss": 1.0586, "step": 1675 }, { "epoch": 0.09690816797415783, "grad_norm": 0.2525533437728882, "learning_rate": 0.00019377162629757784, "loss": 1.0007, "step": 1680 }, { "epoch": 0.09719658514074758, "grad_norm": 0.262040376663208, "learning_rate": 0.00019434832756632067, "loss": 0.9174, "step": 1685 }, { "epoch": 0.09748500230733734, "grad_norm": 0.2837670147418976, "learning_rate": 0.00019492502883506344, "loss": 0.8906, "step": 1690 }, { "epoch": 0.09777341947392709, "grad_norm": 0.2778575122356415, "learning_rate": 0.00019550173010380624, "loss": 0.9947, "step": 1695 }, { "epoch": 0.09806183664051685, "grad_norm": 0.24817965924739838, "learning_rate": 0.000196078431372549, "loss": 1.0096, "step": 1700 }, { "epoch": 0.0983502538071066, "grad_norm": 0.2699022591114044, "learning_rate": 0.00019665513264129184, "loss": 1.0408, "step": 1705 }, { "epoch": 0.09863867097369636, "grad_norm": 0.26346924901008606, "learning_rate": 0.0001972318339100346, "loss": 1.0206, "step": 1710 }, { "epoch": 0.09892708814028611, "grad_norm": 0.2642849087715149, "learning_rate": 0.0001978085351787774, "loss": 0.9985, "step": 1715 }, { "epoch": 0.09921550530687587, "grad_norm": 0.27539825439453125, "learning_rate": 0.00019838523644752018, "loss": 0.9614, "step": 1720 }, { "epoch": 0.09950392247346562, "grad_norm": 0.25085148215293884, "learning_rate": 0.000198961937716263, "loss": 0.9855, "step": 1725 }, { "epoch": 0.09979233964005538, "grad_norm": 0.2658745348453522, "learning_rate": 0.00019953863898500578, "loss": 0.9996, "step": 1730 }, { "epoch": 0.10008075680664513, "grad_norm": 0.2539004981517792, "learning_rate": 0.00019999999797274117, "loss": 0.9276, "step": 1735 }, { "epoch": 0.10036917397323489, "grad_norm": 0.2977031469345093, "learning_rate": 0.0001999999270186907, "loss": 0.991, "step": 1740 }, { "epoch": 0.10065759113982464, "grad_norm": 0.279045045375824, "learning_rate": 0.0001999997547017808, "loss": 0.9691, "step": 1745 }, { "epoch": 0.1009460083064144, "grad_norm": 0.2583720088005066, "learning_rate": 0.0001999994810221862, "loss": 0.9513, "step": 1750 }, { "epoch": 0.10123442547300415, "grad_norm": 0.2970728874206543, "learning_rate": 0.00019999910598018426, "loss": 0.9861, "step": 1755 }, { "epoch": 0.10152284263959391, "grad_norm": 0.25419795513153076, "learning_rate": 0.00019999862957615513, "loss": 1.0042, "step": 1760 }, { "epoch": 0.10181125980618366, "grad_norm": 0.26986587047576904, "learning_rate": 0.00019999805181058176, "loss": 0.9627, "step": 1765 }, { "epoch": 0.10209967697277342, "grad_norm": 0.2580127418041229, "learning_rate": 0.00019999737268404973, "loss": 1.0264, "step": 1770 }, { "epoch": 0.10238809413936317, "grad_norm": 0.25490108132362366, "learning_rate": 0.00019999659219724749, "loss": 0.9655, "step": 1775 }, { "epoch": 0.10267651130595293, "grad_norm": 0.2768772840499878, "learning_rate": 0.00019999571035096608, "loss": 1.0224, "step": 1780 }, { "epoch": 0.10296492847254268, "grad_norm": 0.25926682353019714, "learning_rate": 0.00019999472714609943, "loss": 0.9058, "step": 1785 }, { "epoch": 0.10325334563913244, "grad_norm": 0.2662297487258911, "learning_rate": 0.00019999364258364413, "loss": 0.9776, "step": 1790 }, { "epoch": 0.10354176280572219, "grad_norm": 0.2684202194213867, "learning_rate": 0.0001999924566646995, "loss": 0.9563, "step": 1795 }, { "epoch": 0.10383017997231195, "grad_norm": 0.25693777203559875, "learning_rate": 0.00019999116939046764, "loss": 1.0355, "step": 1800 }, { "epoch": 0.1041185971389017, "grad_norm": 0.24468928575515747, "learning_rate": 0.0001999897807622534, "loss": 1.0907, "step": 1805 }, { "epoch": 0.10440701430549146, "grad_norm": 0.25416669249534607, "learning_rate": 0.0001999882907814643, "loss": 1.0232, "step": 1810 }, { "epoch": 0.10469543147208121, "grad_norm": 0.27336063981056213, "learning_rate": 0.00019998669944961062, "loss": 0.9219, "step": 1815 }, { "epoch": 0.10498384863867097, "grad_norm": 0.26575300097465515, "learning_rate": 0.0001999850067683054, "loss": 0.9423, "step": 1820 }, { "epoch": 0.10527226580526072, "grad_norm": 0.2702259421348572, "learning_rate": 0.00019998321273926437, "loss": 1.0044, "step": 1825 }, { "epoch": 0.10556068297185048, "grad_norm": 0.25690004229545593, "learning_rate": 0.00019998131736430604, "loss": 0.9728, "step": 1830 }, { "epoch": 0.10584910013844025, "grad_norm": 0.27018973231315613, "learning_rate": 0.00019997932064535158, "loss": 1.0005, "step": 1835 }, { "epoch": 0.10613751730503, "grad_norm": 0.276635080575943, "learning_rate": 0.00019997722258442499, "loss": 0.9643, "step": 1840 }, { "epoch": 0.10642593447161976, "grad_norm": 0.2834376394748688, "learning_rate": 0.00019997502318365286, "loss": 0.9691, "step": 1845 }, { "epoch": 0.10671435163820951, "grad_norm": 0.2709560692310333, "learning_rate": 0.00019997272244526456, "loss": 0.9281, "step": 1850 }, { "epoch": 0.10700276880479927, "grad_norm": 0.2817580997943878, "learning_rate": 0.00019997032037159224, "loss": 1.0373, "step": 1855 }, { "epoch": 0.10729118597138902, "grad_norm": 0.27111148834228516, "learning_rate": 0.00019996781696507069, "loss": 1.0148, "step": 1860 }, { "epoch": 0.10757960313797878, "grad_norm": 0.25952383875846863, "learning_rate": 0.00019996521222823743, "loss": 0.9546, "step": 1865 }, { "epoch": 0.10786802030456853, "grad_norm": 0.2788892686367035, "learning_rate": 0.00019996250616373268, "loss": 0.9801, "step": 1870 }, { "epoch": 0.10815643747115829, "grad_norm": 1.3752487897872925, "learning_rate": 0.00019995969877429945, "loss": 0.9122, "step": 1875 }, { "epoch": 0.10844485463774804, "grad_norm": 0.2783893048763275, "learning_rate": 0.0001999567900627833, "loss": 1.0063, "step": 1880 }, { "epoch": 0.1087332718043378, "grad_norm": 0.27742305397987366, "learning_rate": 0.0001999537800321327, "loss": 0.9959, "step": 1885 }, { "epoch": 0.10902168897092755, "grad_norm": 0.2651984691619873, "learning_rate": 0.0001999506686853986, "loss": 1.007, "step": 1890 }, { "epoch": 0.10931010613751731, "grad_norm": 0.25715208053588867, "learning_rate": 0.0001999474560257348, "loss": 0.9855, "step": 1895 }, { "epoch": 0.10959852330410706, "grad_norm": 0.27990275621414185, "learning_rate": 0.00019994414205639775, "loss": 0.9599, "step": 1900 }, { "epoch": 0.10988694047069682, "grad_norm": 0.25654786825180054, "learning_rate": 0.00019994072678074655, "loss": 0.9955, "step": 1905 }, { "epoch": 0.11017535763728657, "grad_norm": 0.28725671768188477, "learning_rate": 0.00019993721020224308, "loss": 0.9419, "step": 1910 }, { "epoch": 0.11046377480387633, "grad_norm": 0.25918087363243103, "learning_rate": 0.00019993359232445176, "loss": 0.9585, "step": 1915 }, { "epoch": 0.11075219197046608, "grad_norm": 0.25459691882133484, "learning_rate": 0.0001999298731510399, "loss": 0.9382, "step": 1920 }, { "epoch": 0.11104060913705584, "grad_norm": 0.2630646526813507, "learning_rate": 0.00019992605268577727, "loss": 0.9103, "step": 1925 }, { "epoch": 0.11132902630364559, "grad_norm": 0.2786347270011902, "learning_rate": 0.00019992213093253643, "loss": 1.0174, "step": 1930 }, { "epoch": 0.11161744347023535, "grad_norm": 0.25533023476600647, "learning_rate": 0.00019991810789529257, "loss": 1.003, "step": 1935 }, { "epoch": 0.1119058606368251, "grad_norm": 0.2641088366508484, "learning_rate": 0.0001999139835781236, "loss": 0.9767, "step": 1940 }, { "epoch": 0.11219427780341486, "grad_norm": 0.2834392189979553, "learning_rate": 0.00019990975798521, "loss": 1.0413, "step": 1945 }, { "epoch": 0.11248269497000461, "grad_norm": 0.29145047068595886, "learning_rate": 0.00019990543112083503, "loss": 0.9319, "step": 1950 }, { "epoch": 0.11277111213659437, "grad_norm": 0.2648943364620209, "learning_rate": 0.00019990100298938442, "loss": 0.9541, "step": 1955 }, { "epoch": 0.11305952930318412, "grad_norm": 0.2761361002922058, "learning_rate": 0.00019989647359534672, "loss": 1.041, "step": 1960 }, { "epoch": 0.11334794646977388, "grad_norm": 0.26408612728118896, "learning_rate": 0.00019989184294331308, "loss": 0.9914, "step": 1965 }, { "epoch": 0.11363636363636363, "grad_norm": 0.28646019101142883, "learning_rate": 0.0001998871110379772, "loss": 1.0491, "step": 1970 }, { "epoch": 0.11392478080295339, "grad_norm": 0.28368857502937317, "learning_rate": 0.0001998822778841355, "loss": 1.0156, "step": 1975 }, { "epoch": 0.11421319796954314, "grad_norm": 0.2637539207935333, "learning_rate": 0.00019987734348668706, "loss": 0.9229, "step": 1980 }, { "epoch": 0.1145016151361329, "grad_norm": 0.3021569848060608, "learning_rate": 0.00019987230785063344, "loss": 1.0092, "step": 1985 }, { "epoch": 0.11479003230272265, "grad_norm": 0.2628127336502075, "learning_rate": 0.00019986717098107896, "loss": 0.9768, "step": 1990 }, { "epoch": 0.11507844946931241, "grad_norm": 0.2722758948802948, "learning_rate": 0.0001998619328832305, "loss": 1.0138, "step": 1995 }, { "epoch": 0.11536686663590216, "grad_norm": 0.261016845703125, "learning_rate": 0.00019985659356239758, "loss": 1.0576, "step": 2000 }, { "epoch": 0.11565528380249192, "grad_norm": 0.26313841342926025, "learning_rate": 0.0001998511530239922, "loss": 0.9934, "step": 2005 }, { "epoch": 0.11594370096908169, "grad_norm": 0.2713305354118347, "learning_rate": 0.00019984561127352914, "loss": 1.0219, "step": 2010 }, { "epoch": 0.11623211813567144, "grad_norm": 0.24656793475151062, "learning_rate": 0.00019983996831662566, "loss": 1.0266, "step": 2015 }, { "epoch": 0.1165205353022612, "grad_norm": 0.259756863117218, "learning_rate": 0.00019983422415900158, "loss": 1.0254, "step": 2020 }, { "epoch": 0.11680895246885095, "grad_norm": 0.2553316652774811, "learning_rate": 0.0001998283788064794, "loss": 0.9304, "step": 2025 }, { "epoch": 0.1170973696354407, "grad_norm": 0.27868157625198364, "learning_rate": 0.00019982243226498411, "loss": 1.0193, "step": 2030 }, { "epoch": 0.11738578680203046, "grad_norm": 0.2899937927722931, "learning_rate": 0.00019981638454054333, "loss": 0.8705, "step": 2035 }, { "epoch": 0.11767420396862022, "grad_norm": 0.2696991264820099, "learning_rate": 0.00019981023563928716, "loss": 0.9649, "step": 2040 }, { "epoch": 0.11796262113520997, "grad_norm": 0.26514795422554016, "learning_rate": 0.00019980398556744837, "loss": 0.9288, "step": 2045 }, { "epoch": 0.11825103830179973, "grad_norm": 0.2759961187839508, "learning_rate": 0.00019979763433136216, "loss": 0.9752, "step": 2050 }, { "epoch": 0.11853945546838948, "grad_norm": 0.24151116609573364, "learning_rate": 0.00019979118193746637, "loss": 0.9837, "step": 2055 }, { "epoch": 0.11882787263497924, "grad_norm": 0.2888840436935425, "learning_rate": 0.00019978462839230133, "loss": 1.0506, "step": 2060 }, { "epoch": 0.11911628980156899, "grad_norm": 0.258368581533432, "learning_rate": 0.00019977797370250986, "loss": 0.9598, "step": 2065 }, { "epoch": 0.11940470696815875, "grad_norm": 0.27287793159484863, "learning_rate": 0.0001997712178748374, "loss": 1.0012, "step": 2070 }, { "epoch": 0.1196931241347485, "grad_norm": 0.2549577057361603, "learning_rate": 0.00019976436091613184, "loss": 1.0228, "step": 2075 }, { "epoch": 0.11998154130133826, "grad_norm": 0.26153385639190674, "learning_rate": 0.0001997574028333436, "loss": 0.9636, "step": 2080 }, { "epoch": 0.12026995846792801, "grad_norm": 0.2726786136627197, "learning_rate": 0.00019975034363352556, "loss": 0.9345, "step": 2085 }, { "epoch": 0.12055837563451777, "grad_norm": 0.27283257246017456, "learning_rate": 0.0001997431833238332, "loss": 0.9742, "step": 2090 }, { "epoch": 0.12084679280110752, "grad_norm": 0.35528162121772766, "learning_rate": 0.00019973592191152437, "loss": 1.0162, "step": 2095 }, { "epoch": 0.12113520996769728, "grad_norm": 0.2918716371059418, "learning_rate": 0.00019972855940395947, "loss": 1.0201, "step": 2100 }, { "epoch": 0.12142362713428703, "grad_norm": 0.28611305356025696, "learning_rate": 0.00019972109580860132, "loss": 0.9773, "step": 2105 }, { "epoch": 0.12171204430087679, "grad_norm": 0.2829110026359558, "learning_rate": 0.00019971353113301527, "loss": 1.0952, "step": 2110 }, { "epoch": 0.12200046146746654, "grad_norm": 0.26948046684265137, "learning_rate": 0.0001997058653848691, "loss": 1.0, "step": 2115 }, { "epoch": 0.1222888786340563, "grad_norm": 0.259901762008667, "learning_rate": 0.00019969809857193306, "loss": 0.9584, "step": 2120 }, { "epoch": 0.12257729580064605, "grad_norm": 0.2724592387676239, "learning_rate": 0.00019969023070207973, "loss": 0.9421, "step": 2125 }, { "epoch": 0.12286571296723581, "grad_norm": 0.26687607169151306, "learning_rate": 0.0001996822617832843, "loss": 0.9197, "step": 2130 }, { "epoch": 0.12315413013382556, "grad_norm": 0.28086045384407043, "learning_rate": 0.00019967419182362429, "loss": 0.9574, "step": 2135 }, { "epoch": 0.12344254730041532, "grad_norm": 0.27749550342559814, "learning_rate": 0.0001996660208312796, "loss": 0.9948, "step": 2140 }, { "epoch": 0.12373096446700507, "grad_norm": 0.26183804869651794, "learning_rate": 0.00019965774881453263, "loss": 1.0297, "step": 2145 }, { "epoch": 0.12401938163359483, "grad_norm": 0.2577550411224365, "learning_rate": 0.00019964937578176816, "loss": 0.9852, "step": 2150 }, { "epoch": 0.12430779880018458, "grad_norm": 0.279245525598526, "learning_rate": 0.00019964090174147327, "loss": 0.9754, "step": 2155 }, { "epoch": 0.12459621596677434, "grad_norm": 0.2758920192718506, "learning_rate": 0.00019963232670223752, "loss": 0.9894, "step": 2160 }, { "epoch": 0.12488463313336409, "grad_norm": 0.29135221242904663, "learning_rate": 0.00019962365067275286, "loss": 0.9535, "step": 2165 }, { "epoch": 0.12517305029995385, "grad_norm": 0.2922044098377228, "learning_rate": 0.00019961487366181355, "loss": 0.9631, "step": 2170 }, { "epoch": 0.1254614674665436, "grad_norm": 0.2769213020801544, "learning_rate": 0.0001996059956783162, "loss": 1.0147, "step": 2175 }, { "epoch": 0.12574988463313336, "grad_norm": 0.26981329917907715, "learning_rate": 0.00019959701673125983, "loss": 1.0227, "step": 2180 }, { "epoch": 0.1260383017997231, "grad_norm": 0.27635276317596436, "learning_rate": 0.00019958793682974574, "loss": 0.9745, "step": 2185 }, { "epoch": 0.12632671896631287, "grad_norm": 0.28650912642478943, "learning_rate": 0.00019957875598297759, "loss": 1.0018, "step": 2190 }, { "epoch": 0.12661513613290262, "grad_norm": 0.26457536220550537, "learning_rate": 0.00019956947420026136, "loss": 1.0461, "step": 2195 }, { "epoch": 0.12690355329949238, "grad_norm": 0.28600943088531494, "learning_rate": 0.00019956009149100533, "loss": 0.9647, "step": 2200 }, { "epoch": 0.12719197046608213, "grad_norm": 0.2786143720149994, "learning_rate": 0.00019955060786472012, "loss": 0.9247, "step": 2205 }, { "epoch": 0.1274803876326719, "grad_norm": 0.2701742351055145, "learning_rate": 0.00019954102333101856, "loss": 0.9729, "step": 2210 }, { "epoch": 0.12776880479926164, "grad_norm": 0.28489363193511963, "learning_rate": 0.00019953133789961584, "loss": 0.9782, "step": 2215 }, { "epoch": 0.1280572219658514, "grad_norm": 0.26730117201805115, "learning_rate": 0.0001995215515803294, "loss": 0.9715, "step": 2220 }, { "epoch": 0.12834563913244115, "grad_norm": 0.28904202580451965, "learning_rate": 0.00019951166438307894, "loss": 0.9835, "step": 2225 }, { "epoch": 0.1286340562990309, "grad_norm": 0.2703316807746887, "learning_rate": 0.00019950167631788642, "loss": 0.9696, "step": 2230 }, { "epoch": 0.12892247346562066, "grad_norm": 0.28106942772865295, "learning_rate": 0.000199491587394876, "loss": 0.9521, "step": 2235 }, { "epoch": 0.12921089063221042, "grad_norm": 0.27781790494918823, "learning_rate": 0.00019948139762427416, "loss": 0.9942, "step": 2240 }, { "epoch": 0.12949930779880017, "grad_norm": 0.26624271273612976, "learning_rate": 0.00019947110701640952, "loss": 0.9662, "step": 2245 }, { "epoch": 0.12978772496538993, "grad_norm": 0.2619341015815735, "learning_rate": 0.000199460715581713, "loss": 0.9083, "step": 2250 }, { "epoch": 0.13007614213197968, "grad_norm": 0.2704095244407654, "learning_rate": 0.00019945022333071752, "loss": 1.0528, "step": 2255 }, { "epoch": 0.13036455929856944, "grad_norm": 0.2684679627418518, "learning_rate": 0.0001994396302740585, "loss": 0.9707, "step": 2260 }, { "epoch": 0.1306529764651592, "grad_norm": 0.2747194766998291, "learning_rate": 0.00019942893642247326, "loss": 0.9843, "step": 2265 }, { "epoch": 0.13094139363174895, "grad_norm": 0.27362656593322754, "learning_rate": 0.00019941814178680144, "loss": 1.0139, "step": 2270 }, { "epoch": 0.13122981079833873, "grad_norm": 0.2812444567680359, "learning_rate": 0.00019940724637798477, "loss": 0.9368, "step": 2275 }, { "epoch": 0.13151822796492849, "grad_norm": 0.2766329050064087, "learning_rate": 0.00019939625020706724, "loss": 0.9932, "step": 2280 }, { "epoch": 0.13180664513151824, "grad_norm": 0.27966558933258057, "learning_rate": 0.0001993851532851948, "loss": 0.9762, "step": 2285 }, { "epoch": 0.132095062298108, "grad_norm": 0.2908051908016205, "learning_rate": 0.00019937395562361564, "loss": 1.0419, "step": 2290 }, { "epoch": 0.13238347946469775, "grad_norm": 0.272611528635025, "learning_rate": 0.0001993626572336801, "loss": 0.9563, "step": 2295 }, { "epoch": 0.1326718966312875, "grad_norm": 0.2815663814544678, "learning_rate": 0.00019935125812684047, "loss": 0.9881, "step": 2300 }, { "epoch": 0.13296031379787726, "grad_norm": 0.28436169028282166, "learning_rate": 0.0001993397583146513, "loss": 1.0, "step": 2305 }, { "epoch": 0.13324873096446702, "grad_norm": 0.2669413983821869, "learning_rate": 0.00019932815780876904, "loss": 0.9727, "step": 2310 }, { "epoch": 0.13353714813105677, "grad_norm": 0.2894003093242645, "learning_rate": 0.00019931645662095237, "loss": 0.9613, "step": 2315 }, { "epoch": 0.13382556529764653, "grad_norm": 0.27110880613327026, "learning_rate": 0.00019930465476306197, "loss": 0.9912, "step": 2320 }, { "epoch": 0.13411398246423628, "grad_norm": 0.28134435415267944, "learning_rate": 0.0001992927522470605, "loss": 1.0183, "step": 2325 }, { "epoch": 0.13440239963082604, "grad_norm": 0.2562038004398346, "learning_rate": 0.00019928074908501272, "loss": 0.9604, "step": 2330 }, { "epoch": 0.1346908167974158, "grad_norm": 0.3024313747882843, "learning_rate": 0.0001992686452890854, "loss": 0.9825, "step": 2335 }, { "epoch": 0.13497923396400555, "grad_norm": 0.28540000319480896, "learning_rate": 0.00019925644087154734, "loss": 0.9882, "step": 2340 }, { "epoch": 0.1352676511305953, "grad_norm": 0.26603230834007263, "learning_rate": 0.0001992441358447692, "loss": 0.9883, "step": 2345 }, { "epoch": 0.13555606829718506, "grad_norm": 0.28682613372802734, "learning_rate": 0.00019923173022122378, "loss": 0.9404, "step": 2350 }, { "epoch": 0.1358444854637748, "grad_norm": 0.29595518112182617, "learning_rate": 0.00019921922401348576, "loss": 0.963, "step": 2355 }, { "epoch": 0.13613290263036457, "grad_norm": 0.2716725468635559, "learning_rate": 0.00019920661723423183, "loss": 0.9273, "step": 2360 }, { "epoch": 0.13642131979695432, "grad_norm": 0.2717735469341278, "learning_rate": 0.00019919390989624054, "loss": 0.9808, "step": 2365 }, { "epoch": 0.13670973696354408, "grad_norm": 0.3040509521961212, "learning_rate": 0.00019918110201239247, "loss": 1.0277, "step": 2370 }, { "epoch": 0.13699815413013383, "grad_norm": 0.2620179355144501, "learning_rate": 0.00019916819359567001, "loss": 1.0213, "step": 2375 }, { "epoch": 0.1372865712967236, "grad_norm": 0.3165864646434784, "learning_rate": 0.00019915518465915758, "loss": 0.9429, "step": 2380 }, { "epoch": 0.13757498846331334, "grad_norm": 0.311599999666214, "learning_rate": 0.0001991420752160414, "loss": 1.0413, "step": 2385 }, { "epoch": 0.1378634056299031, "grad_norm": 0.2847161293029785, "learning_rate": 0.00019912886527960954, "loss": 0.99, "step": 2390 }, { "epoch": 0.13815182279649285, "grad_norm": 0.2932097017765045, "learning_rate": 0.00019911555486325203, "loss": 1.0477, "step": 2395 }, { "epoch": 0.1384402399630826, "grad_norm": 0.269297331571579, "learning_rate": 0.0001991021439804607, "loss": 1.0069, "step": 2400 }, { "epoch": 0.13872865712967236, "grad_norm": 0.2708551287651062, "learning_rate": 0.00019908863264482917, "loss": 0.9499, "step": 2405 }, { "epoch": 0.13901707429626212, "grad_norm": 0.2815878093242645, "learning_rate": 0.00019907502087005297, "loss": 1.0067, "step": 2410 }, { "epoch": 0.13930549146285187, "grad_norm": 0.2791478633880615, "learning_rate": 0.00019906130866992935, "loss": 0.9474, "step": 2415 }, { "epoch": 0.13959390862944163, "grad_norm": 0.2578289806842804, "learning_rate": 0.00019904749605835742, "loss": 0.9546, "step": 2420 }, { "epoch": 0.13988232579603138, "grad_norm": 0.27858108282089233, "learning_rate": 0.00019903358304933805, "loss": 1.0216, "step": 2425 }, { "epoch": 0.14017074296262114, "grad_norm": 0.2818721830844879, "learning_rate": 0.00019901956965697387, "loss": 0.9856, "step": 2430 }, { "epoch": 0.1404591601292109, "grad_norm": 0.30525150895118713, "learning_rate": 0.0001990054558954693, "loss": 1.0186, "step": 2435 }, { "epoch": 0.14074757729580065, "grad_norm": 0.2578755021095276, "learning_rate": 0.00019899124177913041, "loss": 0.9917, "step": 2440 }, { "epoch": 0.1410359944623904, "grad_norm": 0.2822008728981018, "learning_rate": 0.0001989769273223651, "loss": 0.915, "step": 2445 }, { "epoch": 0.14132441162898016, "grad_norm": 0.279206782579422, "learning_rate": 0.00019896251253968288, "loss": 0.9978, "step": 2450 }, { "epoch": 0.1416128287955699, "grad_norm": 0.3062402009963989, "learning_rate": 0.000198947997445695, "loss": 0.9784, "step": 2455 }, { "epoch": 0.14190124596215967, "grad_norm": 0.28376179933547974, "learning_rate": 0.0001989333820551144, "loss": 0.8916, "step": 2460 }, { "epoch": 0.14218966312874942, "grad_norm": 0.3137090504169464, "learning_rate": 0.00019891866638275564, "loss": 1.0176, "step": 2465 }, { "epoch": 0.14247808029533918, "grad_norm": 0.27621030807495117, "learning_rate": 0.00019890385044353501, "loss": 0.9188, "step": 2470 }, { "epoch": 0.14276649746192893, "grad_norm": 0.26816585659980774, "learning_rate": 0.00019888893425247032, "loss": 0.9401, "step": 2475 }, { "epoch": 0.1430549146285187, "grad_norm": 0.2706138789653778, "learning_rate": 0.00019887391782468113, "loss": 0.9599, "step": 2480 }, { "epoch": 0.14334333179510844, "grad_norm": 0.27414408326148987, "learning_rate": 0.00019885880117538846, "loss": 0.9372, "step": 2485 }, { "epoch": 0.1436317489616982, "grad_norm": 0.26232287287712097, "learning_rate": 0.000198843584319915, "loss": 1.0162, "step": 2490 }, { "epoch": 0.14392016612828795, "grad_norm": 0.2994023263454437, "learning_rate": 0.00019882826727368508, "loss": 1.0131, "step": 2495 }, { "epoch": 0.1442085832948777, "grad_norm": 0.3080557584762573, "learning_rate": 0.0001988128500522244, "loss": 0.9965, "step": 2500 }, { "epoch": 0.14449700046146746, "grad_norm": 0.2668229341506958, "learning_rate": 0.00019879733267116035, "loss": 1.0265, "step": 2505 }, { "epoch": 0.14478541762805722, "grad_norm": 0.3170202672481537, "learning_rate": 0.00019878171514622187, "loss": 0.9301, "step": 2510 }, { "epoch": 0.14507383479464697, "grad_norm": 0.2696046829223633, "learning_rate": 0.0001987659974932392, "loss": 0.9447, "step": 2515 }, { "epoch": 0.14536225196123673, "grad_norm": 0.29331788420677185, "learning_rate": 0.00019875017972814435, "loss": 0.9869, "step": 2520 }, { "epoch": 0.14565066912782648, "grad_norm": 0.27732712030410767, "learning_rate": 0.0001987342618669706, "loss": 0.9317, "step": 2525 }, { "epoch": 0.14593908629441624, "grad_norm": 0.29539668560028076, "learning_rate": 0.00019871824392585276, "loss": 0.932, "step": 2530 }, { "epoch": 0.146227503461006, "grad_norm": 0.2826956510543823, "learning_rate": 0.00019870212592102711, "loss": 1.0275, "step": 2535 }, { "epoch": 0.14651592062759575, "grad_norm": 0.27761781215667725, "learning_rate": 0.00019868590786883134, "loss": 1.0548, "step": 2540 }, { "epoch": 0.1468043377941855, "grad_norm": 0.268969863653183, "learning_rate": 0.00019866958978570452, "loss": 0.8818, "step": 2545 }, { "epoch": 0.14709275496077526, "grad_norm": 0.2976461350917816, "learning_rate": 0.00019865317168818713, "loss": 0.962, "step": 2550 }, { "epoch": 0.147381172127365, "grad_norm": 0.2762574255466461, "learning_rate": 0.00019863665359292108, "loss": 1.0253, "step": 2555 }, { "epoch": 0.14766958929395477, "grad_norm": 0.24959316849708557, "learning_rate": 0.0001986200355166495, "loss": 0.952, "step": 2560 }, { "epoch": 0.14795800646054452, "grad_norm": 0.2605302035808563, "learning_rate": 0.0001986033174762171, "loss": 0.9412, "step": 2565 }, { "epoch": 0.14824642362713428, "grad_norm": 0.2719340920448303, "learning_rate": 0.0001985864994885697, "loss": 0.9876, "step": 2570 }, { "epoch": 0.14853484079372403, "grad_norm": 0.2838572561740875, "learning_rate": 0.00019856958157075445, "loss": 1.0004, "step": 2575 }, { "epoch": 0.1488232579603138, "grad_norm": 0.2722460627555847, "learning_rate": 0.00019855256373991993, "loss": 0.9117, "step": 2580 }, { "epoch": 0.14911167512690354, "grad_norm": 0.2888812720775604, "learning_rate": 0.0001985354460133159, "loss": 0.9098, "step": 2585 }, { "epoch": 0.1494000922934933, "grad_norm": 0.27125051617622375, "learning_rate": 0.00019851822840829338, "loss": 0.9125, "step": 2590 }, { "epoch": 0.14968850946008305, "grad_norm": 0.2893664240837097, "learning_rate": 0.0001985009109423046, "loss": 0.9997, "step": 2595 }, { "epoch": 0.1499769266266728, "grad_norm": 0.27496811747550964, "learning_rate": 0.0001984834936329031, "loss": 1.0189, "step": 2600 }, { "epoch": 0.15026534379326256, "grad_norm": 0.28814586997032166, "learning_rate": 0.00019846597649774358, "loss": 1.06, "step": 2605 }, { "epoch": 0.15055376095985232, "grad_norm": 0.35203614830970764, "learning_rate": 0.00019844835955458193, "loss": 1.0003, "step": 2610 }, { "epoch": 0.15084217812644207, "grad_norm": 0.2919403612613678, "learning_rate": 0.00019843064282127511, "loss": 0.9567, "step": 2615 }, { "epoch": 0.15113059529303183, "grad_norm": 0.2837710678577423, "learning_rate": 0.00019841282631578145, "loss": 0.9919, "step": 2620 }, { "epoch": 0.1514190124596216, "grad_norm": 0.319578617811203, "learning_rate": 0.0001983949100561602, "loss": 0.9816, "step": 2625 }, { "epoch": 0.15170742962621137, "grad_norm": 0.2784458100795746, "learning_rate": 0.00019837689406057183, "loss": 0.9575, "step": 2630 }, { "epoch": 0.15199584679280112, "grad_norm": 0.26964443922042847, "learning_rate": 0.00019835877834727787, "loss": 0.9494, "step": 2635 }, { "epoch": 0.15228426395939088, "grad_norm": 0.2933679521083832, "learning_rate": 0.00019834056293464093, "loss": 1.0165, "step": 2640 }, { "epoch": 0.15257268112598063, "grad_norm": 0.27530142664909363, "learning_rate": 0.00019832224784112473, "loss": 1.0242, "step": 2645 }, { "epoch": 0.15286109829257039, "grad_norm": 0.28296253085136414, "learning_rate": 0.00019830383308529393, "loss": 1.0447, "step": 2650 }, { "epoch": 0.15314951545916014, "grad_norm": 0.2897213399410248, "learning_rate": 0.0001982853186858143, "loss": 0.9933, "step": 2655 }, { "epoch": 0.1534379326257499, "grad_norm": 0.29725173115730286, "learning_rate": 0.00019826670466145262, "loss": 0.8896, "step": 2660 }, { "epoch": 0.15372634979233965, "grad_norm": 0.27441513538360596, "learning_rate": 0.0001982479910310765, "loss": 0.9831, "step": 2665 }, { "epoch": 0.1540147669589294, "grad_norm": 0.29334786534309387, "learning_rate": 0.00019822917781365474, "loss": 1.0099, "step": 2670 }, { "epoch": 0.15430318412551916, "grad_norm": 0.2920885682106018, "learning_rate": 0.00019821026502825687, "loss": 1.0279, "step": 2675 }, { "epoch": 0.15459160129210892, "grad_norm": 0.2887846529483795, "learning_rate": 0.00019819125269405352, "loss": 0.9975, "step": 2680 }, { "epoch": 0.15488001845869867, "grad_norm": 0.29183831810951233, "learning_rate": 0.00019817214083031614, "loss": 1.001, "step": 2685 }, { "epoch": 0.15516843562528843, "grad_norm": 0.26283201575279236, "learning_rate": 0.00019815292945641705, "loss": 0.9868, "step": 2690 }, { "epoch": 0.15545685279187818, "grad_norm": 0.2814032733440399, "learning_rate": 0.00019813361859182945, "loss": 0.9914, "step": 2695 }, { "epoch": 0.15574526995846794, "grad_norm": 0.28302204608917236, "learning_rate": 0.0001981142082561274, "loss": 0.8995, "step": 2700 }, { "epoch": 0.1560336871250577, "grad_norm": 0.2865697145462036, "learning_rate": 0.00019809469846898586, "loss": 0.955, "step": 2705 }, { "epoch": 0.15632210429164745, "grad_norm": 0.28486767411231995, "learning_rate": 0.0001980750892501804, "loss": 0.9249, "step": 2710 }, { "epoch": 0.1566105214582372, "grad_norm": 0.31295526027679443, "learning_rate": 0.00019805538061958765, "loss": 0.941, "step": 2715 }, { "epoch": 0.15689893862482696, "grad_norm": 0.30136919021606445, "learning_rate": 0.0001980355725971847, "loss": 0.9598, "step": 2720 }, { "epoch": 0.1571873557914167, "grad_norm": 0.2663707435131073, "learning_rate": 0.00019801566520304963, "loss": 0.9623, "step": 2725 }, { "epoch": 0.15747577295800647, "grad_norm": 0.2665877044200897, "learning_rate": 0.0001979956584573612, "loss": 0.9904, "step": 2730 }, { "epoch": 0.15776419012459622, "grad_norm": 0.2973937392234802, "learning_rate": 0.00019797555238039872, "loss": 0.9526, "step": 2735 }, { "epoch": 0.15805260729118598, "grad_norm": 0.2698862850666046, "learning_rate": 0.00019795534699254238, "loss": 0.9318, "step": 2740 }, { "epoch": 0.15834102445777573, "grad_norm": 0.28309038281440735, "learning_rate": 0.0001979350423142729, "loss": 0.9845, "step": 2745 }, { "epoch": 0.15862944162436549, "grad_norm": 0.29097744822502136, "learning_rate": 0.00019791463836617176, "loss": 0.9371, "step": 2750 }, { "epoch": 0.15891785879095524, "grad_norm": 0.27511849999427795, "learning_rate": 0.00019789413516892098, "loss": 1.0101, "step": 2755 }, { "epoch": 0.159206275957545, "grad_norm": 0.289734810590744, "learning_rate": 0.00019787353274330313, "loss": 1.0161, "step": 2760 }, { "epoch": 0.15949469312413475, "grad_norm": 0.2949714958667755, "learning_rate": 0.00019785283111020156, "loss": 1.039, "step": 2765 }, { "epoch": 0.1597831102907245, "grad_norm": 0.2833018898963928, "learning_rate": 0.00019783203029059997, "loss": 0.9582, "step": 2770 }, { "epoch": 0.16007152745731426, "grad_norm": 0.2823984920978546, "learning_rate": 0.00019781113030558267, "loss": 0.9568, "step": 2775 }, { "epoch": 0.16035994462390402, "grad_norm": 0.30174189805984497, "learning_rate": 0.00019779013117633454, "loss": 0.9622, "step": 2780 }, { "epoch": 0.16064836179049377, "grad_norm": 0.2764327824115753, "learning_rate": 0.0001977690329241409, "loss": 1.0071, "step": 2785 }, { "epoch": 0.16093677895708353, "grad_norm": 0.28060150146484375, "learning_rate": 0.00019774783557038755, "loss": 0.9681, "step": 2790 }, { "epoch": 0.16122519612367328, "grad_norm": 0.2678576111793518, "learning_rate": 0.00019772653913656076, "loss": 1.0248, "step": 2795 }, { "epoch": 0.16151361329026304, "grad_norm": 0.306606650352478, "learning_rate": 0.00019770514364424725, "loss": 1.0177, "step": 2800 }, { "epoch": 0.1618020304568528, "grad_norm": 0.29886582493782043, "learning_rate": 0.00019768364911513405, "loss": 0.9611, "step": 2805 }, { "epoch": 0.16209044762344255, "grad_norm": 0.2940407395362854, "learning_rate": 0.00019766205557100868, "loss": 0.9679, "step": 2810 }, { "epoch": 0.1623788647900323, "grad_norm": 0.27756741642951965, "learning_rate": 0.000197640363033759, "loss": 0.9272, "step": 2815 }, { "epoch": 0.16266728195662206, "grad_norm": 0.27457764744758606, "learning_rate": 0.0001976185715253732, "loss": 1.0172, "step": 2820 }, { "epoch": 0.1629556991232118, "grad_norm": 0.30826953053474426, "learning_rate": 0.00019759668106793975, "loss": 0.992, "step": 2825 }, { "epoch": 0.16324411628980157, "grad_norm": 0.2786210775375366, "learning_rate": 0.0001975746916836475, "loss": 0.9978, "step": 2830 }, { "epoch": 0.16353253345639132, "grad_norm": 0.2771185338497162, "learning_rate": 0.00019755260339478556, "loss": 0.9633, "step": 2835 }, { "epoch": 0.16382095062298108, "grad_norm": 0.2794210910797119, "learning_rate": 0.0001975304162237432, "loss": 0.9595, "step": 2840 }, { "epoch": 0.16410936778957083, "grad_norm": 0.2792012691497803, "learning_rate": 0.00019750813019301004, "loss": 1.0335, "step": 2845 }, { "epoch": 0.1643977849561606, "grad_norm": 0.304283082485199, "learning_rate": 0.00019748574532517586, "loss": 0.9989, "step": 2850 }, { "epoch": 0.16468620212275034, "grad_norm": 0.2838886082172394, "learning_rate": 0.00019746326164293056, "loss": 0.9652, "step": 2855 }, { "epoch": 0.1649746192893401, "grad_norm": 0.275785356760025, "learning_rate": 0.0001974406791690643, "loss": 0.9703, "step": 2860 }, { "epoch": 0.16526303645592985, "grad_norm": 0.3098074495792389, "learning_rate": 0.00019741799792646734, "loss": 1.0071, "step": 2865 }, { "epoch": 0.1655514536225196, "grad_norm": 0.2729983925819397, "learning_rate": 0.00019739521793813006, "loss": 0.9223, "step": 2870 }, { "epoch": 0.16583987078910936, "grad_norm": 0.286050409078598, "learning_rate": 0.0001973723392271429, "loss": 0.9947, "step": 2875 }, { "epoch": 0.16612828795569912, "grad_norm": 0.2677772641181946, "learning_rate": 0.00019734936181669638, "loss": 1.0642, "step": 2880 }, { "epoch": 0.16641670512228887, "grad_norm": 0.3227224051952362, "learning_rate": 0.00019732628573008114, "loss": 1.0097, "step": 2885 }, { "epoch": 0.16670512228887863, "grad_norm": 0.287588506937027, "learning_rate": 0.00019730311099068771, "loss": 1.0178, "step": 2890 }, { "epoch": 0.16699353945546838, "grad_norm": 0.30312904715538025, "learning_rate": 0.00019727983762200677, "loss": 0.9637, "step": 2895 }, { "epoch": 0.16728195662205814, "grad_norm": 0.2764815092086792, "learning_rate": 0.00019725646564762878, "loss": 0.9786, "step": 2900 }, { "epoch": 0.1675703737886479, "grad_norm": 0.3129001557826996, "learning_rate": 0.00019723299509124433, "loss": 0.9505, "step": 2905 }, { "epoch": 0.16785879095523765, "grad_norm": 0.29697930812835693, "learning_rate": 0.00019720942597664385, "loss": 0.9866, "step": 2910 }, { "epoch": 0.1681472081218274, "grad_norm": 0.30350685119628906, "learning_rate": 0.00019718575832771768, "loss": 0.9753, "step": 2915 }, { "epoch": 0.16843562528841716, "grad_norm": 0.2997938394546509, "learning_rate": 0.00019716199216845604, "loss": 1.0002, "step": 2920 }, { "epoch": 0.1687240424550069, "grad_norm": 0.2617998421192169, "learning_rate": 0.000197138127522949, "loss": 0.931, "step": 2925 }, { "epoch": 0.16901245962159667, "grad_norm": 0.2833821773529053, "learning_rate": 0.00019711416441538652, "loss": 1.0101, "step": 2930 }, { "epoch": 0.16930087678818642, "grad_norm": 0.287142813205719, "learning_rate": 0.00019709010287005825, "loss": 1.0126, "step": 2935 }, { "epoch": 0.16958929395477618, "grad_norm": 0.2692398428916931, "learning_rate": 0.00019706594291135366, "loss": 0.9623, "step": 2940 }, { "epoch": 0.16987771112136593, "grad_norm": 0.3134477436542511, "learning_rate": 0.00019704168456376205, "loss": 1.0175, "step": 2945 }, { "epoch": 0.1701661282879557, "grad_norm": 0.28351497650146484, "learning_rate": 0.0001970173278518724, "loss": 0.9537, "step": 2950 }, { "epoch": 0.17045454545454544, "grad_norm": 0.2851005494594574, "learning_rate": 0.00019699287280037332, "loss": 1.0136, "step": 2955 }, { "epoch": 0.1707429626211352, "grad_norm": 0.3006639778614044, "learning_rate": 0.00019696831943405324, "loss": 1.0825, "step": 2960 }, { "epoch": 0.17103137978772495, "grad_norm": 0.2862212359905243, "learning_rate": 0.0001969436677778001, "loss": 0.9826, "step": 2965 }, { "epoch": 0.1713197969543147, "grad_norm": 0.2898406684398651, "learning_rate": 0.0001969189178566016, "loss": 1.0052, "step": 2970 }, { "epoch": 0.1716082141209045, "grad_norm": 0.3075491487979889, "learning_rate": 0.000196894069695545, "loss": 0.9692, "step": 2975 }, { "epoch": 0.17189663128749424, "grad_norm": 0.28366634249687195, "learning_rate": 0.00019686912331981702, "loss": 0.993, "step": 2980 }, { "epoch": 0.172185048454084, "grad_norm": 0.2819202244281769, "learning_rate": 0.00019684407875470415, "loss": 1.0018, "step": 2985 }, { "epoch": 0.17247346562067375, "grad_norm": 0.34952133893966675, "learning_rate": 0.00019681893602559224, "loss": 0.982, "step": 2990 }, { "epoch": 0.1727618827872635, "grad_norm": 0.3122062087059021, "learning_rate": 0.0001967936951579667, "loss": 0.9914, "step": 2995 }, { "epoch": 0.17305029995385326, "grad_norm": 0.27795976400375366, "learning_rate": 0.00019676835617741249, "loss": 0.9665, "step": 3000 }, { "epoch": 0.17333871712044302, "grad_norm": 0.2866445779800415, "learning_rate": 0.0001967429191096138, "loss": 0.9751, "step": 3005 }, { "epoch": 0.17362713428703277, "grad_norm": 0.28401291370391846, "learning_rate": 0.0001967173839803545, "loss": 0.9746, "step": 3010 }, { "epoch": 0.17391555145362253, "grad_norm": 0.2761111855506897, "learning_rate": 0.00019669175081551773, "loss": 0.9802, "step": 3015 }, { "epoch": 0.17420396862021229, "grad_norm": 0.2995210587978363, "learning_rate": 0.00019666601964108598, "loss": 0.9399, "step": 3020 }, { "epoch": 0.17449238578680204, "grad_norm": 0.28632500767707825, "learning_rate": 0.00019664019048314116, "loss": 0.983, "step": 3025 }, { "epoch": 0.1747808029533918, "grad_norm": 0.2868204116821289, "learning_rate": 0.00019661426336786445, "loss": 0.9339, "step": 3030 }, { "epoch": 0.17506922011998155, "grad_norm": 0.2975151836872101, "learning_rate": 0.00019658823832153632, "loss": 0.9176, "step": 3035 }, { "epoch": 0.1753576372865713, "grad_norm": 0.32150018215179443, "learning_rate": 0.00019656211537053654, "loss": 1.0361, "step": 3040 }, { "epoch": 0.17564605445316106, "grad_norm": 0.27139896154403687, "learning_rate": 0.00019653589454134406, "loss": 0.9399, "step": 3045 }, { "epoch": 0.17593447161975082, "grad_norm": 0.29438334703445435, "learning_rate": 0.00019650957586053716, "loss": 0.9869, "step": 3050 }, { "epoch": 0.17622288878634057, "grad_norm": 0.26268067955970764, "learning_rate": 0.00019648315935479315, "loss": 1.037, "step": 3055 }, { "epoch": 0.17651130595293033, "grad_norm": 0.28784453868865967, "learning_rate": 0.00019645664505088864, "loss": 0.9737, "step": 3060 }, { "epoch": 0.17679972311952008, "grad_norm": 0.29806089401245117, "learning_rate": 0.00019643003297569923, "loss": 0.9884, "step": 3065 }, { "epoch": 0.17708814028610984, "grad_norm": 0.2772783935070038, "learning_rate": 0.00019640332315619977, "loss": 1.0022, "step": 3070 }, { "epoch": 0.1773765574526996, "grad_norm": 0.279499351978302, "learning_rate": 0.0001963765156194641, "loss": 1.0034, "step": 3075 }, { "epoch": 0.17766497461928935, "grad_norm": 0.29856428503990173, "learning_rate": 0.00019634961039266506, "loss": 1.0251, "step": 3080 }, { "epoch": 0.1779533917858791, "grad_norm": 0.2960283160209656, "learning_rate": 0.00019632260750307467, "loss": 0.9978, "step": 3085 }, { "epoch": 0.17824180895246886, "grad_norm": 0.3026635944843292, "learning_rate": 0.0001962955069780638, "loss": 0.9345, "step": 3090 }, { "epoch": 0.1785302261190586, "grad_norm": 0.30011415481567383, "learning_rate": 0.00019626830884510236, "loss": 1.0426, "step": 3095 }, { "epoch": 0.17881864328564837, "grad_norm": 0.31029802560806274, "learning_rate": 0.00019624101313175918, "loss": 1.0291, "step": 3100 }, { "epoch": 0.17910706045223812, "grad_norm": 0.29978078603744507, "learning_rate": 0.00019621361986570194, "loss": 0.9394, "step": 3105 }, { "epoch": 0.17939547761882788, "grad_norm": 0.298728346824646, "learning_rate": 0.00019618612907469732, "loss": 0.9875, "step": 3110 }, { "epoch": 0.17968389478541763, "grad_norm": 0.2664894461631775, "learning_rate": 0.00019615854078661077, "loss": 0.9905, "step": 3115 }, { "epoch": 0.17997231195200739, "grad_norm": 0.284242182970047, "learning_rate": 0.00019613085502940658, "loss": 1.1183, "step": 3120 }, { "epoch": 0.18026072911859714, "grad_norm": 0.29005903005599976, "learning_rate": 0.00019610307183114787, "loss": 0.9643, "step": 3125 }, { "epoch": 0.1805491462851869, "grad_norm": 0.31152260303497314, "learning_rate": 0.00019607519121999647, "loss": 0.955, "step": 3130 }, { "epoch": 0.18083756345177665, "grad_norm": 0.3107044994831085, "learning_rate": 0.00019604721322421303, "loss": 0.9592, "step": 3135 }, { "epoch": 0.1811259806183664, "grad_norm": 0.3071282207965851, "learning_rate": 0.00019601913787215683, "loss": 0.9844, "step": 3140 }, { "epoch": 0.18141439778495616, "grad_norm": 0.29717057943344116, "learning_rate": 0.00019599096519228585, "loss": 0.9394, "step": 3145 }, { "epoch": 0.18170281495154592, "grad_norm": 0.3277190625667572, "learning_rate": 0.0001959626952131568, "loss": 0.8651, "step": 3150 }, { "epoch": 0.18199123211813567, "grad_norm": 0.2847001254558563, "learning_rate": 0.00019593432796342496, "loss": 1.0355, "step": 3155 }, { "epoch": 0.18227964928472543, "grad_norm": 0.2961786091327667, "learning_rate": 0.00019590586347184417, "loss": 1.0553, "step": 3160 }, { "epoch": 0.18256806645131518, "grad_norm": 0.2928047478199005, "learning_rate": 0.00019587730176726686, "loss": 0.9886, "step": 3165 }, { "epoch": 0.18285648361790494, "grad_norm": 0.320328027009964, "learning_rate": 0.00019584864287864408, "loss": 0.9522, "step": 3170 }, { "epoch": 0.1831449007844947, "grad_norm": 0.2688181400299072, "learning_rate": 0.00019581988683502525, "loss": 1.0481, "step": 3175 }, { "epoch": 0.18343331795108445, "grad_norm": 0.31589022278785706, "learning_rate": 0.0001957910336655584, "loss": 0.9825, "step": 3180 }, { "epoch": 0.1837217351176742, "grad_norm": 0.30393970012664795, "learning_rate": 0.00019576208339948988, "loss": 0.9845, "step": 3185 }, { "epoch": 0.18401015228426396, "grad_norm": 0.27783116698265076, "learning_rate": 0.00019573303606616459, "loss": 0.9965, "step": 3190 }, { "epoch": 0.1842985694508537, "grad_norm": 0.29289042949676514, "learning_rate": 0.00019570389169502569, "loss": 0.9849, "step": 3195 }, { "epoch": 0.18458698661744347, "grad_norm": 0.28584346175193787, "learning_rate": 0.00019567465031561487, "loss": 1.0468, "step": 3200 }, { "epoch": 0.18487540378403322, "grad_norm": 0.2989406883716583, "learning_rate": 0.00019564531195757193, "loss": 0.9834, "step": 3205 }, { "epoch": 0.18516382095062298, "grad_norm": 0.3050430715084076, "learning_rate": 0.0001956158766506352, "loss": 1.0285, "step": 3210 }, { "epoch": 0.18545223811721273, "grad_norm": 0.30927765369415283, "learning_rate": 0.00019558634442464113, "loss": 0.911, "step": 3215 }, { "epoch": 0.18574065528380249, "grad_norm": 0.2875533401966095, "learning_rate": 0.00019555671530952445, "loss": 0.9708, "step": 3220 }, { "epoch": 0.18602907245039224, "grad_norm": 0.338565856218338, "learning_rate": 0.00019552698933531808, "loss": 0.9928, "step": 3225 }, { "epoch": 0.186317489616982, "grad_norm": 0.2844907343387604, "learning_rate": 0.00019549716653215318, "loss": 0.9989, "step": 3230 }, { "epoch": 0.18660590678357175, "grad_norm": 0.2826622426509857, "learning_rate": 0.00019546724693025896, "loss": 0.9663, "step": 3235 }, { "epoch": 0.1868943239501615, "grad_norm": 0.29726430773735046, "learning_rate": 0.00019543723055996282, "loss": 0.9864, "step": 3240 }, { "epoch": 0.18718274111675126, "grad_norm": 0.2944948673248291, "learning_rate": 0.0001954071174516903, "loss": 0.9907, "step": 3245 }, { "epoch": 0.18747115828334102, "grad_norm": 0.2960521876811981, "learning_rate": 0.00019537690763596487, "loss": 0.9947, "step": 3250 }, { "epoch": 0.18775957544993077, "grad_norm": 0.2907959520816803, "learning_rate": 0.0001953466011434081, "loss": 0.998, "step": 3255 }, { "epoch": 0.18804799261652053, "grad_norm": 0.27448657155036926, "learning_rate": 0.00019531619800473952, "loss": 0.9299, "step": 3260 }, { "epoch": 0.18833640978311028, "grad_norm": 0.2914285361766815, "learning_rate": 0.00019528569825077668, "loss": 0.9851, "step": 3265 }, { "epoch": 0.18862482694970004, "grad_norm": 0.28481677174568176, "learning_rate": 0.00019525510191243498, "loss": 1.0796, "step": 3270 }, { "epoch": 0.1889132441162898, "grad_norm": 0.3071490526199341, "learning_rate": 0.00019522440902072782, "loss": 1.0045, "step": 3275 }, { "epoch": 0.18920166128287955, "grad_norm": 0.31344813108444214, "learning_rate": 0.0001951936196067664, "loss": 1.0384, "step": 3280 }, { "epoch": 0.1894900784494693, "grad_norm": 0.29477670788764954, "learning_rate": 0.00019516273370175972, "loss": 0.9663, "step": 3285 }, { "epoch": 0.18977849561605906, "grad_norm": 0.36153990030288696, "learning_rate": 0.00019513175133701474, "loss": 0.9459, "step": 3290 }, { "epoch": 0.1900669127826488, "grad_norm": 0.29918980598449707, "learning_rate": 0.000195100672543936, "loss": 0.9239, "step": 3295 }, { "epoch": 0.19035532994923857, "grad_norm": 0.2978503108024597, "learning_rate": 0.00019506949735402588, "loss": 0.9286, "step": 3300 }, { "epoch": 0.19064374711582832, "grad_norm": 0.3202069103717804, "learning_rate": 0.00019503822579888453, "loss": 1.0259, "step": 3305 }, { "epoch": 0.19093216428241808, "grad_norm": 0.3225456774234772, "learning_rate": 0.00019500685791020968, "loss": 0.9501, "step": 3310 }, { "epoch": 0.19122058144900783, "grad_norm": 0.3228490948677063, "learning_rate": 0.00019497539371979674, "loss": 1.0353, "step": 3315 }, { "epoch": 0.1915089986155976, "grad_norm": 0.32369717955589294, "learning_rate": 0.00019494383325953875, "loss": 0.9616, "step": 3320 }, { "epoch": 0.19179741578218737, "grad_norm": 0.3090066909790039, "learning_rate": 0.0001949121765614263, "loss": 0.9646, "step": 3325 }, { "epoch": 0.19208583294877712, "grad_norm": 0.26542478799819946, "learning_rate": 0.00019488042365754758, "loss": 0.979, "step": 3330 }, { "epoch": 0.19237425011536688, "grad_norm": 0.2973325848579407, "learning_rate": 0.0001948485745800882, "loss": 0.9433, "step": 3335 }, { "epoch": 0.19266266728195663, "grad_norm": 0.30728423595428467, "learning_rate": 0.0001948166293613314, "loss": 0.9544, "step": 3340 }, { "epoch": 0.1929510844485464, "grad_norm": 0.27854323387145996, "learning_rate": 0.00019478458803365772, "loss": 0.9428, "step": 3345 }, { "epoch": 0.19323950161513614, "grad_norm": 0.27844732999801636, "learning_rate": 0.00019475245062954523, "loss": 1.0545, "step": 3350 }, { "epoch": 0.1935279187817259, "grad_norm": 0.28251299262046814, "learning_rate": 0.00019472021718156937, "loss": 0.9315, "step": 3355 }, { "epoch": 0.19381633594831565, "grad_norm": 0.2970223128795624, "learning_rate": 0.00019468788772240286, "loss": 1.0053, "step": 3360 }, { "epoch": 0.1941047531149054, "grad_norm": 0.29227715730667114, "learning_rate": 0.0001946554622848158, "loss": 1.0171, "step": 3365 }, { "epoch": 0.19439317028149516, "grad_norm": 0.3032057285308838, "learning_rate": 0.00019462294090167554, "loss": 1.0456, "step": 3370 }, { "epoch": 0.19468158744808492, "grad_norm": 0.2863052189350128, "learning_rate": 0.00019459032360594677, "loss": 0.9876, "step": 3375 }, { "epoch": 0.19497000461467467, "grad_norm": 0.29493972659111023, "learning_rate": 0.0001945576104306913, "loss": 0.9076, "step": 3380 }, { "epoch": 0.19525842178126443, "grad_norm": 0.28630873560905457, "learning_rate": 0.00019452480140906819, "loss": 0.9734, "step": 3385 }, { "epoch": 0.19554683894785418, "grad_norm": 0.28571903705596924, "learning_rate": 0.00019449189657433358, "loss": 1.0033, "step": 3390 }, { "epoch": 0.19583525611444394, "grad_norm": 0.3060779273509979, "learning_rate": 0.0001944588959598408, "loss": 0.9493, "step": 3395 }, { "epoch": 0.1961236732810337, "grad_norm": 0.28723883628845215, "learning_rate": 0.00019442579959904024, "loss": 0.9713, "step": 3400 }, { "epoch": 0.19641209044762345, "grad_norm": 0.29430314898490906, "learning_rate": 0.00019439260752547935, "loss": 0.9476, "step": 3405 }, { "epoch": 0.1967005076142132, "grad_norm": 0.3151422142982483, "learning_rate": 0.0001943593197728026, "loss": 1.0443, "step": 3410 }, { "epoch": 0.19698892478080296, "grad_norm": 0.32313892245292664, "learning_rate": 0.00019432593637475138, "loss": 0.9968, "step": 3415 }, { "epoch": 0.19727734194739271, "grad_norm": 0.27212581038475037, "learning_rate": 0.00019429245736516415, "loss": 0.9608, "step": 3420 }, { "epoch": 0.19756575911398247, "grad_norm": 0.28393295407295227, "learning_rate": 0.00019425888277797615, "loss": 1.025, "step": 3425 }, { "epoch": 0.19785417628057222, "grad_norm": 0.31957364082336426, "learning_rate": 0.00019422521264721962, "loss": 0.9412, "step": 3430 }, { "epoch": 0.19814259344716198, "grad_norm": 0.28313255310058594, "learning_rate": 0.0001941914470070236, "loss": 0.8898, "step": 3435 }, { "epoch": 0.19843101061375173, "grad_norm": 0.30928754806518555, "learning_rate": 0.00019415758589161385, "loss": 1.0036, "step": 3440 }, { "epoch": 0.1987194277803415, "grad_norm": 0.30498096346855164, "learning_rate": 0.00019412362933531307, "loss": 0.8956, "step": 3445 }, { "epoch": 0.19900784494693124, "grad_norm": 0.2994639277458191, "learning_rate": 0.0001940895773725406, "loss": 0.958, "step": 3450 }, { "epoch": 0.199296262113521, "grad_norm": 0.27892401814460754, "learning_rate": 0.00019405543003781251, "loss": 1.0444, "step": 3455 }, { "epoch": 0.19958467928011075, "grad_norm": 0.3064746558666229, "learning_rate": 0.00019402118736574155, "loss": 0.9791, "step": 3460 }, { "epoch": 0.1998730964467005, "grad_norm": 0.30257728695869446, "learning_rate": 0.00019398684939103707, "loss": 1.0427, "step": 3465 }, { "epoch": 0.20016151361329027, "grad_norm": 0.30800187587738037, "learning_rate": 0.00019395241614850504, "loss": 0.9739, "step": 3470 }, { "epoch": 0.20044993077988002, "grad_norm": 0.30980637669563293, "learning_rate": 0.00019391788767304804, "loss": 0.9858, "step": 3475 }, { "epoch": 0.20073834794646978, "grad_norm": 0.300564706325531, "learning_rate": 0.00019388326399966515, "loss": 1.0109, "step": 3480 }, { "epoch": 0.20102676511305953, "grad_norm": 0.2801668047904968, "learning_rate": 0.0001938485451634519, "loss": 0.9414, "step": 3485 }, { "epoch": 0.20131518227964929, "grad_norm": 0.32072213292121887, "learning_rate": 0.00019381373119960033, "loss": 1.0513, "step": 3490 }, { "epoch": 0.20160359944623904, "grad_norm": 0.3455287218093872, "learning_rate": 0.00019377882214339893, "loss": 0.9559, "step": 3495 }, { "epoch": 0.2018920166128288, "grad_norm": 0.29594236612319946, "learning_rate": 0.00019374381803023252, "loss": 1.0114, "step": 3500 }, { "epoch": 0.20218043377941855, "grad_norm": 0.29461246728897095, "learning_rate": 0.0001937087188955823, "loss": 0.9981, "step": 3505 }, { "epoch": 0.2024688509460083, "grad_norm": 0.29845237731933594, "learning_rate": 0.00019367352477502576, "loss": 0.9626, "step": 3510 }, { "epoch": 0.20275726811259806, "grad_norm": 0.29965266585350037, "learning_rate": 0.00019363823570423675, "loss": 0.9343, "step": 3515 }, { "epoch": 0.20304568527918782, "grad_norm": 0.3167986273765564, "learning_rate": 0.0001936028517189852, "loss": 0.9134, "step": 3520 }, { "epoch": 0.20333410244577757, "grad_norm": 0.2801003158092499, "learning_rate": 0.00019356737285513748, "loss": 0.9585, "step": 3525 }, { "epoch": 0.20362251961236733, "grad_norm": 0.34285858273506165, "learning_rate": 0.00019353179914865596, "loss": 1.0429, "step": 3530 }, { "epoch": 0.20391093677895708, "grad_norm": 0.2997133135795593, "learning_rate": 0.00019349613063559916, "loss": 0.9669, "step": 3535 }, { "epoch": 0.20419935394554684, "grad_norm": 0.2827471196651459, "learning_rate": 0.00019346036735212177, "loss": 1.0545, "step": 3540 }, { "epoch": 0.2044877711121366, "grad_norm": 0.29712873697280884, "learning_rate": 0.00019342450933447448, "loss": 0.8974, "step": 3545 }, { "epoch": 0.20477618827872635, "grad_norm": 0.30608931183815, "learning_rate": 0.00019338855661900405, "loss": 0.9705, "step": 3550 }, { "epoch": 0.2050646054453161, "grad_norm": 0.3042439818382263, "learning_rate": 0.00019335250924215318, "loss": 0.9509, "step": 3555 }, { "epoch": 0.20535302261190586, "grad_norm": 0.32983705401420593, "learning_rate": 0.00019331636724046058, "loss": 0.9299, "step": 3560 }, { "epoch": 0.2056414397784956, "grad_norm": 0.31326207518577576, "learning_rate": 0.0001932801306505608, "loss": 1.0091, "step": 3565 }, { "epoch": 0.20592985694508537, "grad_norm": 0.362664133310318, "learning_rate": 0.00019324379950918437, "loss": 1.0372, "step": 3570 }, { "epoch": 0.20621827411167512, "grad_norm": 0.30850011110305786, "learning_rate": 0.00019320737385315756, "loss": 1.007, "step": 3575 }, { "epoch": 0.20650669127826488, "grad_norm": 0.28833866119384766, "learning_rate": 0.00019317085371940246, "loss": 0.9142, "step": 3580 }, { "epoch": 0.20679510844485463, "grad_norm": 0.30341702699661255, "learning_rate": 0.00019313423914493703, "loss": 0.9421, "step": 3585 }, { "epoch": 0.20708352561144439, "grad_norm": 0.29995712637901306, "learning_rate": 0.00019309753016687477, "loss": 0.9276, "step": 3590 }, { "epoch": 0.20737194277803414, "grad_norm": 0.3257008492946625, "learning_rate": 0.00019306072682242505, "loss": 0.9618, "step": 3595 }, { "epoch": 0.2076603599446239, "grad_norm": 0.31132039427757263, "learning_rate": 0.00019302382914889284, "loss": 1.0191, "step": 3600 }, { "epoch": 0.20794877711121365, "grad_norm": 0.2903344929218292, "learning_rate": 0.00019298683718367864, "loss": 0.9276, "step": 3605 }, { "epoch": 0.2082371942778034, "grad_norm": 0.28606170415878296, "learning_rate": 0.00019294975096427862, "loss": 0.9942, "step": 3610 }, { "epoch": 0.20852561144439316, "grad_norm": 0.31145986914634705, "learning_rate": 0.00019291257052828447, "loss": 1.0452, "step": 3615 }, { "epoch": 0.20881402861098292, "grad_norm": 0.29645946621894836, "learning_rate": 0.00019287529591338333, "loss": 0.9606, "step": 3620 }, { "epoch": 0.20910244577757267, "grad_norm": 0.2780356705188751, "learning_rate": 0.0001928379271573579, "loss": 0.952, "step": 3625 }, { "epoch": 0.20939086294416243, "grad_norm": 0.31448647379875183, "learning_rate": 0.0001928004642980862, "loss": 0.938, "step": 3630 }, { "epoch": 0.20967928011075218, "grad_norm": 0.45712539553642273, "learning_rate": 0.0001927629073735417, "loss": 0.9825, "step": 3635 }, { "epoch": 0.20996769727734194, "grad_norm": 0.2840917408466339, "learning_rate": 0.00019272525642179323, "loss": 0.9532, "step": 3640 }, { "epoch": 0.2102561144439317, "grad_norm": 0.301581472158432, "learning_rate": 0.00019268751148100486, "loss": 0.9401, "step": 3645 }, { "epoch": 0.21054453161052145, "grad_norm": 0.2878675162792206, "learning_rate": 0.00019264967258943595, "loss": 0.96, "step": 3650 }, { "epoch": 0.2108329487771112, "grad_norm": 0.3223981261253357, "learning_rate": 0.0001926117397854412, "loss": 0.9317, "step": 3655 }, { "epoch": 0.21112136594370096, "grad_norm": 0.3064003884792328, "learning_rate": 0.0001925737131074703, "loss": 1.0191, "step": 3660 }, { "epoch": 0.2114097831102907, "grad_norm": 0.29931890964508057, "learning_rate": 0.0001925355925940683, "loss": 1.0221, "step": 3665 }, { "epoch": 0.2116982002768805, "grad_norm": 0.29575493931770325, "learning_rate": 0.00019249737828387522, "loss": 0.9803, "step": 3670 }, { "epoch": 0.21198661744347025, "grad_norm": 0.29677698016166687, "learning_rate": 0.0001924590702156262, "loss": 0.9743, "step": 3675 }, { "epoch": 0.21227503461006, "grad_norm": 0.28778114914894104, "learning_rate": 0.00019242066842815146, "loss": 1.0134, "step": 3680 }, { "epoch": 0.21256345177664976, "grad_norm": 0.3022085130214691, "learning_rate": 0.00019238217296037614, "loss": 1.0065, "step": 3685 }, { "epoch": 0.21285186894323951, "grad_norm": 0.28485235571861267, "learning_rate": 0.00019234358385132038, "loss": 1.0066, "step": 3690 }, { "epoch": 0.21314028610982927, "grad_norm": 0.2786906063556671, "learning_rate": 0.00019230490114009928, "loss": 0.9393, "step": 3695 }, { "epoch": 0.21342870327641902, "grad_norm": 0.30164533853530884, "learning_rate": 0.00019226612486592271, "loss": 0.8972, "step": 3700 }, { "epoch": 0.21371712044300878, "grad_norm": 0.303313672542572, "learning_rate": 0.00019222725506809547, "loss": 0.9892, "step": 3705 }, { "epoch": 0.21400553760959853, "grad_norm": 0.2972022593021393, "learning_rate": 0.00019218829178601713, "loss": 1.0382, "step": 3710 }, { "epoch": 0.2142939547761883, "grad_norm": 0.2921942174434662, "learning_rate": 0.00019214923505918202, "loss": 1.0, "step": 3715 }, { "epoch": 0.21458237194277804, "grad_norm": 0.2801063358783722, "learning_rate": 0.00019211008492717914, "loss": 0.9779, "step": 3720 }, { "epoch": 0.2148707891093678, "grad_norm": 0.2877048850059509, "learning_rate": 0.00019207084142969225, "loss": 1.0471, "step": 3725 }, { "epoch": 0.21515920627595755, "grad_norm": 0.3104129135608673, "learning_rate": 0.0001920315046064997, "loss": 0.9551, "step": 3730 }, { "epoch": 0.2154476234425473, "grad_norm": 0.28808075189590454, "learning_rate": 0.0001919920744974745, "loss": 0.9911, "step": 3735 }, { "epoch": 0.21573604060913706, "grad_norm": 0.2917241156101227, "learning_rate": 0.00019195255114258408, "loss": 0.9557, "step": 3740 }, { "epoch": 0.21602445777572682, "grad_norm": 0.3106626272201538, "learning_rate": 0.0001919129345818905, "loss": 0.9817, "step": 3745 }, { "epoch": 0.21631287494231657, "grad_norm": 0.30126625299453735, "learning_rate": 0.00019187322485555031, "loss": 0.996, "step": 3750 }, { "epoch": 0.21660129210890633, "grad_norm": 0.28781163692474365, "learning_rate": 0.0001918334220038144, "loss": 0.9824, "step": 3755 }, { "epoch": 0.21688970927549608, "grad_norm": 0.3098173141479492, "learning_rate": 0.00019179352606702813, "loss": 0.9516, "step": 3760 }, { "epoch": 0.21717812644208584, "grad_norm": 0.29839685559272766, "learning_rate": 0.00019175353708563117, "loss": 1.0092, "step": 3765 }, { "epoch": 0.2174665436086756, "grad_norm": 0.2970857322216034, "learning_rate": 0.00019171345510015758, "loss": 1.0161, "step": 3770 }, { "epoch": 0.21775496077526535, "grad_norm": 0.3346623480319977, "learning_rate": 0.00019167328015123558, "loss": 0.9373, "step": 3775 }, { "epoch": 0.2180433779418551, "grad_norm": 0.29829534888267517, "learning_rate": 0.0001916330122795877, "loss": 0.9769, "step": 3780 }, { "epoch": 0.21833179510844486, "grad_norm": 0.28773486614227295, "learning_rate": 0.00019159265152603064, "loss": 0.9643, "step": 3785 }, { "epoch": 0.21862021227503461, "grad_norm": 0.35629943013191223, "learning_rate": 0.00019155219793147522, "loss": 1.0356, "step": 3790 }, { "epoch": 0.21890862944162437, "grad_norm": 0.3033393323421478, "learning_rate": 0.00019151165153692644, "loss": 0.9561, "step": 3795 }, { "epoch": 0.21919704660821412, "grad_norm": 0.32361406087875366, "learning_rate": 0.00019147101238348326, "loss": 0.9953, "step": 3800 }, { "epoch": 0.21948546377480388, "grad_norm": 0.36144763231277466, "learning_rate": 0.00019143028051233873, "loss": 0.9526, "step": 3805 }, { "epoch": 0.21977388094139363, "grad_norm": 0.275502473115921, "learning_rate": 0.00019138945596477994, "loss": 0.9281, "step": 3810 }, { "epoch": 0.2200622981079834, "grad_norm": 0.32906025648117065, "learning_rate": 0.0001913485387821877, "loss": 0.9382, "step": 3815 }, { "epoch": 0.22035071527457314, "grad_norm": 0.28945717215538025, "learning_rate": 0.00019130752900603702, "loss": 1.0106, "step": 3820 }, { "epoch": 0.2206391324411629, "grad_norm": 0.30186885595321655, "learning_rate": 0.00019126642667789654, "loss": 0.9785, "step": 3825 }, { "epoch": 0.22092754960775265, "grad_norm": 0.27561014890670776, "learning_rate": 0.00019122523183942879, "loss": 1.0386, "step": 3830 }, { "epoch": 0.2212159667743424, "grad_norm": 0.3077535331249237, "learning_rate": 0.00019118394453239006, "loss": 1.0155, "step": 3835 }, { "epoch": 0.22150438394093216, "grad_norm": 0.3126158118247986, "learning_rate": 0.00019114256479863038, "loss": 0.9581, "step": 3840 }, { "epoch": 0.22179280110752192, "grad_norm": 0.275310754776001, "learning_rate": 0.00019110109268009347, "loss": 1.0005, "step": 3845 }, { "epoch": 0.22208121827411167, "grad_norm": 0.31148555874824524, "learning_rate": 0.00019105952821881668, "loss": 1.0132, "step": 3850 }, { "epoch": 0.22236963544070143, "grad_norm": 0.2904588580131531, "learning_rate": 0.00019101787145693098, "loss": 0.9738, "step": 3855 }, { "epoch": 0.22265805260729118, "grad_norm": 0.28576961159706116, "learning_rate": 0.00019097612243666086, "loss": 0.952, "step": 3860 }, { "epoch": 0.22294646977388094, "grad_norm": 0.31309062242507935, "learning_rate": 0.0001909342812003244, "loss": 0.9586, "step": 3865 }, { "epoch": 0.2232348869404707, "grad_norm": 0.30974939465522766, "learning_rate": 0.00019089234779033306, "loss": 0.9903, "step": 3870 }, { "epoch": 0.22352330410706045, "grad_norm": 0.2999672293663025, "learning_rate": 0.00019085032224919177, "loss": 0.9514, "step": 3875 }, { "epoch": 0.2238117212736502, "grad_norm": 0.2947905957698822, "learning_rate": 0.00019080820461949886, "loss": 0.9602, "step": 3880 }, { "epoch": 0.22410013844023996, "grad_norm": 0.2898218333721161, "learning_rate": 0.00019076599494394602, "loss": 1.0081, "step": 3885 }, { "epoch": 0.22438855560682971, "grad_norm": 0.27696529030799866, "learning_rate": 0.00019072369326531824, "loss": 0.9245, "step": 3890 }, { "epoch": 0.22467697277341947, "grad_norm": 0.30240634083747864, "learning_rate": 0.00019068129962649365, "loss": 0.9759, "step": 3895 }, { "epoch": 0.22496538994000922, "grad_norm": 0.3089566230773926, "learning_rate": 0.00019063881407044373, "loss": 0.9144, "step": 3900 }, { "epoch": 0.22525380710659898, "grad_norm": 0.3179451823234558, "learning_rate": 0.00019059623664023311, "loss": 1.0379, "step": 3905 }, { "epoch": 0.22554222427318874, "grad_norm": 0.29353705048561096, "learning_rate": 0.00019055356737901952, "loss": 1.0612, "step": 3910 }, { "epoch": 0.2258306414397785, "grad_norm": 0.317120224237442, "learning_rate": 0.00019051080633005372, "loss": 0.9758, "step": 3915 }, { "epoch": 0.22611905860636825, "grad_norm": 0.29097092151641846, "learning_rate": 0.00019046795353667965, "loss": 1.0292, "step": 3920 }, { "epoch": 0.226407475772958, "grad_norm": 0.3430017828941345, "learning_rate": 0.00019042500904233408, "loss": 0.9483, "step": 3925 }, { "epoch": 0.22669589293954776, "grad_norm": 0.31500279903411865, "learning_rate": 0.00019038197289054684, "loss": 0.9524, "step": 3930 }, { "epoch": 0.2269843101061375, "grad_norm": 0.2940191328525543, "learning_rate": 0.00019033884512494064, "loss": 0.9503, "step": 3935 }, { "epoch": 0.22727272727272727, "grad_norm": 0.37484368681907654, "learning_rate": 0.00019029562578923106, "loss": 0.9885, "step": 3940 }, { "epoch": 0.22756114443931702, "grad_norm": 0.3004748225212097, "learning_rate": 0.00019025231492722643, "loss": 0.9923, "step": 3945 }, { "epoch": 0.22784956160590678, "grad_norm": 0.30483829975128174, "learning_rate": 0.000190208912582828, "loss": 0.9507, "step": 3950 }, { "epoch": 0.22813797877249653, "grad_norm": 0.30382221937179565, "learning_rate": 0.0001901654188000296, "loss": 0.9534, "step": 3955 }, { "epoch": 0.22842639593908629, "grad_norm": 0.3174874484539032, "learning_rate": 0.0001901218336229178, "loss": 1.0329, "step": 3960 }, { "epoch": 0.22871481310567604, "grad_norm": 0.3040885925292969, "learning_rate": 0.00019007815709567183, "loss": 0.978, "step": 3965 }, { "epoch": 0.2290032302722658, "grad_norm": 0.3258110284805298, "learning_rate": 0.0001900343892625635, "loss": 1.0537, "step": 3970 }, { "epoch": 0.22929164743885555, "grad_norm": 0.2845390737056732, "learning_rate": 0.00018999053016795719, "loss": 0.9592, "step": 3975 }, { "epoch": 0.2295800646054453, "grad_norm": 0.3034794330596924, "learning_rate": 0.00018994657985630972, "loss": 0.9808, "step": 3980 }, { "epoch": 0.22986848177203506, "grad_norm": 0.3224650025367737, "learning_rate": 0.00018990253837217042, "loss": 0.9952, "step": 3985 }, { "epoch": 0.23015689893862482, "grad_norm": 0.3139420449733734, "learning_rate": 0.00018985840576018107, "loss": 0.9499, "step": 3990 }, { "epoch": 0.23044531610521457, "grad_norm": 0.2906229794025421, "learning_rate": 0.00018981418206507575, "loss": 0.9593, "step": 3995 }, { "epoch": 0.23073373327180433, "grad_norm": 0.28899601101875305, "learning_rate": 0.00018976986733168093, "loss": 1.0203, "step": 4000 }, { "epoch": 0.23102215043839408, "grad_norm": 0.3095887303352356, "learning_rate": 0.00018972546160491528, "loss": 1.0639, "step": 4005 }, { "epoch": 0.23131056760498384, "grad_norm": 0.2887146472930908, "learning_rate": 0.00018968096492978976, "loss": 0.9895, "step": 4010 }, { "epoch": 0.2315989847715736, "grad_norm": 0.2964550256729126, "learning_rate": 0.0001896363773514075, "loss": 0.9801, "step": 4015 }, { "epoch": 0.23188740193816337, "grad_norm": 0.319967657327652, "learning_rate": 0.0001895916989149638, "loss": 1.0451, "step": 4020 }, { "epoch": 0.23217581910475313, "grad_norm": 0.33027443289756775, "learning_rate": 0.000189546929665746, "loss": 1.0692, "step": 4025 }, { "epoch": 0.23246423627134288, "grad_norm": 0.29336920380592346, "learning_rate": 0.00018950206964913355, "loss": 0.9877, "step": 4030 }, { "epoch": 0.23275265343793264, "grad_norm": 0.29890185594558716, "learning_rate": 0.0001894571189105979, "loss": 0.9255, "step": 4035 }, { "epoch": 0.2330410706045224, "grad_norm": 0.30321866273880005, "learning_rate": 0.00018941207749570237, "loss": 1.0392, "step": 4040 }, { "epoch": 0.23332948777111215, "grad_norm": 0.3135153651237488, "learning_rate": 0.00018936694545010232, "loss": 0.9713, "step": 4045 }, { "epoch": 0.2336179049377019, "grad_norm": 0.29142776131629944, "learning_rate": 0.0001893217228195449, "loss": 1.0032, "step": 4050 }, { "epoch": 0.23390632210429166, "grad_norm": 0.3098786771297455, "learning_rate": 0.0001892764096498691, "loss": 1.0406, "step": 4055 }, { "epoch": 0.2341947392708814, "grad_norm": 0.3293483257293701, "learning_rate": 0.00018923100598700561, "loss": 1.0142, "step": 4060 }, { "epoch": 0.23448315643747117, "grad_norm": 0.2999286651611328, "learning_rate": 0.00018918551187697703, "loss": 0.9461, "step": 4065 }, { "epoch": 0.23477157360406092, "grad_norm": 0.3046085238456726, "learning_rate": 0.00018913992736589746, "loss": 0.9985, "step": 4070 }, { "epoch": 0.23505999077065068, "grad_norm": 0.2792486846446991, "learning_rate": 0.00018909425249997267, "loss": 0.99, "step": 4075 }, { "epoch": 0.23534840793724043, "grad_norm": 0.30676671862602234, "learning_rate": 0.0001890484873255001, "loss": 0.9918, "step": 4080 }, { "epoch": 0.2356368251038302, "grad_norm": 0.2913929522037506, "learning_rate": 0.00018900263188886864, "loss": 0.9612, "step": 4085 }, { "epoch": 0.23592524227041994, "grad_norm": 0.32379987835884094, "learning_rate": 0.00018895668623655873, "loss": 0.9277, "step": 4090 }, { "epoch": 0.2362136594370097, "grad_norm": 0.33832383155822754, "learning_rate": 0.00018891065041514224, "loss": 0.9477, "step": 4095 }, { "epoch": 0.23650207660359945, "grad_norm": 0.31692832708358765, "learning_rate": 0.0001888645244712824, "loss": 0.96, "step": 4100 }, { "epoch": 0.2367904937701892, "grad_norm": 0.3002290725708008, "learning_rate": 0.0001888183084517338, "loss": 0.9275, "step": 4105 }, { "epoch": 0.23707891093677896, "grad_norm": 0.27560725808143616, "learning_rate": 0.00018877200240334236, "loss": 1.0375, "step": 4110 }, { "epoch": 0.23736732810336872, "grad_norm": 0.3038588762283325, "learning_rate": 0.0001887256063730453, "loss": 1.0229, "step": 4115 }, { "epoch": 0.23765574526995847, "grad_norm": 0.2903522253036499, "learning_rate": 0.00018867912040787096, "loss": 1.0118, "step": 4120 }, { "epoch": 0.23794416243654823, "grad_norm": 0.3210826516151428, "learning_rate": 0.0001886325445549389, "loss": 0.9883, "step": 4125 }, { "epoch": 0.23823257960313798, "grad_norm": 0.375205934047699, "learning_rate": 0.00018858587886145975, "loss": 0.9813, "step": 4130 }, { "epoch": 0.23852099676972774, "grad_norm": 0.3124467134475708, "learning_rate": 0.0001885391233747352, "loss": 0.9015, "step": 4135 }, { "epoch": 0.2388094139363175, "grad_norm": 0.3262174725532532, "learning_rate": 0.00018849227814215805, "loss": 0.8783, "step": 4140 }, { "epoch": 0.23909783110290725, "grad_norm": 0.31825995445251465, "learning_rate": 0.00018844534321121195, "loss": 1.0335, "step": 4145 }, { "epoch": 0.239386248269497, "grad_norm": 0.3026926517486572, "learning_rate": 0.00018839831862947152, "loss": 0.9791, "step": 4150 }, { "epoch": 0.23967466543608676, "grad_norm": 0.32558876276016235, "learning_rate": 0.0001883512044446023, "loss": 1.0042, "step": 4155 }, { "epoch": 0.23996308260267651, "grad_norm": 0.2892070412635803, "learning_rate": 0.00018830400070436057, "loss": 0.8757, "step": 4160 }, { "epoch": 0.24025149976926627, "grad_norm": 0.31175941228866577, "learning_rate": 0.00018825670745659345, "loss": 0.986, "step": 4165 }, { "epoch": 0.24053991693585602, "grad_norm": 0.3003990054130554, "learning_rate": 0.00018820932474923873, "loss": 0.9732, "step": 4170 }, { "epoch": 0.24082833410244578, "grad_norm": 0.29946771264076233, "learning_rate": 0.00018816185263032496, "loss": 0.985, "step": 4175 }, { "epoch": 0.24111675126903553, "grad_norm": 0.29952332377433777, "learning_rate": 0.00018811429114797123, "loss": 0.9689, "step": 4180 }, { "epoch": 0.2414051684356253, "grad_norm": 0.3125024735927582, "learning_rate": 0.00018806664035038727, "loss": 0.971, "step": 4185 }, { "epoch": 0.24169358560221504, "grad_norm": 0.338549941778183, "learning_rate": 0.00018801890028587333, "loss": 0.9967, "step": 4190 }, { "epoch": 0.2419820027688048, "grad_norm": 0.3147549033164978, "learning_rate": 0.00018797107100282015, "loss": 1.0016, "step": 4195 }, { "epoch": 0.24227041993539455, "grad_norm": 0.28421711921691895, "learning_rate": 0.0001879231525497089, "loss": 0.9418, "step": 4200 }, { "epoch": 0.2425588371019843, "grad_norm": 0.3105412721633911, "learning_rate": 0.00018787514497511104, "loss": 1.0044, "step": 4205 }, { "epoch": 0.24284725426857406, "grad_norm": 0.2936135530471802, "learning_rate": 0.0001878270483276886, "loss": 0.9557, "step": 4210 }, { "epoch": 0.24313567143516382, "grad_norm": 0.3218764662742615, "learning_rate": 0.00018777886265619365, "loss": 0.9989, "step": 4215 }, { "epoch": 0.24342408860175357, "grad_norm": 0.29364484548568726, "learning_rate": 0.00018773058800946858, "loss": 0.9341, "step": 4220 }, { "epoch": 0.24371250576834333, "grad_norm": 0.29040706157684326, "learning_rate": 0.0001876822244364461, "loss": 0.9869, "step": 4225 }, { "epoch": 0.24400092293493308, "grad_norm": 0.31661713123321533, "learning_rate": 0.00018763377198614887, "loss": 0.9548, "step": 4230 }, { "epoch": 0.24428934010152284, "grad_norm": 0.30058059096336365, "learning_rate": 0.00018758523070768973, "loss": 0.9072, "step": 4235 }, { "epoch": 0.2445777572681126, "grad_norm": 0.3189939856529236, "learning_rate": 0.00018753660065027152, "loss": 0.9999, "step": 4240 }, { "epoch": 0.24486617443470235, "grad_norm": 0.3253864645957947, "learning_rate": 0.00018748788186318712, "loss": 0.9708, "step": 4245 }, { "epoch": 0.2451545916012921, "grad_norm": 0.307716429233551, "learning_rate": 0.00018743907439581933, "loss": 0.9375, "step": 4250 }, { "epoch": 0.24544300876788186, "grad_norm": 0.2934640049934387, "learning_rate": 0.00018739017829764082, "loss": 0.9647, "step": 4255 }, { "epoch": 0.24573142593447161, "grad_norm": 0.3377256393432617, "learning_rate": 0.0001873411936182141, "loss": 0.9755, "step": 4260 }, { "epoch": 0.24601984310106137, "grad_norm": 0.3084704875946045, "learning_rate": 0.0001872921204071915, "loss": 1.0172, "step": 4265 }, { "epoch": 0.24630826026765112, "grad_norm": 0.3088560402393341, "learning_rate": 0.000187242958714315, "loss": 0.9861, "step": 4270 }, { "epoch": 0.24659667743424088, "grad_norm": 0.28719452023506165, "learning_rate": 0.00018719370858941644, "loss": 0.9762, "step": 4275 }, { "epoch": 0.24688509460083063, "grad_norm": 0.31891629099845886, "learning_rate": 0.00018714437008241709, "loss": 1.0395, "step": 4280 }, { "epoch": 0.2471735117674204, "grad_norm": 0.32796710729599, "learning_rate": 0.000187094943243328, "loss": 0.967, "step": 4285 }, { "epoch": 0.24746192893401014, "grad_norm": 0.3454214930534363, "learning_rate": 0.00018704542812224956, "loss": 0.938, "step": 4290 }, { "epoch": 0.2477503461005999, "grad_norm": 0.2978779375553131, "learning_rate": 0.00018699582476937185, "loss": 0.981, "step": 4295 }, { "epoch": 0.24803876326718965, "grad_norm": 0.3401256501674652, "learning_rate": 0.00018694613323497422, "loss": 1.0089, "step": 4300 }, { "epoch": 0.2483271804337794, "grad_norm": 0.32507994771003723, "learning_rate": 0.0001868963535694255, "loss": 1.0443, "step": 4305 }, { "epoch": 0.24861559760036916, "grad_norm": 0.31554827094078064, "learning_rate": 0.0001868464858231838, "loss": 1.0414, "step": 4310 }, { "epoch": 0.24890401476695892, "grad_norm": 0.3291451632976532, "learning_rate": 0.00018679653004679655, "loss": 0.9676, "step": 4315 }, { "epoch": 0.24919243193354867, "grad_norm": 0.3101532459259033, "learning_rate": 0.0001867464862909004, "loss": 0.955, "step": 4320 }, { "epoch": 0.24948084910013843, "grad_norm": 0.29966261982917786, "learning_rate": 0.00018669635460622107, "loss": 0.9035, "step": 4325 }, { "epoch": 0.24976926626672818, "grad_norm": 0.2881443500518799, "learning_rate": 0.00018664613504357366, "loss": 0.9708, "step": 4330 }, { "epoch": 0.25005768343331797, "grad_norm": 0.29754626750946045, "learning_rate": 0.00018659582765386204, "loss": 1.0263, "step": 4335 }, { "epoch": 0.2503461005999077, "grad_norm": 0.3321414291858673, "learning_rate": 0.0001865454324880794, "loss": 0.9843, "step": 4340 }, { "epoch": 0.2506345177664975, "grad_norm": 0.32111719250679016, "learning_rate": 0.00018649494959730765, "loss": 1.0291, "step": 4345 }, { "epoch": 0.2509229349330872, "grad_norm": 0.3495931327342987, "learning_rate": 0.00018644437903271778, "loss": 1.0373, "step": 4350 }, { "epoch": 0.251211352099677, "grad_norm": 0.30436307191848755, "learning_rate": 0.0001863937208455696, "loss": 0.9767, "step": 4355 }, { "epoch": 0.2514997692662667, "grad_norm": 0.3309740126132965, "learning_rate": 0.00018634297508721167, "loss": 0.9387, "step": 4360 }, { "epoch": 0.2517881864328565, "grad_norm": 0.300322949886322, "learning_rate": 0.00018629214180908144, "loss": 1.0123, "step": 4365 }, { "epoch": 0.2520766035994462, "grad_norm": 0.3226313591003418, "learning_rate": 0.00018624122106270506, "loss": 0.9499, "step": 4370 }, { "epoch": 0.252365020766036, "grad_norm": 0.32126346230506897, "learning_rate": 0.00018619021289969717, "loss": 0.9617, "step": 4375 }, { "epoch": 0.25265343793262574, "grad_norm": 0.2929309010505676, "learning_rate": 0.00018613911737176125, "loss": 0.9452, "step": 4380 }, { "epoch": 0.2529418550992155, "grad_norm": 0.29882681369781494, "learning_rate": 0.00018608793453068914, "loss": 0.9957, "step": 4385 }, { "epoch": 0.25323027226580525, "grad_norm": 0.2783080041408539, "learning_rate": 0.0001860366644283613, "loss": 0.9397, "step": 4390 }, { "epoch": 0.25351868943239503, "grad_norm": 0.2922220230102539, "learning_rate": 0.00018598530711674667, "loss": 0.9619, "step": 4395 }, { "epoch": 0.25380710659898476, "grad_norm": 0.2756292223930359, "learning_rate": 0.00018593386264790243, "loss": 0.9608, "step": 4400 }, { "epoch": 0.25409552376557454, "grad_norm": 0.32587939500808716, "learning_rate": 0.00018588233107397429, "loss": 0.8999, "step": 4405 }, { "epoch": 0.25438394093216427, "grad_norm": 0.301612913608551, "learning_rate": 0.00018583071244719607, "loss": 0.909, "step": 4410 }, { "epoch": 0.25467235809875405, "grad_norm": 0.3122866153717041, "learning_rate": 0.00018577900681989, "loss": 0.9398, "step": 4415 }, { "epoch": 0.2549607752653438, "grad_norm": 0.30573856830596924, "learning_rate": 0.0001857272142444664, "loss": 0.9165, "step": 4420 }, { "epoch": 0.25524919243193356, "grad_norm": 0.29823189973831177, "learning_rate": 0.00018567533477342377, "loss": 0.9528, "step": 4425 }, { "epoch": 0.2555376095985233, "grad_norm": 0.3344714641571045, "learning_rate": 0.0001856233684593486, "loss": 0.9577, "step": 4430 }, { "epoch": 0.25582602676511307, "grad_norm": 0.29007846117019653, "learning_rate": 0.0001855713153549155, "loss": 0.944, "step": 4435 }, { "epoch": 0.2561144439317028, "grad_norm": 0.2928242087364197, "learning_rate": 0.00018551917551288706, "loss": 0.9878, "step": 4440 }, { "epoch": 0.2564028610982926, "grad_norm": 0.3003365695476532, "learning_rate": 0.0001854669489861137, "loss": 0.9784, "step": 4445 }, { "epoch": 0.2566912782648823, "grad_norm": 0.30604249238967896, "learning_rate": 0.0001854146358275338, "loss": 0.9803, "step": 4450 }, { "epoch": 0.2569796954314721, "grad_norm": 0.31301596760749817, "learning_rate": 0.00018536223609017348, "loss": 1.0573, "step": 4455 }, { "epoch": 0.2572681125980618, "grad_norm": 0.30836206674575806, "learning_rate": 0.00018530974982714667, "loss": 0.9928, "step": 4460 }, { "epoch": 0.2575565297646516, "grad_norm": 0.3122254014015198, "learning_rate": 0.00018525717709165498, "loss": 1.0245, "step": 4465 }, { "epoch": 0.2578449469312413, "grad_norm": 0.29952389001846313, "learning_rate": 0.0001852045179369877, "loss": 1.0159, "step": 4470 }, { "epoch": 0.2581333640978311, "grad_norm": 0.2811339199542999, "learning_rate": 0.00018515177241652163, "loss": 0.9483, "step": 4475 }, { "epoch": 0.25842178126442084, "grad_norm": 0.3140300512313843, "learning_rate": 0.0001850989405837212, "loss": 0.98, "step": 4480 }, { "epoch": 0.2587101984310106, "grad_norm": 0.3146283030509949, "learning_rate": 0.00018504602249213838, "loss": 1.0204, "step": 4485 }, { "epoch": 0.25899861559760035, "grad_norm": 0.28882843255996704, "learning_rate": 0.0001849930181954124, "loss": 0.995, "step": 4490 }, { "epoch": 0.25928703276419013, "grad_norm": 0.35614368319511414, "learning_rate": 0.00018493992774727005, "loss": 1.0179, "step": 4495 }, { "epoch": 0.25957544993077986, "grad_norm": 0.3043900728225708, "learning_rate": 0.00018488675120152532, "loss": 0.9413, "step": 4500 }, { "epoch": 0.25986386709736964, "grad_norm": 0.2888356149196625, "learning_rate": 0.00018483348861207953, "loss": 0.9917, "step": 4505 }, { "epoch": 0.26015228426395937, "grad_norm": 0.31191486120224, "learning_rate": 0.00018478014003292116, "loss": 0.9503, "step": 4510 }, { "epoch": 0.26044070143054915, "grad_norm": 0.2871573269367218, "learning_rate": 0.00018472670551812596, "loss": 1.0236, "step": 4515 }, { "epoch": 0.2607291185971389, "grad_norm": 0.3728832006454468, "learning_rate": 0.0001846731851218567, "loss": 1.0037, "step": 4520 }, { "epoch": 0.26101753576372866, "grad_norm": 0.27702075242996216, "learning_rate": 0.00018461957889836324, "loss": 0.9536, "step": 4525 }, { "epoch": 0.2613059529303184, "grad_norm": 0.2843487560749054, "learning_rate": 0.00018456588690198236, "loss": 0.974, "step": 4530 }, { "epoch": 0.26159437009690817, "grad_norm": 0.3026067912578583, "learning_rate": 0.0001845121091871379, "loss": 1.0121, "step": 4535 }, { "epoch": 0.2618827872634979, "grad_norm": 0.299246221780777, "learning_rate": 0.0001844582458083405, "loss": 0.9328, "step": 4540 }, { "epoch": 0.2621712044300877, "grad_norm": 0.29690268635749817, "learning_rate": 0.0001844042968201877, "loss": 0.9492, "step": 4545 }, { "epoch": 0.26245962159667746, "grad_norm": 0.29138097167015076, "learning_rate": 0.0001843502622773637, "loss": 0.9715, "step": 4550 }, { "epoch": 0.2627480387632672, "grad_norm": 0.2924482822418213, "learning_rate": 0.0001842961422346396, "loss": 0.9897, "step": 4555 }, { "epoch": 0.26303645592985697, "grad_norm": 0.28473740816116333, "learning_rate": 0.00018424193674687297, "loss": 1.0282, "step": 4560 }, { "epoch": 0.2633248730964467, "grad_norm": 0.3194859027862549, "learning_rate": 0.00018418764586900817, "loss": 0.9995, "step": 4565 }, { "epoch": 0.2636132902630365, "grad_norm": 0.31165921688079834, "learning_rate": 0.00018413326965607593, "loss": 1.0285, "step": 4570 }, { "epoch": 0.2639017074296262, "grad_norm": 0.28910648822784424, "learning_rate": 0.00018407880816319363, "loss": 0.9465, "step": 4575 }, { "epoch": 0.264190124596216, "grad_norm": 0.3027464747428894, "learning_rate": 0.00018402426144556504, "loss": 0.9554, "step": 4580 }, { "epoch": 0.2644785417628057, "grad_norm": 0.3191346824169159, "learning_rate": 0.0001839696295584803, "loss": 1.0284, "step": 4585 }, { "epoch": 0.2647669589293955, "grad_norm": 0.32781797647476196, "learning_rate": 0.0001839149125573159, "loss": 0.9761, "step": 4590 }, { "epoch": 0.26505537609598523, "grad_norm": 0.28181716799736023, "learning_rate": 0.0001838601104975346, "loss": 1.0894, "step": 4595 }, { "epoch": 0.265343793262575, "grad_norm": 0.35118234157562256, "learning_rate": 0.00018380522343468532, "loss": 0.9843, "step": 4600 }, { "epoch": 0.26563221042916474, "grad_norm": 0.30681881308555603, "learning_rate": 0.0001837502514244033, "loss": 1.0639, "step": 4605 }, { "epoch": 0.2659206275957545, "grad_norm": 0.3133811056613922, "learning_rate": 0.00018369519452240973, "loss": 1.0317, "step": 4610 }, { "epoch": 0.26620904476234425, "grad_norm": 0.3321933150291443, "learning_rate": 0.00018364005278451187, "loss": 0.9626, "step": 4615 }, { "epoch": 0.26649746192893403, "grad_norm": 0.30032068490982056, "learning_rate": 0.00018358482626660303, "loss": 1.0235, "step": 4620 }, { "epoch": 0.26678587909552376, "grad_norm": 0.315247118473053, "learning_rate": 0.00018352951502466244, "loss": 1.0141, "step": 4625 }, { "epoch": 0.26707429626211354, "grad_norm": 0.2941517233848572, "learning_rate": 0.0001834741191147552, "loss": 0.9924, "step": 4630 }, { "epoch": 0.26736271342870327, "grad_norm": 0.30521127581596375, "learning_rate": 0.00018341863859303218, "loss": 1.0182, "step": 4635 }, { "epoch": 0.26765113059529305, "grad_norm": 0.3334304392337799, "learning_rate": 0.00018336307351573018, "loss": 0.9819, "step": 4640 }, { "epoch": 0.2679395477618828, "grad_norm": 0.28640317916870117, "learning_rate": 0.00018330742393917143, "loss": 1.0039, "step": 4645 }, { "epoch": 0.26822796492847256, "grad_norm": 0.30890411138534546, "learning_rate": 0.00018325168991976408, "loss": 1.0092, "step": 4650 }, { "epoch": 0.2685163820950623, "grad_norm": 0.29789072275161743, "learning_rate": 0.00018319587151400174, "loss": 1.0011, "step": 4655 }, { "epoch": 0.26880479926165207, "grad_norm": 0.2906172275543213, "learning_rate": 0.00018313996877846361, "loss": 0.9535, "step": 4660 }, { "epoch": 0.2690932164282418, "grad_norm": 0.2868962585926056, "learning_rate": 0.00018308398176981433, "loss": 1.0084, "step": 4665 }, { "epoch": 0.2693816335948316, "grad_norm": 0.3024742007255554, "learning_rate": 0.00018302791054480394, "loss": 1.05, "step": 4670 }, { "epoch": 0.2696700507614213, "grad_norm": 0.29981881380081177, "learning_rate": 0.00018297175516026788, "loss": 0.9848, "step": 4675 }, { "epoch": 0.2699584679280111, "grad_norm": 0.303254634141922, "learning_rate": 0.00018291551567312694, "loss": 0.9698, "step": 4680 }, { "epoch": 0.2702468850946008, "grad_norm": 0.3180643618106842, "learning_rate": 0.0001828591921403871, "loss": 1.0005, "step": 4685 }, { "epoch": 0.2705353022611906, "grad_norm": 0.300870805978775, "learning_rate": 0.00018280278461913952, "loss": 0.9951, "step": 4690 }, { "epoch": 0.27082371942778033, "grad_norm": 0.30927881598472595, "learning_rate": 0.00018274629316656054, "loss": 0.9021, "step": 4695 }, { "epoch": 0.2711121365943701, "grad_norm": 0.310472697019577, "learning_rate": 0.00018268971783991152, "loss": 1.0217, "step": 4700 }, { "epoch": 0.27140055376095984, "grad_norm": 0.33175238966941833, "learning_rate": 0.00018263305869653892, "loss": 0.9618, "step": 4705 }, { "epoch": 0.2716889709275496, "grad_norm": 0.333126038312912, "learning_rate": 0.00018257631579387412, "loss": 1.0605, "step": 4710 }, { "epoch": 0.27197738809413935, "grad_norm": 0.32339242100715637, "learning_rate": 0.00018251948918943334, "loss": 1.0171, "step": 4715 }, { "epoch": 0.27226580526072913, "grad_norm": 0.28846561908721924, "learning_rate": 0.0001824625789408177, "loss": 0.9603, "step": 4720 }, { "epoch": 0.27255422242731886, "grad_norm": 0.2988503873348236, "learning_rate": 0.0001824055851057131, "loss": 0.9954, "step": 4725 }, { "epoch": 0.27284263959390864, "grad_norm": 0.2900153398513794, "learning_rate": 0.00018234850774189018, "loss": 0.8959, "step": 4730 }, { "epoch": 0.27313105676049837, "grad_norm": 0.3061988353729248, "learning_rate": 0.00018229134690720425, "loss": 0.9985, "step": 4735 }, { "epoch": 0.27341947392708815, "grad_norm": 0.323887437582016, "learning_rate": 0.00018223410265959516, "loss": 0.9946, "step": 4740 }, { "epoch": 0.2737078910936779, "grad_norm": 0.28910937905311584, "learning_rate": 0.00018217677505708737, "loss": 0.9593, "step": 4745 }, { "epoch": 0.27399630826026766, "grad_norm": 0.3210904896259308, "learning_rate": 0.00018211936415778984, "loss": 0.9201, "step": 4750 }, { "epoch": 0.2742847254268574, "grad_norm": 0.280989408493042, "learning_rate": 0.00018206187001989593, "loss": 0.9341, "step": 4755 }, { "epoch": 0.2745731425934472, "grad_norm": 0.3101036846637726, "learning_rate": 0.0001820042927016834, "loss": 0.9668, "step": 4760 }, { "epoch": 0.2748615597600369, "grad_norm": 0.3021515905857086, "learning_rate": 0.00018194663226151427, "loss": 0.9514, "step": 4765 }, { "epoch": 0.2751499769266267, "grad_norm": 0.30778107047080994, "learning_rate": 0.0001818888887578349, "loss": 0.967, "step": 4770 }, { "epoch": 0.2754383940932164, "grad_norm": 0.32916298508644104, "learning_rate": 0.00018183106224917576, "loss": 0.977, "step": 4775 }, { "epoch": 0.2757268112598062, "grad_norm": 0.3261403441429138, "learning_rate": 0.00018177315279415153, "loss": 0.9491, "step": 4780 }, { "epoch": 0.2760152284263959, "grad_norm": 0.3058185279369354, "learning_rate": 0.0001817151604514609, "loss": 0.985, "step": 4785 }, { "epoch": 0.2763036455929857, "grad_norm": 0.2961861491203308, "learning_rate": 0.00018165708527988664, "loss": 0.966, "step": 4790 }, { "epoch": 0.27659206275957543, "grad_norm": 0.306802362203598, "learning_rate": 0.0001815989273382954, "loss": 0.9942, "step": 4795 }, { "epoch": 0.2768804799261652, "grad_norm": 0.2971879839897156, "learning_rate": 0.00018154068668563782, "loss": 0.9362, "step": 4800 }, { "epoch": 0.27716889709275494, "grad_norm": 0.3133615255355835, "learning_rate": 0.00018148236338094833, "loss": 0.9792, "step": 4805 }, { "epoch": 0.2774573142593447, "grad_norm": 0.287922739982605, "learning_rate": 0.00018142395748334513, "loss": 0.8758, "step": 4810 }, { "epoch": 0.27774573142593445, "grad_norm": 0.31516680121421814, "learning_rate": 0.00018136546905203016, "loss": 0.9796, "step": 4815 }, { "epoch": 0.27803414859252423, "grad_norm": 0.2937026917934418, "learning_rate": 0.000181306898146289, "loss": 0.9357, "step": 4820 }, { "epoch": 0.27832256575911396, "grad_norm": 0.28911980986595154, "learning_rate": 0.00018124824482549086, "loss": 1.0377, "step": 4825 }, { "epoch": 0.27861098292570374, "grad_norm": 0.31102392077445984, "learning_rate": 0.00018118950914908843, "loss": 0.9278, "step": 4830 }, { "epoch": 0.27889940009229347, "grad_norm": 0.34064656496047974, "learning_rate": 0.00018113069117661797, "loss": 0.9204, "step": 4835 }, { "epoch": 0.27918781725888325, "grad_norm": 0.31624704599380493, "learning_rate": 0.00018107179096769901, "loss": 0.9523, "step": 4840 }, { "epoch": 0.279476234425473, "grad_norm": 0.2988069951534271, "learning_rate": 0.00018101280858203462, "loss": 1.02, "step": 4845 }, { "epoch": 0.27976465159206276, "grad_norm": 0.3373366892337799, "learning_rate": 0.00018095374407941104, "loss": 1.0102, "step": 4850 }, { "epoch": 0.2800530687586525, "grad_norm": 0.3316998779773712, "learning_rate": 0.00018089459751969778, "loss": 1.0128, "step": 4855 }, { "epoch": 0.2803414859252423, "grad_norm": 0.30349063873291016, "learning_rate": 0.0001808353689628475, "loss": 0.8867, "step": 4860 }, { "epoch": 0.280629903091832, "grad_norm": 0.33365175127983093, "learning_rate": 0.0001807760584688961, "loss": 0.9744, "step": 4865 }, { "epoch": 0.2809183202584218, "grad_norm": 0.3064230978488922, "learning_rate": 0.0001807166660979623, "loss": 0.9424, "step": 4870 }, { "epoch": 0.2812067374250115, "grad_norm": 0.29559001326560974, "learning_rate": 0.00018065719191024808, "loss": 0.999, "step": 4875 }, { "epoch": 0.2814951545916013, "grad_norm": 0.33819547295570374, "learning_rate": 0.00018059763596603814, "loss": 1.0021, "step": 4880 }, { "epoch": 0.281783571758191, "grad_norm": 0.321237713098526, "learning_rate": 0.00018053799832570014, "loss": 0.9694, "step": 4885 }, { "epoch": 0.2820719889247808, "grad_norm": 0.3277602791786194, "learning_rate": 0.0001804782790496846, "loss": 0.9753, "step": 4890 }, { "epoch": 0.2823604060913706, "grad_norm": 0.2939560115337372, "learning_rate": 0.00018041847819852468, "loss": 0.9314, "step": 4895 }, { "epoch": 0.2826488232579603, "grad_norm": 0.30268988013267517, "learning_rate": 0.00018035859583283626, "loss": 0.973, "step": 4900 }, { "epoch": 0.2829372404245501, "grad_norm": 0.30159807205200195, "learning_rate": 0.00018029863201331783, "loss": 0.912, "step": 4905 }, { "epoch": 0.2832256575911398, "grad_norm": 0.3018011152744293, "learning_rate": 0.00018023858680075061, "loss": 1.0129, "step": 4910 }, { "epoch": 0.2835140747577296, "grad_norm": 0.3190675973892212, "learning_rate": 0.0001801784602559981, "loss": 0.9403, "step": 4915 }, { "epoch": 0.28380249192431933, "grad_norm": 0.32579466700553894, "learning_rate": 0.00018011825244000632, "loss": 0.9585, "step": 4920 }, { "epoch": 0.2840909090909091, "grad_norm": 0.28825148940086365, "learning_rate": 0.00018005796341380372, "loss": 0.9275, "step": 4925 }, { "epoch": 0.28437932625749884, "grad_norm": 0.3098837435245514, "learning_rate": 0.00017999759323850098, "loss": 1.0372, "step": 4930 }, { "epoch": 0.2846677434240886, "grad_norm": 0.3242741823196411, "learning_rate": 0.0001799371419752911, "loss": 0.9727, "step": 4935 }, { "epoch": 0.28495616059067835, "grad_norm": 0.31144097447395325, "learning_rate": 0.0001798766096854493, "loss": 0.926, "step": 4940 }, { "epoch": 0.28524457775726814, "grad_norm": 0.3283907175064087, "learning_rate": 0.0001798159964303328, "loss": 1.0166, "step": 4945 }, { "epoch": 0.28553299492385786, "grad_norm": 0.32130008935928345, "learning_rate": 0.00017975530227138105, "loss": 0.9836, "step": 4950 }, { "epoch": 0.28582141209044765, "grad_norm": 0.3087569773197174, "learning_rate": 0.00017969452727011536, "loss": 1.0186, "step": 4955 }, { "epoch": 0.2861098292570374, "grad_norm": 0.29481446743011475, "learning_rate": 0.00017963367148813913, "loss": 1.0168, "step": 4960 }, { "epoch": 0.28639824642362716, "grad_norm": 0.30410903692245483, "learning_rate": 0.0001795727349871375, "loss": 1.0112, "step": 4965 }, { "epoch": 0.2866866635902169, "grad_norm": 0.30060312151908875, "learning_rate": 0.0001795117178288775, "loss": 1.0348, "step": 4970 }, { "epoch": 0.28697508075680667, "grad_norm": 0.3111194670200348, "learning_rate": 0.00017945062007520797, "loss": 1.0389, "step": 4975 }, { "epoch": 0.2872634979233964, "grad_norm": 0.3265964090824127, "learning_rate": 0.00017938944178805933, "loss": 0.9882, "step": 4980 }, { "epoch": 0.2875519150899862, "grad_norm": 0.29788386821746826, "learning_rate": 0.0001793281830294437, "loss": 0.9822, "step": 4985 }, { "epoch": 0.2878403322565759, "grad_norm": 0.29151782393455505, "learning_rate": 0.00017926684386145478, "loss": 0.9768, "step": 4990 }, { "epoch": 0.2881287494231657, "grad_norm": 0.30184727907180786, "learning_rate": 0.0001792054243462677, "loss": 0.9629, "step": 4995 }, { "epoch": 0.2884171665897554, "grad_norm": 0.34121939539909363, "learning_rate": 0.00017914392454613913, "loss": 1.0261, "step": 5000 }, { "epoch": 0.2887055837563452, "grad_norm": 0.29807743430137634, "learning_rate": 0.00017908234452340707, "loss": 0.9572, "step": 5005 }, { "epoch": 0.2889940009229349, "grad_norm": 0.31767940521240234, "learning_rate": 0.00017902068434049077, "loss": 0.9559, "step": 5010 }, { "epoch": 0.2892824180895247, "grad_norm": 0.33446744084358215, "learning_rate": 0.0001789589440598909, "loss": 1.0301, "step": 5015 }, { "epoch": 0.28957083525611443, "grad_norm": 0.30399268865585327, "learning_rate": 0.00017889712374418912, "loss": 1.0066, "step": 5020 }, { "epoch": 0.2898592524227042, "grad_norm": 0.31030428409576416, "learning_rate": 0.0001788352234560484, "loss": 0.9751, "step": 5025 }, { "epoch": 0.29014766958929394, "grad_norm": 0.30979663133621216, "learning_rate": 0.00017877324325821264, "loss": 1.0192, "step": 5030 }, { "epoch": 0.2904360867558837, "grad_norm": 0.31306302547454834, "learning_rate": 0.0001787111832135068, "loss": 1.018, "step": 5035 }, { "epoch": 0.29072450392247345, "grad_norm": 0.2913960814476013, "learning_rate": 0.00017864904338483676, "loss": 0.954, "step": 5040 }, { "epoch": 0.29101292108906324, "grad_norm": 0.3290291428565979, "learning_rate": 0.00017858682383518928, "loss": 1.0345, "step": 5045 }, { "epoch": 0.29130133825565296, "grad_norm": 0.29978567361831665, "learning_rate": 0.00017852452462763192, "loss": 0.9141, "step": 5050 }, { "epoch": 0.29158975542224275, "grad_norm": 0.3192616403102875, "learning_rate": 0.00017846214582531298, "loss": 1.0308, "step": 5055 }, { "epoch": 0.2918781725888325, "grad_norm": 0.30386027693748474, "learning_rate": 0.00017839968749146142, "loss": 0.9891, "step": 5060 }, { "epoch": 0.29216658975542226, "grad_norm": 0.31576067209243774, "learning_rate": 0.00017833714968938687, "loss": 0.8924, "step": 5065 }, { "epoch": 0.292455006922012, "grad_norm": 0.30237501859664917, "learning_rate": 0.0001782745324824795, "loss": 0.9355, "step": 5070 }, { "epoch": 0.29274342408860177, "grad_norm": 0.29534316062927246, "learning_rate": 0.00017821183593420988, "loss": 0.918, "step": 5075 }, { "epoch": 0.2930318412551915, "grad_norm": 0.3078969120979309, "learning_rate": 0.00017814906010812912, "loss": 1.018, "step": 5080 }, { "epoch": 0.2933202584217813, "grad_norm": 0.31464046239852905, "learning_rate": 0.00017808620506786865, "loss": 0.8985, "step": 5085 }, { "epoch": 0.293608675588371, "grad_norm": 0.28147396445274353, "learning_rate": 0.00017802327087714016, "loss": 0.9324, "step": 5090 }, { "epoch": 0.2938970927549608, "grad_norm": 0.30480238795280457, "learning_rate": 0.00017796025759973558, "loss": 0.9891, "step": 5095 }, { "epoch": 0.2941855099215505, "grad_norm": 0.2980847656726837, "learning_rate": 0.00017789716529952704, "loss": 0.9646, "step": 5100 }, { "epoch": 0.2944739270881403, "grad_norm": 0.3064950108528137, "learning_rate": 0.00017783399404046674, "loss": 0.9412, "step": 5105 }, { "epoch": 0.29476234425473, "grad_norm": 0.29476818442344666, "learning_rate": 0.00017777074388658693, "loss": 0.9941, "step": 5110 }, { "epoch": 0.2950507614213198, "grad_norm": 0.3126581609249115, "learning_rate": 0.00017770741490199979, "loss": 0.915, "step": 5115 }, { "epoch": 0.29533917858790953, "grad_norm": 0.29231584072113037, "learning_rate": 0.00017764400715089744, "loss": 0.986, "step": 5120 }, { "epoch": 0.2956275957544993, "grad_norm": 0.32379642128944397, "learning_rate": 0.00017758052069755188, "loss": 0.954, "step": 5125 }, { "epoch": 0.29591601292108904, "grad_norm": 0.34569108486175537, "learning_rate": 0.0001775169556063148, "loss": 0.9136, "step": 5130 }, { "epoch": 0.2962044300876788, "grad_norm": 0.3077993392944336, "learning_rate": 0.00017745331194161766, "loss": 0.9378, "step": 5135 }, { "epoch": 0.29649284725426855, "grad_norm": 0.29212331771850586, "learning_rate": 0.00017738958976797157, "loss": 0.9831, "step": 5140 }, { "epoch": 0.29678126442085834, "grad_norm": 0.30094853043556213, "learning_rate": 0.00017732578914996712, "loss": 1.0074, "step": 5145 }, { "epoch": 0.29706968158744806, "grad_norm": 0.2973226010799408, "learning_rate": 0.00017726191015227452, "loss": 0.9542, "step": 5150 }, { "epoch": 0.29735809875403785, "grad_norm": 0.31063079833984375, "learning_rate": 0.00017719795283964345, "loss": 1.0237, "step": 5155 }, { "epoch": 0.2976465159206276, "grad_norm": 0.3208075165748596, "learning_rate": 0.00017713391727690284, "loss": 1.0179, "step": 5160 }, { "epoch": 0.29793493308721736, "grad_norm": 0.31248340010643005, "learning_rate": 0.00017706980352896108, "loss": 0.9811, "step": 5165 }, { "epoch": 0.2982233502538071, "grad_norm": 0.3174075186252594, "learning_rate": 0.0001770056116608057, "loss": 0.9975, "step": 5170 }, { "epoch": 0.29851176742039687, "grad_norm": 0.2985789477825165, "learning_rate": 0.0001769413417375035, "loss": 0.953, "step": 5175 }, { "epoch": 0.2988001845869866, "grad_norm": 0.304510235786438, "learning_rate": 0.0001768769938242003, "loss": 1.0, "step": 5180 }, { "epoch": 0.2990886017535764, "grad_norm": 0.30035126209259033, "learning_rate": 0.00017681256798612112, "loss": 1.0334, "step": 5185 }, { "epoch": 0.2993770189201661, "grad_norm": 0.3091324269771576, "learning_rate": 0.0001767480642885698, "loss": 1.0038, "step": 5190 }, { "epoch": 0.2996654360867559, "grad_norm": 0.32166004180908203, "learning_rate": 0.00017668348279692921, "loss": 0.9949, "step": 5195 }, { "epoch": 0.2999538532533456, "grad_norm": 0.29622697830200195, "learning_rate": 0.00017661882357666105, "loss": 0.9714, "step": 5200 }, { "epoch": 0.3002422704199354, "grad_norm": 0.31370845437049866, "learning_rate": 0.00017655408669330576, "loss": 0.9996, "step": 5205 }, { "epoch": 0.3005306875865251, "grad_norm": 0.31638461351394653, "learning_rate": 0.00017648927221248264, "loss": 0.9975, "step": 5210 }, { "epoch": 0.3008191047531149, "grad_norm": 0.3722272217273712, "learning_rate": 0.00017642438019988945, "loss": 1.0313, "step": 5215 }, { "epoch": 0.30110752191970463, "grad_norm": 0.3506329357624054, "learning_rate": 0.00017635941072130268, "loss": 1.0279, "step": 5220 }, { "epoch": 0.3013959390862944, "grad_norm": 0.2909005582332611, "learning_rate": 0.0001762943638425773, "loss": 1.0225, "step": 5225 }, { "epoch": 0.30168435625288414, "grad_norm": 0.2934955060482025, "learning_rate": 0.00017622923962964672, "loss": 1.0326, "step": 5230 }, { "epoch": 0.3019727734194739, "grad_norm": 0.38688069581985474, "learning_rate": 0.00017616403814852278, "loss": 1.063, "step": 5235 }, { "epoch": 0.30226119058606365, "grad_norm": 0.28810983896255493, "learning_rate": 0.0001760987594652956, "loss": 0.9573, "step": 5240 }, { "epoch": 0.30254960775265344, "grad_norm": 0.3192325830459595, "learning_rate": 0.00017603340364613355, "loss": 0.9995, "step": 5245 }, { "epoch": 0.3028380249192432, "grad_norm": 0.3177827000617981, "learning_rate": 0.00017596797075728322, "loss": 0.9979, "step": 5250 }, { "epoch": 0.30312644208583295, "grad_norm": 0.3133276402950287, "learning_rate": 0.00017590246086506933, "loss": 0.9178, "step": 5255 }, { "epoch": 0.30341485925242273, "grad_norm": 0.31365862488746643, "learning_rate": 0.00017583687403589454, "loss": 1.0893, "step": 5260 }, { "epoch": 0.30370327641901246, "grad_norm": 0.2886902391910553, "learning_rate": 0.0001757712103362397, "loss": 0.9814, "step": 5265 }, { "epoch": 0.30399169358560224, "grad_norm": 0.3023292124271393, "learning_rate": 0.0001757054698326634, "loss": 0.9635, "step": 5270 }, { "epoch": 0.30428011075219197, "grad_norm": 0.30080515146255493, "learning_rate": 0.00017563965259180216, "loss": 1.0034, "step": 5275 }, { "epoch": 0.30456852791878175, "grad_norm": 0.34142324328422546, "learning_rate": 0.00017557375868037026, "loss": 1.0152, "step": 5280 }, { "epoch": 0.3048569450853715, "grad_norm": 0.3014017939567566, "learning_rate": 0.00017550778816515967, "loss": 0.9798, "step": 5285 }, { "epoch": 0.30514536225196126, "grad_norm": 0.4091859459877014, "learning_rate": 0.0001754417411130401, "loss": 1.0376, "step": 5290 }, { "epoch": 0.305433779418551, "grad_norm": 0.2926144301891327, "learning_rate": 0.00017537561759095873, "loss": 0.966, "step": 5295 }, { "epoch": 0.30572219658514077, "grad_norm": 0.30158326029777527, "learning_rate": 0.0001753094176659403, "loss": 0.9269, "step": 5300 }, { "epoch": 0.3060106137517305, "grad_norm": 0.32335394620895386, "learning_rate": 0.00017524314140508705, "loss": 0.9784, "step": 5305 }, { "epoch": 0.3062990309183203, "grad_norm": 0.29012882709503174, "learning_rate": 0.0001751767888755785, "loss": 0.8869, "step": 5310 }, { "epoch": 0.30658744808491, "grad_norm": 0.3356166183948517, "learning_rate": 0.00017511036014467157, "loss": 0.9995, "step": 5315 }, { "epoch": 0.3068758652514998, "grad_norm": 0.29851922392845154, "learning_rate": 0.00017504385527970028, "loss": 0.9666, "step": 5320 }, { "epoch": 0.3071642824180895, "grad_norm": 0.29468950629234314, "learning_rate": 0.00017497727434807598, "loss": 1.0196, "step": 5325 }, { "epoch": 0.3074526995846793, "grad_norm": 0.29625648260116577, "learning_rate": 0.00017491061741728702, "loss": 1.0007, "step": 5330 }, { "epoch": 0.30774111675126903, "grad_norm": 0.30475690960884094, "learning_rate": 0.00017484388455489883, "loss": 1.0158, "step": 5335 }, { "epoch": 0.3080295339178588, "grad_norm": 0.29841533303260803, "learning_rate": 0.00017477707582855384, "loss": 0.9383, "step": 5340 }, { "epoch": 0.30831795108444854, "grad_norm": 0.3112857937812805, "learning_rate": 0.00017471019130597127, "loss": 0.952, "step": 5345 }, { "epoch": 0.3086063682510383, "grad_norm": 0.30072787404060364, "learning_rate": 0.00017464323105494727, "loss": 0.9599, "step": 5350 }, { "epoch": 0.30889478541762805, "grad_norm": 0.33163365721702576, "learning_rate": 0.0001745761951433547, "loss": 1.03, "step": 5355 }, { "epoch": 0.30918320258421783, "grad_norm": 0.3288794159889221, "learning_rate": 0.00017450908363914316, "loss": 0.9898, "step": 5360 }, { "epoch": 0.30947161975080756, "grad_norm": 0.3404468595981598, "learning_rate": 0.0001744418966103388, "loss": 0.9744, "step": 5365 }, { "epoch": 0.30976003691739734, "grad_norm": 0.3131037950515747, "learning_rate": 0.00017437463412504437, "loss": 1.0509, "step": 5370 }, { "epoch": 0.31004845408398707, "grad_norm": 0.31082791090011597, "learning_rate": 0.00017430729625143908, "loss": 0.9927, "step": 5375 }, { "epoch": 0.31033687125057685, "grad_norm": 0.3165504038333893, "learning_rate": 0.00017423988305777864, "loss": 1.0446, "step": 5380 }, { "epoch": 0.3106252884171666, "grad_norm": 0.3058837652206421, "learning_rate": 0.00017417239461239498, "loss": 0.9513, "step": 5385 }, { "epoch": 0.31091370558375636, "grad_norm": 0.31753161549568176, "learning_rate": 0.0001741048309836964, "loss": 0.9855, "step": 5390 }, { "epoch": 0.3112021227503461, "grad_norm": 0.3206409513950348, "learning_rate": 0.00017403719224016735, "loss": 0.9444, "step": 5395 }, { "epoch": 0.31149053991693587, "grad_norm": 0.3047698438167572, "learning_rate": 0.00017396947845036844, "loss": 0.9291, "step": 5400 }, { "epoch": 0.3117789570835256, "grad_norm": 0.3192146122455597, "learning_rate": 0.0001739016896829364, "loss": 0.9296, "step": 5405 }, { "epoch": 0.3120673742501154, "grad_norm": 0.33970245718955994, "learning_rate": 0.00017383382600658388, "loss": 1.021, "step": 5410 }, { "epoch": 0.3123557914167051, "grad_norm": 0.3308781385421753, "learning_rate": 0.00017376588749009946, "loss": 0.979, "step": 5415 }, { "epoch": 0.3126442085832949, "grad_norm": 0.28747421503067017, "learning_rate": 0.0001736978742023477, "loss": 0.9705, "step": 5420 }, { "epoch": 0.3129326257498846, "grad_norm": 0.2859438359737396, "learning_rate": 0.0001736297862122688, "loss": 0.9193, "step": 5425 }, { "epoch": 0.3132210429164744, "grad_norm": 0.3263635039329529, "learning_rate": 0.00017356162358887875, "loss": 1.0189, "step": 5430 }, { "epoch": 0.31350946008306413, "grad_norm": 0.3261274993419647, "learning_rate": 0.0001734933864012692, "loss": 1.0329, "step": 5435 }, { "epoch": 0.3137978772496539, "grad_norm": 0.3366422653198242, "learning_rate": 0.00017342507471860733, "loss": 0.9987, "step": 5440 }, { "epoch": 0.31408629441624364, "grad_norm": 0.29917559027671814, "learning_rate": 0.00017335668861013592, "loss": 0.9922, "step": 5445 }, { "epoch": 0.3143747115828334, "grad_norm": 0.30047109723091125, "learning_rate": 0.0001732882281451731, "loss": 1.0203, "step": 5450 }, { "epoch": 0.31466312874942315, "grad_norm": 0.31822001934051514, "learning_rate": 0.00017321969339311241, "loss": 0.9877, "step": 5455 }, { "epoch": 0.31495154591601293, "grad_norm": 0.35328808426856995, "learning_rate": 0.0001731510844234227, "loss": 0.9792, "step": 5460 }, { "epoch": 0.31523996308260266, "grad_norm": 0.33993515372276306, "learning_rate": 0.00017308240130564802, "loss": 0.9535, "step": 5465 }, { "epoch": 0.31552838024919244, "grad_norm": 0.3094184994697571, "learning_rate": 0.0001730136441094076, "loss": 1.0092, "step": 5470 }, { "epoch": 0.31581679741578217, "grad_norm": 0.31005391478538513, "learning_rate": 0.00017294481290439575, "loss": 0.9881, "step": 5475 }, { "epoch": 0.31610521458237195, "grad_norm": 0.35270652174949646, "learning_rate": 0.00017287590776038177, "loss": 1.0174, "step": 5480 }, { "epoch": 0.3163936317489617, "grad_norm": 0.31024622917175293, "learning_rate": 0.00017280692874720998, "loss": 0.9835, "step": 5485 }, { "epoch": 0.31668204891555146, "grad_norm": 0.30939292907714844, "learning_rate": 0.0001727378759347995, "loss": 0.9864, "step": 5490 }, { "epoch": 0.3169704660821412, "grad_norm": 0.3189314305782318, "learning_rate": 0.00017266874939314434, "loss": 1.0451, "step": 5495 }, { "epoch": 0.31725888324873097, "grad_norm": 0.335095077753067, "learning_rate": 0.0001725995491923131, "loss": 1.0026, "step": 5500 }, { "epoch": 0.3175473004153207, "grad_norm": 0.31108853220939636, "learning_rate": 0.0001725302754024492, "loss": 1.0007, "step": 5505 }, { "epoch": 0.3178357175819105, "grad_norm": 0.30693233013153076, "learning_rate": 0.00017246092809377058, "loss": 0.9338, "step": 5510 }, { "epoch": 0.3181241347485002, "grad_norm": 0.28529733419418335, "learning_rate": 0.00017239150733656966, "loss": 0.9947, "step": 5515 }, { "epoch": 0.31841255191509, "grad_norm": 0.3156437277793884, "learning_rate": 0.0001723220132012134, "loss": 1.0326, "step": 5520 }, { "epoch": 0.3187009690816797, "grad_norm": 0.3074203431606293, "learning_rate": 0.0001722524457581431, "loss": 1.033, "step": 5525 }, { "epoch": 0.3189893862482695, "grad_norm": 0.2984369695186615, "learning_rate": 0.00017218280507787435, "loss": 0.9646, "step": 5530 }, { "epoch": 0.31927780341485923, "grad_norm": 0.31262287497520447, "learning_rate": 0.00017211309123099696, "loss": 1.0107, "step": 5535 }, { "epoch": 0.319566220581449, "grad_norm": 0.3307073712348938, "learning_rate": 0.00017204330428817496, "loss": 0.982, "step": 5540 }, { "epoch": 0.31985463774803874, "grad_norm": 0.31009113788604736, "learning_rate": 0.00017197344432014645, "loss": 0.943, "step": 5545 }, { "epoch": 0.3201430549146285, "grad_norm": 0.3452712297439575, "learning_rate": 0.00017190351139772348, "loss": 0.9966, "step": 5550 }, { "epoch": 0.32043147208121825, "grad_norm": 0.3429424464702606, "learning_rate": 0.0001718335055917922, "loss": 1.0072, "step": 5555 }, { "epoch": 0.32071988924780803, "grad_norm": 0.32212451100349426, "learning_rate": 0.00017176342697331246, "loss": 0.9886, "step": 5560 }, { "epoch": 0.32100830641439776, "grad_norm": 0.3044155538082123, "learning_rate": 0.00017169327561331808, "loss": 1.0059, "step": 5565 }, { "epoch": 0.32129672358098754, "grad_norm": 0.31964996457099915, "learning_rate": 0.00017162305158291655, "loss": 0.9924, "step": 5570 }, { "epoch": 0.32158514074757727, "grad_norm": 0.29949527978897095, "learning_rate": 0.0001715527549532889, "loss": 1.0092, "step": 5575 }, { "epoch": 0.32187355791416705, "grad_norm": 0.3664807081222534, "learning_rate": 0.00017148238579568995, "loss": 0.9933, "step": 5580 }, { "epoch": 0.3221619750807568, "grad_norm": 0.3005771338939667, "learning_rate": 0.0001714119441814479, "loss": 0.9662, "step": 5585 }, { "epoch": 0.32245039224734656, "grad_norm": 0.30175963044166565, "learning_rate": 0.00017134143018196447, "loss": 0.9698, "step": 5590 }, { "epoch": 0.32273880941393635, "grad_norm": 0.30512237548828125, "learning_rate": 0.00017127084386871466, "loss": 0.9445, "step": 5595 }, { "epoch": 0.3230272265805261, "grad_norm": 0.2964697480201721, "learning_rate": 0.00017120018531324689, "loss": 0.9598, "step": 5600 }, { "epoch": 0.32331564374711586, "grad_norm": 0.32373014092445374, "learning_rate": 0.0001711294545871827, "loss": 0.9872, "step": 5605 }, { "epoch": 0.3236040609137056, "grad_norm": 0.31650540232658386, "learning_rate": 0.00017105865176221684, "loss": 0.9516, "step": 5610 }, { "epoch": 0.32389247808029537, "grad_norm": 0.3336973190307617, "learning_rate": 0.00017098777691011718, "loss": 0.9658, "step": 5615 }, { "epoch": 0.3241808952468851, "grad_norm": 0.3417705297470093, "learning_rate": 0.00017091683010272447, "loss": 0.9986, "step": 5620 }, { "epoch": 0.3244693124134749, "grad_norm": 0.31307974457740784, "learning_rate": 0.00017084581141195253, "loss": 0.9374, "step": 5625 }, { "epoch": 0.3247577295800646, "grad_norm": 0.2898584306240082, "learning_rate": 0.00017077472090978798, "loss": 0.936, "step": 5630 }, { "epoch": 0.3250461467466544, "grad_norm": 0.32295459508895874, "learning_rate": 0.00017070355866829017, "loss": 0.9609, "step": 5635 }, { "epoch": 0.3253345639132441, "grad_norm": 0.2989657521247864, "learning_rate": 0.00017063232475959133, "loss": 0.9887, "step": 5640 }, { "epoch": 0.3256229810798339, "grad_norm": 0.31220418214797974, "learning_rate": 0.00017056101925589623, "loss": 1.0109, "step": 5645 }, { "epoch": 0.3259113982464236, "grad_norm": 0.33185145258903503, "learning_rate": 0.00017048964222948217, "loss": 1.0592, "step": 5650 }, { "epoch": 0.3261998154130134, "grad_norm": 0.32256993651390076, "learning_rate": 0.000170418193752699, "loss": 0.9829, "step": 5655 }, { "epoch": 0.32648823257960313, "grad_norm": 0.3156249523162842, "learning_rate": 0.00017034667389796904, "loss": 0.9038, "step": 5660 }, { "epoch": 0.3267766497461929, "grad_norm": 0.32424643635749817, "learning_rate": 0.0001702750827377869, "loss": 1.0174, "step": 5665 }, { "epoch": 0.32706506691278264, "grad_norm": 0.29936036467552185, "learning_rate": 0.00017020342034471944, "loss": 1.0131, "step": 5670 }, { "epoch": 0.3273534840793724, "grad_norm": 0.2940688133239746, "learning_rate": 0.0001701316867914058, "loss": 1.007, "step": 5675 }, { "epoch": 0.32764190124596215, "grad_norm": 0.33732593059539795, "learning_rate": 0.00017005988215055718, "loss": 0.9991, "step": 5680 }, { "epoch": 0.32793031841255194, "grad_norm": 0.2845425605773926, "learning_rate": 0.00016998800649495693, "loss": 0.9274, "step": 5685 }, { "epoch": 0.32821873557914166, "grad_norm": 0.3055282235145569, "learning_rate": 0.00016991605989746025, "loss": 1.0231, "step": 5690 }, { "epoch": 0.32850715274573145, "grad_norm": 0.33001506328582764, "learning_rate": 0.0001698440424309944, "loss": 1.0293, "step": 5695 }, { "epoch": 0.3287955699123212, "grad_norm": 0.32202020287513733, "learning_rate": 0.00016977195416855828, "loss": 0.9725, "step": 5700 }, { "epoch": 0.32908398707891096, "grad_norm": 0.3239056468009949, "learning_rate": 0.0001696997951832228, "loss": 1.0648, "step": 5705 }, { "epoch": 0.3293724042455007, "grad_norm": 0.2853987216949463, "learning_rate": 0.00016962756554813037, "loss": 0.9922, "step": 5710 }, { "epoch": 0.32966082141209047, "grad_norm": 0.2992650866508484, "learning_rate": 0.00016955526533649504, "loss": 0.9936, "step": 5715 }, { "epoch": 0.3299492385786802, "grad_norm": 0.29636356234550476, "learning_rate": 0.0001694828946216025, "loss": 0.9924, "step": 5720 }, { "epoch": 0.33023765574527, "grad_norm": 0.2977345585823059, "learning_rate": 0.00016941045347680973, "loss": 0.972, "step": 5725 }, { "epoch": 0.3305260729118597, "grad_norm": 0.2948797047138214, "learning_rate": 0.00016933794197554524, "loss": 0.967, "step": 5730 }, { "epoch": 0.3308144900784495, "grad_norm": 0.3199259340763092, "learning_rate": 0.00016926536019130884, "loss": 0.9491, "step": 5735 }, { "epoch": 0.3311029072450392, "grad_norm": 0.3436327278614044, "learning_rate": 0.00016919270819767152, "loss": 0.9865, "step": 5740 }, { "epoch": 0.331391324411629, "grad_norm": 0.2954476773738861, "learning_rate": 0.0001691199860682755, "loss": 0.9706, "step": 5745 }, { "epoch": 0.3316797415782187, "grad_norm": 0.30476078391075134, "learning_rate": 0.00016904719387683407, "loss": 0.9166, "step": 5750 }, { "epoch": 0.3319681587448085, "grad_norm": 0.3269798755645752, "learning_rate": 0.0001689743316971315, "loss": 0.9568, "step": 5755 }, { "epoch": 0.33225657591139823, "grad_norm": 0.29287829995155334, "learning_rate": 0.00016890139960302304, "loss": 0.9311, "step": 5760 }, { "epoch": 0.332544993077988, "grad_norm": 0.3217598497867584, "learning_rate": 0.00016882839766843485, "loss": 1.0023, "step": 5765 }, { "epoch": 0.33283341024457774, "grad_norm": 0.306896448135376, "learning_rate": 0.00016875532596736373, "loss": 0.936, "step": 5770 }, { "epoch": 0.3331218274111675, "grad_norm": 0.32418233156204224, "learning_rate": 0.00016868218457387736, "loss": 0.9939, "step": 5775 }, { "epoch": 0.33341024457775725, "grad_norm": 0.325740247964859, "learning_rate": 0.00016860897356211403, "loss": 0.9642, "step": 5780 }, { "epoch": 0.33369866174434704, "grad_norm": 0.29711437225341797, "learning_rate": 0.00016853569300628253, "loss": 0.943, "step": 5785 }, { "epoch": 0.33398707891093676, "grad_norm": 0.3324286937713623, "learning_rate": 0.00016846234298066218, "loss": 0.9789, "step": 5790 }, { "epoch": 0.33427549607752655, "grad_norm": 0.3122076690196991, "learning_rate": 0.00016838892355960274, "loss": 1.0296, "step": 5795 }, { "epoch": 0.3345639132441163, "grad_norm": 0.32273441553115845, "learning_rate": 0.0001683154348175243, "loss": 0.9853, "step": 5800 }, { "epoch": 0.33485233041070606, "grad_norm": 0.35843372344970703, "learning_rate": 0.00016824187682891714, "loss": 1.0195, "step": 5805 }, { "epoch": 0.3351407475772958, "grad_norm": 0.33089888095855713, "learning_rate": 0.00016816824966834183, "loss": 1.0346, "step": 5810 }, { "epoch": 0.33542916474388557, "grad_norm": 0.3273804783821106, "learning_rate": 0.00016809455341042906, "loss": 1.0994, "step": 5815 }, { "epoch": 0.3357175819104753, "grad_norm": 0.3188895583152771, "learning_rate": 0.00016802078812987948, "loss": 0.9853, "step": 5820 }, { "epoch": 0.3360059990770651, "grad_norm": 0.3157017230987549, "learning_rate": 0.00016794695390146374, "loss": 0.9587, "step": 5825 }, { "epoch": 0.3362944162436548, "grad_norm": 0.3376935124397278, "learning_rate": 0.0001678730508000224, "loss": 0.8833, "step": 5830 }, { "epoch": 0.3365828334102446, "grad_norm": 0.32190343737602234, "learning_rate": 0.00016779907890046575, "loss": 1.0592, "step": 5835 }, { "epoch": 0.3368712505768343, "grad_norm": 0.3182050883769989, "learning_rate": 0.00016772503827777396, "loss": 0.9834, "step": 5840 }, { "epoch": 0.3371596677434241, "grad_norm": 0.28917548060417175, "learning_rate": 0.00016765092900699675, "loss": 0.939, "step": 5845 }, { "epoch": 0.3374480849100138, "grad_norm": 0.31394827365875244, "learning_rate": 0.00016757675116325343, "loss": 0.9955, "step": 5850 }, { "epoch": 0.3377365020766036, "grad_norm": 0.29517030715942383, "learning_rate": 0.00016750250482173287, "loss": 0.9974, "step": 5855 }, { "epoch": 0.33802491924319333, "grad_norm": 0.30578872561454773, "learning_rate": 0.0001674281900576933, "loss": 1.0233, "step": 5860 }, { "epoch": 0.3383133364097831, "grad_norm": 0.3007958233356476, "learning_rate": 0.00016735380694646236, "loss": 0.9597, "step": 5865 }, { "epoch": 0.33860175357637284, "grad_norm": 0.3038438856601715, "learning_rate": 0.00016727935556343698, "loss": 0.9776, "step": 5870 }, { "epoch": 0.3388901707429626, "grad_norm": 0.31204402446746826, "learning_rate": 0.00016720483598408326, "loss": 0.9629, "step": 5875 }, { "epoch": 0.33917858790955235, "grad_norm": 0.3024327754974365, "learning_rate": 0.0001671302482839364, "loss": 0.9279, "step": 5880 }, { "epoch": 0.33946700507614214, "grad_norm": 0.31090736389160156, "learning_rate": 0.00016705559253860067, "loss": 0.95, "step": 5885 }, { "epoch": 0.33975542224273186, "grad_norm": 0.3600342571735382, "learning_rate": 0.00016698086882374939, "loss": 1.0251, "step": 5890 }, { "epoch": 0.34004383940932165, "grad_norm": 0.3187331557273865, "learning_rate": 0.00016690607721512465, "loss": 0.9718, "step": 5895 }, { "epoch": 0.3403322565759114, "grad_norm": 0.3226509988307953, "learning_rate": 0.00016683121778853746, "loss": 0.9604, "step": 5900 }, { "epoch": 0.34062067374250116, "grad_norm": 0.31832170486450195, "learning_rate": 0.00016675629061986747, "loss": 0.95, "step": 5905 }, { "epoch": 0.3409090909090909, "grad_norm": 0.29185858368873596, "learning_rate": 0.00016668129578506315, "loss": 1.0028, "step": 5910 }, { "epoch": 0.34119750807568067, "grad_norm": 0.30505648255348206, "learning_rate": 0.00016660623336014137, "loss": 0.9506, "step": 5915 }, { "epoch": 0.3414859252422704, "grad_norm": 0.30190521478652954, "learning_rate": 0.00016653110342118764, "loss": 0.9906, "step": 5920 }, { "epoch": 0.3417743424088602, "grad_norm": 0.28448954224586487, "learning_rate": 0.00016645590604435592, "loss": 0.9539, "step": 5925 }, { "epoch": 0.3420627595754499, "grad_norm": 0.3030353784561157, "learning_rate": 0.0001663806413058684, "loss": 1.0043, "step": 5930 }, { "epoch": 0.3423511767420397, "grad_norm": 0.321473091840744, "learning_rate": 0.00016630530928201566, "loss": 0.9228, "step": 5935 }, { "epoch": 0.3426395939086294, "grad_norm": 0.3062508702278137, "learning_rate": 0.00016622991004915645, "loss": 1.003, "step": 5940 }, { "epoch": 0.3429280110752192, "grad_norm": 0.30550527572631836, "learning_rate": 0.00016615444368371768, "loss": 0.9754, "step": 5945 }, { "epoch": 0.343216428241809, "grad_norm": 0.2936306297779083, "learning_rate": 0.00016607891026219418, "loss": 0.9352, "step": 5950 }, { "epoch": 0.3435048454083987, "grad_norm": 0.3158090114593506, "learning_rate": 0.0001660033098611489, "loss": 1.0072, "step": 5955 }, { "epoch": 0.3437932625749885, "grad_norm": 0.30227041244506836, "learning_rate": 0.00016592764255721264, "loss": 1.0412, "step": 5960 }, { "epoch": 0.3440816797415782, "grad_norm": 0.319016695022583, "learning_rate": 0.00016585190842708397, "loss": 0.9956, "step": 5965 }, { "epoch": 0.344370096908168, "grad_norm": 0.3120563328266144, "learning_rate": 0.00016577610754752925, "loss": 0.988, "step": 5970 }, { "epoch": 0.3446585140747577, "grad_norm": 0.31160399317741394, "learning_rate": 0.00016570023999538247, "loss": 0.9859, "step": 5975 }, { "epoch": 0.3449469312413475, "grad_norm": 0.33563148975372314, "learning_rate": 0.00016562430584754516, "loss": 0.9598, "step": 5980 }, { "epoch": 0.34523534840793724, "grad_norm": 0.30352646112442017, "learning_rate": 0.00016554830518098647, "loss": 1.0189, "step": 5985 }, { "epoch": 0.345523765574527, "grad_norm": 0.32421186566352844, "learning_rate": 0.00016547223807274287, "loss": 0.957, "step": 5990 }, { "epoch": 0.34581218274111675, "grad_norm": 0.304166316986084, "learning_rate": 0.00016539610459991816, "loss": 0.9022, "step": 5995 }, { "epoch": 0.34610059990770653, "grad_norm": 0.29363691806793213, "learning_rate": 0.00016531990483968357, "loss": 0.9502, "step": 6000 }, { "epoch": 0.34638901707429626, "grad_norm": 0.2799575626850128, "learning_rate": 0.00016524363886927734, "loss": 0.9679, "step": 6005 }, { "epoch": 0.34667743424088604, "grad_norm": 0.31635069847106934, "learning_rate": 0.00016516730676600493, "loss": 0.9555, "step": 6010 }, { "epoch": 0.34696585140747577, "grad_norm": 0.35151907801628113, "learning_rate": 0.00016509090860723874, "loss": 1.0035, "step": 6015 }, { "epoch": 0.34725426857406555, "grad_norm": 0.3108111321926117, "learning_rate": 0.00016501444447041824, "loss": 0.9622, "step": 6020 }, { "epoch": 0.3475426857406553, "grad_norm": 0.30233535170555115, "learning_rate": 0.00016493791443304974, "loss": 0.9351, "step": 6025 }, { "epoch": 0.34783110290724506, "grad_norm": 0.30143481492996216, "learning_rate": 0.00016486131857270628, "loss": 0.9925, "step": 6030 }, { "epoch": 0.3481195200738348, "grad_norm": 0.29362648725509644, "learning_rate": 0.00016478465696702767, "loss": 1.0382, "step": 6035 }, { "epoch": 0.34840793724042457, "grad_norm": 0.3204686641693115, "learning_rate": 0.00016470792969372039, "loss": 0.952, "step": 6040 }, { "epoch": 0.3486963544070143, "grad_norm": 0.3001445233821869, "learning_rate": 0.00016463113683055748, "loss": 1.04, "step": 6045 }, { "epoch": 0.3489847715736041, "grad_norm": 0.29392385482788086, "learning_rate": 0.00016455427845537835, "loss": 0.9467, "step": 6050 }, { "epoch": 0.3492731887401938, "grad_norm": 0.33031490445137024, "learning_rate": 0.000164477354646089, "loss": 0.9735, "step": 6055 }, { "epoch": 0.3495616059067836, "grad_norm": 0.3166663944721222, "learning_rate": 0.0001644003654806616, "loss": 1.0249, "step": 6060 }, { "epoch": 0.3498500230733733, "grad_norm": 0.33485808968544006, "learning_rate": 0.00016432331103713465, "loss": 0.9104, "step": 6065 }, { "epoch": 0.3501384402399631, "grad_norm": 0.3192949891090393, "learning_rate": 0.00016424619139361282, "loss": 1.1312, "step": 6070 }, { "epoch": 0.3504268574065528, "grad_norm": 0.29363909363746643, "learning_rate": 0.00016416900662826676, "loss": 0.9196, "step": 6075 }, { "epoch": 0.3507152745731426, "grad_norm": 0.310404896736145, "learning_rate": 0.00016409175681933328, "loss": 0.9595, "step": 6080 }, { "epoch": 0.35100369173973234, "grad_norm": 0.3029721677303314, "learning_rate": 0.00016401444204511504, "loss": 0.9954, "step": 6085 }, { "epoch": 0.3512921089063221, "grad_norm": 0.3040766716003418, "learning_rate": 0.00016393706238398056, "loss": 0.9765, "step": 6090 }, { "epoch": 0.35158052607291185, "grad_norm": 0.34396886825561523, "learning_rate": 0.00016385961791436416, "loss": 0.9751, "step": 6095 }, { "epoch": 0.35186894323950163, "grad_norm": 0.3235977590084076, "learning_rate": 0.00016378210871476577, "loss": 0.9789, "step": 6100 }, { "epoch": 0.35215736040609136, "grad_norm": 0.32465118169784546, "learning_rate": 0.000163704534863751, "loss": 0.9131, "step": 6105 }, { "epoch": 0.35244577757268114, "grad_norm": 0.3071576952934265, "learning_rate": 0.00016362689643995105, "loss": 1.0081, "step": 6110 }, { "epoch": 0.35273419473927087, "grad_norm": 0.34030023217201233, "learning_rate": 0.00016354919352206242, "loss": 0.9797, "step": 6115 }, { "epoch": 0.35302261190586065, "grad_norm": 0.31560373306274414, "learning_rate": 0.00016347142618884712, "loss": 0.9883, "step": 6120 }, { "epoch": 0.3533110290724504, "grad_norm": 0.3090599775314331, "learning_rate": 0.00016339359451913237, "loss": 0.9808, "step": 6125 }, { "epoch": 0.35359944623904016, "grad_norm": 0.3321547210216522, "learning_rate": 0.00016331569859181062, "loss": 0.9567, "step": 6130 }, { "epoch": 0.3538878634056299, "grad_norm": 0.32135429978370667, "learning_rate": 0.00016323773848583953, "loss": 0.9698, "step": 6135 }, { "epoch": 0.35417628057221967, "grad_norm": 0.29677772521972656, "learning_rate": 0.00016315971428024168, "loss": 0.9991, "step": 6140 }, { "epoch": 0.3544646977388094, "grad_norm": 0.3057997524738312, "learning_rate": 0.00016308162605410472, "loss": 0.9348, "step": 6145 }, { "epoch": 0.3547531149053992, "grad_norm": 0.2951048016548157, "learning_rate": 0.0001630034738865812, "loss": 0.9084, "step": 6150 }, { "epoch": 0.3550415320719889, "grad_norm": 0.3238579332828522, "learning_rate": 0.00016292525785688842, "loss": 0.9612, "step": 6155 }, { "epoch": 0.3553299492385787, "grad_norm": 0.31826281547546387, "learning_rate": 0.00016284697804430843, "loss": 1.004, "step": 6160 }, { "epoch": 0.3556183664051684, "grad_norm": 0.3389774262905121, "learning_rate": 0.00016276863452818798, "loss": 0.9539, "step": 6165 }, { "epoch": 0.3559067835717582, "grad_norm": 0.3698391318321228, "learning_rate": 0.00016269022738793832, "loss": 1.1102, "step": 6170 }, { "epoch": 0.35619520073834793, "grad_norm": 0.32285526394844055, "learning_rate": 0.0001626117567030352, "loss": 0.9831, "step": 6175 }, { "epoch": 0.3564836179049377, "grad_norm": 0.3242669105529785, "learning_rate": 0.00016253322255301887, "loss": 0.9603, "step": 6180 }, { "epoch": 0.35677203507152744, "grad_norm": 0.3193565607070923, "learning_rate": 0.00016245462501749384, "loss": 1.0302, "step": 6185 }, { "epoch": 0.3570604522381172, "grad_norm": 0.3204268217086792, "learning_rate": 0.0001623759641761289, "loss": 1.0044, "step": 6190 }, { "epoch": 0.35734886940470695, "grad_norm": 0.28855907917022705, "learning_rate": 0.00016229724010865688, "loss": 0.9842, "step": 6195 }, { "epoch": 0.35763728657129673, "grad_norm": 0.3026552200317383, "learning_rate": 0.00016221845289487492, "loss": 0.9917, "step": 6200 }, { "epoch": 0.35792570373788646, "grad_norm": 0.33038827776908875, "learning_rate": 0.000162139602614644, "loss": 1.0027, "step": 6205 }, { "epoch": 0.35821412090447624, "grad_norm": 0.2994578778743744, "learning_rate": 0.00016206068934788905, "loss": 0.9743, "step": 6210 }, { "epoch": 0.35850253807106597, "grad_norm": 0.29378122091293335, "learning_rate": 0.00016198171317459895, "loss": 0.8709, "step": 6215 }, { "epoch": 0.35879095523765575, "grad_norm": 0.29244738817214966, "learning_rate": 0.0001619026741748262, "loss": 0.8787, "step": 6220 }, { "epoch": 0.3590793724042455, "grad_norm": 0.28639066219329834, "learning_rate": 0.00016182357242868704, "loss": 1.0387, "step": 6225 }, { "epoch": 0.35936778957083526, "grad_norm": 0.33279699087142944, "learning_rate": 0.00016174440801636138, "loss": 0.9867, "step": 6230 }, { "epoch": 0.359656206737425, "grad_norm": 0.287534236907959, "learning_rate": 0.00016166518101809257, "loss": 1.0265, "step": 6235 }, { "epoch": 0.35994462390401477, "grad_norm": 0.30234718322753906, "learning_rate": 0.0001615858915141874, "loss": 0.9442, "step": 6240 }, { "epoch": 0.3602330410706045, "grad_norm": 0.3060368597507477, "learning_rate": 0.00016150653958501605, "loss": 0.9826, "step": 6245 }, { "epoch": 0.3605214582371943, "grad_norm": 0.3163430988788605, "learning_rate": 0.00016142712531101196, "loss": 0.9444, "step": 6250 }, { "epoch": 0.360809875403784, "grad_norm": 0.32550159096717834, "learning_rate": 0.00016134764877267176, "loss": 1.0238, "step": 6255 }, { "epoch": 0.3610982925703738, "grad_norm": 0.32287704944610596, "learning_rate": 0.0001612681100505552, "loss": 1.0156, "step": 6260 }, { "epoch": 0.3613867097369635, "grad_norm": 0.29501256346702576, "learning_rate": 0.00016118850922528508, "loss": 0.9461, "step": 6265 }, { "epoch": 0.3616751269035533, "grad_norm": 0.3390810191631317, "learning_rate": 0.00016110884637754713, "loss": 1.008, "step": 6270 }, { "epoch": 0.36196354407014303, "grad_norm": 0.33765143156051636, "learning_rate": 0.00016102912158808992, "loss": 0.9744, "step": 6275 }, { "epoch": 0.3622519612367328, "grad_norm": 0.3217780292034149, "learning_rate": 0.00016094933493772487, "loss": 0.985, "step": 6280 }, { "epoch": 0.36254037840332254, "grad_norm": 0.3047623038291931, "learning_rate": 0.00016086948650732605, "loss": 1.003, "step": 6285 }, { "epoch": 0.3628287955699123, "grad_norm": 0.35244515538215637, "learning_rate": 0.00016078957637783017, "loss": 0.9471, "step": 6290 }, { "epoch": 0.3631172127365021, "grad_norm": 0.3418562114238739, "learning_rate": 0.0001607096046302365, "loss": 0.9324, "step": 6295 }, { "epoch": 0.36340562990309183, "grad_norm": 0.32155272364616394, "learning_rate": 0.00016062957134560675, "loss": 0.9364, "step": 6300 }, { "epoch": 0.3636940470696816, "grad_norm": 0.320909321308136, "learning_rate": 0.00016054947660506494, "loss": 0.9888, "step": 6305 }, { "epoch": 0.36398246423627134, "grad_norm": 0.30325910449028015, "learning_rate": 0.0001604693204897975, "loss": 0.93, "step": 6310 }, { "epoch": 0.3642708814028611, "grad_norm": 0.3497485816478729, "learning_rate": 0.0001603891030810531, "loss": 1.0178, "step": 6315 }, { "epoch": 0.36455929856945085, "grad_norm": 0.3147721290588379, "learning_rate": 0.00016030882446014234, "loss": 0.9138, "step": 6320 }, { "epoch": 0.36484771573604063, "grad_norm": 0.31661319732666016, "learning_rate": 0.00016022848470843802, "loss": 0.9617, "step": 6325 }, { "epoch": 0.36513613290263036, "grad_norm": 0.3261774778366089, "learning_rate": 0.00016014808390737485, "loss": 0.93, "step": 6330 }, { "epoch": 0.36542455006922014, "grad_norm": 0.31109583377838135, "learning_rate": 0.00016006762213844947, "loss": 1.0262, "step": 6335 }, { "epoch": 0.36571296723580987, "grad_norm": 0.3141893446445465, "learning_rate": 0.00015998709948322027, "loss": 1.003, "step": 6340 }, { "epoch": 0.36600138440239965, "grad_norm": 0.30060815811157227, "learning_rate": 0.00015990651602330741, "loss": 0.965, "step": 6345 }, { "epoch": 0.3662898015689894, "grad_norm": 0.3138159215450287, "learning_rate": 0.00015982587184039263, "loss": 0.9993, "step": 6350 }, { "epoch": 0.36657821873557916, "grad_norm": 0.30857542157173157, "learning_rate": 0.00015974516701621925, "loss": 0.9592, "step": 6355 }, { "epoch": 0.3668666359021689, "grad_norm": 0.32973772287368774, "learning_rate": 0.00015966440163259202, "loss": 0.9751, "step": 6360 }, { "epoch": 0.3671550530687587, "grad_norm": 0.2922951281070709, "learning_rate": 0.00015958357577137712, "loss": 0.99, "step": 6365 }, { "epoch": 0.3674434702353484, "grad_norm": 0.31458890438079834, "learning_rate": 0.00015950268951450198, "loss": 1.0391, "step": 6370 }, { "epoch": 0.3677318874019382, "grad_norm": 0.2942887246608734, "learning_rate": 0.0001594217429439553, "loss": 0.8909, "step": 6375 }, { "epoch": 0.3680203045685279, "grad_norm": 0.35455599427223206, "learning_rate": 0.00015934073614178696, "loss": 0.9409, "step": 6380 }, { "epoch": 0.3683087217351177, "grad_norm": 0.32667621970176697, "learning_rate": 0.00015925966919010773, "loss": 0.9703, "step": 6385 }, { "epoch": 0.3685971389017074, "grad_norm": 0.3091600239276886, "learning_rate": 0.00015917854217108954, "loss": 0.9424, "step": 6390 }, { "epoch": 0.3688855560682972, "grad_norm": 0.2892536222934723, "learning_rate": 0.0001590973551669651, "loss": 0.9214, "step": 6395 }, { "epoch": 0.36917397323488693, "grad_norm": 0.3017813265323639, "learning_rate": 0.00015901610826002787, "loss": 0.9155, "step": 6400 }, { "epoch": 0.3694623904014767, "grad_norm": 0.34070533514022827, "learning_rate": 0.00015893480153263213, "loss": 0.9354, "step": 6405 }, { "epoch": 0.36975080756806644, "grad_norm": 0.3184935748577118, "learning_rate": 0.0001588534350671928, "loss": 1.0191, "step": 6410 }, { "epoch": 0.3700392247346562, "grad_norm": 0.3152550756931305, "learning_rate": 0.00015877200894618532, "loss": 0.946, "step": 6415 }, { "epoch": 0.37032764190124595, "grad_norm": 0.3296424448490143, "learning_rate": 0.00015869052325214554, "loss": 1.1301, "step": 6420 }, { "epoch": 0.37061605906783573, "grad_norm": 0.3112740218639374, "learning_rate": 0.0001586089780676698, "loss": 0.9555, "step": 6425 }, { "epoch": 0.37090447623442546, "grad_norm": 0.34751659631729126, "learning_rate": 0.00015852737347541465, "loss": 1.0707, "step": 6430 }, { "epoch": 0.37119289340101524, "grad_norm": 0.3217742145061493, "learning_rate": 0.00015844570955809694, "loss": 0.9849, "step": 6435 }, { "epoch": 0.37148131056760497, "grad_norm": 0.35857248306274414, "learning_rate": 0.00015836398639849355, "loss": 1.0154, "step": 6440 }, { "epoch": 0.37176972773419475, "grad_norm": 0.3333515226840973, "learning_rate": 0.00015828220407944154, "loss": 1.0059, "step": 6445 }, { "epoch": 0.3720581449007845, "grad_norm": 0.31444647908210754, "learning_rate": 0.00015820036268383785, "loss": 1.0227, "step": 6450 }, { "epoch": 0.37234656206737426, "grad_norm": 0.3097481429576874, "learning_rate": 0.0001581184622946393, "loss": 0.9426, "step": 6455 }, { "epoch": 0.372634979233964, "grad_norm": 0.32116419076919556, "learning_rate": 0.00015803650299486252, "loss": 0.9438, "step": 6460 }, { "epoch": 0.3729233964005538, "grad_norm": 0.3407098352909088, "learning_rate": 0.00015795448486758388, "loss": 1.0078, "step": 6465 }, { "epoch": 0.3732118135671435, "grad_norm": 0.3003779351711273, "learning_rate": 0.00015787240799593937, "loss": 0.9564, "step": 6470 }, { "epoch": 0.3735002307337333, "grad_norm": 0.28709009289741516, "learning_rate": 0.00015779027246312448, "loss": 0.9607, "step": 6475 }, { "epoch": 0.373788647900323, "grad_norm": 0.3018396496772766, "learning_rate": 0.00015770807835239424, "loss": 0.9747, "step": 6480 }, { "epoch": 0.3740770650669128, "grad_norm": 0.2960224151611328, "learning_rate": 0.00015762582574706298, "loss": 0.9753, "step": 6485 }, { "epoch": 0.3743654822335025, "grad_norm": 0.29029327630996704, "learning_rate": 0.00015754351473050435, "loss": 0.9303, "step": 6490 }, { "epoch": 0.3746538994000923, "grad_norm": 0.31783005595207214, "learning_rate": 0.00015746114538615124, "loss": 0.9661, "step": 6495 }, { "epoch": 0.37494231656668203, "grad_norm": 0.3153396248817444, "learning_rate": 0.0001573787177974956, "loss": 1.0327, "step": 6500 }, { "epoch": 0.3752307337332718, "grad_norm": 0.28863847255706787, "learning_rate": 0.00015729623204808847, "loss": 0.9343, "step": 6505 }, { "epoch": 0.37551915089986154, "grad_norm": 0.31009867787361145, "learning_rate": 0.00015721368822153986, "loss": 0.9591, "step": 6510 }, { "epoch": 0.3758075680664513, "grad_norm": 0.3038884401321411, "learning_rate": 0.00015713108640151853, "loss": 0.9929, "step": 6515 }, { "epoch": 0.37609598523304105, "grad_norm": 0.3314804434776306, "learning_rate": 0.0001570484266717522, "loss": 0.9816, "step": 6520 }, { "epoch": 0.37638440239963084, "grad_norm": 0.3055729568004608, "learning_rate": 0.0001569657091160271, "loss": 0.9554, "step": 6525 }, { "epoch": 0.37667281956622056, "grad_norm": 0.29258403182029724, "learning_rate": 0.00015688293381818823, "loss": 0.9179, "step": 6530 }, { "epoch": 0.37696123673281035, "grad_norm": 0.30928143858909607, "learning_rate": 0.00015680010086213908, "loss": 0.9057, "step": 6535 }, { "epoch": 0.3772496538994001, "grad_norm": 0.3039039373397827, "learning_rate": 0.0001567172103318415, "loss": 0.9327, "step": 6540 }, { "epoch": 0.37753807106598986, "grad_norm": 0.30910852551460266, "learning_rate": 0.00015663426231131585, "loss": 1.0292, "step": 6545 }, { "epoch": 0.3778264882325796, "grad_norm": 0.30116426944732666, "learning_rate": 0.00015655125688464062, "loss": 0.9728, "step": 6550 }, { "epoch": 0.37811490539916937, "grad_norm": 0.293541818857193, "learning_rate": 0.0001564681941359525, "loss": 0.9825, "step": 6555 }, { "epoch": 0.3784033225657591, "grad_norm": 0.3120487630367279, "learning_rate": 0.00015638507414944642, "loss": 1.0029, "step": 6560 }, { "epoch": 0.3786917397323489, "grad_norm": 0.32010599970817566, "learning_rate": 0.00015630189700937516, "loss": 0.8747, "step": 6565 }, { "epoch": 0.3789801568989386, "grad_norm": 0.2856518626213074, "learning_rate": 0.0001562186628000496, "loss": 0.9913, "step": 6570 }, { "epoch": 0.3792685740655284, "grad_norm": 0.2798760235309601, "learning_rate": 0.00015613537160583829, "loss": 0.9699, "step": 6575 }, { "epoch": 0.3795569912321181, "grad_norm": 0.2922796308994293, "learning_rate": 0.00015605202351116765, "loss": 0.9391, "step": 6580 }, { "epoch": 0.3798454083987079, "grad_norm": 0.30891719460487366, "learning_rate": 0.0001559686186005218, "loss": 0.9812, "step": 6585 }, { "epoch": 0.3801338255652976, "grad_norm": 0.29908978939056396, "learning_rate": 0.00015588515695844234, "loss": 0.9752, "step": 6590 }, { "epoch": 0.3804222427318874, "grad_norm": 0.31617826223373413, "learning_rate": 0.00015580163866952846, "loss": 0.9801, "step": 6595 }, { "epoch": 0.38071065989847713, "grad_norm": 0.31201690435409546, "learning_rate": 0.00015571806381843676, "loss": 0.9959, "step": 6600 }, { "epoch": 0.3809990770650669, "grad_norm": 0.3266178369522095, "learning_rate": 0.00015563443248988116, "loss": 0.9302, "step": 6605 }, { "epoch": 0.38128749423165664, "grad_norm": 0.3142068386077881, "learning_rate": 0.00015555074476863282, "loss": 0.9307, "step": 6610 }, { "epoch": 0.3815759113982464, "grad_norm": 0.3227985203266144, "learning_rate": 0.0001554670007395201, "loss": 0.917, "step": 6615 }, { "epoch": 0.38186432856483615, "grad_norm": 0.3386768698692322, "learning_rate": 0.00015538320048742835, "loss": 1.0017, "step": 6620 }, { "epoch": 0.38215274573142594, "grad_norm": 0.3295430541038513, "learning_rate": 0.0001552993440973, "loss": 1.0512, "step": 6625 }, { "epoch": 0.38244116289801566, "grad_norm": 0.2914939820766449, "learning_rate": 0.00015521543165413428, "loss": 0.9476, "step": 6630 }, { "epoch": 0.38272958006460545, "grad_norm": 0.3167172372341156, "learning_rate": 0.0001551314632429874, "loss": 1.0272, "step": 6635 }, { "epoch": 0.3830179972311952, "grad_norm": 0.3107517957687378, "learning_rate": 0.00015504743894897218, "loss": 0.9716, "step": 6640 }, { "epoch": 0.38330641439778496, "grad_norm": 0.3065870404243469, "learning_rate": 0.00015496335885725808, "loss": 1.0395, "step": 6645 }, { "epoch": 0.38359483156437474, "grad_norm": 0.3044165372848511, "learning_rate": 0.00015487922305307118, "loss": 0.9299, "step": 6650 }, { "epoch": 0.38388324873096447, "grad_norm": 0.29545021057128906, "learning_rate": 0.00015479503162169395, "loss": 0.9476, "step": 6655 }, { "epoch": 0.38417166589755425, "grad_norm": 0.3002990186214447, "learning_rate": 0.0001547107846484653, "loss": 0.9963, "step": 6660 }, { "epoch": 0.384460083064144, "grad_norm": 0.33107367157936096, "learning_rate": 0.00015462648221878052, "loss": 0.9646, "step": 6665 }, { "epoch": 0.38474850023073376, "grad_norm": 0.3228285610675812, "learning_rate": 0.00015454212441809095, "loss": 1.0694, "step": 6670 }, { "epoch": 0.3850369173973235, "grad_norm": 0.3043341040611267, "learning_rate": 0.00015445771133190412, "loss": 0.9703, "step": 6675 }, { "epoch": 0.38532533456391327, "grad_norm": 0.3010495901107788, "learning_rate": 0.00015437324304578363, "loss": 0.9275, "step": 6680 }, { "epoch": 0.385613751730503, "grad_norm": 0.3137162923812866, "learning_rate": 0.00015428871964534907, "loss": 1.0299, "step": 6685 }, { "epoch": 0.3859021688970928, "grad_norm": 0.3311811089515686, "learning_rate": 0.00015420414121627575, "loss": 0.9973, "step": 6690 }, { "epoch": 0.3861905860636825, "grad_norm": 0.2890627682209015, "learning_rate": 0.00015411950784429486, "loss": 0.9347, "step": 6695 }, { "epoch": 0.3864790032302723, "grad_norm": 0.32376575469970703, "learning_rate": 0.00015403481961519334, "loss": 0.9397, "step": 6700 }, { "epoch": 0.386767420396862, "grad_norm": 0.3138471245765686, "learning_rate": 0.0001539500766148136, "loss": 0.9889, "step": 6705 }, { "epoch": 0.3870558375634518, "grad_norm": 0.33765313029289246, "learning_rate": 0.00015386527892905365, "loss": 1.0173, "step": 6710 }, { "epoch": 0.3873442547300415, "grad_norm": 0.3095814287662506, "learning_rate": 0.0001537804266438669, "loss": 1.0448, "step": 6715 }, { "epoch": 0.3876326718966313, "grad_norm": 0.3238964080810547, "learning_rate": 0.0001536955198452621, "loss": 0.9843, "step": 6720 }, { "epoch": 0.38792108906322104, "grad_norm": 0.3234662711620331, "learning_rate": 0.00015361055861930328, "loss": 1.0203, "step": 6725 }, { "epoch": 0.3882095062298108, "grad_norm": 0.2965511679649353, "learning_rate": 0.0001535255430521097, "loss": 0.9069, "step": 6730 }, { "epoch": 0.38849792339640055, "grad_norm": 0.32197943329811096, "learning_rate": 0.00015344047322985555, "loss": 0.9222, "step": 6735 }, { "epoch": 0.38878634056299033, "grad_norm": 0.2778390645980835, "learning_rate": 0.00015335534923877013, "loss": 0.9485, "step": 6740 }, { "epoch": 0.38907475772958006, "grad_norm": 0.37352126836776733, "learning_rate": 0.0001532701711651376, "loss": 1.0752, "step": 6745 }, { "epoch": 0.38936317489616984, "grad_norm": 0.32387804985046387, "learning_rate": 0.000153184939095297, "loss": 0.9647, "step": 6750 }, { "epoch": 0.38965159206275957, "grad_norm": 0.30921655893325806, "learning_rate": 0.00015309965311564194, "loss": 0.9618, "step": 6755 }, { "epoch": 0.38994000922934935, "grad_norm": 0.3546212911605835, "learning_rate": 0.00015301431331262095, "loss": 1.0117, "step": 6760 }, { "epoch": 0.3902284263959391, "grad_norm": 0.29029014706611633, "learning_rate": 0.00015292891977273683, "loss": 0.9657, "step": 6765 }, { "epoch": 0.39051684356252886, "grad_norm": 0.3335913121700287, "learning_rate": 0.00015284347258254704, "loss": 0.9895, "step": 6770 }, { "epoch": 0.3908052607291186, "grad_norm": 0.2966814935207367, "learning_rate": 0.00015275797182866336, "loss": 0.9901, "step": 6775 }, { "epoch": 0.39109367789570837, "grad_norm": 0.29803818464279175, "learning_rate": 0.0001526724175977518, "loss": 0.9878, "step": 6780 }, { "epoch": 0.3913820950622981, "grad_norm": 0.30125004053115845, "learning_rate": 0.00015258680997653275, "loss": 1.0261, "step": 6785 }, { "epoch": 0.3916705122288879, "grad_norm": 0.33115720748901367, "learning_rate": 0.0001525011490517805, "loss": 0.9936, "step": 6790 }, { "epoch": 0.3919589293954776, "grad_norm": 0.3167159855365753, "learning_rate": 0.0001524154349103235, "loss": 0.9257, "step": 6795 }, { "epoch": 0.3922473465620674, "grad_norm": 0.3028007447719574, "learning_rate": 0.00015232966763904416, "loss": 0.9338, "step": 6800 }, { "epoch": 0.3925357637286571, "grad_norm": 0.28450897336006165, "learning_rate": 0.00015224384732487868, "loss": 0.9803, "step": 6805 }, { "epoch": 0.3928241808952469, "grad_norm": 0.30390694737434387, "learning_rate": 0.00015215797405481704, "loss": 0.929, "step": 6810 }, { "epoch": 0.3931125980618366, "grad_norm": 0.28544360399246216, "learning_rate": 0.00015207204791590288, "loss": 1.0024, "step": 6815 }, { "epoch": 0.3934010152284264, "grad_norm": 0.31118243932724, "learning_rate": 0.00015198606899523352, "loss": 0.9276, "step": 6820 }, { "epoch": 0.39368943239501614, "grad_norm": 0.32028988003730774, "learning_rate": 0.00015190003737995967, "loss": 0.9942, "step": 6825 }, { "epoch": 0.3939778495616059, "grad_norm": 0.3325750231742859, "learning_rate": 0.00015181395315728554, "loss": 0.9642, "step": 6830 }, { "epoch": 0.39426626672819565, "grad_norm": 0.33307701349258423, "learning_rate": 0.00015172781641446852, "loss": 0.9986, "step": 6835 }, { "epoch": 0.39455468389478543, "grad_norm": 0.31831297278404236, "learning_rate": 0.00015164162723881947, "loss": 0.9855, "step": 6840 }, { "epoch": 0.39484310106137516, "grad_norm": 0.28365767002105713, "learning_rate": 0.00015155538571770218, "loss": 0.9571, "step": 6845 }, { "epoch": 0.39513151822796494, "grad_norm": 0.3154856860637665, "learning_rate": 0.00015146909193853363, "loss": 0.9904, "step": 6850 }, { "epoch": 0.39541993539455467, "grad_norm": 0.3086288571357727, "learning_rate": 0.0001513827459887837, "loss": 0.965, "step": 6855 }, { "epoch": 0.39570835256114445, "grad_norm": 0.3205987513065338, "learning_rate": 0.0001512963479559752, "loss": 1.0125, "step": 6860 }, { "epoch": 0.3959967697277342, "grad_norm": 0.29472029209136963, "learning_rate": 0.00015120989792768367, "loss": 0.9909, "step": 6865 }, { "epoch": 0.39628518689432396, "grad_norm": 0.34999117255210876, "learning_rate": 0.00015112339599153746, "loss": 1.0217, "step": 6870 }, { "epoch": 0.3965736040609137, "grad_norm": 0.2855675518512726, "learning_rate": 0.00015103684223521742, "loss": 1.0462, "step": 6875 }, { "epoch": 0.39686202122750347, "grad_norm": 0.3382554352283478, "learning_rate": 0.00015095023674645698, "loss": 1.038, "step": 6880 }, { "epoch": 0.3971504383940932, "grad_norm": 0.3201339840888977, "learning_rate": 0.000150863579613042, "loss": 0.9419, "step": 6885 }, { "epoch": 0.397438855560683, "grad_norm": 0.3181384205818176, "learning_rate": 0.00015077687092281074, "loss": 0.984, "step": 6890 }, { "epoch": 0.3977272727272727, "grad_norm": 0.3145751357078552, "learning_rate": 0.00015069011076365357, "loss": 0.952, "step": 6895 }, { "epoch": 0.3980156898938625, "grad_norm": 0.2770393490791321, "learning_rate": 0.00015060329922351326, "loss": 0.9023, "step": 6900 }, { "epoch": 0.3983041070604522, "grad_norm": 0.3097230792045593, "learning_rate": 0.00015051643639038447, "loss": 0.9512, "step": 6905 }, { "epoch": 0.398592524227042, "grad_norm": 0.31388595700263977, "learning_rate": 0.0001504295223523139, "loss": 0.9373, "step": 6910 }, { "epoch": 0.3988809413936317, "grad_norm": 0.30969393253326416, "learning_rate": 0.0001503425571974002, "loss": 1.0089, "step": 6915 }, { "epoch": 0.3991693585602215, "grad_norm": 0.3253278434276581, "learning_rate": 0.00015025554101379379, "loss": 0.9958, "step": 6920 }, { "epoch": 0.39945777572681124, "grad_norm": 0.28762054443359375, "learning_rate": 0.00015016847388969683, "loss": 0.9883, "step": 6925 }, { "epoch": 0.399746192893401, "grad_norm": 0.31128495931625366, "learning_rate": 0.0001500813559133631, "loss": 0.9916, "step": 6930 }, { "epoch": 0.40003461005999075, "grad_norm": 0.3101608157157898, "learning_rate": 0.00014999418717309793, "loss": 0.9811, "step": 6935 }, { "epoch": 0.40032302722658053, "grad_norm": 0.3171060085296631, "learning_rate": 0.00014990696775725812, "loss": 0.9856, "step": 6940 }, { "epoch": 0.40061144439317026, "grad_norm": 0.33557629585266113, "learning_rate": 0.00014981969775425185, "loss": 1.0214, "step": 6945 }, { "epoch": 0.40089986155976004, "grad_norm": 0.2946593761444092, "learning_rate": 0.0001497323772525385, "loss": 0.9413, "step": 6950 }, { "epoch": 0.40118827872634977, "grad_norm": 0.39665666222572327, "learning_rate": 0.00014964500634062877, "loss": 1.0003, "step": 6955 }, { "epoch": 0.40147669589293955, "grad_norm": 0.28560730814933777, "learning_rate": 0.00014955758510708434, "loss": 0.9861, "step": 6960 }, { "epoch": 0.4017651130595293, "grad_norm": 0.30924665927886963, "learning_rate": 0.00014947011364051794, "loss": 1.0107, "step": 6965 }, { "epoch": 0.40205353022611906, "grad_norm": 0.2980976998806, "learning_rate": 0.00014938259202959317, "loss": 0.9834, "step": 6970 }, { "epoch": 0.4023419473927088, "grad_norm": 0.31071528792381287, "learning_rate": 0.00014929502036302458, "loss": 1.0827, "step": 6975 }, { "epoch": 0.40263036455929857, "grad_norm": 0.30230122804641724, "learning_rate": 0.00014920739872957732, "loss": 0.9203, "step": 6980 }, { "epoch": 0.4029187817258883, "grad_norm": 0.3271790146827698, "learning_rate": 0.0001491197272180673, "loss": 0.9462, "step": 6985 }, { "epoch": 0.4032071988924781, "grad_norm": 0.3099105954170227, "learning_rate": 0.00014903200591736087, "loss": 1.0032, "step": 6990 }, { "epoch": 0.40349561605906786, "grad_norm": 0.298093318939209, "learning_rate": 0.00014894423491637498, "loss": 0.9668, "step": 6995 }, { "epoch": 0.4037840332256576, "grad_norm": 0.3025701940059662, "learning_rate": 0.00014885641430407686, "loss": 0.9442, "step": 7000 }, { "epoch": 0.4040724503922474, "grad_norm": 0.30681777000427246, "learning_rate": 0.00014876854416948405, "loss": 1.0638, "step": 7005 }, { "epoch": 0.4043608675588371, "grad_norm": 0.2943170964717865, "learning_rate": 0.0001486806246016643, "loss": 1.0032, "step": 7010 }, { "epoch": 0.4046492847254269, "grad_norm": 0.35952475666999817, "learning_rate": 0.00014859265568973546, "loss": 0.9793, "step": 7015 }, { "epoch": 0.4049377018920166, "grad_norm": 0.30241405963897705, "learning_rate": 0.00014850463752286543, "loss": 0.9869, "step": 7020 }, { "epoch": 0.4052261190586064, "grad_norm": 0.30669084191322327, "learning_rate": 0.000148416570190272, "loss": 0.9314, "step": 7025 }, { "epoch": 0.4055145362251961, "grad_norm": 0.3244229853153229, "learning_rate": 0.00014832845378122276, "loss": 1.0062, "step": 7030 }, { "epoch": 0.4058029533917859, "grad_norm": 0.3314943313598633, "learning_rate": 0.0001482402883850351, "loss": 0.9591, "step": 7035 }, { "epoch": 0.40609137055837563, "grad_norm": 0.30962324142456055, "learning_rate": 0.00014815207409107608, "loss": 0.9297, "step": 7040 }, { "epoch": 0.4063797877249654, "grad_norm": 0.3165276050567627, "learning_rate": 0.00014806381098876227, "loss": 1.0911, "step": 7045 }, { "epoch": 0.40666820489155514, "grad_norm": 0.3096354901790619, "learning_rate": 0.00014797549916755975, "loss": 0.9584, "step": 7050 }, { "epoch": 0.4069566220581449, "grad_norm": 0.3194222152233124, "learning_rate": 0.00014788713871698397, "loss": 0.9737, "step": 7055 }, { "epoch": 0.40724503922473465, "grad_norm": 0.3206169605255127, "learning_rate": 0.0001477987297265997, "loss": 0.9935, "step": 7060 }, { "epoch": 0.40753345639132443, "grad_norm": 0.2967379689216614, "learning_rate": 0.00014771027228602086, "loss": 0.9805, "step": 7065 }, { "epoch": 0.40782187355791416, "grad_norm": 0.31947875022888184, "learning_rate": 0.0001476217664849105, "loss": 1.0187, "step": 7070 }, { "epoch": 0.40811029072450394, "grad_norm": 0.3055800497531891, "learning_rate": 0.00014753321241298072, "loss": 0.9962, "step": 7075 }, { "epoch": 0.40839870789109367, "grad_norm": 0.3292485475540161, "learning_rate": 0.00014744461015999248, "loss": 0.9679, "step": 7080 }, { "epoch": 0.40868712505768345, "grad_norm": 0.3264350891113281, "learning_rate": 0.00014735595981575568, "loss": 1.0154, "step": 7085 }, { "epoch": 0.4089755422242732, "grad_norm": 0.32917913794517517, "learning_rate": 0.00014726726147012889, "loss": 0.9888, "step": 7090 }, { "epoch": 0.40926395939086296, "grad_norm": 0.34813347458839417, "learning_rate": 0.00014717851521301933, "loss": 1.0196, "step": 7095 }, { "epoch": 0.4095523765574527, "grad_norm": 0.407846063375473, "learning_rate": 0.00014708972113438285, "loss": 1.0062, "step": 7100 }, { "epoch": 0.4098407937240425, "grad_norm": 0.3226325511932373, "learning_rate": 0.00014700087932422367, "loss": 0.9841, "step": 7105 }, { "epoch": 0.4101292108906322, "grad_norm": 0.28346356749534607, "learning_rate": 0.00014691198987259454, "loss": 0.9968, "step": 7110 }, { "epoch": 0.410417628057222, "grad_norm": 0.3126351237297058, "learning_rate": 0.00014682305286959631, "loss": 0.9506, "step": 7115 }, { "epoch": 0.4107060452238117, "grad_norm": 0.3126891851425171, "learning_rate": 0.00014673406840537824, "loss": 0.9417, "step": 7120 }, { "epoch": 0.4109944623904015, "grad_norm": 0.2949593961238861, "learning_rate": 0.00014664503657013756, "loss": 0.9393, "step": 7125 }, { "epoch": 0.4112828795569912, "grad_norm": 0.3290235996246338, "learning_rate": 0.00014655595745411955, "loss": 1.0241, "step": 7130 }, { "epoch": 0.411571296723581, "grad_norm": 0.3105536997318268, "learning_rate": 0.00014646683114761735, "loss": 0.911, "step": 7135 }, { "epoch": 0.41185971389017073, "grad_norm": 0.29566848278045654, "learning_rate": 0.00014637765774097206, "loss": 1.0272, "step": 7140 }, { "epoch": 0.4121481310567605, "grad_norm": 0.3105669915676117, "learning_rate": 0.00014628843732457248, "loss": 1.01, "step": 7145 }, { "epoch": 0.41243654822335024, "grad_norm": 0.2995404005050659, "learning_rate": 0.000146199169988855, "loss": 0.9107, "step": 7150 }, { "epoch": 0.41272496538994, "grad_norm": 0.31976473331451416, "learning_rate": 0.00014610985582430363, "loss": 1.0212, "step": 7155 }, { "epoch": 0.41301338255652975, "grad_norm": 0.32434025406837463, "learning_rate": 0.00014602049492144984, "loss": 0.9824, "step": 7160 }, { "epoch": 0.41330179972311953, "grad_norm": 0.2803219258785248, "learning_rate": 0.00014593108737087241, "loss": 0.9267, "step": 7165 }, { "epoch": 0.41359021688970926, "grad_norm": 0.2851792573928833, "learning_rate": 0.00014584163326319754, "loss": 0.9277, "step": 7170 }, { "epoch": 0.41387863405629904, "grad_norm": 0.32938867807388306, "learning_rate": 0.00014575213268909842, "loss": 0.9406, "step": 7175 }, { "epoch": 0.41416705122288877, "grad_norm": 0.3028603792190552, "learning_rate": 0.00014566258573929557, "loss": 0.9894, "step": 7180 }, { "epoch": 0.41445546838947855, "grad_norm": 0.2850208580493927, "learning_rate": 0.00014557299250455633, "loss": 1.0507, "step": 7185 }, { "epoch": 0.4147438855560683, "grad_norm": 0.331676721572876, "learning_rate": 0.0001454833530756951, "loss": 0.994, "step": 7190 }, { "epoch": 0.41503230272265806, "grad_norm": 0.30083078145980835, "learning_rate": 0.00014539366754357297, "loss": 1.009, "step": 7195 }, { "epoch": 0.4153207198892478, "grad_norm": 0.3384031653404236, "learning_rate": 0.0001453039359990979, "loss": 1.0258, "step": 7200 }, { "epoch": 0.4156091370558376, "grad_norm": 0.2972491979598999, "learning_rate": 0.0001452141585332243, "loss": 1.0282, "step": 7205 }, { "epoch": 0.4158975542224273, "grad_norm": 0.3435482382774353, "learning_rate": 0.00014512433523695332, "loss": 0.9686, "step": 7210 }, { "epoch": 0.4161859713890171, "grad_norm": 0.318953275680542, "learning_rate": 0.0001450344662013325, "loss": 0.9745, "step": 7215 }, { "epoch": 0.4164743885556068, "grad_norm": 0.3264816701412201, "learning_rate": 0.0001449445515174557, "loss": 0.9602, "step": 7220 }, { "epoch": 0.4167628057221966, "grad_norm": 0.30100223422050476, "learning_rate": 0.00014485459127646307, "loss": 0.9971, "step": 7225 }, { "epoch": 0.4170512228887863, "grad_norm": 0.29668816924095154, "learning_rate": 0.000144764585569541, "loss": 0.9988, "step": 7230 }, { "epoch": 0.4173396400553761, "grad_norm": 0.32644376158714294, "learning_rate": 0.00014467453448792188, "loss": 0.9821, "step": 7235 }, { "epoch": 0.41762805722196583, "grad_norm": 0.31165027618408203, "learning_rate": 0.00014458443812288415, "loss": 0.9882, "step": 7240 }, { "epoch": 0.4179164743885556, "grad_norm": 0.3170810043811798, "learning_rate": 0.00014449429656575205, "loss": 0.9978, "step": 7245 }, { "epoch": 0.41820489155514534, "grad_norm": 0.29307204484939575, "learning_rate": 0.00014440410990789582, "loss": 0.9341, "step": 7250 }, { "epoch": 0.4184933087217351, "grad_norm": 0.33344531059265137, "learning_rate": 0.00014431387824073125, "loss": 0.9938, "step": 7255 }, { "epoch": 0.41878172588832485, "grad_norm": 0.3290488123893738, "learning_rate": 0.00014422360165571976, "loss": 0.9463, "step": 7260 }, { "epoch": 0.41907014305491463, "grad_norm": 0.33802330493927, "learning_rate": 0.0001441332802443684, "loss": 0.9686, "step": 7265 }, { "epoch": 0.41935856022150436, "grad_norm": 0.3061620891094208, "learning_rate": 0.0001440429140982296, "loss": 0.9766, "step": 7270 }, { "epoch": 0.41964697738809414, "grad_norm": 0.30755650997161865, "learning_rate": 0.00014395250330890113, "loss": 0.9463, "step": 7275 }, { "epoch": 0.41993539455468387, "grad_norm": 0.3155378997325897, "learning_rate": 0.000143862047968026, "loss": 0.9407, "step": 7280 }, { "epoch": 0.42022381172127365, "grad_norm": 0.3173658847808838, "learning_rate": 0.00014377154816729246, "loss": 0.9581, "step": 7285 }, { "epoch": 0.4205122288878634, "grad_norm": 0.32038891315460205, "learning_rate": 0.00014368100399843366, "loss": 1.0172, "step": 7290 }, { "epoch": 0.42080064605445316, "grad_norm": 0.31820791959762573, "learning_rate": 0.0001435904155532279, "loss": 0.9354, "step": 7295 }, { "epoch": 0.4210890632210429, "grad_norm": 0.3380236029624939, "learning_rate": 0.00014349978292349825, "loss": 0.98, "step": 7300 }, { "epoch": 0.4213774803876327, "grad_norm": 0.28636041283607483, "learning_rate": 0.00014340910620111265, "loss": 1.0169, "step": 7305 }, { "epoch": 0.4216658975542224, "grad_norm": 0.27922528982162476, "learning_rate": 0.0001433183854779836, "loss": 0.9598, "step": 7310 }, { "epoch": 0.4219543147208122, "grad_norm": 0.2958151698112488, "learning_rate": 0.00014322762084606843, "loss": 0.9574, "step": 7315 }, { "epoch": 0.4222427318874019, "grad_norm": 0.322035551071167, "learning_rate": 0.00014313681239736865, "loss": 0.9757, "step": 7320 }, { "epoch": 0.4225311490539917, "grad_norm": 0.3313840627670288, "learning_rate": 0.00014304596022393052, "loss": 0.9766, "step": 7325 }, { "epoch": 0.4228195662205814, "grad_norm": 0.32730937004089355, "learning_rate": 0.00014295506441784435, "loss": 0.9779, "step": 7330 }, { "epoch": 0.4231079833871712, "grad_norm": 0.3087776005268097, "learning_rate": 0.0001428641250712449, "loss": 0.951, "step": 7335 }, { "epoch": 0.423396400553761, "grad_norm": 0.28657153248786926, "learning_rate": 0.00014277314227631086, "loss": 1.0397, "step": 7340 }, { "epoch": 0.4236848177203507, "grad_norm": 0.3487570881843567, "learning_rate": 0.00014268211612526515, "loss": 1.0699, "step": 7345 }, { "epoch": 0.4239732348869405, "grad_norm": 0.31562715768814087, "learning_rate": 0.00014259104671037452, "loss": 0.9947, "step": 7350 }, { "epoch": 0.4242616520535302, "grad_norm": 0.3040665090084076, "learning_rate": 0.00014249993412394958, "loss": 0.9742, "step": 7355 }, { "epoch": 0.42455006922012, "grad_norm": 0.28409066796302795, "learning_rate": 0.00014240877845834472, "loss": 0.9224, "step": 7360 }, { "epoch": 0.42483848638670973, "grad_norm": 0.3323284089565277, "learning_rate": 0.00014231757980595803, "loss": 0.8986, "step": 7365 }, { "epoch": 0.4251269035532995, "grad_norm": 0.33217912912368774, "learning_rate": 0.00014222633825923108, "loss": 0.9937, "step": 7370 }, { "epoch": 0.42541532071988925, "grad_norm": 0.31237930059432983, "learning_rate": 0.00014213505391064905, "loss": 1.0209, "step": 7375 }, { "epoch": 0.42570373788647903, "grad_norm": 0.33656054735183716, "learning_rate": 0.00014204372685274039, "loss": 1.0028, "step": 7380 }, { "epoch": 0.42599215505306876, "grad_norm": 0.31092768907546997, "learning_rate": 0.00014195235717807687, "loss": 0.9364, "step": 7385 }, { "epoch": 0.42628057221965854, "grad_norm": 0.3244108557701111, "learning_rate": 0.00014186094497927352, "loss": 1.0035, "step": 7390 }, { "epoch": 0.42656898938624827, "grad_norm": 0.31575846672058105, "learning_rate": 0.0001417694903489884, "loss": 0.9241, "step": 7395 }, { "epoch": 0.42685740655283805, "grad_norm": 0.2925664782524109, "learning_rate": 0.00014167799337992258, "loss": 1.0251, "step": 7400 }, { "epoch": 0.4271458237194278, "grad_norm": 0.31504228711128235, "learning_rate": 0.00014158645416482011, "loss": 0.9347, "step": 7405 }, { "epoch": 0.42743424088601756, "grad_norm": 0.2886911928653717, "learning_rate": 0.00014149487279646781, "loss": 0.903, "step": 7410 }, { "epoch": 0.4277226580526073, "grad_norm": 0.3230822682380676, "learning_rate": 0.00014140324936769524, "loss": 0.9672, "step": 7415 }, { "epoch": 0.42801107521919707, "grad_norm": 0.29172009229660034, "learning_rate": 0.00014131158397137462, "loss": 0.9792, "step": 7420 }, { "epoch": 0.4282994923857868, "grad_norm": 0.2988271415233612, "learning_rate": 0.00014121987670042064, "loss": 0.9907, "step": 7425 }, { "epoch": 0.4285879095523766, "grad_norm": 0.3402508795261383, "learning_rate": 0.00014112812764779053, "loss": 0.9922, "step": 7430 }, { "epoch": 0.4288763267189663, "grad_norm": 0.3000766932964325, "learning_rate": 0.00014103633690648376, "loss": 1.009, "step": 7435 }, { "epoch": 0.4291647438855561, "grad_norm": 0.287803590297699, "learning_rate": 0.00014094450456954218, "loss": 0.9878, "step": 7440 }, { "epoch": 0.4294531610521458, "grad_norm": 0.31101298332214355, "learning_rate": 0.00014085263073004972, "loss": 0.9197, "step": 7445 }, { "epoch": 0.4297415782187356, "grad_norm": 0.30686867237091064, "learning_rate": 0.00014076071548113238, "loss": 1.0046, "step": 7450 }, { "epoch": 0.4300299953853253, "grad_norm": 0.32540297508239746, "learning_rate": 0.00014066875891595811, "loss": 0.9943, "step": 7455 }, { "epoch": 0.4303184125519151, "grad_norm": 0.32309621572494507, "learning_rate": 0.0001405767611277369, "loss": 0.9379, "step": 7460 }, { "epoch": 0.43060682971850484, "grad_norm": 0.30645352602005005, "learning_rate": 0.0001404847222097203, "loss": 0.9759, "step": 7465 }, { "epoch": 0.4308952468850946, "grad_norm": 0.3069199025630951, "learning_rate": 0.00014039264225520175, "loss": 0.9785, "step": 7470 }, { "epoch": 0.43118366405168435, "grad_norm": 0.30918145179748535, "learning_rate": 0.00014030052135751613, "loss": 0.954, "step": 7475 }, { "epoch": 0.43147208121827413, "grad_norm": 0.32399114966392517, "learning_rate": 0.0001402083596100399, "loss": 1.0961, "step": 7480 }, { "epoch": 0.43176049838486386, "grad_norm": 0.33643585443496704, "learning_rate": 0.00014011615710619085, "loss": 1.0204, "step": 7485 }, { "epoch": 0.43204891555145364, "grad_norm": 0.32877838611602783, "learning_rate": 0.00014002391393942826, "loss": 0.97, "step": 7490 }, { "epoch": 0.43233733271804337, "grad_norm": 0.31330958008766174, "learning_rate": 0.00013993163020325242, "loss": 0.9539, "step": 7495 }, { "epoch": 0.43262574988463315, "grad_norm": 0.33290615677833557, "learning_rate": 0.00013983930599120487, "loss": 1.0575, "step": 7500 }, { "epoch": 0.4329141670512229, "grad_norm": 0.2965332567691803, "learning_rate": 0.00013974694139686812, "loss": 0.9635, "step": 7505 }, { "epoch": 0.43320258421781266, "grad_norm": 0.3181534707546234, "learning_rate": 0.0001396545365138657, "loss": 1.1166, "step": 7510 }, { "epoch": 0.4334910013844024, "grad_norm": 0.31753942370414734, "learning_rate": 0.00013956209143586181, "loss": 1.0099, "step": 7515 }, { "epoch": 0.43377941855099217, "grad_norm": 0.31996965408325195, "learning_rate": 0.00013946960625656153, "loss": 0.9755, "step": 7520 }, { "epoch": 0.4340678357175819, "grad_norm": 0.31612056493759155, "learning_rate": 0.00013937708106971056, "loss": 1.0283, "step": 7525 }, { "epoch": 0.4343562528841717, "grad_norm": 0.3391875922679901, "learning_rate": 0.00013928451596909516, "loss": 0.9188, "step": 7530 }, { "epoch": 0.4346446700507614, "grad_norm": 0.323889821767807, "learning_rate": 0.00013919191104854196, "loss": 0.9728, "step": 7535 }, { "epoch": 0.4349330872173512, "grad_norm": 0.3123781085014343, "learning_rate": 0.00013909926640191813, "loss": 1.0161, "step": 7540 }, { "epoch": 0.4352215043839409, "grad_norm": 0.3935023844242096, "learning_rate": 0.00013900658212313093, "loss": 1.0055, "step": 7545 }, { "epoch": 0.4355099215505307, "grad_norm": 0.30142349004745483, "learning_rate": 0.0001389138583061279, "loss": 1.0241, "step": 7550 }, { "epoch": 0.4357983387171204, "grad_norm": 0.3473080098628998, "learning_rate": 0.00013882109504489659, "loss": 0.911, "step": 7555 }, { "epoch": 0.4360867558837102, "grad_norm": 0.2962372601032257, "learning_rate": 0.00013872829243346453, "loss": 0.9448, "step": 7560 }, { "epoch": 0.43637517305029994, "grad_norm": 0.2840719521045685, "learning_rate": 0.00013863545056589925, "loss": 0.9958, "step": 7565 }, { "epoch": 0.4366635902168897, "grad_norm": 0.3180067837238312, "learning_rate": 0.00013854256953630797, "loss": 0.991, "step": 7570 }, { "epoch": 0.43695200738347945, "grad_norm": 0.37251415848731995, "learning_rate": 0.0001384496494388376, "loss": 0.9553, "step": 7575 }, { "epoch": 0.43724042455006923, "grad_norm": 0.36039602756500244, "learning_rate": 0.00013835669036767466, "loss": 0.9297, "step": 7580 }, { "epoch": 0.43752884171665896, "grad_norm": 0.30950820446014404, "learning_rate": 0.00013826369241704524, "loss": 0.9534, "step": 7585 }, { "epoch": 0.43781725888324874, "grad_norm": 0.3068082928657532, "learning_rate": 0.00013817065568121477, "loss": 0.9184, "step": 7590 }, { "epoch": 0.43810567604983847, "grad_norm": 0.30188676714897156, "learning_rate": 0.00013807758025448803, "loss": 0.9726, "step": 7595 }, { "epoch": 0.43839409321642825, "grad_norm": 0.290171355009079, "learning_rate": 0.00013798446623120893, "loss": 0.9274, "step": 7600 }, { "epoch": 0.438682510383018, "grad_norm": 0.3495076894760132, "learning_rate": 0.0001378913137057607, "loss": 0.9428, "step": 7605 }, { "epoch": 0.43897092754960776, "grad_norm": 0.313632994890213, "learning_rate": 0.00013779812277256537, "loss": 0.9952, "step": 7610 }, { "epoch": 0.4392593447161975, "grad_norm": 0.3400636911392212, "learning_rate": 0.00013770489352608404, "loss": 0.917, "step": 7615 }, { "epoch": 0.43954776188278727, "grad_norm": 0.33605697751045227, "learning_rate": 0.0001376116260608166, "loss": 0.9214, "step": 7620 }, { "epoch": 0.439836179049377, "grad_norm": 0.2898849546909332, "learning_rate": 0.0001375183204713017, "loss": 0.9064, "step": 7625 }, { "epoch": 0.4401245962159668, "grad_norm": 0.3333107829093933, "learning_rate": 0.0001374249768521166, "loss": 1.067, "step": 7630 }, { "epoch": 0.4404130133825565, "grad_norm": 0.3403453528881073, "learning_rate": 0.00013733159529787719, "loss": 0.9259, "step": 7635 }, { "epoch": 0.4407014305491463, "grad_norm": 0.294716477394104, "learning_rate": 0.0001372381759032377, "loss": 0.9791, "step": 7640 }, { "epoch": 0.440989847715736, "grad_norm": 0.3185860514640808, "learning_rate": 0.00013714471876289075, "loss": 0.9606, "step": 7645 }, { "epoch": 0.4412782648823258, "grad_norm": 0.2923915982246399, "learning_rate": 0.00013705122397156727, "loss": 0.9309, "step": 7650 }, { "epoch": 0.4415666820489155, "grad_norm": 0.32446566224098206, "learning_rate": 0.00013695769162403633, "loss": 0.9541, "step": 7655 }, { "epoch": 0.4418550992155053, "grad_norm": 0.3201291859149933, "learning_rate": 0.00013686412181510504, "loss": 0.9672, "step": 7660 }, { "epoch": 0.44214351638209504, "grad_norm": 0.2882368266582489, "learning_rate": 0.00013677051463961855, "loss": 0.94, "step": 7665 }, { "epoch": 0.4424319335486848, "grad_norm": 0.31610748171806335, "learning_rate": 0.0001366768701924598, "loss": 0.9958, "step": 7670 }, { "epoch": 0.44272035071527455, "grad_norm": 0.29429611563682556, "learning_rate": 0.00013658318856854955, "loss": 0.9828, "step": 7675 }, { "epoch": 0.44300876788186433, "grad_norm": 0.2986331284046173, "learning_rate": 0.0001364894698628462, "loss": 0.981, "step": 7680 }, { "epoch": 0.44329718504845406, "grad_norm": 0.3160644471645355, "learning_rate": 0.0001363957141703459, "loss": 1.0728, "step": 7685 }, { "epoch": 0.44358560221504384, "grad_norm": 0.3175942897796631, "learning_rate": 0.00013630192158608202, "loss": 0.953, "step": 7690 }, { "epoch": 0.4438740193816336, "grad_norm": 0.30581793189048767, "learning_rate": 0.00013620809220512558, "loss": 0.9689, "step": 7695 }, { "epoch": 0.44416243654822335, "grad_norm": 0.30399492383003235, "learning_rate": 0.00013611422612258477, "loss": 1.0377, "step": 7700 }, { "epoch": 0.44445085371481313, "grad_norm": 0.35260793566703796, "learning_rate": 0.00013602032343360497, "loss": 0.9977, "step": 7705 }, { "epoch": 0.44473927088140286, "grad_norm": 0.3687379062175751, "learning_rate": 0.00013592638423336875, "loss": 0.9886, "step": 7710 }, { "epoch": 0.44502768804799264, "grad_norm": 0.30002057552337646, "learning_rate": 0.00013583240861709563, "loss": 0.9398, "step": 7715 }, { "epoch": 0.44531610521458237, "grad_norm": 0.2873061001300812, "learning_rate": 0.00013573839668004202, "loss": 0.8944, "step": 7720 }, { "epoch": 0.44560452238117215, "grad_norm": 0.3006219267845154, "learning_rate": 0.00013564434851750119, "loss": 1.0028, "step": 7725 }, { "epoch": 0.4458929395477619, "grad_norm": 0.2903684973716736, "learning_rate": 0.00013555026422480313, "loss": 0.9164, "step": 7730 }, { "epoch": 0.44618135671435166, "grad_norm": 0.33015668392181396, "learning_rate": 0.00013545614389731442, "loss": 1.0059, "step": 7735 }, { "epoch": 0.4464697738809414, "grad_norm": 0.31042858958244324, "learning_rate": 0.00013536198763043823, "loss": 0.8928, "step": 7740 }, { "epoch": 0.4467581910475312, "grad_norm": 0.3223642408847809, "learning_rate": 0.00013526779551961403, "loss": 1.021, "step": 7745 }, { "epoch": 0.4470466082141209, "grad_norm": 0.3327777087688446, "learning_rate": 0.00013517356766031777, "loss": 0.9867, "step": 7750 }, { "epoch": 0.4473350253807107, "grad_norm": 0.28827667236328125, "learning_rate": 0.00013507930414806153, "loss": 0.9634, "step": 7755 }, { "epoch": 0.4476234425473004, "grad_norm": 0.3253006041049957, "learning_rate": 0.00013498500507839363, "loss": 1.0064, "step": 7760 }, { "epoch": 0.4479118597138902, "grad_norm": 0.3410976231098175, "learning_rate": 0.00013489067054689834, "loss": 1.0264, "step": 7765 }, { "epoch": 0.4482002768804799, "grad_norm": 0.3115012049674988, "learning_rate": 0.00013479630064919593, "loss": 0.9865, "step": 7770 }, { "epoch": 0.4484886940470697, "grad_norm": 0.33605247735977173, "learning_rate": 0.00013470189548094242, "loss": 0.9313, "step": 7775 }, { "epoch": 0.44877711121365943, "grad_norm": 0.32948291301727295, "learning_rate": 0.00013460745513782976, "loss": 0.9301, "step": 7780 }, { "epoch": 0.4490655283802492, "grad_norm": 0.2822820842266083, "learning_rate": 0.0001345129797155854, "loss": 0.987, "step": 7785 }, { "epoch": 0.44935394554683894, "grad_norm": 0.3184073567390442, "learning_rate": 0.0001344184693099724, "loss": 0.9474, "step": 7790 }, { "epoch": 0.4496423627134287, "grad_norm": 0.30305254459381104, "learning_rate": 0.0001343239240167893, "loss": 0.9738, "step": 7795 }, { "epoch": 0.44993077988001845, "grad_norm": 0.2888820171356201, "learning_rate": 0.00013422934393186994, "loss": 0.9428, "step": 7800 }, { "epoch": 0.45021919704660823, "grad_norm": 0.30135294795036316, "learning_rate": 0.0001341347291510835, "loss": 0.9629, "step": 7805 }, { "epoch": 0.45050761421319796, "grad_norm": 0.3297244608402252, "learning_rate": 0.0001340400797703343, "loss": 0.9554, "step": 7810 }, { "epoch": 0.45079603137978774, "grad_norm": 0.31398284435272217, "learning_rate": 0.0001339453958855617, "loss": 0.9741, "step": 7815 }, { "epoch": 0.45108444854637747, "grad_norm": 0.30296990275382996, "learning_rate": 0.00013385067759274014, "loss": 0.917, "step": 7820 }, { "epoch": 0.45137286571296725, "grad_norm": 0.3051854968070984, "learning_rate": 0.00013375592498787871, "loss": 1.0307, "step": 7825 }, { "epoch": 0.451661282879557, "grad_norm": 0.3125559687614441, "learning_rate": 0.00013366113816702164, "loss": 1.0156, "step": 7830 }, { "epoch": 0.45194970004614676, "grad_norm": 0.3818068504333496, "learning_rate": 0.00013356631722624744, "loss": 1.0099, "step": 7835 }, { "epoch": 0.4522381172127365, "grad_norm": 0.3067907989025116, "learning_rate": 0.0001334714622616695, "loss": 0.9715, "step": 7840 }, { "epoch": 0.4525265343793263, "grad_norm": 0.31763195991516113, "learning_rate": 0.00013337657336943555, "loss": 0.903, "step": 7845 }, { "epoch": 0.452814951545916, "grad_norm": 0.296339213848114, "learning_rate": 0.0001332816506457278, "loss": 0.9954, "step": 7850 }, { "epoch": 0.4531033687125058, "grad_norm": 0.29348504543304443, "learning_rate": 0.00013318669418676266, "loss": 0.9327, "step": 7855 }, { "epoch": 0.4533917858790955, "grad_norm": 0.3079511225223541, "learning_rate": 0.0001330917040887908, "loss": 1.0196, "step": 7860 }, { "epoch": 0.4536802030456853, "grad_norm": 0.29583096504211426, "learning_rate": 0.000132996680448097, "loss": 0.987, "step": 7865 }, { "epoch": 0.453968620212275, "grad_norm": 0.3352641761302948, "learning_rate": 0.00013290162336099996, "loss": 0.9933, "step": 7870 }, { "epoch": 0.4542570373788648, "grad_norm": 0.32410529255867004, "learning_rate": 0.00013280653292385233, "loss": 0.9673, "step": 7875 }, { "epoch": 0.45454545454545453, "grad_norm": 0.3224494755268097, "learning_rate": 0.00013271140923304064, "loss": 0.9507, "step": 7880 }, { "epoch": 0.4548338717120443, "grad_norm": 0.2842070460319519, "learning_rate": 0.00013261625238498496, "loss": 0.9414, "step": 7885 }, { "epoch": 0.45512228887863404, "grad_norm": 0.31733888387680054, "learning_rate": 0.00013252106247613914, "loss": 1.013, "step": 7890 }, { "epoch": 0.4554107060452238, "grad_norm": 0.33846980333328247, "learning_rate": 0.0001324258396029904, "loss": 0.964, "step": 7895 }, { "epoch": 0.45569912321181355, "grad_norm": 0.29883426427841187, "learning_rate": 0.00013233058386205948, "loss": 0.9998, "step": 7900 }, { "epoch": 0.45598754037840333, "grad_norm": 0.3294544219970703, "learning_rate": 0.0001322352953499004, "loss": 0.9731, "step": 7905 }, { "epoch": 0.45627595754499306, "grad_norm": 0.4528438150882721, "learning_rate": 0.00013213997416310034, "loss": 1.0049, "step": 7910 }, { "epoch": 0.45656437471158284, "grad_norm": 0.30099087953567505, "learning_rate": 0.0001320446203982797, "loss": 0.9577, "step": 7915 }, { "epoch": 0.45685279187817257, "grad_norm": 0.32508721947669983, "learning_rate": 0.00013194923415209183, "loss": 1.0045, "step": 7920 }, { "epoch": 0.45714120904476235, "grad_norm": 0.29300785064697266, "learning_rate": 0.00013185381552122303, "loss": 0.9194, "step": 7925 }, { "epoch": 0.4574296262113521, "grad_norm": 0.306942343711853, "learning_rate": 0.00013175836460239243, "loss": 0.9877, "step": 7930 }, { "epoch": 0.45771804337794186, "grad_norm": 0.3295309841632843, "learning_rate": 0.00013166288149235188, "loss": 0.9917, "step": 7935 }, { "epoch": 0.4580064605445316, "grad_norm": 0.5229211449623108, "learning_rate": 0.00013156736628788584, "loss": 0.9927, "step": 7940 }, { "epoch": 0.4582948777111214, "grad_norm": 0.3033890128135681, "learning_rate": 0.00013147181908581136, "loss": 0.9158, "step": 7945 }, { "epoch": 0.4585832948777111, "grad_norm": 0.3171187937259674, "learning_rate": 0.00013137623998297785, "loss": 0.9485, "step": 7950 }, { "epoch": 0.4588717120443009, "grad_norm": 0.31747764348983765, "learning_rate": 0.00013128062907626718, "loss": 0.9021, "step": 7955 }, { "epoch": 0.4591601292108906, "grad_norm": 0.3300734758377075, "learning_rate": 0.00013118498646259323, "loss": 0.9613, "step": 7960 }, { "epoch": 0.4594485463774804, "grad_norm": 0.31003138422966003, "learning_rate": 0.00013108931223890225, "loss": 0.9745, "step": 7965 }, { "epoch": 0.4597369635440701, "grad_norm": 0.2931799590587616, "learning_rate": 0.0001309936065021724, "loss": 0.9516, "step": 7970 }, { "epoch": 0.4600253807106599, "grad_norm": 0.3191987872123718, "learning_rate": 0.00013089786934941387, "loss": 0.9241, "step": 7975 }, { "epoch": 0.46031379787724963, "grad_norm": 0.28786250948905945, "learning_rate": 0.0001308021008776686, "loss": 0.9766, "step": 7980 }, { "epoch": 0.4606022150438394, "grad_norm": 0.30729734897613525, "learning_rate": 0.0001307063011840103, "loss": 1.004, "step": 7985 }, { "epoch": 0.46089063221042914, "grad_norm": 0.3125825822353363, "learning_rate": 0.00013061047036554444, "loss": 1.0104, "step": 7990 }, { "epoch": 0.4611790493770189, "grad_norm": 0.3065352141857147, "learning_rate": 0.0001305146085194079, "loss": 0.9858, "step": 7995 }, { "epoch": 0.46146746654360865, "grad_norm": 0.2968180477619171, "learning_rate": 0.00013041871574276905, "loss": 1.0341, "step": 8000 }, { "epoch": 0.46175588371019843, "grad_norm": 0.3416571021080017, "learning_rate": 0.0001303227921328276, "loss": 0.9738, "step": 8005 }, { "epoch": 0.46204430087678816, "grad_norm": 0.31280043721199036, "learning_rate": 0.00013022683778681458, "loss": 0.9728, "step": 8010 }, { "epoch": 0.46233271804337794, "grad_norm": 0.333219975233078, "learning_rate": 0.00013013085280199214, "loss": 0.9559, "step": 8015 }, { "epoch": 0.46262113520996767, "grad_norm": 0.3534589111804962, "learning_rate": 0.00013003483727565344, "loss": 0.9473, "step": 8020 }, { "epoch": 0.46290955237655745, "grad_norm": 0.312299519777298, "learning_rate": 0.00012993879130512263, "loss": 0.968, "step": 8025 }, { "epoch": 0.4631979695431472, "grad_norm": 0.304463267326355, "learning_rate": 0.00012984271498775473, "loss": 0.9619, "step": 8030 }, { "epoch": 0.46348638670973696, "grad_norm": 0.27623772621154785, "learning_rate": 0.00012974660842093554, "loss": 0.9282, "step": 8035 }, { "epoch": 0.46377480387632675, "grad_norm": 0.3356596529483795, "learning_rate": 0.00012965047170208145, "loss": 0.9826, "step": 8040 }, { "epoch": 0.4640632210429165, "grad_norm": 0.34293317794799805, "learning_rate": 0.00012955430492863948, "loss": 1.0262, "step": 8045 }, { "epoch": 0.46435163820950626, "grad_norm": 0.3161788582801819, "learning_rate": 0.00012945810819808715, "loss": 1.019, "step": 8050 }, { "epoch": 0.464640055376096, "grad_norm": 0.31269657611846924, "learning_rate": 0.00012936188160793218, "loss": 0.9221, "step": 8055 }, { "epoch": 0.46492847254268577, "grad_norm": 0.3549450933933258, "learning_rate": 0.00012926562525571273, "loss": 1.0017, "step": 8060 }, { "epoch": 0.4652168897092755, "grad_norm": 0.3124108910560608, "learning_rate": 0.00012916933923899702, "loss": 0.9945, "step": 8065 }, { "epoch": 0.4655053068758653, "grad_norm": 0.3089344799518585, "learning_rate": 0.00012907302365538348, "loss": 1.0043, "step": 8070 }, { "epoch": 0.465793724042455, "grad_norm": 0.3016572594642639, "learning_rate": 0.00012897667860250028, "loss": 0.932, "step": 8075 }, { "epoch": 0.4660821412090448, "grad_norm": 0.2821403741836548, "learning_rate": 0.0001288803041780057, "loss": 0.8898, "step": 8080 }, { "epoch": 0.4663705583756345, "grad_norm": 0.3156525790691376, "learning_rate": 0.00012878390047958761, "loss": 0.9543, "step": 8085 }, { "epoch": 0.4666589755422243, "grad_norm": 0.2793186902999878, "learning_rate": 0.0001286874676049637, "loss": 0.936, "step": 8090 }, { "epoch": 0.466947392708814, "grad_norm": 0.3266434669494629, "learning_rate": 0.00012859100565188104, "loss": 1.0449, "step": 8095 }, { "epoch": 0.4672358098754038, "grad_norm": 0.31635022163391113, "learning_rate": 0.00012849451471811643, "loss": 1.0126, "step": 8100 }, { "epoch": 0.46752422704199353, "grad_norm": 0.3141334652900696, "learning_rate": 0.0001283979949014758, "loss": 0.9654, "step": 8105 }, { "epoch": 0.4678126442085833, "grad_norm": 0.31797635555267334, "learning_rate": 0.00012830144629979456, "loss": 0.9323, "step": 8110 }, { "epoch": 0.46810106137517304, "grad_norm": 0.32226845622062683, "learning_rate": 0.00012820486901093717, "loss": 0.9622, "step": 8115 }, { "epoch": 0.4683894785417628, "grad_norm": 0.3159628212451935, "learning_rate": 0.00012810826313279717, "loss": 0.9863, "step": 8120 }, { "epoch": 0.46867789570835255, "grad_norm": 0.3085826337337494, "learning_rate": 0.00012801162876329713, "loss": 1.0229, "step": 8125 }, { "epoch": 0.46896631287494234, "grad_norm": 0.32204851508140564, "learning_rate": 0.00012791496600038854, "loss": 0.9969, "step": 8130 }, { "epoch": 0.46925473004153206, "grad_norm": 0.3067418038845062, "learning_rate": 0.00012781827494205147, "loss": 0.9431, "step": 8135 }, { "epoch": 0.46954314720812185, "grad_norm": 0.31123608350753784, "learning_rate": 0.00012772155568629499, "loss": 0.9736, "step": 8140 }, { "epoch": 0.4698315643747116, "grad_norm": 0.31212547421455383, "learning_rate": 0.00012762480833115644, "loss": 0.9642, "step": 8145 }, { "epoch": 0.47011998154130136, "grad_norm": 0.28204548358917236, "learning_rate": 0.00012752803297470187, "loss": 1.004, "step": 8150 }, { "epoch": 0.4704083987078911, "grad_norm": 0.3246347904205322, "learning_rate": 0.00012743122971502555, "loss": 0.9538, "step": 8155 }, { "epoch": 0.47069681587448087, "grad_norm": 0.3177697956562042, "learning_rate": 0.00012733439865025012, "loss": 0.9978, "step": 8160 }, { "epoch": 0.4709852330410706, "grad_norm": 0.32368218898773193, "learning_rate": 0.0001272375398785264, "loss": 0.995, "step": 8165 }, { "epoch": 0.4712736502076604, "grad_norm": 0.34451228380203247, "learning_rate": 0.0001271406534980333, "loss": 1.0219, "step": 8170 }, { "epoch": 0.4715620673742501, "grad_norm": 0.3153943121433258, "learning_rate": 0.00012704373960697766, "loss": 1.0028, "step": 8175 }, { "epoch": 0.4718504845408399, "grad_norm": 0.3561108112335205, "learning_rate": 0.0001269467983035943, "loss": 0.9848, "step": 8180 }, { "epoch": 0.4721389017074296, "grad_norm": 0.3053421080112457, "learning_rate": 0.00012684982968614567, "loss": 0.9592, "step": 8185 }, { "epoch": 0.4724273188740194, "grad_norm": 0.31110382080078125, "learning_rate": 0.00012675283385292212, "loss": 0.9648, "step": 8190 }, { "epoch": 0.4727157360406091, "grad_norm": 0.34353601932525635, "learning_rate": 0.00012665581090224136, "loss": 0.9924, "step": 8195 }, { "epoch": 0.4730041532071989, "grad_norm": 0.3134307265281677, "learning_rate": 0.00012655876093244878, "loss": 0.9842, "step": 8200 }, { "epoch": 0.47329257037378863, "grad_norm": 0.3005763590335846, "learning_rate": 0.00012646168404191704, "loss": 0.883, "step": 8205 }, { "epoch": 0.4735809875403784, "grad_norm": 0.31364625692367554, "learning_rate": 0.00012636458032904617, "loss": 1.0272, "step": 8210 }, { "epoch": 0.47386940470696814, "grad_norm": 0.32760295271873474, "learning_rate": 0.00012626744989226326, "loss": 0.9793, "step": 8215 }, { "epoch": 0.4741578218735579, "grad_norm": 0.31284964084625244, "learning_rate": 0.00012617029283002265, "loss": 0.9602, "step": 8220 }, { "epoch": 0.47444623904014765, "grad_norm": 0.28940585255622864, "learning_rate": 0.00012607310924080557, "loss": 0.9804, "step": 8225 }, { "epoch": 0.47473465620673744, "grad_norm": 0.3072567880153656, "learning_rate": 0.00012597589922312008, "loss": 0.8965, "step": 8230 }, { "epoch": 0.47502307337332716, "grad_norm": 0.2848748564720154, "learning_rate": 0.0001258786628755012, "loss": 0.8763, "step": 8235 }, { "epoch": 0.47531149053991695, "grad_norm": 0.31714004278182983, "learning_rate": 0.00012578140029651053, "loss": 0.9775, "step": 8240 }, { "epoch": 0.4755999077065067, "grad_norm": 0.2894527316093445, "learning_rate": 0.00012568411158473625, "loss": 1.0269, "step": 8245 }, { "epoch": 0.47588832487309646, "grad_norm": 0.3150068521499634, "learning_rate": 0.00012558679683879301, "loss": 1.0003, "step": 8250 }, { "epoch": 0.4761767420396862, "grad_norm": 0.3005748987197876, "learning_rate": 0.00012548945615732202, "loss": 1.0234, "step": 8255 }, { "epoch": 0.47646515920627597, "grad_norm": 0.33544933795928955, "learning_rate": 0.0001253920896389905, "loss": 0.9997, "step": 8260 }, { "epoch": 0.4767535763728657, "grad_norm": 0.31992655992507935, "learning_rate": 0.00012529469738249208, "loss": 0.9854, "step": 8265 }, { "epoch": 0.4770419935394555, "grad_norm": 0.3050898611545563, "learning_rate": 0.00012519727948654642, "loss": 0.9838, "step": 8270 }, { "epoch": 0.4773304107060452, "grad_norm": 0.31912678480148315, "learning_rate": 0.00012509983604989917, "loss": 0.982, "step": 8275 }, { "epoch": 0.477618827872635, "grad_norm": 0.3090691566467285, "learning_rate": 0.00012500236717132178, "loss": 0.9482, "step": 8280 }, { "epoch": 0.4779072450392247, "grad_norm": 0.2905727028846741, "learning_rate": 0.00012490487294961167, "loss": 0.9127, "step": 8285 }, { "epoch": 0.4781956622058145, "grad_norm": 0.3192508816719055, "learning_rate": 0.0001248073534835917, "loss": 0.9899, "step": 8290 }, { "epoch": 0.4784840793724042, "grad_norm": 0.29958266019821167, "learning_rate": 0.00012470980887211062, "loss": 0.9993, "step": 8295 }, { "epoch": 0.478772496538994, "grad_norm": 0.3005164861679077, "learning_rate": 0.0001246122392140424, "loss": 0.9824, "step": 8300 }, { "epoch": 0.47906091370558374, "grad_norm": 0.37904489040374756, "learning_rate": 0.00012451464460828656, "loss": 1.0149, "step": 8305 }, { "epoch": 0.4793493308721735, "grad_norm": 0.30801454186439514, "learning_rate": 0.00012441702515376786, "loss": 1.0023, "step": 8310 }, { "epoch": 0.47963774803876325, "grad_norm": 0.3129716217517853, "learning_rate": 0.00012431938094943618, "loss": 0.9613, "step": 8315 }, { "epoch": 0.47992616520535303, "grad_norm": 0.30742931365966797, "learning_rate": 0.0001242217120942666, "loss": 0.8988, "step": 8320 }, { "epoch": 0.48021458237194276, "grad_norm": 0.3507063090801239, "learning_rate": 0.00012412401868725913, "loss": 0.9813, "step": 8325 }, { "epoch": 0.48050299953853254, "grad_norm": 0.28657853603363037, "learning_rate": 0.00012402630082743868, "loss": 0.9277, "step": 8330 }, { "epoch": 0.48079141670512227, "grad_norm": 0.3005477786064148, "learning_rate": 0.00012392855861385492, "loss": 0.9476, "step": 8335 }, { "epoch": 0.48107983387171205, "grad_norm": 0.2988882064819336, "learning_rate": 0.00012383079214558227, "loss": 0.9513, "step": 8340 }, { "epoch": 0.4813682510383018, "grad_norm": 0.31848201155662537, "learning_rate": 0.0001237330015217196, "loss": 0.9201, "step": 8345 }, { "epoch": 0.48165666820489156, "grad_norm": 0.2883610725402832, "learning_rate": 0.00012363518684139043, "loss": 0.9438, "step": 8350 }, { "epoch": 0.4819450853714813, "grad_norm": 0.3046533167362213, "learning_rate": 0.0001235373482037426, "loss": 0.9581, "step": 8355 }, { "epoch": 0.48223350253807107, "grad_norm": 0.3131345510482788, "learning_rate": 0.00012343948570794815, "loss": 0.977, "step": 8360 }, { "epoch": 0.4825219197046608, "grad_norm": 0.3097611665725708, "learning_rate": 0.00012334159945320342, "loss": 0.944, "step": 8365 }, { "epoch": 0.4828103368712506, "grad_norm": 0.3010179102420807, "learning_rate": 0.00012324368953872883, "loss": 0.9699, "step": 8370 }, { "epoch": 0.4830987540378403, "grad_norm": 0.32022956013679504, "learning_rate": 0.00012314575606376863, "loss": 1.0071, "step": 8375 }, { "epoch": 0.4833871712044301, "grad_norm": 0.32526081800460815, "learning_rate": 0.00012304779912759118, "loss": 0.9378, "step": 8380 }, { "epoch": 0.4836755883710198, "grad_norm": 0.32581695914268494, "learning_rate": 0.00012294981882948844, "loss": 0.9235, "step": 8385 }, { "epoch": 0.4839640055376096, "grad_norm": 0.30687832832336426, "learning_rate": 0.00012285181526877615, "loss": 0.9252, "step": 8390 }, { "epoch": 0.4842524227041994, "grad_norm": 0.2729445993900299, "learning_rate": 0.00012275378854479358, "loss": 0.9161, "step": 8395 }, { "epoch": 0.4845408398707891, "grad_norm": 0.3410494029521942, "learning_rate": 0.00012265573875690344, "loss": 0.983, "step": 8400 }, { "epoch": 0.4848292570373789, "grad_norm": 0.3035064935684204, "learning_rate": 0.00012255766600449198, "loss": 0.934, "step": 8405 }, { "epoch": 0.4851176742039686, "grad_norm": 0.30714723467826843, "learning_rate": 0.0001224595703869685, "loss": 0.8976, "step": 8410 }, { "epoch": 0.4854060913705584, "grad_norm": 0.3508041799068451, "learning_rate": 0.00012236145200376566, "loss": 1.0483, "step": 8415 }, { "epoch": 0.48569450853714813, "grad_norm": 0.30225586891174316, "learning_rate": 0.0001222633109543392, "loss": 0.9137, "step": 8420 }, { "epoch": 0.4859829257037379, "grad_norm": 0.32091522216796875, "learning_rate": 0.0001221651473381676, "loss": 0.944, "step": 8425 }, { "epoch": 0.48627134287032764, "grad_norm": 0.3079957962036133, "learning_rate": 0.00012206696125475249, "loss": 1.0086, "step": 8430 }, { "epoch": 0.4865597600369174, "grad_norm": 0.28919950127601624, "learning_rate": 0.00012196875280361817, "loss": 1.0097, "step": 8435 }, { "epoch": 0.48684817720350715, "grad_norm": 0.296323299407959, "learning_rate": 0.00012187052208431158, "loss": 0.9956, "step": 8440 }, { "epoch": 0.48713659437009693, "grad_norm": 0.3021702468395233, "learning_rate": 0.00012177226919640223, "loss": 1.0361, "step": 8445 }, { "epoch": 0.48742501153668666, "grad_norm": 0.3154110908508301, "learning_rate": 0.0001216739942394822, "loss": 0.9982, "step": 8450 }, { "epoch": 0.48771342870327644, "grad_norm": 0.2946849763393402, "learning_rate": 0.0001215756973131658, "loss": 1.094, "step": 8455 }, { "epoch": 0.48800184586986617, "grad_norm": 0.3265274167060852, "learning_rate": 0.00012147737851708973, "loss": 0.9622, "step": 8460 }, { "epoch": 0.48829026303645595, "grad_norm": 0.30080366134643555, "learning_rate": 0.00012137903795091276, "loss": 0.9274, "step": 8465 }, { "epoch": 0.4885786802030457, "grad_norm": 0.3038170337677002, "learning_rate": 0.00012128067571431583, "loss": 0.9048, "step": 8470 }, { "epoch": 0.48886709736963546, "grad_norm": 0.3240787982940674, "learning_rate": 0.00012118229190700172, "loss": 1.0785, "step": 8475 }, { "epoch": 0.4891555145362252, "grad_norm": 0.3066551387310028, "learning_rate": 0.00012108388662869519, "loss": 1.0428, "step": 8480 }, { "epoch": 0.48944393170281497, "grad_norm": 0.3393528461456299, "learning_rate": 0.0001209854599791427, "loss": 0.94, "step": 8485 }, { "epoch": 0.4897323488694047, "grad_norm": 0.31477421522140503, "learning_rate": 0.0001208870120581124, "loss": 0.9802, "step": 8490 }, { "epoch": 0.4900207660359945, "grad_norm": 0.3190643787384033, "learning_rate": 0.00012078854296539397, "loss": 0.9995, "step": 8495 }, { "epoch": 0.4903091832025842, "grad_norm": 0.2997737526893616, "learning_rate": 0.00012069005280079862, "loss": 0.9562, "step": 8500 }, { "epoch": 0.490597600369174, "grad_norm": 0.2990228831768036, "learning_rate": 0.0001205915416641588, "loss": 0.9139, "step": 8505 }, { "epoch": 0.4908860175357637, "grad_norm": 0.3303566575050354, "learning_rate": 0.00012049300965532832, "loss": 0.9472, "step": 8510 }, { "epoch": 0.4911744347023535, "grad_norm": 0.34171241521835327, "learning_rate": 0.00012039445687418212, "loss": 1.0068, "step": 8515 }, { "epoch": 0.49146285186894323, "grad_norm": 0.32005879282951355, "learning_rate": 0.00012029588342061621, "loss": 0.9828, "step": 8520 }, { "epoch": 0.491751269035533, "grad_norm": 0.30151885747909546, "learning_rate": 0.00012019728939454748, "loss": 1.001, "step": 8525 }, { "epoch": 0.49203968620212274, "grad_norm": 0.3093133568763733, "learning_rate": 0.00012009867489591377, "loss": 0.9059, "step": 8530 }, { "epoch": 0.4923281033687125, "grad_norm": 0.3029720187187195, "learning_rate": 0.00012000004002467364, "loss": 0.9823, "step": 8535 }, { "epoch": 0.49261652053530225, "grad_norm": 0.3385627269744873, "learning_rate": 0.00011990138488080622, "loss": 0.9238, "step": 8540 }, { "epoch": 0.49290493770189203, "grad_norm": 0.32065922021865845, "learning_rate": 0.00011980270956431135, "loss": 0.9322, "step": 8545 }, { "epoch": 0.49319335486848176, "grad_norm": 0.3160153031349182, "learning_rate": 0.00011970401417520913, "loss": 0.9436, "step": 8550 }, { "epoch": 0.49348177203507154, "grad_norm": 0.28007084131240845, "learning_rate": 0.00011960529881354017, "loss": 0.8472, "step": 8555 }, { "epoch": 0.49377018920166127, "grad_norm": 0.3082873523235321, "learning_rate": 0.00011950656357936525, "loss": 0.8565, "step": 8560 }, { "epoch": 0.49405860636825105, "grad_norm": 0.2938149571418762, "learning_rate": 0.00011940780857276528, "loss": 0.9621, "step": 8565 }, { "epoch": 0.4943470235348408, "grad_norm": 0.3359840512275696, "learning_rate": 0.00011930903389384123, "loss": 0.9477, "step": 8570 }, { "epoch": 0.49463544070143056, "grad_norm": 0.3250032663345337, "learning_rate": 0.00011921023964271403, "loss": 0.9795, "step": 8575 }, { "epoch": 0.4949238578680203, "grad_norm": 0.3110741078853607, "learning_rate": 0.00011911142591952437, "loss": 0.955, "step": 8580 }, { "epoch": 0.49521227503461007, "grad_norm": 0.29950112104415894, "learning_rate": 0.00011901259282443285, "loss": 0.9587, "step": 8585 }, { "epoch": 0.4955006922011998, "grad_norm": 0.3032659590244293, "learning_rate": 0.0001189137404576195, "loss": 1.0214, "step": 8590 }, { "epoch": 0.4957891093677896, "grad_norm": 0.3259546458721161, "learning_rate": 0.00011881486891928404, "loss": 1.0018, "step": 8595 }, { "epoch": 0.4960775265343793, "grad_norm": 0.3006402850151062, "learning_rate": 0.00011871597830964551, "loss": 1.0101, "step": 8600 }, { "epoch": 0.4963659437009691, "grad_norm": 0.32679426670074463, "learning_rate": 0.00011861706872894236, "loss": 0.9485, "step": 8605 }, { "epoch": 0.4966543608675588, "grad_norm": 0.27988797426223755, "learning_rate": 0.00011851814027743223, "loss": 0.8054, "step": 8610 }, { "epoch": 0.4969427780341486, "grad_norm": 0.2938796579837799, "learning_rate": 0.00011841919305539194, "loss": 0.9298, "step": 8615 }, { "epoch": 0.49723119520073833, "grad_norm": 0.3216566741466522, "learning_rate": 0.00011832022716311722, "loss": 0.9831, "step": 8620 }, { "epoch": 0.4975196123673281, "grad_norm": 0.3104879856109619, "learning_rate": 0.0001182212427009229, "loss": 0.9916, "step": 8625 }, { "epoch": 0.49780802953391784, "grad_norm": 0.33819061517715454, "learning_rate": 0.00011812223976914243, "loss": 1.0035, "step": 8630 }, { "epoch": 0.4980964467005076, "grad_norm": 0.29182150959968567, "learning_rate": 0.00011802321846812816, "loss": 0.9055, "step": 8635 }, { "epoch": 0.49838486386709735, "grad_norm": 0.303713321685791, "learning_rate": 0.00011792417889825094, "loss": 0.8924, "step": 8640 }, { "epoch": 0.49867328103368713, "grad_norm": 0.3153342604637146, "learning_rate": 0.00011782512115990023, "loss": 0.9802, "step": 8645 }, { "epoch": 0.49896169820027686, "grad_norm": 0.3053840398788452, "learning_rate": 0.00011772604535348382, "loss": 0.9128, "step": 8650 }, { "epoch": 0.49925011536686664, "grad_norm": 0.3106091320514679, "learning_rate": 0.00011762695157942789, "loss": 1.0137, "step": 8655 }, { "epoch": 0.49953853253345637, "grad_norm": 0.325166255235672, "learning_rate": 0.00011752783993817675, "loss": 0.9791, "step": 8660 }, { "epoch": 0.49982694970004615, "grad_norm": 0.31137755513191223, "learning_rate": 0.00011742871053019294, "loss": 0.9528, "step": 8665 }, { "epoch": 0.5001153668666359, "grad_norm": 0.30196908116340637, "learning_rate": 0.00011732956345595682, "loss": 0.9264, "step": 8670 }, { "epoch": 0.5004037840332256, "grad_norm": 0.30455246567726135, "learning_rate": 0.00011723039881596686, "loss": 0.9306, "step": 8675 }, { "epoch": 0.5006922011998154, "grad_norm": 0.33094102144241333, "learning_rate": 0.00011713121671073924, "loss": 0.9644, "step": 8680 }, { "epoch": 0.5009806183664052, "grad_norm": 0.29138949513435364, "learning_rate": 0.00011703201724080783, "loss": 0.919, "step": 8685 }, { "epoch": 0.501269035532995, "grad_norm": 0.3106113374233246, "learning_rate": 0.00011693280050672417, "loss": 1.008, "step": 8690 }, { "epoch": 0.5015574526995846, "grad_norm": 0.30877065658569336, "learning_rate": 0.00011683356660905716, "loss": 0.9312, "step": 8695 }, { "epoch": 0.5018458698661744, "grad_norm": 0.3161839246749878, "learning_rate": 0.00011673431564839327, "loss": 0.921, "step": 8700 }, { "epoch": 0.5021342870327642, "grad_norm": 0.30622875690460205, "learning_rate": 0.00011663504772533617, "loss": 0.9326, "step": 8705 }, { "epoch": 0.502422704199354, "grad_norm": 0.32432737946510315, "learning_rate": 0.0001165357629405067, "loss": 1.0314, "step": 8710 }, { "epoch": 0.5027111213659436, "grad_norm": 0.3023039698600769, "learning_rate": 0.00011643646139454287, "loss": 0.9921, "step": 8715 }, { "epoch": 0.5029995385325334, "grad_norm": 0.31420597434043884, "learning_rate": 0.00011633714318809962, "loss": 0.9713, "step": 8720 }, { "epoch": 0.5032879556991232, "grad_norm": 0.3235868215560913, "learning_rate": 0.00011623780842184881, "loss": 0.9795, "step": 8725 }, { "epoch": 0.503576372865713, "grad_norm": 0.3257627785205841, "learning_rate": 0.00011613845719647909, "loss": 1.0116, "step": 8730 }, { "epoch": 0.5038647900323027, "grad_norm": 0.289419561624527, "learning_rate": 0.00011603908961269571, "loss": 0.924, "step": 8735 }, { "epoch": 0.5041532071988925, "grad_norm": 0.31289026141166687, "learning_rate": 0.00011593970577122067, "loss": 0.8984, "step": 8740 }, { "epoch": 0.5044416243654822, "grad_norm": 0.3069057762622833, "learning_rate": 0.00011584030577279223, "loss": 0.9594, "step": 8745 }, { "epoch": 0.504730041532072, "grad_norm": 0.3309262990951538, "learning_rate": 0.00011574088971816523, "loss": 1.0637, "step": 8750 }, { "epoch": 0.5050184586986618, "grad_norm": 0.343013733625412, "learning_rate": 0.00011564145770811068, "loss": 1.0467, "step": 8755 }, { "epoch": 0.5053068758652515, "grad_norm": 0.298500657081604, "learning_rate": 0.00011554200984341577, "loss": 0.9842, "step": 8760 }, { "epoch": 0.5055952930318413, "grad_norm": 0.294319212436676, "learning_rate": 0.00011544254622488378, "loss": 0.9636, "step": 8765 }, { "epoch": 0.505883710198431, "grad_norm": 0.30474424362182617, "learning_rate": 0.00011534306695333395, "loss": 0.9891, "step": 8770 }, { "epoch": 0.5061721273650208, "grad_norm": 0.3213635981082916, "learning_rate": 0.00011524357212960135, "loss": 0.9494, "step": 8775 }, { "epoch": 0.5064605445316105, "grad_norm": 0.30603140592575073, "learning_rate": 0.00011514406185453692, "loss": 0.9003, "step": 8780 }, { "epoch": 0.5067489616982003, "grad_norm": 0.305373877286911, "learning_rate": 0.00011504453622900717, "loss": 1.0101, "step": 8785 }, { "epoch": 0.5070373788647901, "grad_norm": 0.31912145018577576, "learning_rate": 0.00011494499535389418, "loss": 0.9679, "step": 8790 }, { "epoch": 0.5073257960313798, "grad_norm": 0.2848472595214844, "learning_rate": 0.00011484543933009549, "loss": 0.9735, "step": 8795 }, { "epoch": 0.5076142131979695, "grad_norm": 0.3449084460735321, "learning_rate": 0.00011474586825852405, "loss": 0.9184, "step": 8800 }, { "epoch": 0.5079026303645593, "grad_norm": 0.30281150341033936, "learning_rate": 0.00011464628224010797, "loss": 1.0068, "step": 8805 }, { "epoch": 0.5081910475311491, "grad_norm": 0.29601889848709106, "learning_rate": 0.00011454668137579059, "loss": 0.9992, "step": 8810 }, { "epoch": 0.5084794646977389, "grad_norm": 0.3145044147968292, "learning_rate": 0.00011444706576653024, "loss": 0.9517, "step": 8815 }, { "epoch": 0.5087678818643285, "grad_norm": 0.3319196403026581, "learning_rate": 0.00011434743551330028, "loss": 0.9928, "step": 8820 }, { "epoch": 0.5090562990309183, "grad_norm": 0.28969430923461914, "learning_rate": 0.00011424779071708878, "loss": 0.9698, "step": 8825 }, { "epoch": 0.5093447161975081, "grad_norm": 0.2996658384799957, "learning_rate": 0.00011414813147889868, "loss": 1.0052, "step": 8830 }, { "epoch": 0.5096331333640979, "grad_norm": 0.2955401539802551, "learning_rate": 0.0001140484578997475, "loss": 0.9902, "step": 8835 }, { "epoch": 0.5099215505306876, "grad_norm": 0.3238406777381897, "learning_rate": 0.00011394877008066731, "loss": 1.0101, "step": 8840 }, { "epoch": 0.5102099676972773, "grad_norm": 0.304666668176651, "learning_rate": 0.00011384906812270457, "loss": 0.9317, "step": 8845 }, { "epoch": 0.5104983848638671, "grad_norm": 0.2767610251903534, "learning_rate": 0.00011374935212692018, "loss": 0.9771, "step": 8850 }, { "epoch": 0.5107868020304569, "grad_norm": 0.3337164521217346, "learning_rate": 0.00011364962219438913, "loss": 0.9174, "step": 8855 }, { "epoch": 0.5110752191970466, "grad_norm": 0.3500116169452667, "learning_rate": 0.00011354987842620061, "loss": 0.9645, "step": 8860 }, { "epoch": 0.5113636363636364, "grad_norm": 0.3071758449077606, "learning_rate": 0.00011345012092345786, "loss": 0.9561, "step": 8865 }, { "epoch": 0.5116520535302261, "grad_norm": 0.3307574689388275, "learning_rate": 0.000113350349787278, "loss": 1.0172, "step": 8870 }, { "epoch": 0.5119404706968159, "grad_norm": 0.31679022312164307, "learning_rate": 0.00011325056511879197, "loss": 0.9626, "step": 8875 }, { "epoch": 0.5122288878634056, "grad_norm": 0.30507731437683105, "learning_rate": 0.00011315076701914449, "loss": 0.9616, "step": 8880 }, { "epoch": 0.5125173050299954, "grad_norm": 0.328744113445282, "learning_rate": 0.00011305095558949376, "loss": 0.9907, "step": 8885 }, { "epoch": 0.5128057221965852, "grad_norm": 0.3259487748146057, "learning_rate": 0.00011295113093101162, "loss": 1.0606, "step": 8890 }, { "epoch": 0.5130941393631749, "grad_norm": 0.30750012397766113, "learning_rate": 0.00011285129314488328, "loss": 0.9538, "step": 8895 }, { "epoch": 0.5133825565297646, "grad_norm": 0.3048967719078064, "learning_rate": 0.0001127514423323072, "loss": 0.982, "step": 8900 }, { "epoch": 0.5136709736963544, "grad_norm": 0.33238890767097473, "learning_rate": 0.00011265157859449513, "loss": 0.9261, "step": 8905 }, { "epoch": 0.5139593908629442, "grad_norm": 0.312454491853714, "learning_rate": 0.00011255170203267186, "loss": 0.9946, "step": 8910 }, { "epoch": 0.514247808029534, "grad_norm": 0.28780901432037354, "learning_rate": 0.0001124518127480753, "loss": 0.9761, "step": 8915 }, { "epoch": 0.5145362251961236, "grad_norm": 0.29829463362693787, "learning_rate": 0.000112351910841956, "loss": 0.9599, "step": 8920 }, { "epoch": 0.5148246423627134, "grad_norm": 0.289553701877594, "learning_rate": 0.0001122519964155776, "loss": 0.998, "step": 8925 }, { "epoch": 0.5151130595293032, "grad_norm": 0.33418598771095276, "learning_rate": 0.00011215206957021618, "loss": 1.0044, "step": 8930 }, { "epoch": 0.515401476695893, "grad_norm": 0.3013911843299866, "learning_rate": 0.00011205213040716063, "loss": 1.0204, "step": 8935 }, { "epoch": 0.5156898938624827, "grad_norm": 0.30099862813949585, "learning_rate": 0.00011195217902771212, "loss": 0.9752, "step": 8940 }, { "epoch": 0.5159783110290724, "grad_norm": 0.3052062690258026, "learning_rate": 0.00011185221553318438, "loss": 1.0301, "step": 8945 }, { "epoch": 0.5162667281956622, "grad_norm": 0.30699998140335083, "learning_rate": 0.0001117522400249033, "loss": 0.9504, "step": 8950 }, { "epoch": 0.516555145362252, "grad_norm": 0.3200739324092865, "learning_rate": 0.00011165225260420697, "loss": 0.9353, "step": 8955 }, { "epoch": 0.5168435625288417, "grad_norm": 0.28724315762519836, "learning_rate": 0.00011155225337244562, "loss": 0.9249, "step": 8960 }, { "epoch": 0.5171319796954315, "grad_norm": 0.312840074300766, "learning_rate": 0.00011145224243098138, "loss": 0.9696, "step": 8965 }, { "epoch": 0.5174203968620212, "grad_norm": 0.3123422861099243, "learning_rate": 0.00011135221988118825, "loss": 0.9377, "step": 8970 }, { "epoch": 0.517708814028611, "grad_norm": 0.3269249200820923, "learning_rate": 0.00011125218582445207, "loss": 1.0189, "step": 8975 }, { "epoch": 0.5179972311952007, "grad_norm": 0.34945985674858093, "learning_rate": 0.00011115214036217026, "loss": 0.9692, "step": 8980 }, { "epoch": 0.5182856483617905, "grad_norm": 0.30264851450920105, "learning_rate": 0.00011105208359575186, "loss": 0.9469, "step": 8985 }, { "epoch": 0.5185740655283803, "grad_norm": 0.3047720491886139, "learning_rate": 0.0001109520156266173, "loss": 0.9471, "step": 8990 }, { "epoch": 0.51886248269497, "grad_norm": 0.33226409554481506, "learning_rate": 0.00011085193655619845, "loss": 0.9561, "step": 8995 }, { "epoch": 0.5191508998615597, "grad_norm": 0.3194473385810852, "learning_rate": 0.00011075184648593838, "loss": 1.0039, "step": 9000 }, { "epoch": 0.5194393170281495, "grad_norm": 0.334067165851593, "learning_rate": 0.00011065174551729134, "loss": 0.9985, "step": 9005 }, { "epoch": 0.5197277341947393, "grad_norm": 0.3037133514881134, "learning_rate": 0.00011055163375172257, "loss": 1.0081, "step": 9010 }, { "epoch": 0.5200161513613291, "grad_norm": 0.3324849605560303, "learning_rate": 0.00011045151129070832, "loss": 0.9596, "step": 9015 }, { "epoch": 0.5203045685279187, "grad_norm": 0.29767823219299316, "learning_rate": 0.00011035137823573561, "loss": 0.8858, "step": 9020 }, { "epoch": 0.5205929856945085, "grad_norm": 0.3264126181602478, "learning_rate": 0.00011025123468830232, "loss": 0.9279, "step": 9025 }, { "epoch": 0.5208814028610983, "grad_norm": 0.28214511275291443, "learning_rate": 0.0001101510807499168, "loss": 0.9387, "step": 9030 }, { "epoch": 0.5211698200276881, "grad_norm": 0.30854344367980957, "learning_rate": 0.00011005091652209809, "loss": 0.91, "step": 9035 }, { "epoch": 0.5214582371942778, "grad_norm": 0.3331091105937958, "learning_rate": 0.00010995074210637557, "loss": 1.0045, "step": 9040 }, { "epoch": 0.5217466543608675, "grad_norm": 0.29864826798439026, "learning_rate": 0.00010985055760428893, "loss": 0.8801, "step": 9045 }, { "epoch": 0.5220350715274573, "grad_norm": 0.3084268867969513, "learning_rate": 0.00010975036311738818, "loss": 0.9623, "step": 9050 }, { "epoch": 0.5223234886940471, "grad_norm": 0.31782200932502747, "learning_rate": 0.00010965015874723332, "loss": 1.056, "step": 9055 }, { "epoch": 0.5226119058606368, "grad_norm": 0.29870155453681946, "learning_rate": 0.00010954994459539452, "loss": 0.9459, "step": 9060 }, { "epoch": 0.5229003230272266, "grad_norm": 0.32133176922798157, "learning_rate": 0.0001094497207634517, "loss": 0.9144, "step": 9065 }, { "epoch": 0.5231887401938163, "grad_norm": 0.2952311933040619, "learning_rate": 0.00010934948735299475, "loss": 0.9592, "step": 9070 }, { "epoch": 0.5234771573604061, "grad_norm": 0.32097524404525757, "learning_rate": 0.00010924924446562317, "loss": 1.0487, "step": 9075 }, { "epoch": 0.5237655745269958, "grad_norm": 0.3255499005317688, "learning_rate": 0.00010914899220294607, "loss": 0.9538, "step": 9080 }, { "epoch": 0.5240539916935856, "grad_norm": 0.3189411461353302, "learning_rate": 0.00010904873066658208, "loss": 1.0008, "step": 9085 }, { "epoch": 0.5243424088601754, "grad_norm": 0.29466235637664795, "learning_rate": 0.00010894845995815928, "loss": 0.999, "step": 9090 }, { "epoch": 0.5246308260267651, "grad_norm": 0.31723839044570923, "learning_rate": 0.00010884818017931495, "loss": 0.9266, "step": 9095 }, { "epoch": 0.5249192431933549, "grad_norm": 0.26466256380081177, "learning_rate": 0.00010874789143169568, "loss": 0.9945, "step": 9100 }, { "epoch": 0.5252076603599446, "grad_norm": 0.3435070514678955, "learning_rate": 0.00010864759381695701, "loss": 0.93, "step": 9105 }, { "epoch": 0.5254960775265344, "grad_norm": 0.3118496835231781, "learning_rate": 0.00010854728743676362, "loss": 0.9869, "step": 9110 }, { "epoch": 0.5257844946931242, "grad_norm": 0.2827567458152771, "learning_rate": 0.00010844697239278891, "loss": 0.9564, "step": 9115 }, { "epoch": 0.5260729118597139, "grad_norm": 0.27921804785728455, "learning_rate": 0.00010834664878671525, "loss": 0.9807, "step": 9120 }, { "epoch": 0.5263613290263036, "grad_norm": 0.2913965582847595, "learning_rate": 0.00010824631672023349, "loss": 0.9486, "step": 9125 }, { "epoch": 0.5266497461928934, "grad_norm": 0.3042261004447937, "learning_rate": 0.00010814597629504324, "loss": 0.9576, "step": 9130 }, { "epoch": 0.5269381633594832, "grad_norm": 0.31277990341186523, "learning_rate": 0.00010804562761285246, "loss": 0.9182, "step": 9135 }, { "epoch": 0.527226580526073, "grad_norm": 0.3407859802246094, "learning_rate": 0.00010794527077537755, "loss": 0.9203, "step": 9140 }, { "epoch": 0.5275149976926626, "grad_norm": 0.3242495059967041, "learning_rate": 0.00010784490588434309, "loss": 1.0222, "step": 9145 }, { "epoch": 0.5278034148592524, "grad_norm": 0.3136400282382965, "learning_rate": 0.00010774453304148192, "loss": 1.0051, "step": 9150 }, { "epoch": 0.5280918320258422, "grad_norm": 0.3262755572795868, "learning_rate": 0.00010764415234853484, "loss": 1.0198, "step": 9155 }, { "epoch": 0.528380249192432, "grad_norm": 0.3099469542503357, "learning_rate": 0.00010754376390725074, "loss": 0.9721, "step": 9160 }, { "epoch": 0.5286686663590217, "grad_norm": 0.3080025315284729, "learning_rate": 0.00010744336781938624, "loss": 0.9494, "step": 9165 }, { "epoch": 0.5289570835256114, "grad_norm": 0.4399293065071106, "learning_rate": 0.00010734296418670582, "loss": 0.9977, "step": 9170 }, { "epoch": 0.5292455006922012, "grad_norm": 0.3164156377315521, "learning_rate": 0.00010724255311098146, "loss": 0.9743, "step": 9175 }, { "epoch": 0.529533917858791, "grad_norm": 0.3243144750595093, "learning_rate": 0.00010714213469399283, "loss": 0.9734, "step": 9180 }, { "epoch": 0.5298223350253807, "grad_norm": 0.35423848032951355, "learning_rate": 0.00010704170903752695, "loss": 0.9779, "step": 9185 }, { "epoch": 0.5301107521919705, "grad_norm": 0.36763471364974976, "learning_rate": 0.00010694127624337826, "loss": 0.9587, "step": 9190 }, { "epoch": 0.5303991693585602, "grad_norm": 0.323824405670166, "learning_rate": 0.00010684083641334832, "loss": 1.007, "step": 9195 }, { "epoch": 0.53068758652515, "grad_norm": 0.3197695016860962, "learning_rate": 0.00010674038964924597, "loss": 0.9734, "step": 9200 }, { "epoch": 0.5309760036917397, "grad_norm": 0.31020426750183105, "learning_rate": 0.00010663993605288693, "loss": 1.0066, "step": 9205 }, { "epoch": 0.5312644208583295, "grad_norm": 0.30677443742752075, "learning_rate": 0.00010653947572609393, "loss": 0.9763, "step": 9210 }, { "epoch": 0.5315528380249193, "grad_norm": 0.28415751457214355, "learning_rate": 0.0001064390087706965, "loss": 1.0104, "step": 9215 }, { "epoch": 0.531841255191509, "grad_norm": 0.3487168252468109, "learning_rate": 0.0001063385352885309, "loss": 0.9149, "step": 9220 }, { "epoch": 0.5321296723580987, "grad_norm": 0.2908375561237335, "learning_rate": 0.00010623805538144, "loss": 0.8763, "step": 9225 }, { "epoch": 0.5324180895246885, "grad_norm": 0.30931004881858826, "learning_rate": 0.00010613756915127319, "loss": 0.9634, "step": 9230 }, { "epoch": 0.5327065066912783, "grad_norm": 0.28703534603118896, "learning_rate": 0.00010603707669988627, "loss": 0.9669, "step": 9235 }, { "epoch": 0.5329949238578681, "grad_norm": 0.3313189446926117, "learning_rate": 0.00010593657812914129, "loss": 1.0061, "step": 9240 }, { "epoch": 0.5332833410244577, "grad_norm": 0.2962750494480133, "learning_rate": 0.00010583607354090657, "loss": 0.9598, "step": 9245 }, { "epoch": 0.5335717581910475, "grad_norm": 0.32524964213371277, "learning_rate": 0.00010573556303705652, "loss": 1.0356, "step": 9250 }, { "epoch": 0.5338601753576373, "grad_norm": 0.31498393416404724, "learning_rate": 0.00010563504671947153, "loss": 0.9552, "step": 9255 }, { "epoch": 0.5341485925242271, "grad_norm": 0.2950249910354614, "learning_rate": 0.00010553452469003789, "loss": 0.9432, "step": 9260 }, { "epoch": 0.5344370096908168, "grad_norm": 0.27950915694236755, "learning_rate": 0.00010543399705064771, "loss": 1.0472, "step": 9265 }, { "epoch": 0.5347254268574065, "grad_norm": 0.3320115804672241, "learning_rate": 0.00010533346390319867, "loss": 1.057, "step": 9270 }, { "epoch": 0.5350138440239963, "grad_norm": 0.29796433448791504, "learning_rate": 0.00010523292534959419, "loss": 0.947, "step": 9275 }, { "epoch": 0.5353022611905861, "grad_norm": 0.3084598779678345, "learning_rate": 0.00010513238149174304, "loss": 0.9316, "step": 9280 }, { "epoch": 0.5355906783571758, "grad_norm": 0.2846716344356537, "learning_rate": 0.00010503183243155952, "loss": 0.9935, "step": 9285 }, { "epoch": 0.5358790955237656, "grad_norm": 0.33815798163414, "learning_rate": 0.00010493127827096298, "loss": 0.9659, "step": 9290 }, { "epoch": 0.5361675126903553, "grad_norm": 0.3921065628528595, "learning_rate": 0.00010483071911187818, "loss": 0.9826, "step": 9295 }, { "epoch": 0.5364559298569451, "grad_norm": 0.3282729983329773, "learning_rate": 0.00010473015505623477, "loss": 0.9255, "step": 9300 }, { "epoch": 0.5367443470235348, "grad_norm": 0.2944236099720001, "learning_rate": 0.00010462958620596745, "loss": 0.9622, "step": 9305 }, { "epoch": 0.5370327641901246, "grad_norm": 0.30026480555534363, "learning_rate": 0.00010452901266301574, "loss": 0.9721, "step": 9310 }, { "epoch": 0.5373211813567144, "grad_norm": 0.3067670464515686, "learning_rate": 0.00010442843452932394, "loss": 1.0303, "step": 9315 }, { "epoch": 0.5376095985233041, "grad_norm": 0.33017462491989136, "learning_rate": 0.000104327851906841, "loss": 0.9387, "step": 9320 }, { "epoch": 0.5378980156898938, "grad_norm": 0.2776556611061096, "learning_rate": 0.00010422726489752041, "loss": 0.8753, "step": 9325 }, { "epoch": 0.5381864328564836, "grad_norm": 0.316010981798172, "learning_rate": 0.00010412667360332013, "loss": 0.9669, "step": 9330 }, { "epoch": 0.5384748500230734, "grad_norm": 0.29539474844932556, "learning_rate": 0.00010402607812620244, "loss": 0.9677, "step": 9335 }, { "epoch": 0.5387632671896632, "grad_norm": 0.2884175479412079, "learning_rate": 0.00010392547856813384, "loss": 0.9596, "step": 9340 }, { "epoch": 0.5390516843562528, "grad_norm": 0.2999941110610962, "learning_rate": 0.00010382487503108503, "loss": 1.053, "step": 9345 }, { "epoch": 0.5393401015228426, "grad_norm": 0.352108895778656, "learning_rate": 0.00010372426761703067, "loss": 1.0039, "step": 9350 }, { "epoch": 0.5396285186894324, "grad_norm": 0.2817211449146271, "learning_rate": 0.00010362365642794943, "loss": 0.9056, "step": 9355 }, { "epoch": 0.5399169358560222, "grad_norm": 0.3019465506076813, "learning_rate": 0.00010352304156582376, "loss": 0.9113, "step": 9360 }, { "epoch": 0.5402053530226119, "grad_norm": 0.3267442286014557, "learning_rate": 0.00010342242313263974, "loss": 0.9913, "step": 9365 }, { "epoch": 0.5404937701892016, "grad_norm": 0.3325335383415222, "learning_rate": 0.0001033218012303873, "loss": 0.976, "step": 9370 }, { "epoch": 0.5407821873557914, "grad_norm": 0.3207154870033264, "learning_rate": 0.00010322117596105967, "loss": 0.9713, "step": 9375 }, { "epoch": 0.5410706045223812, "grad_norm": 0.2877350449562073, "learning_rate": 0.00010312054742665362, "loss": 0.9579, "step": 9380 }, { "epoch": 0.5413590216889709, "grad_norm": 0.3093964159488678, "learning_rate": 0.00010301991572916914, "loss": 0.9728, "step": 9385 }, { "epoch": 0.5416474388555607, "grad_norm": 0.30820104479789734, "learning_rate": 0.0001029192809706095, "loss": 0.9679, "step": 9390 }, { "epoch": 0.5419358560221504, "grad_norm": 0.3071676790714264, "learning_rate": 0.00010281864325298102, "loss": 0.9454, "step": 9395 }, { "epoch": 0.5422242731887402, "grad_norm": 0.3092137575149536, "learning_rate": 0.00010271800267829308, "loss": 0.9537, "step": 9400 }, { "epoch": 0.5425126903553299, "grad_norm": 0.3113473951816559, "learning_rate": 0.00010261735934855788, "loss": 0.997, "step": 9405 }, { "epoch": 0.5428011075219197, "grad_norm": 0.34891951084136963, "learning_rate": 0.00010251671336579048, "loss": 1.0035, "step": 9410 }, { "epoch": 0.5430895246885095, "grad_norm": 0.33309951424598694, "learning_rate": 0.00010241606483200857, "loss": 0.9692, "step": 9415 }, { "epoch": 0.5433779418550992, "grad_norm": 0.31338512897491455, "learning_rate": 0.00010231541384923248, "loss": 0.9677, "step": 9420 }, { "epoch": 0.5436663590216889, "grad_norm": 0.30394166707992554, "learning_rate": 0.00010221476051948502, "loss": 0.9955, "step": 9425 }, { "epoch": 0.5439547761882787, "grad_norm": 0.3012612462043762, "learning_rate": 0.0001021141049447913, "loss": 0.9789, "step": 9430 }, { "epoch": 0.5442431933548685, "grad_norm": 0.3203679919242859, "learning_rate": 0.00010201344722717881, "loss": 1.0433, "step": 9435 }, { "epoch": 0.5445316105214583, "grad_norm": 0.33140864968299866, "learning_rate": 0.00010191278746867714, "loss": 0.9415, "step": 9440 }, { "epoch": 0.544820027688048, "grad_norm": 0.3454248607158661, "learning_rate": 0.00010181212577131796, "loss": 1.0054, "step": 9445 }, { "epoch": 0.5451084448546377, "grad_norm": 0.31142881512641907, "learning_rate": 0.00010171146223713496, "loss": 0.9692, "step": 9450 }, { "epoch": 0.5453968620212275, "grad_norm": 0.29943177103996277, "learning_rate": 0.00010161079696816362, "loss": 0.9, "step": 9455 }, { "epoch": 0.5456852791878173, "grad_norm": 0.3128969669342041, "learning_rate": 0.00010151013006644128, "loss": 0.991, "step": 9460 }, { "epoch": 0.5459736963544071, "grad_norm": 0.32823264598846436, "learning_rate": 0.00010140946163400675, "loss": 0.9202, "step": 9465 }, { "epoch": 0.5462621135209967, "grad_norm": 0.31170547008514404, "learning_rate": 0.00010130879177290061, "loss": 0.9825, "step": 9470 }, { "epoch": 0.5465505306875865, "grad_norm": 0.31054195761680603, "learning_rate": 0.00010120812058516467, "loss": 0.9623, "step": 9475 }, { "epoch": 0.5468389478541763, "grad_norm": 0.2681087851524353, "learning_rate": 0.00010110744817284232, "loss": 0.8792, "step": 9480 }, { "epoch": 0.5471273650207661, "grad_norm": 0.40381041169166565, "learning_rate": 0.00010100677463797799, "loss": 0.9968, "step": 9485 }, { "epoch": 0.5474157821873558, "grad_norm": 0.2984217703342438, "learning_rate": 0.00010090610008261738, "loss": 0.9848, "step": 9490 }, { "epoch": 0.5477041993539455, "grad_norm": 0.312328040599823, "learning_rate": 0.00010080542460880711, "loss": 1.0204, "step": 9495 }, { "epoch": 0.5479926165205353, "grad_norm": 0.2800973057746887, "learning_rate": 0.00010070474831859486, "loss": 0.9493, "step": 9500 }, { "epoch": 0.5482810336871251, "grad_norm": 0.33222299814224243, "learning_rate": 0.00010060407131402902, "loss": 0.9683, "step": 9505 }, { "epoch": 0.5485694508537148, "grad_norm": 0.2953050434589386, "learning_rate": 0.0001005033936971588, "loss": 1.0451, "step": 9510 }, { "epoch": 0.5488578680203046, "grad_norm": 0.3120376765727997, "learning_rate": 0.00010040271557003394, "loss": 1.0147, "step": 9515 }, { "epoch": 0.5491462851868943, "grad_norm": 0.2867201268672943, "learning_rate": 0.00010030203703470477, "loss": 0.9954, "step": 9520 }, { "epoch": 0.5494347023534841, "grad_norm": 0.3178676962852478, "learning_rate": 0.00010020135819322203, "loss": 0.9652, "step": 9525 }, { "epoch": 0.5497231195200738, "grad_norm": 0.2976604104042053, "learning_rate": 0.00010010067914763668, "loss": 0.9695, "step": 9530 }, { "epoch": 0.5500115366866636, "grad_norm": 0.2862003445625305, "learning_rate": 0.0001, "loss": 1.0483, "step": 9535 }, { "epoch": 0.5502999538532534, "grad_norm": 0.32227590680122375, "learning_rate": 9.989932085236334e-05, "loss": 0.9444, "step": 9540 }, { "epoch": 0.5505883710198431, "grad_norm": 0.33216729760169983, "learning_rate": 9.979864180677801e-05, "loss": 0.9703, "step": 9545 }, { "epoch": 0.5508767881864328, "grad_norm": 0.31401535868644714, "learning_rate": 9.969796296529525e-05, "loss": 0.9568, "step": 9550 }, { "epoch": 0.5511652053530226, "grad_norm": 0.31950071454048157, "learning_rate": 9.959728442996606e-05, "loss": 0.9557, "step": 9555 }, { "epoch": 0.5514536225196124, "grad_norm": 0.2919849753379822, "learning_rate": 9.949660630284122e-05, "loss": 0.9718, "step": 9560 }, { "epoch": 0.5517420396862022, "grad_norm": 0.29922500252723694, "learning_rate": 9.939592868597097e-05, "loss": 0.9499, "step": 9565 }, { "epoch": 0.5520304568527918, "grad_norm": 0.32805556058883667, "learning_rate": 9.929525168140516e-05, "loss": 1.0231, "step": 9570 }, { "epoch": 0.5523188740193816, "grad_norm": 0.2967434525489807, "learning_rate": 9.919457539119293e-05, "loss": 0.9839, "step": 9575 }, { "epoch": 0.5526072911859714, "grad_norm": 0.3082091212272644, "learning_rate": 9.909389991738263e-05, "loss": 0.9543, "step": 9580 }, { "epoch": 0.5528957083525612, "grad_norm": 0.31558457016944885, "learning_rate": 9.899322536202205e-05, "loss": 0.929, "step": 9585 }, { "epoch": 0.5531841255191509, "grad_norm": 0.32369035482406616, "learning_rate": 9.889255182715769e-05, "loss": 0.9577, "step": 9590 }, { "epoch": 0.5534725426857406, "grad_norm": 0.30929917097091675, "learning_rate": 9.879187941483536e-05, "loss": 0.9378, "step": 9595 }, { "epoch": 0.5537609598523304, "grad_norm": 0.32841646671295166, "learning_rate": 9.869120822709946e-05, "loss": 0.9896, "step": 9600 }, { "epoch": 0.5540493770189202, "grad_norm": 0.31913015246391296, "learning_rate": 9.859053836599327e-05, "loss": 1.0478, "step": 9605 }, { "epoch": 0.5543377941855099, "grad_norm": 0.3005629777908325, "learning_rate": 9.848986993355877e-05, "loss": 1.0315, "step": 9610 }, { "epoch": 0.5546262113520997, "grad_norm": 0.3119855523109436, "learning_rate": 9.838920303183636e-05, "loss": 0.9761, "step": 9615 }, { "epoch": 0.5549146285186894, "grad_norm": 0.29266947507858276, "learning_rate": 9.828853776286505e-05, "loss": 0.921, "step": 9620 }, { "epoch": 0.5552030456852792, "grad_norm": 0.3018263876438141, "learning_rate": 9.818787422868204e-05, "loss": 0.967, "step": 9625 }, { "epoch": 0.5554914628518689, "grad_norm": 0.27815303206443787, "learning_rate": 9.808721253132289e-05, "loss": 0.9625, "step": 9630 }, { "epoch": 0.5557798800184587, "grad_norm": 0.34874555468559265, "learning_rate": 9.798655277282124e-05, "loss": 0.9527, "step": 9635 }, { "epoch": 0.5560682971850485, "grad_norm": 0.30870407819747925, "learning_rate": 9.78858950552087e-05, "loss": 0.9634, "step": 9640 }, { "epoch": 0.5563567143516382, "grad_norm": 0.3843367099761963, "learning_rate": 9.778523948051504e-05, "loss": 0.9582, "step": 9645 }, { "epoch": 0.5566451315182279, "grad_norm": 0.2970375716686249, "learning_rate": 9.768458615076751e-05, "loss": 0.9807, "step": 9650 }, { "epoch": 0.5569335486848177, "grad_norm": 0.29214388132095337, "learning_rate": 9.758393516799146e-05, "loss": 0.9638, "step": 9655 }, { "epoch": 0.5572219658514075, "grad_norm": 0.30512815713882446, "learning_rate": 9.748328663420952e-05, "loss": 0.9968, "step": 9660 }, { "epoch": 0.5575103830179973, "grad_norm": 0.35112571716308594, "learning_rate": 9.738264065144214e-05, "loss": 1.0067, "step": 9665 }, { "epoch": 0.5577988001845869, "grad_norm": 0.2951698899269104, "learning_rate": 9.728199732170696e-05, "loss": 0.9334, "step": 9670 }, { "epoch": 0.5580872173511767, "grad_norm": 0.30807235836982727, "learning_rate": 9.718135674701899e-05, "loss": 0.9796, "step": 9675 }, { "epoch": 0.5583756345177665, "grad_norm": 0.2801293730735779, "learning_rate": 9.708071902939054e-05, "loss": 0.9888, "step": 9680 }, { "epoch": 0.5586640516843563, "grad_norm": 0.3309718668460846, "learning_rate": 9.698008427083087e-05, "loss": 0.9936, "step": 9685 }, { "epoch": 0.558952468850946, "grad_norm": 0.2967337369918823, "learning_rate": 9.687945257334641e-05, "loss": 0.8834, "step": 9690 }, { "epoch": 0.5592408860175357, "grad_norm": 0.2973209321498871, "learning_rate": 9.677882403894036e-05, "loss": 0.8997, "step": 9695 }, { "epoch": 0.5595293031841255, "grad_norm": 0.28040575981140137, "learning_rate": 9.667819876961272e-05, "loss": 0.884, "step": 9700 }, { "epoch": 0.5598177203507153, "grad_norm": 0.3078805208206177, "learning_rate": 9.657757686736027e-05, "loss": 0.9495, "step": 9705 }, { "epoch": 0.560106137517305, "grad_norm": 0.3054788410663605, "learning_rate": 9.647695843417628e-05, "loss": 1.0202, "step": 9710 }, { "epoch": 0.5603945546838948, "grad_norm": 0.40618494153022766, "learning_rate": 9.637634357205058e-05, "loss": 0.9471, "step": 9715 }, { "epoch": 0.5606829718504845, "grad_norm": 0.33074966073036194, "learning_rate": 9.627573238296933e-05, "loss": 1.0419, "step": 9720 }, { "epoch": 0.5609713890170743, "grad_norm": 0.320569783449173, "learning_rate": 9.617512496891498e-05, "loss": 0.9851, "step": 9725 }, { "epoch": 0.561259806183664, "grad_norm": 0.2868152856826782, "learning_rate": 9.60745214318662e-05, "loss": 0.936, "step": 9730 }, { "epoch": 0.5615482233502538, "grad_norm": 0.3062056005001068, "learning_rate": 9.597392187379758e-05, "loss": 0.9793, "step": 9735 }, { "epoch": 0.5618366405168436, "grad_norm": 0.30484551191329956, "learning_rate": 9.58733263966799e-05, "loss": 0.9776, "step": 9740 }, { "epoch": 0.5621250576834333, "grad_norm": 0.32705235481262207, "learning_rate": 9.577273510247958e-05, "loss": 0.9882, "step": 9745 }, { "epoch": 0.562413474850023, "grad_norm": 0.3526208996772766, "learning_rate": 9.567214809315903e-05, "loss": 0.9331, "step": 9750 }, { "epoch": 0.5627018920166128, "grad_norm": 0.3613347113132477, "learning_rate": 9.557156547067607e-05, "loss": 1.0471, "step": 9755 }, { "epoch": 0.5629903091832026, "grad_norm": 0.29372262954711914, "learning_rate": 9.547098733698428e-05, "loss": 0.9677, "step": 9760 }, { "epoch": 0.5632787263497924, "grad_norm": 0.30436399579048157, "learning_rate": 9.537041379403258e-05, "loss": 0.9875, "step": 9765 }, { "epoch": 0.563567143516382, "grad_norm": 0.3027651607990265, "learning_rate": 9.526984494376524e-05, "loss": 1.0115, "step": 9770 }, { "epoch": 0.5638555606829718, "grad_norm": 0.3011035621166229, "learning_rate": 9.516928088812184e-05, "loss": 0.9937, "step": 9775 }, { "epoch": 0.5641439778495616, "grad_norm": 0.2939337193965912, "learning_rate": 9.5068721729037e-05, "loss": 0.9299, "step": 9780 }, { "epoch": 0.5644323950161514, "grad_norm": 0.2924240827560425, "learning_rate": 9.496816756844052e-05, "loss": 1.0545, "step": 9785 }, { "epoch": 0.5647208121827412, "grad_norm": 0.3084491789340973, "learning_rate": 9.486761850825694e-05, "loss": 0.9381, "step": 9790 }, { "epoch": 0.5650092293493308, "grad_norm": 0.29923975467681885, "learning_rate": 9.476707465040583e-05, "loss": 0.9886, "step": 9795 }, { "epoch": 0.5652976465159206, "grad_norm": 0.3646468222141266, "learning_rate": 9.466653609680137e-05, "loss": 0.973, "step": 9800 }, { "epoch": 0.5655860636825104, "grad_norm": 0.2971440553665161, "learning_rate": 9.456600294935231e-05, "loss": 1.0352, "step": 9805 }, { "epoch": 0.5658744808491002, "grad_norm": 0.27280595898628235, "learning_rate": 9.446547530996214e-05, "loss": 1.0609, "step": 9810 }, { "epoch": 0.5661628980156899, "grad_norm": 0.2964895963668823, "learning_rate": 9.436495328052846e-05, "loss": 0.9792, "step": 9815 }, { "epoch": 0.5664513151822796, "grad_norm": 0.30843856930732727, "learning_rate": 9.426443696294351e-05, "loss": 0.9715, "step": 9820 }, { "epoch": 0.5667397323488694, "grad_norm": 0.30792513489723206, "learning_rate": 9.416392645909347e-05, "loss": 0.9974, "step": 9825 }, { "epoch": 0.5670281495154592, "grad_norm": 0.29897308349609375, "learning_rate": 9.406342187085875e-05, "loss": 0.9381, "step": 9830 }, { "epoch": 0.5673165666820489, "grad_norm": 0.33265048265457153, "learning_rate": 9.396292330011377e-05, "loss": 0.9877, "step": 9835 }, { "epoch": 0.5676049838486387, "grad_norm": 0.29707449674606323, "learning_rate": 9.386243084872682e-05, "loss": 0.9378, "step": 9840 }, { "epoch": 0.5678934010152284, "grad_norm": 0.31178462505340576, "learning_rate": 9.376194461856001e-05, "loss": 0.9454, "step": 9845 }, { "epoch": 0.5681818181818182, "grad_norm": 0.26626142859458923, "learning_rate": 9.36614647114691e-05, "loss": 1.018, "step": 9850 }, { "epoch": 0.5684702353484079, "grad_norm": 0.4671937823295593, "learning_rate": 9.356099122930352e-05, "loss": 0.9548, "step": 9855 }, { "epoch": 0.5687586525149977, "grad_norm": 0.31160303950309753, "learning_rate": 9.34605242739061e-05, "loss": 1.0282, "step": 9860 }, { "epoch": 0.5690470696815875, "grad_norm": 0.2990851104259491, "learning_rate": 9.336006394711308e-05, "loss": 0.9297, "step": 9865 }, { "epoch": 0.5693354868481773, "grad_norm": 0.2787928879261017, "learning_rate": 9.325961035075405e-05, "loss": 0.938, "step": 9870 }, { "epoch": 0.5696239040147669, "grad_norm": 0.30769744515419006, "learning_rate": 9.315916358665166e-05, "loss": 0.9144, "step": 9875 }, { "epoch": 0.5699123211813567, "grad_norm": 0.30106931924819946, "learning_rate": 9.305872375662176e-05, "loss": 0.9833, "step": 9880 }, { "epoch": 0.5702007383479465, "grad_norm": 0.3208131492137909, "learning_rate": 9.295829096247304e-05, "loss": 1.0075, "step": 9885 }, { "epoch": 0.5704891555145363, "grad_norm": 0.31395384669303894, "learning_rate": 9.285786530600718e-05, "loss": 0.9917, "step": 9890 }, { "epoch": 0.5707775726811259, "grad_norm": 0.32120105624198914, "learning_rate": 9.275744688901858e-05, "loss": 0.9922, "step": 9895 }, { "epoch": 0.5710659898477157, "grad_norm": 0.30216944217681885, "learning_rate": 9.26570358132942e-05, "loss": 0.9661, "step": 9900 }, { "epoch": 0.5713544070143055, "grad_norm": 0.344277560710907, "learning_rate": 9.255663218061379e-05, "loss": 0.9521, "step": 9905 }, { "epoch": 0.5716428241808953, "grad_norm": 0.2767544388771057, "learning_rate": 9.245623609274928e-05, "loss": 0.8752, "step": 9910 }, { "epoch": 0.571931241347485, "grad_norm": 0.29667040705680847, "learning_rate": 9.235584765146519e-05, "loss": 0.9666, "step": 9915 }, { "epoch": 0.5722196585140747, "grad_norm": 0.2905004322528839, "learning_rate": 9.225546695851815e-05, "loss": 0.9477, "step": 9920 }, { "epoch": 0.5725080756806645, "grad_norm": 0.3022611439228058, "learning_rate": 9.215509411565695e-05, "loss": 0.9692, "step": 9925 }, { "epoch": 0.5727964928472543, "grad_norm": 0.31084057688713074, "learning_rate": 9.20547292246225e-05, "loss": 0.975, "step": 9930 }, { "epoch": 0.573084910013844, "grad_norm": 0.3119238615036011, "learning_rate": 9.195437238714755e-05, "loss": 0.9913, "step": 9935 }, { "epoch": 0.5733733271804338, "grad_norm": 0.2962389588356018, "learning_rate": 9.185402370495677e-05, "loss": 0.9217, "step": 9940 }, { "epoch": 0.5736617443470236, "grad_norm": 0.372999370098114, "learning_rate": 9.17536832797665e-05, "loss": 0.9033, "step": 9945 }, { "epoch": 0.5739501615136133, "grad_norm": 0.3164578974246979, "learning_rate": 9.165335121328477e-05, "loss": 0.9219, "step": 9950 }, { "epoch": 0.574238578680203, "grad_norm": 0.3055517375469208, "learning_rate": 9.155302760721112e-05, "loss": 0.9748, "step": 9955 }, { "epoch": 0.5745269958467928, "grad_norm": 0.32085710763931274, "learning_rate": 9.14527125632364e-05, "loss": 1.0123, "step": 9960 }, { "epoch": 0.5748154130133826, "grad_norm": 0.30235058069229126, "learning_rate": 9.135240618304301e-05, "loss": 0.9886, "step": 9965 }, { "epoch": 0.5751038301799724, "grad_norm": 0.3043424189090729, "learning_rate": 9.125210856830433e-05, "loss": 0.962, "step": 9970 }, { "epoch": 0.575392247346562, "grad_norm": 0.32040169835090637, "learning_rate": 9.115181982068506e-05, "loss": 0.9993, "step": 9975 }, { "epoch": 0.5756806645131518, "grad_norm": 0.3150070607662201, "learning_rate": 9.105154004184071e-05, "loss": 0.9307, "step": 9980 }, { "epoch": 0.5759690816797416, "grad_norm": 0.32180115580558777, "learning_rate": 9.095126933341793e-05, "loss": 0.9771, "step": 9985 }, { "epoch": 0.5762574988463314, "grad_norm": 0.3003450632095337, "learning_rate": 9.085100779705398e-05, "loss": 1.0007, "step": 9990 }, { "epoch": 0.576545916012921, "grad_norm": 0.3017977774143219, "learning_rate": 9.075075553437687e-05, "loss": 0.9714, "step": 9995 }, { "epoch": 0.5768343331795108, "grad_norm": 0.29309558868408203, "learning_rate": 9.065051264700527e-05, "loss": 0.8992, "step": 10000 }, { "epoch": 0.5771227503461006, "grad_norm": 0.2850026786327362, "learning_rate": 9.05502792365483e-05, "loss": 0.9094, "step": 10005 }, { "epoch": 0.5774111675126904, "grad_norm": 0.30001845955848694, "learning_rate": 9.045005540460552e-05, "loss": 0.9405, "step": 10010 }, { "epoch": 0.5776995846792801, "grad_norm": 0.29599326848983765, "learning_rate": 9.03498412527667e-05, "loss": 0.9898, "step": 10015 }, { "epoch": 0.5779880018458698, "grad_norm": 0.32731327414512634, "learning_rate": 9.024963688261186e-05, "loss": 1.0046, "step": 10020 }, { "epoch": 0.5782764190124596, "grad_norm": 0.28468936681747437, "learning_rate": 9.01494423957111e-05, "loss": 0.8711, "step": 10025 }, { "epoch": 0.5785648361790494, "grad_norm": 0.29854655265808105, "learning_rate": 9.004925789362446e-05, "loss": 0.9974, "step": 10030 }, { "epoch": 0.5788532533456391, "grad_norm": 0.28597164154052734, "learning_rate": 8.994908347790193e-05, "loss": 0.966, "step": 10035 }, { "epoch": 0.5791416705122289, "grad_norm": 0.293830007314682, "learning_rate": 8.984891925008321e-05, "loss": 0.9991, "step": 10040 }, { "epoch": 0.5794300876788187, "grad_norm": 0.30949392914772034, "learning_rate": 8.974876531169772e-05, "loss": 0.9751, "step": 10045 }, { "epoch": 0.5797185048454084, "grad_norm": 0.280025839805603, "learning_rate": 8.964862176426443e-05, "loss": 0.9497, "step": 10050 }, { "epoch": 0.5800069220119981, "grad_norm": 0.35741013288497925, "learning_rate": 8.954848870929171e-05, "loss": 0.9903, "step": 10055 }, { "epoch": 0.5802953391785879, "grad_norm": 0.3232552707195282, "learning_rate": 8.944836624827748e-05, "loss": 0.9788, "step": 10060 }, { "epoch": 0.5805837563451777, "grad_norm": 0.3030491769313812, "learning_rate": 8.93482544827087e-05, "loss": 0.9476, "step": 10065 }, { "epoch": 0.5808721735117675, "grad_norm": 0.2858760952949524, "learning_rate": 8.924815351406163e-05, "loss": 0.9144, "step": 10070 }, { "epoch": 0.5811605906783571, "grad_norm": 0.2993592321872711, "learning_rate": 8.914806344380156e-05, "loss": 0.9965, "step": 10075 }, { "epoch": 0.5814490078449469, "grad_norm": 0.2993376851081848, "learning_rate": 8.904798437338272e-05, "loss": 0.9078, "step": 10080 }, { "epoch": 0.5817374250115367, "grad_norm": 0.29207873344421387, "learning_rate": 8.89479164042482e-05, "loss": 0.9786, "step": 10085 }, { "epoch": 0.5820258421781265, "grad_norm": 0.29630357027053833, "learning_rate": 8.884785963782975e-05, "loss": 0.9935, "step": 10090 }, { "epoch": 0.5823142593447161, "grad_norm": 0.3105989694595337, "learning_rate": 8.874781417554797e-05, "loss": 0.9515, "step": 10095 }, { "epoch": 0.5826026765113059, "grad_norm": 0.31932732462882996, "learning_rate": 8.864778011881175e-05, "loss": 1.0112, "step": 10100 }, { "epoch": 0.5828910936778957, "grad_norm": 0.3012641668319702, "learning_rate": 8.854775756901866e-05, "loss": 0.9548, "step": 10105 }, { "epoch": 0.5831795108444855, "grad_norm": 0.3154539465904236, "learning_rate": 8.84477466275544e-05, "loss": 0.937, "step": 10110 }, { "epoch": 0.5834679280110752, "grad_norm": 0.31845125555992126, "learning_rate": 8.834774739579304e-05, "loss": 0.9737, "step": 10115 }, { "epoch": 0.583756345177665, "grad_norm": 0.29611679911613464, "learning_rate": 8.824775997509675e-05, "loss": 0.9389, "step": 10120 }, { "epoch": 0.5840447623442547, "grad_norm": 0.28183600306510925, "learning_rate": 8.814778446681563e-05, "loss": 0.9568, "step": 10125 }, { "epoch": 0.5843331795108445, "grad_norm": 0.2992686629295349, "learning_rate": 8.80478209722879e-05, "loss": 1.0136, "step": 10130 }, { "epoch": 0.5846215966774342, "grad_norm": 0.3107997477054596, "learning_rate": 8.794786959283938e-05, "loss": 0.9818, "step": 10135 }, { "epoch": 0.584910013844024, "grad_norm": 0.29976189136505127, "learning_rate": 8.784793042978384e-05, "loss": 1.0196, "step": 10140 }, { "epoch": 0.5851984310106138, "grad_norm": 0.26917529106140137, "learning_rate": 8.774800358442246e-05, "loss": 0.9463, "step": 10145 }, { "epoch": 0.5854868481772035, "grad_norm": 0.295770525932312, "learning_rate": 8.764808915804401e-05, "loss": 0.9743, "step": 10150 }, { "epoch": 0.5857752653437933, "grad_norm": 0.2809622883796692, "learning_rate": 8.754818725192475e-05, "loss": 0.9612, "step": 10155 }, { "epoch": 0.586063682510383, "grad_norm": 0.30057060718536377, "learning_rate": 8.744829796732812e-05, "loss": 0.9618, "step": 10160 }, { "epoch": 0.5863520996769728, "grad_norm": 0.31506556272506714, "learning_rate": 8.734842140550488e-05, "loss": 0.99, "step": 10165 }, { "epoch": 0.5866405168435626, "grad_norm": 0.2896447777748108, "learning_rate": 8.724855766769282e-05, "loss": 0.9272, "step": 10170 }, { "epoch": 0.5869289340101523, "grad_norm": 0.32177969813346863, "learning_rate": 8.714870685511676e-05, "loss": 0.9798, "step": 10175 }, { "epoch": 0.587217351176742, "grad_norm": 0.33434587717056274, "learning_rate": 8.70488690689884e-05, "loss": 1.0404, "step": 10180 }, { "epoch": 0.5875057683433318, "grad_norm": 0.28400492668151855, "learning_rate": 8.694904441050625e-05, "loss": 0.9206, "step": 10185 }, { "epoch": 0.5877941855099216, "grad_norm": 0.3033732771873474, "learning_rate": 8.684923298085555e-05, "loss": 0.9642, "step": 10190 }, { "epoch": 0.5880826026765114, "grad_norm": 0.3117521405220032, "learning_rate": 8.674943488120801e-05, "loss": 1.0299, "step": 10195 }, { "epoch": 0.588371019843101, "grad_norm": 0.2808005213737488, "learning_rate": 8.6649650212722e-05, "loss": 0.9186, "step": 10200 }, { "epoch": 0.5886594370096908, "grad_norm": 0.306206613779068, "learning_rate": 8.654987907654214e-05, "loss": 0.9317, "step": 10205 }, { "epoch": 0.5889478541762806, "grad_norm": 0.28762176632881165, "learning_rate": 8.645012157379941e-05, "loss": 0.9352, "step": 10210 }, { "epoch": 0.5892362713428704, "grad_norm": 0.3348996639251709, "learning_rate": 8.635037780561093e-05, "loss": 0.9577, "step": 10215 }, { "epoch": 0.58952468850946, "grad_norm": 0.3033459186553955, "learning_rate": 8.625064787307986e-05, "loss": 0.9789, "step": 10220 }, { "epoch": 0.5898131056760498, "grad_norm": 0.28775766491889954, "learning_rate": 8.615093187729544e-05, "loss": 0.9979, "step": 10225 }, { "epoch": 0.5901015228426396, "grad_norm": 0.312083899974823, "learning_rate": 8.605122991933271e-05, "loss": 0.8772, "step": 10230 }, { "epoch": 0.5903899400092294, "grad_norm": 0.3176785409450531, "learning_rate": 8.595154210025251e-05, "loss": 0.9501, "step": 10235 }, { "epoch": 0.5906783571758191, "grad_norm": 0.3222027122974396, "learning_rate": 8.585186852110134e-05, "loss": 0.9476, "step": 10240 }, { "epoch": 0.5909667743424089, "grad_norm": 0.2980990707874298, "learning_rate": 8.575220928291123e-05, "loss": 1.0012, "step": 10245 }, { "epoch": 0.5912551915089986, "grad_norm": 0.3284415006637573, "learning_rate": 8.565256448669976e-05, "loss": 0.9454, "step": 10250 }, { "epoch": 0.5915436086755884, "grad_norm": 0.41807207465171814, "learning_rate": 8.555293423346977e-05, "loss": 1.0065, "step": 10255 }, { "epoch": 0.5918320258421781, "grad_norm": 0.3130856156349182, "learning_rate": 8.545331862420944e-05, "loss": 1.0001, "step": 10260 }, { "epoch": 0.5921204430087679, "grad_norm": 0.3069097697734833, "learning_rate": 8.535371775989204e-05, "loss": 0.9449, "step": 10265 }, { "epoch": 0.5924088601753577, "grad_norm": 0.38203707337379456, "learning_rate": 8.525413174147598e-05, "loss": 0.9914, "step": 10270 }, { "epoch": 0.5926972773419474, "grad_norm": 0.3196433186531067, "learning_rate": 8.515456066990455e-05, "loss": 0.9902, "step": 10275 }, { "epoch": 0.5929856945085371, "grad_norm": 0.294821172952652, "learning_rate": 8.505500464610584e-05, "loss": 0.9215, "step": 10280 }, { "epoch": 0.5932741116751269, "grad_norm": 0.28446537256240845, "learning_rate": 8.495546377099287e-05, "loss": 1.0246, "step": 10285 }, { "epoch": 0.5935625288417167, "grad_norm": 0.2978392243385315, "learning_rate": 8.485593814546307e-05, "loss": 0.9981, "step": 10290 }, { "epoch": 0.5938509460083065, "grad_norm": 0.30145201086997986, "learning_rate": 8.475642787039867e-05, "loss": 0.9243, "step": 10295 }, { "epoch": 0.5941393631748961, "grad_norm": 0.3130795955657959, "learning_rate": 8.465693304666606e-05, "loss": 1.0306, "step": 10300 }, { "epoch": 0.5944277803414859, "grad_norm": 0.3145841062068939, "learning_rate": 8.455745377511626e-05, "loss": 1.0172, "step": 10305 }, { "epoch": 0.5947161975080757, "grad_norm": 0.3414532244205475, "learning_rate": 8.445799015658427e-05, "loss": 0.9902, "step": 10310 }, { "epoch": 0.5950046146746655, "grad_norm": 0.2802201807498932, "learning_rate": 8.435854229188934e-05, "loss": 0.9086, "step": 10315 }, { "epoch": 0.5952930318412551, "grad_norm": 0.31181442737579346, "learning_rate": 8.425911028183479e-05, "loss": 0.9267, "step": 10320 }, { "epoch": 0.5955814490078449, "grad_norm": 0.3048861026763916, "learning_rate": 8.415969422720778e-05, "loss": 0.965, "step": 10325 }, { "epoch": 0.5958698661744347, "grad_norm": 0.28428834676742554, "learning_rate": 8.406029422877937e-05, "loss": 0.8888, "step": 10330 }, { "epoch": 0.5961582833410245, "grad_norm": 0.3297795355319977, "learning_rate": 8.396091038730431e-05, "loss": 0.9079, "step": 10335 }, { "epoch": 0.5964467005076142, "grad_norm": 0.331345796585083, "learning_rate": 8.386154280352094e-05, "loss": 0.8413, "step": 10340 }, { "epoch": 0.596735117674204, "grad_norm": 0.4152957499027252, "learning_rate": 8.37621915781512e-05, "loss": 0.9664, "step": 10345 }, { "epoch": 0.5970235348407937, "grad_norm": 0.31456562876701355, "learning_rate": 8.366285681190039e-05, "loss": 0.9788, "step": 10350 }, { "epoch": 0.5973119520073835, "grad_norm": 0.29719334840774536, "learning_rate": 8.356353860545715e-05, "loss": 0.9616, "step": 10355 }, { "epoch": 0.5976003691739732, "grad_norm": 0.3752502501010895, "learning_rate": 8.34642370594933e-05, "loss": 0.9854, "step": 10360 }, { "epoch": 0.597888786340563, "grad_norm": 0.3314830958843231, "learning_rate": 8.336495227466385e-05, "loss": 0.92, "step": 10365 }, { "epoch": 0.5981772035071528, "grad_norm": 0.2938712239265442, "learning_rate": 8.326568435160677e-05, "loss": 0.8947, "step": 10370 }, { "epoch": 0.5984656206737425, "grad_norm": 0.28025558590888977, "learning_rate": 8.316643339094285e-05, "loss": 0.9338, "step": 10375 }, { "epoch": 0.5987540378403322, "grad_norm": 0.31624218821525574, "learning_rate": 8.306719949327588e-05, "loss": 0.9782, "step": 10380 }, { "epoch": 0.599042455006922, "grad_norm": 0.3080288767814636, "learning_rate": 8.296798275919217e-05, "loss": 0.9119, "step": 10385 }, { "epoch": 0.5993308721735118, "grad_norm": 0.31243014335632324, "learning_rate": 8.286878328926077e-05, "loss": 0.9489, "step": 10390 }, { "epoch": 0.5996192893401016, "grad_norm": 0.28355371952056885, "learning_rate": 8.276960118403314e-05, "loss": 0.9557, "step": 10395 }, { "epoch": 0.5999077065066912, "grad_norm": 0.30809932947158813, "learning_rate": 8.26704365440432e-05, "loss": 0.9632, "step": 10400 }, { "epoch": 0.600196123673281, "grad_norm": 0.32145076990127563, "learning_rate": 8.257128946980713e-05, "loss": 0.9607, "step": 10405 }, { "epoch": 0.6004845408398708, "grad_norm": 0.30059847235679626, "learning_rate": 8.247216006182326e-05, "loss": 0.9015, "step": 10410 }, { "epoch": 0.6007729580064606, "grad_norm": 0.2907862067222595, "learning_rate": 8.237304842057214e-05, "loss": 0.9672, "step": 10415 }, { "epoch": 0.6010613751730502, "grad_norm": 0.27866873145103455, "learning_rate": 8.227395464651618e-05, "loss": 0.9158, "step": 10420 }, { "epoch": 0.60134979233964, "grad_norm": 0.33694329857826233, "learning_rate": 8.217487884009979e-05, "loss": 0.9546, "step": 10425 }, { "epoch": 0.6016382095062298, "grad_norm": 0.28351861238479614, "learning_rate": 8.20758211017491e-05, "loss": 1.0306, "step": 10430 }, { "epoch": 0.6019266266728196, "grad_norm": 0.3070996105670929, "learning_rate": 8.197678153187185e-05, "loss": 1.0044, "step": 10435 }, { "epoch": 0.6022150438394093, "grad_norm": 0.29808667302131653, "learning_rate": 8.187776023085762e-05, "loss": 0.9833, "step": 10440 }, { "epoch": 0.602503461005999, "grad_norm": 0.2630424499511719, "learning_rate": 8.177875729907714e-05, "loss": 0.8795, "step": 10445 }, { "epoch": 0.6027918781725888, "grad_norm": 0.3080122470855713, "learning_rate": 8.167977283688282e-05, "loss": 0.9663, "step": 10450 }, { "epoch": 0.6030802953391786, "grad_norm": 0.3310147821903229, "learning_rate": 8.158080694460807e-05, "loss": 0.9654, "step": 10455 }, { "epoch": 0.6033687125057683, "grad_norm": 0.27639147639274597, "learning_rate": 8.148185972256778e-05, "loss": 0.9594, "step": 10460 }, { "epoch": 0.6036571296723581, "grad_norm": 0.298957496881485, "learning_rate": 8.138293127105768e-05, "loss": 0.9537, "step": 10465 }, { "epoch": 0.6039455468389479, "grad_norm": 0.28867480158805847, "learning_rate": 8.128402169035451e-05, "loss": 0.903, "step": 10470 }, { "epoch": 0.6042339640055376, "grad_norm": 0.3202032148838043, "learning_rate": 8.118513108071599e-05, "loss": 0.8776, "step": 10475 }, { "epoch": 0.6045223811721273, "grad_norm": 0.29487964510917664, "learning_rate": 8.108625954238051e-05, "loss": 0.9058, "step": 10480 }, { "epoch": 0.6048107983387171, "grad_norm": 0.29023751616477966, "learning_rate": 8.098740717556718e-05, "loss": 0.9536, "step": 10485 }, { "epoch": 0.6050992155053069, "grad_norm": 0.28486311435699463, "learning_rate": 8.088857408047562e-05, "loss": 0.8713, "step": 10490 }, { "epoch": 0.6053876326718967, "grad_norm": 0.30855536460876465, "learning_rate": 8.0789760357286e-05, "loss": 0.9776, "step": 10495 }, { "epoch": 0.6056760498384864, "grad_norm": 0.3332061767578125, "learning_rate": 8.06909661061588e-05, "loss": 0.9547, "step": 10500 }, { "epoch": 0.6059644670050761, "grad_norm": 0.3112873435020447, "learning_rate": 8.059219142723474e-05, "loss": 0.964, "step": 10505 }, { "epoch": 0.6062528841716659, "grad_norm": 0.3010444641113281, "learning_rate": 8.049343642063477e-05, "loss": 0.9752, "step": 10510 }, { "epoch": 0.6065413013382557, "grad_norm": 0.33293378353118896, "learning_rate": 8.039470118645982e-05, "loss": 0.9532, "step": 10515 }, { "epoch": 0.6068297185048455, "grad_norm": 0.2785302698612213, "learning_rate": 8.029598582479088e-05, "loss": 1.0026, "step": 10520 }, { "epoch": 0.6071181356714351, "grad_norm": 0.2951408326625824, "learning_rate": 8.019729043568866e-05, "loss": 0.9447, "step": 10525 }, { "epoch": 0.6074065528380249, "grad_norm": 0.3003128170967102, "learning_rate": 8.00986151191938e-05, "loss": 1.0167, "step": 10530 }, { "epoch": 0.6076949700046147, "grad_norm": 0.2881382405757904, "learning_rate": 7.999995997532641e-05, "loss": 0.9712, "step": 10535 }, { "epoch": 0.6079833871712045, "grad_norm": 0.3272612392902374, "learning_rate": 7.990132510408625e-05, "loss": 0.932, "step": 10540 }, { "epoch": 0.6082718043377942, "grad_norm": 0.29652857780456543, "learning_rate": 7.980271060545255e-05, "loss": 0.9897, "step": 10545 }, { "epoch": 0.6085602215043839, "grad_norm": 0.2992599308490753, "learning_rate": 7.970411657938381e-05, "loss": 0.9029, "step": 10550 }, { "epoch": 0.6088486386709737, "grad_norm": 0.285044401884079, "learning_rate": 7.960554312581789e-05, "loss": 0.9396, "step": 10555 }, { "epoch": 0.6091370558375635, "grad_norm": 0.2942273020744324, "learning_rate": 7.95069903446717e-05, "loss": 0.9674, "step": 10560 }, { "epoch": 0.6094254730041532, "grad_norm": 0.30003759264945984, "learning_rate": 7.940845833584123e-05, "loss": 0.9442, "step": 10565 }, { "epoch": 0.609713890170743, "grad_norm": 0.2985435426235199, "learning_rate": 7.930994719920142e-05, "loss": 0.9265, "step": 10570 }, { "epoch": 0.6100023073373327, "grad_norm": 0.29074397683143616, "learning_rate": 7.921145703460603e-05, "loss": 1.0108, "step": 10575 }, { "epoch": 0.6102907245039225, "grad_norm": 0.35004884004592896, "learning_rate": 7.911298794188761e-05, "loss": 0.9595, "step": 10580 }, { "epoch": 0.6105791416705122, "grad_norm": 0.3128565549850464, "learning_rate": 7.901454002085731e-05, "loss": 0.9111, "step": 10585 }, { "epoch": 0.610867558837102, "grad_norm": 0.34627801179885864, "learning_rate": 7.891611337130482e-05, "loss": 0.9754, "step": 10590 }, { "epoch": 0.6111559760036918, "grad_norm": 0.28770461678504944, "learning_rate": 7.881770809299833e-05, "loss": 0.9568, "step": 10595 }, { "epoch": 0.6114443931702815, "grad_norm": 0.43506577610969543, "learning_rate": 7.871932428568418e-05, "loss": 0.942, "step": 10600 }, { "epoch": 0.6117328103368712, "grad_norm": 0.2795199751853943, "learning_rate": 7.862096204908726e-05, "loss": 0.9539, "step": 10605 }, { "epoch": 0.612021227503461, "grad_norm": 0.28520259261131287, "learning_rate": 7.852262148291028e-05, "loss": 0.9625, "step": 10610 }, { "epoch": 0.6123096446700508, "grad_norm": 0.3216764032840729, "learning_rate": 7.842430268683422e-05, "loss": 1.0665, "step": 10615 }, { "epoch": 0.6125980618366406, "grad_norm": 0.29916471242904663, "learning_rate": 7.832600576051779e-05, "loss": 0.9015, "step": 10620 }, { "epoch": 0.6128864790032302, "grad_norm": 0.2716759443283081, "learning_rate": 7.822773080359778e-05, "loss": 0.9538, "step": 10625 }, { "epoch": 0.61317489616982, "grad_norm": 0.31317955255508423, "learning_rate": 7.812947791568845e-05, "loss": 1.0093, "step": 10630 }, { "epoch": 0.6134633133364098, "grad_norm": 0.3098663091659546, "learning_rate": 7.803124719638184e-05, "loss": 0.9008, "step": 10635 }, { "epoch": 0.6137517305029996, "grad_norm": 0.30039840936660767, "learning_rate": 7.793303874524752e-05, "loss": 0.9435, "step": 10640 }, { "epoch": 0.6140401476695893, "grad_norm": 0.28601938486099243, "learning_rate": 7.783485266183242e-05, "loss": 0.9447, "step": 10645 }, { "epoch": 0.614328564836179, "grad_norm": 0.31414878368377686, "learning_rate": 7.773668904566085e-05, "loss": 0.9465, "step": 10650 }, { "epoch": 0.6146169820027688, "grad_norm": 0.2823723554611206, "learning_rate": 7.763854799623433e-05, "loss": 0.9305, "step": 10655 }, { "epoch": 0.6149053991693586, "grad_norm": 0.28703320026397705, "learning_rate": 7.75404296130315e-05, "loss": 0.9711, "step": 10660 }, { "epoch": 0.6151938163359483, "grad_norm": 0.36116015911102295, "learning_rate": 7.744233399550806e-05, "loss": 0.967, "step": 10665 }, { "epoch": 0.6154822335025381, "grad_norm": 0.293555349111557, "learning_rate": 7.734426124309656e-05, "loss": 1.0009, "step": 10670 }, { "epoch": 0.6157706506691278, "grad_norm": 0.3458692729473114, "learning_rate": 7.724621145520645e-05, "loss": 1.0352, "step": 10675 }, { "epoch": 0.6160590678357176, "grad_norm": 0.2964923679828644, "learning_rate": 7.714818473122385e-05, "loss": 0.9484, "step": 10680 }, { "epoch": 0.6163474850023073, "grad_norm": 0.30109351873397827, "learning_rate": 7.705018117051157e-05, "loss": 0.9068, "step": 10685 }, { "epoch": 0.6166359021688971, "grad_norm": 0.26956018805503845, "learning_rate": 7.695220087240885e-05, "loss": 0.9494, "step": 10690 }, { "epoch": 0.6169243193354869, "grad_norm": 0.3016825020313263, "learning_rate": 7.685424393623137e-05, "loss": 0.9229, "step": 10695 }, { "epoch": 0.6172127365020766, "grad_norm": 0.284314900636673, "learning_rate": 7.675631046127123e-05, "loss": 0.9199, "step": 10700 }, { "epoch": 0.6175011536686663, "grad_norm": 0.2942012548446655, "learning_rate": 7.665840054679659e-05, "loss": 0.925, "step": 10705 }, { "epoch": 0.6177895708352561, "grad_norm": 0.33849096298217773, "learning_rate": 7.656051429205188e-05, "loss": 0.9594, "step": 10710 }, { "epoch": 0.6180779880018459, "grad_norm": 0.3064514398574829, "learning_rate": 7.646265179625743e-05, "loss": 0.9324, "step": 10715 }, { "epoch": 0.6183664051684357, "grad_norm": 0.3193131387233734, "learning_rate": 7.636481315860958e-05, "loss": 0.9513, "step": 10720 }, { "epoch": 0.6186548223350253, "grad_norm": 0.28184202313423157, "learning_rate": 7.626699847828042e-05, "loss": 0.9926, "step": 10725 }, { "epoch": 0.6189432395016151, "grad_norm": 0.28118598461151123, "learning_rate": 7.616920785441777e-05, "loss": 0.8637, "step": 10730 }, { "epoch": 0.6192316566682049, "grad_norm": 0.33061110973358154, "learning_rate": 7.607144138614509e-05, "loss": 0.9664, "step": 10735 }, { "epoch": 0.6195200738347947, "grad_norm": 0.2889274060726166, "learning_rate": 7.597369917256132e-05, "loss": 0.9826, "step": 10740 }, { "epoch": 0.6198084910013844, "grad_norm": 0.28369957208633423, "learning_rate": 7.587598131274088e-05, "loss": 0.9739, "step": 10745 }, { "epoch": 0.6200969081679741, "grad_norm": 0.28314968943595886, "learning_rate": 7.577828790573345e-05, "loss": 0.9097, "step": 10750 }, { "epoch": 0.6203853253345639, "grad_norm": 0.2938774824142456, "learning_rate": 7.568061905056383e-05, "loss": 0.9658, "step": 10755 }, { "epoch": 0.6206737425011537, "grad_norm": 0.270669549703598, "learning_rate": 7.55829748462322e-05, "loss": 0.934, "step": 10760 }, { "epoch": 0.6209621596677434, "grad_norm": 0.3004055321216583, "learning_rate": 7.548535539171343e-05, "loss": 0.96, "step": 10765 }, { "epoch": 0.6212505768343332, "grad_norm": 0.28886789083480835, "learning_rate": 7.538776078595762e-05, "loss": 0.9317, "step": 10770 }, { "epoch": 0.6215389940009229, "grad_norm": 0.3282342553138733, "learning_rate": 7.529019112788937e-05, "loss": 0.9816, "step": 10775 }, { "epoch": 0.6218274111675127, "grad_norm": 0.31142762303352356, "learning_rate": 7.519264651640829e-05, "loss": 0.9848, "step": 10780 }, { "epoch": 0.6221158283341024, "grad_norm": 0.31296736001968384, "learning_rate": 7.50951270503884e-05, "loss": 0.9546, "step": 10785 }, { "epoch": 0.6224042455006922, "grad_norm": 0.2920689582824707, "learning_rate": 7.499763282867823e-05, "loss": 1.0018, "step": 10790 }, { "epoch": 0.622692662667282, "grad_norm": 0.3036629259586334, "learning_rate": 7.490016395010087e-05, "loss": 1.0164, "step": 10795 }, { "epoch": 0.6229810798338717, "grad_norm": 0.3179445266723633, "learning_rate": 7.480272051345358e-05, "loss": 0.9614, "step": 10800 }, { "epoch": 0.6232694970004614, "grad_norm": 0.33129721879959106, "learning_rate": 7.470530261750793e-05, "loss": 0.9597, "step": 10805 }, { "epoch": 0.6235579141670512, "grad_norm": 0.27982205152511597, "learning_rate": 7.460791036100952e-05, "loss": 1.0011, "step": 10810 }, { "epoch": 0.623846331333641, "grad_norm": 0.2774750292301178, "learning_rate": 7.451054384267802e-05, "loss": 0.953, "step": 10815 }, { "epoch": 0.6241347485002308, "grad_norm": 0.3093210458755493, "learning_rate": 7.4413203161207e-05, "loss": 0.9847, "step": 10820 }, { "epoch": 0.6244231656668204, "grad_norm": 0.32347777485847473, "learning_rate": 7.431588841526378e-05, "loss": 1.0336, "step": 10825 }, { "epoch": 0.6247115828334102, "grad_norm": 0.2853950262069702, "learning_rate": 7.421859970348949e-05, "loss": 0.9333, "step": 10830 }, { "epoch": 0.625, "grad_norm": 0.31536683440208435, "learning_rate": 7.412133712449879e-05, "loss": 0.9859, "step": 10835 }, { "epoch": 0.6252884171665898, "grad_norm": 0.3622225522994995, "learning_rate": 7.402410077687993e-05, "loss": 0.9806, "step": 10840 }, { "epoch": 0.6255768343331796, "grad_norm": 0.27675193548202515, "learning_rate": 7.39268907591945e-05, "loss": 0.9244, "step": 10845 }, { "epoch": 0.6258652514997692, "grad_norm": 0.3075655698776245, "learning_rate": 7.382970716997736e-05, "loss": 0.9561, "step": 10850 }, { "epoch": 0.626153668666359, "grad_norm": 0.28471189737319946, "learning_rate": 7.373255010773677e-05, "loss": 1.0043, "step": 10855 }, { "epoch": 0.6264420858329488, "grad_norm": 0.3159666657447815, "learning_rate": 7.363541967095387e-05, "loss": 0.9872, "step": 10860 }, { "epoch": 0.6267305029995386, "grad_norm": 0.30512797832489014, "learning_rate": 7.353831595808298e-05, "loss": 0.9348, "step": 10865 }, { "epoch": 0.6270189201661283, "grad_norm": 0.29550230503082275, "learning_rate": 7.344123906755124e-05, "loss": 1.0045, "step": 10870 }, { "epoch": 0.627307337332718, "grad_norm": 0.2956281900405884, "learning_rate": 7.334418909775866e-05, "loss": 0.9955, "step": 10875 }, { "epoch": 0.6275957544993078, "grad_norm": 0.3497249484062195, "learning_rate": 7.324716614707793e-05, "loss": 0.9717, "step": 10880 }, { "epoch": 0.6278841716658976, "grad_norm": 0.3222239017486572, "learning_rate": 7.315017031385434e-05, "loss": 0.9774, "step": 10885 }, { "epoch": 0.6281725888324873, "grad_norm": 0.2839806377887726, "learning_rate": 7.305320169640575e-05, "loss": 0.8747, "step": 10890 }, { "epoch": 0.6284610059990771, "grad_norm": 0.28460511565208435, "learning_rate": 7.295626039302234e-05, "loss": 0.9222, "step": 10895 }, { "epoch": 0.6287494231656668, "grad_norm": 0.28321826457977295, "learning_rate": 7.285934650196672e-05, "loss": 0.9395, "step": 10900 }, { "epoch": 0.6290378403322566, "grad_norm": 0.30483925342559814, "learning_rate": 7.27624601214736e-05, "loss": 0.9941, "step": 10905 }, { "epoch": 0.6293262574988463, "grad_norm": 0.32150912284851074, "learning_rate": 7.266560134974989e-05, "loss": 0.9828, "step": 10910 }, { "epoch": 0.6296146746654361, "grad_norm": 0.2850598990917206, "learning_rate": 7.25687702849745e-05, "loss": 0.9422, "step": 10915 }, { "epoch": 0.6299030918320259, "grad_norm": 0.3211795687675476, "learning_rate": 7.247196702529815e-05, "loss": 0.9291, "step": 10920 }, { "epoch": 0.6301915089986156, "grad_norm": 0.28331828117370605, "learning_rate": 7.23751916688436e-05, "loss": 0.9529, "step": 10925 }, { "epoch": 0.6304799261652053, "grad_norm": 0.2783966660499573, "learning_rate": 7.227844431370502e-05, "loss": 0.9945, "step": 10930 }, { "epoch": 0.6307683433317951, "grad_norm": 0.28569692373275757, "learning_rate": 7.218172505794854e-05, "loss": 0.9894, "step": 10935 }, { "epoch": 0.6310567604983849, "grad_norm": 0.2852317988872528, "learning_rate": 7.208503399961149e-05, "loss": 0.9604, "step": 10940 }, { "epoch": 0.6313451776649747, "grad_norm": 0.29289114475250244, "learning_rate": 7.19883712367029e-05, "loss": 0.9199, "step": 10945 }, { "epoch": 0.6316335948315643, "grad_norm": 0.2972031831741333, "learning_rate": 7.189173686720287e-05, "loss": 0.988, "step": 10950 }, { "epoch": 0.6319220119981541, "grad_norm": 0.31643036007881165, "learning_rate": 7.179513098906286e-05, "loss": 0.9148, "step": 10955 }, { "epoch": 0.6322104291647439, "grad_norm": 0.31982874870300293, "learning_rate": 7.169855370020547e-05, "loss": 0.9133, "step": 10960 }, { "epoch": 0.6324988463313337, "grad_norm": 0.2920014560222626, "learning_rate": 7.160200509852419e-05, "loss": 0.9445, "step": 10965 }, { "epoch": 0.6327872634979234, "grad_norm": 0.31772705912590027, "learning_rate": 7.15054852818836e-05, "loss": 0.9335, "step": 10970 }, { "epoch": 0.6330756806645131, "grad_norm": 0.2917129099369049, "learning_rate": 7.140899434811898e-05, "loss": 0.9335, "step": 10975 }, { "epoch": 0.6333640978311029, "grad_norm": 0.3037589192390442, "learning_rate": 7.131253239503635e-05, "loss": 0.9397, "step": 10980 }, { "epoch": 0.6336525149976927, "grad_norm": 0.28663626313209534, "learning_rate": 7.121609952041241e-05, "loss": 0.9181, "step": 10985 }, { "epoch": 0.6339409321642824, "grad_norm": 0.288944810628891, "learning_rate": 7.111969582199431e-05, "loss": 0.9224, "step": 10990 }, { "epoch": 0.6342293493308722, "grad_norm": 0.31760546565055847, "learning_rate": 7.102332139749975e-05, "loss": 0.9432, "step": 10995 }, { "epoch": 0.6345177664974619, "grad_norm": 0.3294132947921753, "learning_rate": 7.092697634461654e-05, "loss": 0.9897, "step": 11000 }, { "epoch": 0.6348061836640517, "grad_norm": 0.3404971957206726, "learning_rate": 7.083066076100299e-05, "loss": 0.9885, "step": 11005 }, { "epoch": 0.6350946008306414, "grad_norm": 0.3096059262752533, "learning_rate": 7.073437474428732e-05, "loss": 0.9761, "step": 11010 }, { "epoch": 0.6353830179972312, "grad_norm": 0.29714658856391907, "learning_rate": 7.063811839206785e-05, "loss": 0.9564, "step": 11015 }, { "epoch": 0.635671435163821, "grad_norm": 0.3057291507720947, "learning_rate": 7.05418918019129e-05, "loss": 0.9389, "step": 11020 }, { "epoch": 0.6359598523304107, "grad_norm": 0.29941415786743164, "learning_rate": 7.044569507136052e-05, "loss": 0.954, "step": 11025 }, { "epoch": 0.6362482694970004, "grad_norm": 0.3494766056537628, "learning_rate": 7.034952829791858e-05, "loss": 0.995, "step": 11030 }, { "epoch": 0.6365366866635902, "grad_norm": 0.31812405586242676, "learning_rate": 7.025339157906448e-05, "loss": 0.985, "step": 11035 }, { "epoch": 0.63682510383018, "grad_norm": 0.3052515387535095, "learning_rate": 7.01572850122453e-05, "loss": 0.9408, "step": 11040 }, { "epoch": 0.6371135209967698, "grad_norm": 0.2897067368030548, "learning_rate": 7.00612086948774e-05, "loss": 0.8831, "step": 11045 }, { "epoch": 0.6374019381633594, "grad_norm": 0.30502036213874817, "learning_rate": 6.996516272434658e-05, "loss": 1.0114, "step": 11050 }, { "epoch": 0.6376903553299492, "grad_norm": 0.29145947098731995, "learning_rate": 6.986914719800788e-05, "loss": 0.9856, "step": 11055 }, { "epoch": 0.637978772496539, "grad_norm": 0.3128416836261749, "learning_rate": 6.97731622131854e-05, "loss": 0.9045, "step": 11060 }, { "epoch": 0.6382671896631288, "grad_norm": 0.3100086748600006, "learning_rate": 6.96772078671724e-05, "loss": 0.9582, "step": 11065 }, { "epoch": 0.6385556068297185, "grad_norm": 0.29209864139556885, "learning_rate": 6.9581284257231e-05, "loss": 0.9107, "step": 11070 }, { "epoch": 0.6388440239963082, "grad_norm": 0.31914541125297546, "learning_rate": 6.948539148059211e-05, "loss": 0.8921, "step": 11075 }, { "epoch": 0.639132441162898, "grad_norm": 0.3100963234901428, "learning_rate": 6.938952963445559e-05, "loss": 0.9396, "step": 11080 }, { "epoch": 0.6394208583294878, "grad_norm": 0.2907158136367798, "learning_rate": 6.929369881598968e-05, "loss": 0.9779, "step": 11085 }, { "epoch": 0.6397092754960775, "grad_norm": 0.3513832688331604, "learning_rate": 6.919789912233146e-05, "loss": 0.9685, "step": 11090 }, { "epoch": 0.6399976926626673, "grad_norm": 0.31142088770866394, "learning_rate": 6.910213065058615e-05, "loss": 0.9449, "step": 11095 }, { "epoch": 0.640286109829257, "grad_norm": 0.2722773849964142, "learning_rate": 6.900639349782762e-05, "loss": 0.8805, "step": 11100 }, { "epoch": 0.6405745269958468, "grad_norm": 0.3007947504520416, "learning_rate": 6.89106877610978e-05, "loss": 0.9022, "step": 11105 }, { "epoch": 0.6408629441624365, "grad_norm": 0.3130947947502136, "learning_rate": 6.88150135374068e-05, "loss": 1.0078, "step": 11110 }, { "epoch": 0.6411513613290263, "grad_norm": 0.2944614887237549, "learning_rate": 6.871937092373287e-05, "loss": 0.964, "step": 11115 }, { "epoch": 0.6414397784956161, "grad_norm": 0.31232336163520813, "learning_rate": 6.862376001702213e-05, "loss": 0.9654, "step": 11120 }, { "epoch": 0.6417281956622058, "grad_norm": 0.3049473464488983, "learning_rate": 6.852818091418865e-05, "loss": 0.9922, "step": 11125 }, { "epoch": 0.6420166128287955, "grad_norm": 0.28708407282829285, "learning_rate": 6.843263371211414e-05, "loss": 0.8918, "step": 11130 }, { "epoch": 0.6423050299953853, "grad_norm": 0.30203092098236084, "learning_rate": 6.833711850764813e-05, "loss": 0.9724, "step": 11135 }, { "epoch": 0.6425934471619751, "grad_norm": 0.2970055043697357, "learning_rate": 6.824163539760759e-05, "loss": 1.0243, "step": 11140 }, { "epoch": 0.6428818643285649, "grad_norm": 0.3000982999801636, "learning_rate": 6.814618447877698e-05, "loss": 0.9773, "step": 11145 }, { "epoch": 0.6431702814951545, "grad_norm": 0.29718953371047974, "learning_rate": 6.805076584790818e-05, "loss": 0.8987, "step": 11150 }, { "epoch": 0.6434586986617443, "grad_norm": 0.27283528447151184, "learning_rate": 6.79553796017203e-05, "loss": 0.9429, "step": 11155 }, { "epoch": 0.6437471158283341, "grad_norm": 0.2902205288410187, "learning_rate": 6.786002583689968e-05, "loss": 0.9464, "step": 11160 }, { "epoch": 0.6440355329949239, "grad_norm": 0.30263763666152954, "learning_rate": 6.776470465009965e-05, "loss": 0.9825, "step": 11165 }, { "epoch": 0.6443239501615136, "grad_norm": 0.31235364079475403, "learning_rate": 6.766941613794053e-05, "loss": 0.9986, "step": 11170 }, { "epoch": 0.6446123673281033, "grad_norm": 0.3030349612236023, "learning_rate": 6.757416039700963e-05, "loss": 0.9523, "step": 11175 }, { "epoch": 0.6449007844946931, "grad_norm": 0.29484447836875916, "learning_rate": 6.747893752386088e-05, "loss": 0.8984, "step": 11180 }, { "epoch": 0.6451892016612829, "grad_norm": 0.2747655510902405, "learning_rate": 6.738374761501505e-05, "loss": 0.9555, "step": 11185 }, { "epoch": 0.6454776188278727, "grad_norm": 0.2867422103881836, "learning_rate": 6.728859076695938e-05, "loss": 0.9511, "step": 11190 }, { "epoch": 0.6457660359944624, "grad_norm": 0.3082755208015442, "learning_rate": 6.719346707614766e-05, "loss": 0.9675, "step": 11195 }, { "epoch": 0.6460544531610521, "grad_norm": 0.2720474302768707, "learning_rate": 6.709837663900007e-05, "loss": 0.9483, "step": 11200 }, { "epoch": 0.6463428703276419, "grad_norm": 0.3050679862499237, "learning_rate": 6.700331955190303e-05, "loss": 0.8734, "step": 11205 }, { "epoch": 0.6466312874942317, "grad_norm": 0.3173052966594696, "learning_rate": 6.690829591120922e-05, "loss": 0.9713, "step": 11210 }, { "epoch": 0.6469197046608214, "grad_norm": 0.2943088710308075, "learning_rate": 6.681330581323735e-05, "loss": 0.9718, "step": 11215 }, { "epoch": 0.6472081218274112, "grad_norm": 0.3078162372112274, "learning_rate": 6.671834935427222e-05, "loss": 1.0134, "step": 11220 }, { "epoch": 0.647496538994001, "grad_norm": 0.30523622035980225, "learning_rate": 6.662342663056444e-05, "loss": 0.953, "step": 11225 }, { "epoch": 0.6477849561605907, "grad_norm": 0.31331026554107666, "learning_rate": 6.652853773833052e-05, "loss": 0.926, "step": 11230 }, { "epoch": 0.6480733733271804, "grad_norm": 0.32736653089523315, "learning_rate": 6.64336827737526e-05, "loss": 0.9555, "step": 11235 }, { "epoch": 0.6483617904937702, "grad_norm": 0.30823779106140137, "learning_rate": 6.633886183297838e-05, "loss": 0.9442, "step": 11240 }, { "epoch": 0.64865020766036, "grad_norm": 0.3050725758075714, "learning_rate": 6.624407501212128e-05, "loss": 0.8628, "step": 11245 }, { "epoch": 0.6489386248269498, "grad_norm": 0.289765864610672, "learning_rate": 6.614932240725989e-05, "loss": 1.0329, "step": 11250 }, { "epoch": 0.6492270419935394, "grad_norm": 0.29332610964775085, "learning_rate": 6.605460411443831e-05, "loss": 0.9276, "step": 11255 }, { "epoch": 0.6495154591601292, "grad_norm": 0.301318496465683, "learning_rate": 6.595992022966571e-05, "loss": 0.9452, "step": 11260 }, { "epoch": 0.649803876326719, "grad_norm": 0.30513182282447815, "learning_rate": 6.586527084891654e-05, "loss": 0.9093, "step": 11265 }, { "epoch": 0.6500922934933088, "grad_norm": 0.297073096036911, "learning_rate": 6.577065606813011e-05, "loss": 0.8822, "step": 11270 }, { "epoch": 0.6503807106598984, "grad_norm": 0.2968006432056427, "learning_rate": 6.567607598321074e-05, "loss": 0.9486, "step": 11275 }, { "epoch": 0.6506691278264882, "grad_norm": 0.2799467146396637, "learning_rate": 6.558153069002764e-05, "loss": 0.9775, "step": 11280 }, { "epoch": 0.650957544993078, "grad_norm": 0.3077338933944702, "learning_rate": 6.548702028441462e-05, "loss": 0.9383, "step": 11285 }, { "epoch": 0.6512459621596678, "grad_norm": 0.3266589641571045, "learning_rate": 6.539254486217026e-05, "loss": 1.0737, "step": 11290 }, { "epoch": 0.6515343793262575, "grad_norm": 0.2924719750881195, "learning_rate": 6.529810451905759e-05, "loss": 0.8547, "step": 11295 }, { "epoch": 0.6518227964928472, "grad_norm": 0.304733544588089, "learning_rate": 6.520369935080411e-05, "loss": 0.9368, "step": 11300 }, { "epoch": 0.652111213659437, "grad_norm": 0.3150797188282013, "learning_rate": 6.510932945310167e-05, "loss": 1.0042, "step": 11305 }, { "epoch": 0.6523996308260268, "grad_norm": 0.29850059747695923, "learning_rate": 6.501499492160636e-05, "loss": 0.9136, "step": 11310 }, { "epoch": 0.6526880479926165, "grad_norm": 0.3074968755245209, "learning_rate": 6.49206958519385e-05, "loss": 0.915, "step": 11315 }, { "epoch": 0.6529764651592063, "grad_norm": 0.31671762466430664, "learning_rate": 6.482643233968224e-05, "loss": 0.9417, "step": 11320 }, { "epoch": 0.653264882325796, "grad_norm": 0.3049052059650421, "learning_rate": 6.473220448038602e-05, "loss": 0.9117, "step": 11325 }, { "epoch": 0.6535532994923858, "grad_norm": 0.28879445791244507, "learning_rate": 6.463801236956184e-05, "loss": 0.9448, "step": 11330 }, { "epoch": 0.6538417166589755, "grad_norm": 0.2787480652332306, "learning_rate": 6.45438561026856e-05, "loss": 0.9127, "step": 11335 }, { "epoch": 0.6541301338255653, "grad_norm": 0.31340980529785156, "learning_rate": 6.44497357751969e-05, "loss": 0.9709, "step": 11340 }, { "epoch": 0.6544185509921551, "grad_norm": 0.2779744565486908, "learning_rate": 6.435565148249882e-05, "loss": 0.9482, "step": 11345 }, { "epoch": 0.6547069681587449, "grad_norm": 0.3121470510959625, "learning_rate": 6.426160331995801e-05, "loss": 0.9688, "step": 11350 }, { "epoch": 0.6549953853253345, "grad_norm": 0.29920753836631775, "learning_rate": 6.416759138290438e-05, "loss": 0.9957, "step": 11355 }, { "epoch": 0.6552838024919243, "grad_norm": 0.2805769741535187, "learning_rate": 6.407361576663124e-05, "loss": 0.9614, "step": 11360 }, { "epoch": 0.6555722196585141, "grad_norm": 0.31521645188331604, "learning_rate": 6.397967656639504e-05, "loss": 0.943, "step": 11365 }, { "epoch": 0.6558606368251039, "grad_norm": 0.29340264201164246, "learning_rate": 6.388577387741524e-05, "loss": 0.9442, "step": 11370 }, { "epoch": 0.6561490539916935, "grad_norm": 0.3037976324558258, "learning_rate": 6.379190779487443e-05, "loss": 0.9699, "step": 11375 }, { "epoch": 0.6564374711582833, "grad_norm": 0.32146474719047546, "learning_rate": 6.369807841391798e-05, "loss": 0.972, "step": 11380 }, { "epoch": 0.6567258883248731, "grad_norm": 0.2791215777397156, "learning_rate": 6.360428582965414e-05, "loss": 0.9761, "step": 11385 }, { "epoch": 0.6570143054914629, "grad_norm": 0.3133072853088379, "learning_rate": 6.351053013715383e-05, "loss": 1.0486, "step": 11390 }, { "epoch": 0.6573027226580526, "grad_norm": 0.320552796125412, "learning_rate": 6.341681143145048e-05, "loss": 0.9421, "step": 11395 }, { "epoch": 0.6575911398246423, "grad_norm": 0.2836511731147766, "learning_rate": 6.332312980754025e-05, "loss": 0.9323, "step": 11400 }, { "epoch": 0.6578795569912321, "grad_norm": 0.3099246919155121, "learning_rate": 6.322948536038144e-05, "loss": 1.0394, "step": 11405 }, { "epoch": 0.6581679741578219, "grad_norm": 0.3043697476387024, "learning_rate": 6.313587818489497e-05, "loss": 0.9704, "step": 11410 }, { "epoch": 0.6584563913244116, "grad_norm": 0.3238884508609772, "learning_rate": 6.304230837596365e-05, "loss": 0.9943, "step": 11415 }, { "epoch": 0.6587448084910014, "grad_norm": 0.26987263560295105, "learning_rate": 6.294877602843275e-05, "loss": 0.9566, "step": 11420 }, { "epoch": 0.6590332256575911, "grad_norm": 0.29144686460494995, "learning_rate": 6.285528123710929e-05, "loss": 1.0097, "step": 11425 }, { "epoch": 0.6593216428241809, "grad_norm": 0.28795817494392395, "learning_rate": 6.276182409676234e-05, "loss": 0.9627, "step": 11430 }, { "epoch": 0.6596100599907706, "grad_norm": 0.3085058331489563, "learning_rate": 6.266840470212285e-05, "loss": 0.9518, "step": 11435 }, { "epoch": 0.6598984771573604, "grad_norm": 0.2758595049381256, "learning_rate": 6.25750231478834e-05, "loss": 0.9786, "step": 11440 }, { "epoch": 0.6601868943239502, "grad_norm": 0.29992714524269104, "learning_rate": 6.248167952869833e-05, "loss": 0.9749, "step": 11445 }, { "epoch": 0.66047531149054, "grad_norm": 0.2847946286201477, "learning_rate": 6.238837393918341e-05, "loss": 1.0312, "step": 11450 }, { "epoch": 0.6607637286571296, "grad_norm": 0.3597642481327057, "learning_rate": 6.229510647391599e-05, "loss": 0.9781, "step": 11455 }, { "epoch": 0.6610521458237194, "grad_norm": 0.30279117822647095, "learning_rate": 6.220187722743466e-05, "loss": 0.9767, "step": 11460 }, { "epoch": 0.6613405629903092, "grad_norm": 0.3455253541469574, "learning_rate": 6.210868629423932e-05, "loss": 1.0143, "step": 11465 }, { "epoch": 0.661628980156899, "grad_norm": 0.2912869155406952, "learning_rate": 6.201553376879108e-05, "loss": 0.9544, "step": 11470 }, { "epoch": 0.6619173973234886, "grad_norm": 0.30142757296562195, "learning_rate": 6.192241974551198e-05, "loss": 0.9898, "step": 11475 }, { "epoch": 0.6622058144900784, "grad_norm": 0.3466334342956543, "learning_rate": 6.182934431878526e-05, "loss": 0.9499, "step": 11480 }, { "epoch": 0.6624942316566682, "grad_norm": 0.29196497797966003, "learning_rate": 6.173630758295479e-05, "loss": 0.9437, "step": 11485 }, { "epoch": 0.662782648823258, "grad_norm": 0.29323986172676086, "learning_rate": 6.164330963232535e-05, "loss": 0.8918, "step": 11490 }, { "epoch": 0.6630710659898477, "grad_norm": 0.3342483937740326, "learning_rate": 6.155035056116243e-05, "loss": 0.9651, "step": 11495 }, { "epoch": 0.6633594831564374, "grad_norm": 0.29042768478393555, "learning_rate": 6.145743046369205e-05, "loss": 0.9427, "step": 11500 }, { "epoch": 0.6636479003230272, "grad_norm": 0.2897365987300873, "learning_rate": 6.136454943410077e-05, "loss": 0.9875, "step": 11505 }, { "epoch": 0.663936317489617, "grad_norm": 0.2877280116081238, "learning_rate": 6.127170756653546e-05, "loss": 0.9621, "step": 11510 }, { "epoch": 0.6642247346562067, "grad_norm": 0.41063976287841797, "learning_rate": 6.117890495510345e-05, "loss": 0.9689, "step": 11515 }, { "epoch": 0.6645131518227965, "grad_norm": 0.300569087266922, "learning_rate": 6.108614169387215e-05, "loss": 0.9432, "step": 11520 }, { "epoch": 0.6648015689893862, "grad_norm": 0.300698459148407, "learning_rate": 6.099341787686908e-05, "loss": 0.9182, "step": 11525 }, { "epoch": 0.665089986155976, "grad_norm": 0.3228488564491272, "learning_rate": 6.090073359808188e-05, "loss": 0.9576, "step": 11530 }, { "epoch": 0.6653784033225658, "grad_norm": 0.27102410793304443, "learning_rate": 6.080808895145802e-05, "loss": 0.9485, "step": 11535 }, { "epoch": 0.6656668204891555, "grad_norm": 0.3214710056781769, "learning_rate": 6.071548403090488e-05, "loss": 1.0114, "step": 11540 }, { "epoch": 0.6659552376557453, "grad_norm": 0.32063308358192444, "learning_rate": 6.062291893028944e-05, "loss": 0.9854, "step": 11545 }, { "epoch": 0.666243654822335, "grad_norm": 0.2712962031364441, "learning_rate": 6.053039374343849e-05, "loss": 0.9318, "step": 11550 }, { "epoch": 0.6665320719889248, "grad_norm": 0.29538100957870483, "learning_rate": 6.043790856413825e-05, "loss": 1.0111, "step": 11555 }, { "epoch": 0.6668204891555145, "grad_norm": 0.3049154281616211, "learning_rate": 6.0345463486134325e-05, "loss": 0.9196, "step": 11560 }, { "epoch": 0.6671089063221043, "grad_norm": 0.3039052188396454, "learning_rate": 6.025305860313188e-05, "loss": 1.0061, "step": 11565 }, { "epoch": 0.6673973234886941, "grad_norm": 0.2950868308544159, "learning_rate": 6.0160694008795114e-05, "loss": 0.9581, "step": 11570 }, { "epoch": 0.6676857406552839, "grad_norm": 0.32517221570014954, "learning_rate": 6.0068369796747594e-05, "loss": 0.9702, "step": 11575 }, { "epoch": 0.6679741578218735, "grad_norm": 0.28849300742149353, "learning_rate": 5.9976086060571765e-05, "loss": 0.9907, "step": 11580 }, { "epoch": 0.6682625749884633, "grad_norm": 0.3085767924785614, "learning_rate": 5.988384289380915e-05, "loss": 0.9651, "step": 11585 }, { "epoch": 0.6685509921550531, "grad_norm": 0.2863781154155731, "learning_rate": 5.979164038996015e-05, "loss": 0.9576, "step": 11590 }, { "epoch": 0.6688394093216429, "grad_norm": 0.33025938272476196, "learning_rate": 5.9699478642483896e-05, "loss": 0.9488, "step": 11595 }, { "epoch": 0.6691278264882325, "grad_norm": 0.2808819115161896, "learning_rate": 5.960735774479826e-05, "loss": 0.8781, "step": 11600 }, { "epoch": 0.6694162436548223, "grad_norm": 0.3095723092556, "learning_rate": 5.951527779027968e-05, "loss": 1.0953, "step": 11605 }, { "epoch": 0.6697046608214121, "grad_norm": 0.2829914391040802, "learning_rate": 5.942323887226311e-05, "loss": 0.9082, "step": 11610 }, { "epoch": 0.6699930779880019, "grad_norm": 0.28427544236183167, "learning_rate": 5.933124108404189e-05, "loss": 0.9043, "step": 11615 }, { "epoch": 0.6702814951545916, "grad_norm": 0.30131834745407104, "learning_rate": 5.923928451886767e-05, "loss": 0.9115, "step": 11620 }, { "epoch": 0.6705699123211813, "grad_norm": 0.27953872084617615, "learning_rate": 5.914736926995034e-05, "loss": 0.9719, "step": 11625 }, { "epoch": 0.6708583294877711, "grad_norm": 0.2982015013694763, "learning_rate": 5.905549543045783e-05, "loss": 0.9212, "step": 11630 }, { "epoch": 0.6711467466543609, "grad_norm": 0.2859979271888733, "learning_rate": 5.8963663093516264e-05, "loss": 0.8745, "step": 11635 }, { "epoch": 0.6714351638209506, "grad_norm": 0.3366292119026184, "learning_rate": 5.887187235220948e-05, "loss": 0.9363, "step": 11640 }, { "epoch": 0.6717235809875404, "grad_norm": 0.317078560590744, "learning_rate": 5.8780123299579385e-05, "loss": 0.9509, "step": 11645 }, { "epoch": 0.6720119981541302, "grad_norm": 0.28258809447288513, "learning_rate": 5.868841602862541e-05, "loss": 0.9911, "step": 11650 }, { "epoch": 0.6723004153207199, "grad_norm": 0.2883169949054718, "learning_rate": 5.859675063230477e-05, "loss": 0.9584, "step": 11655 }, { "epoch": 0.6725888324873096, "grad_norm": 0.3138083517551422, "learning_rate": 5.8505127203532216e-05, "loss": 1.0294, "step": 11660 }, { "epoch": 0.6728772496538994, "grad_norm": 0.2859340012073517, "learning_rate": 5.841354583517991e-05, "loss": 0.9843, "step": 11665 }, { "epoch": 0.6731656668204892, "grad_norm": 0.31252771615982056, "learning_rate": 5.8322006620077426e-05, "loss": 0.9464, "step": 11670 }, { "epoch": 0.673454083987079, "grad_norm": 0.2721909284591675, "learning_rate": 5.823050965101162e-05, "loss": 0.938, "step": 11675 }, { "epoch": 0.6737425011536686, "grad_norm": 0.30047523975372314, "learning_rate": 5.8139055020726494e-05, "loss": 0.9684, "step": 11680 }, { "epoch": 0.6740309183202584, "grad_norm": 0.2787550985813141, "learning_rate": 5.804764282192314e-05, "loss": 0.9879, "step": 11685 }, { "epoch": 0.6743193354868482, "grad_norm": 0.3054908514022827, "learning_rate": 5.7956273147259645e-05, "loss": 0.9232, "step": 11690 }, { "epoch": 0.674607752653438, "grad_norm": 0.2886941730976105, "learning_rate": 5.786494608935098e-05, "loss": 0.9515, "step": 11695 }, { "epoch": 0.6748961698200276, "grad_norm": 0.29352888464927673, "learning_rate": 5.77736617407689e-05, "loss": 1.0245, "step": 11700 }, { "epoch": 0.6751845869866174, "grad_norm": 0.3170221447944641, "learning_rate": 5.768242019404198e-05, "loss": 0.9051, "step": 11705 }, { "epoch": 0.6754730041532072, "grad_norm": 0.2965846061706543, "learning_rate": 5.7591221541655285e-05, "loss": 0.9583, "step": 11710 }, { "epoch": 0.675761421319797, "grad_norm": 0.2991654574871063, "learning_rate": 5.750006587605043e-05, "loss": 0.938, "step": 11715 }, { "epoch": 0.6760498384863867, "grad_norm": 0.28856098651885986, "learning_rate": 5.74089532896255e-05, "loss": 0.9213, "step": 11720 }, { "epoch": 0.6763382556529764, "grad_norm": 0.6358820199966431, "learning_rate": 5.7317883874734823e-05, "loss": 1.0292, "step": 11725 }, { "epoch": 0.6766266728195662, "grad_norm": 0.2861112952232361, "learning_rate": 5.722685772368912e-05, "loss": 0.9703, "step": 11730 }, { "epoch": 0.676915089986156, "grad_norm": 0.29392847418785095, "learning_rate": 5.713587492875513e-05, "loss": 0.9174, "step": 11735 }, { "epoch": 0.6772035071527457, "grad_norm": 0.30904361605644226, "learning_rate": 5.704493558215567e-05, "loss": 0.9269, "step": 11740 }, { "epoch": 0.6774919243193355, "grad_norm": 0.30134832859039307, "learning_rate": 5.695403977606955e-05, "loss": 0.9305, "step": 11745 }, { "epoch": 0.6777803414859253, "grad_norm": 0.29707950353622437, "learning_rate": 5.6863187602631354e-05, "loss": 0.9669, "step": 11750 }, { "epoch": 0.678068758652515, "grad_norm": 0.2993502914905548, "learning_rate": 5.677237915393165e-05, "loss": 0.9547, "step": 11755 }, { "epoch": 0.6783571758191047, "grad_norm": 0.26072099804878235, "learning_rate": 5.668161452201639e-05, "loss": 0.9151, "step": 11760 }, { "epoch": 0.6786455929856945, "grad_norm": 0.30072999000549316, "learning_rate": 5.659089379888738e-05, "loss": 0.9176, "step": 11765 }, { "epoch": 0.6789340101522843, "grad_norm": 0.27792972326278687, "learning_rate": 5.650021707650173e-05, "loss": 0.8993, "step": 11770 }, { "epoch": 0.6792224273188741, "grad_norm": 0.3117034435272217, "learning_rate": 5.6409584446772135e-05, "loss": 0.9597, "step": 11775 }, { "epoch": 0.6795108444854637, "grad_norm": 0.29373329877853394, "learning_rate": 5.6318996001566384e-05, "loss": 0.8972, "step": 11780 }, { "epoch": 0.6797992616520535, "grad_norm": 0.26909390091896057, "learning_rate": 5.622845183270757e-05, "loss": 0.9559, "step": 11785 }, { "epoch": 0.6800876788186433, "grad_norm": 0.28705233335494995, "learning_rate": 5.613795203197401e-05, "loss": 0.9498, "step": 11790 }, { "epoch": 0.6803760959852331, "grad_norm": 0.31131601333618164, "learning_rate": 5.604749669109889e-05, "loss": 0.9168, "step": 11795 }, { "epoch": 0.6806645131518227, "grad_norm": 0.28067898750305176, "learning_rate": 5.5957085901770424e-05, "loss": 0.9545, "step": 11800 }, { "epoch": 0.6809529303184125, "grad_norm": 0.2953431010246277, "learning_rate": 5.5866719755631625e-05, "loss": 0.893, "step": 11805 }, { "epoch": 0.6812413474850023, "grad_norm": 0.2954767644405365, "learning_rate": 5.577639834428026e-05, "loss": 0.9665, "step": 11810 }, { "epoch": 0.6815297646515921, "grad_norm": 0.2888740599155426, "learning_rate": 5.5686121759268793e-05, "loss": 0.897, "step": 11815 }, { "epoch": 0.6818181818181818, "grad_norm": 0.2831745147705078, "learning_rate": 5.559589009210421e-05, "loss": 0.9831, "step": 11820 }, { "epoch": 0.6821065989847716, "grad_norm": 0.26822391152381897, "learning_rate": 5.550570343424797e-05, "loss": 1.0036, "step": 11825 }, { "epoch": 0.6823950161513613, "grad_norm": 0.3146960139274597, "learning_rate": 5.5415561877115876e-05, "loss": 0.9881, "step": 11830 }, { "epoch": 0.6826834333179511, "grad_norm": 0.3015124499797821, "learning_rate": 5.5325465512078154e-05, "loss": 1.0135, "step": 11835 }, { "epoch": 0.6829718504845408, "grad_norm": 0.28780919313430786, "learning_rate": 5.523541443045904e-05, "loss": 0.9213, "step": 11840 }, { "epoch": 0.6832602676511306, "grad_norm": 0.3086405098438263, "learning_rate": 5.514540872353693e-05, "loss": 0.9785, "step": 11845 }, { "epoch": 0.6835486848177204, "grad_norm": 0.28687992691993713, "learning_rate": 5.505544848254432e-05, "loss": 1.0139, "step": 11850 }, { "epoch": 0.6838371019843101, "grad_norm": 0.3073662519454956, "learning_rate": 5.496553379866753e-05, "loss": 0.9409, "step": 11855 }, { "epoch": 0.6841255191508998, "grad_norm": 0.29431843757629395, "learning_rate": 5.4875664763046705e-05, "loss": 0.9019, "step": 11860 }, { "epoch": 0.6844139363174896, "grad_norm": 0.31620967388153076, "learning_rate": 5.4785841466775726e-05, "loss": 0.9975, "step": 11865 }, { "epoch": 0.6847023534840794, "grad_norm": 0.2722678482532501, "learning_rate": 5.4696064000902146e-05, "loss": 0.9492, "step": 11870 }, { "epoch": 0.6849907706506692, "grad_norm": 0.28141093254089355, "learning_rate": 5.460633245642703e-05, "loss": 0.9502, "step": 11875 }, { "epoch": 0.6852791878172588, "grad_norm": 0.27788811922073364, "learning_rate": 5.451664692430493e-05, "loss": 0.9604, "step": 11880 }, { "epoch": 0.6855676049838486, "grad_norm": 0.3031676113605499, "learning_rate": 5.4427007495443684e-05, "loss": 1.0095, "step": 11885 }, { "epoch": 0.6858560221504384, "grad_norm": 0.30585047602653503, "learning_rate": 5.433741426070442e-05, "loss": 1.0091, "step": 11890 }, { "epoch": 0.6861444393170282, "grad_norm": 0.29617300629615784, "learning_rate": 5.424786731090157e-05, "loss": 1.0051, "step": 11895 }, { "epoch": 0.686432856483618, "grad_norm": 0.267132431268692, "learning_rate": 5.415836673680253e-05, "loss": 0.99, "step": 11900 }, { "epoch": 0.6867212736502076, "grad_norm": 0.29219532012939453, "learning_rate": 5.40689126291276e-05, "loss": 1.0008, "step": 11905 }, { "epoch": 0.6870096908167974, "grad_norm": 0.3108190894126892, "learning_rate": 5.3979505078550184e-05, "loss": 0.9479, "step": 11910 }, { "epoch": 0.6872981079833872, "grad_norm": 0.36880865693092346, "learning_rate": 5.389014417569635e-05, "loss": 1.0612, "step": 11915 }, { "epoch": 0.687586525149977, "grad_norm": 0.29092344641685486, "learning_rate": 5.380083001114503e-05, "loss": 1.0152, "step": 11920 }, { "epoch": 0.6878749423165667, "grad_norm": 0.30183884501457214, "learning_rate": 5.371156267542752e-05, "loss": 0.9995, "step": 11925 }, { "epoch": 0.6881633594831564, "grad_norm": 0.29405462741851807, "learning_rate": 5.362234225902794e-05, "loss": 0.9237, "step": 11930 }, { "epoch": 0.6884517766497462, "grad_norm": 0.2716309428215027, "learning_rate": 5.353316885238269e-05, "loss": 0.898, "step": 11935 }, { "epoch": 0.688740193816336, "grad_norm": 0.30954423546791077, "learning_rate": 5.3444042545880514e-05, "loss": 0.9184, "step": 11940 }, { "epoch": 0.6890286109829257, "grad_norm": 0.2786799967288971, "learning_rate": 5.3354963429862484e-05, "loss": 1.0208, "step": 11945 }, { "epoch": 0.6893170281495155, "grad_norm": 0.3064625561237335, "learning_rate": 5.3265931594621756e-05, "loss": 0.9567, "step": 11950 }, { "epoch": 0.6896054453161052, "grad_norm": 0.3363032639026642, "learning_rate": 5.317694713040369e-05, "loss": 1.0089, "step": 11955 }, { "epoch": 0.689893862482695, "grad_norm": 0.2882344722747803, "learning_rate": 5.3088010127405496e-05, "loss": 0.9548, "step": 11960 }, { "epoch": 0.6901822796492847, "grad_norm": 0.3060877323150635, "learning_rate": 5.299912067577635e-05, "loss": 0.9522, "step": 11965 }, { "epoch": 0.6904706968158745, "grad_norm": 0.3129549026489258, "learning_rate": 5.29102788656172e-05, "loss": 0.9276, "step": 11970 }, { "epoch": 0.6907591139824643, "grad_norm": 0.31678634881973267, "learning_rate": 5.282148478698068e-05, "loss": 1.0038, "step": 11975 }, { "epoch": 0.691047531149054, "grad_norm": 0.31607529520988464, "learning_rate": 5.273273852987113e-05, "loss": 0.885, "step": 11980 }, { "epoch": 0.6913359483156437, "grad_norm": 0.26990994811058044, "learning_rate": 5.2644040184244325e-05, "loss": 0.9675, "step": 11985 }, { "epoch": 0.6916243654822335, "grad_norm": 0.31181198358535767, "learning_rate": 5.255538984000753e-05, "loss": 0.9581, "step": 11990 }, { "epoch": 0.6919127826488233, "grad_norm": 0.2867415249347687, "learning_rate": 5.246678758701932e-05, "loss": 0.9137, "step": 11995 }, { "epoch": 0.6922011998154131, "grad_norm": 0.29689162969589233, "learning_rate": 5.237823351508953e-05, "loss": 1.0561, "step": 12000 }, { "epoch": 0.6924896169820027, "grad_norm": 0.30153143405914307, "learning_rate": 5.228972771397918e-05, "loss": 0.9205, "step": 12005 }, { "epoch": 0.6927780341485925, "grad_norm": 0.27744805812835693, "learning_rate": 5.2201270273400296e-05, "loss": 0.9878, "step": 12010 }, { "epoch": 0.6930664513151823, "grad_norm": 0.287998765707016, "learning_rate": 5.211286128301602e-05, "loss": 0.964, "step": 12015 }, { "epoch": 0.6933548684817721, "grad_norm": 0.3049549162387848, "learning_rate": 5.202450083244026e-05, "loss": 1.0198, "step": 12020 }, { "epoch": 0.6936432856483618, "grad_norm": 0.30203524231910706, "learning_rate": 5.193618901123776e-05, "loss": 0.9331, "step": 12025 }, { "epoch": 0.6939317028149515, "grad_norm": 0.3099840581417084, "learning_rate": 5.184792590892397e-05, "loss": 0.9736, "step": 12030 }, { "epoch": 0.6942201199815413, "grad_norm": 0.3033467233181, "learning_rate": 5.175971161496491e-05, "loss": 1.0176, "step": 12035 }, { "epoch": 0.6945085371481311, "grad_norm": 0.27871066331863403, "learning_rate": 5.167154621877728e-05, "loss": 0.8854, "step": 12040 }, { "epoch": 0.6947969543147208, "grad_norm": 0.2897888123989105, "learning_rate": 5.158342980972805e-05, "loss": 0.9439, "step": 12045 }, { "epoch": 0.6950853714813106, "grad_norm": 0.29491978883743286, "learning_rate": 5.14953624771346e-05, "loss": 0.959, "step": 12050 }, { "epoch": 0.6953737886479003, "grad_norm": 0.3069283068180084, "learning_rate": 5.140734431026453e-05, "loss": 0.992, "step": 12055 }, { "epoch": 0.6956622058144901, "grad_norm": 0.32911011576652527, "learning_rate": 5.131937539833571e-05, "loss": 1.0046, "step": 12060 }, { "epoch": 0.6959506229810798, "grad_norm": 0.29555436968803406, "learning_rate": 5.1231455830516004e-05, "loss": 0.9604, "step": 12065 }, { "epoch": 0.6962390401476696, "grad_norm": 0.28055113554000854, "learning_rate": 5.1143585695923166e-05, "loss": 0.9453, "step": 12070 }, { "epoch": 0.6965274573142594, "grad_norm": 0.32180923223495483, "learning_rate": 5.105576508362504e-05, "loss": 0.9524, "step": 12075 }, { "epoch": 0.6968158744808491, "grad_norm": 0.29000669717788696, "learning_rate": 5.09679940826391e-05, "loss": 0.9389, "step": 12080 }, { "epoch": 0.6971042916474388, "grad_norm": 0.3079163432121277, "learning_rate": 5.0880272781932744e-05, "loss": 0.9959, "step": 12085 }, { "epoch": 0.6973927088140286, "grad_norm": 0.2964383661746979, "learning_rate": 5.079260127042267e-05, "loss": 0.916, "step": 12090 }, { "epoch": 0.6976811259806184, "grad_norm": 0.2799603044986725, "learning_rate": 5.070497963697542e-05, "loss": 0.9438, "step": 12095 }, { "epoch": 0.6979695431472082, "grad_norm": 0.28270310163497925, "learning_rate": 5.061740797040684e-05, "loss": 0.9447, "step": 12100 }, { "epoch": 0.6982579603137978, "grad_norm": 0.28763705492019653, "learning_rate": 5.0529886359482105e-05, "loss": 0.9393, "step": 12105 }, { "epoch": 0.6985463774803876, "grad_norm": 0.3127501606941223, "learning_rate": 5.044241489291569e-05, "loss": 0.9411, "step": 12110 }, { "epoch": 0.6988347946469774, "grad_norm": 0.30937451124191284, "learning_rate": 5.035499365937122e-05, "loss": 0.9552, "step": 12115 }, { "epoch": 0.6991232118135672, "grad_norm": 0.2949715852737427, "learning_rate": 5.0267622747461487e-05, "loss": 0.9715, "step": 12120 }, { "epoch": 0.6994116289801569, "grad_norm": 0.32532167434692383, "learning_rate": 5.018030224574818e-05, "loss": 0.8913, "step": 12125 }, { "epoch": 0.6997000461467466, "grad_norm": 0.2869986593723297, "learning_rate": 5.009303224274191e-05, "loss": 0.996, "step": 12130 }, { "epoch": 0.6999884633133364, "grad_norm": 0.3281845450401306, "learning_rate": 5.000581282690211e-05, "loss": 1.0192, "step": 12135 }, { "epoch": 0.7002768804799262, "grad_norm": 0.2783426344394684, "learning_rate": 4.991864408663692e-05, "loss": 0.9268, "step": 12140 }, { "epoch": 0.7005652976465159, "grad_norm": 0.32887738943099976, "learning_rate": 4.983152611030323e-05, "loss": 1.0089, "step": 12145 }, { "epoch": 0.7008537148131057, "grad_norm": 0.2962915897369385, "learning_rate": 4.974445898620622e-05, "loss": 0.9788, "step": 12150 }, { "epoch": 0.7011421319796954, "grad_norm": 0.2988428473472595, "learning_rate": 4.965744280259982e-05, "loss": 0.8686, "step": 12155 }, { "epoch": 0.7014305491462852, "grad_norm": 0.29604843258857727, "learning_rate": 4.957047764768612e-05, "loss": 0.9572, "step": 12160 }, { "epoch": 0.7017189663128749, "grad_norm": 0.30213579535484314, "learning_rate": 4.9483563609615555e-05, "loss": 0.972, "step": 12165 }, { "epoch": 0.7020073834794647, "grad_norm": 0.29231953620910645, "learning_rate": 4.939670077648676e-05, "loss": 0.9307, "step": 12170 }, { "epoch": 0.7022958006460545, "grad_norm": 0.31731900572776794, "learning_rate": 4.930988923634641e-05, "loss": 0.9383, "step": 12175 }, { "epoch": 0.7025842178126442, "grad_norm": 0.3002810776233673, "learning_rate": 4.922312907718929e-05, "loss": 1.0575, "step": 12180 }, { "epoch": 0.7028726349792339, "grad_norm": 0.32030534744262695, "learning_rate": 4.913642038695802e-05, "loss": 1.0175, "step": 12185 }, { "epoch": 0.7031610521458237, "grad_norm": 0.30039533972740173, "learning_rate": 4.9049763253543054e-05, "loss": 0.9111, "step": 12190 }, { "epoch": 0.7034494693124135, "grad_norm": 0.29103514552116394, "learning_rate": 4.896315776478263e-05, "loss": 1.0018, "step": 12195 }, { "epoch": 0.7037378864790033, "grad_norm": 0.31603309512138367, "learning_rate": 4.8876604008462554e-05, "loss": 0.9354, "step": 12200 }, { "epoch": 0.7040263036455929, "grad_norm": 0.2983769476413727, "learning_rate": 4.879010207231632e-05, "loss": 0.9266, "step": 12205 }, { "epoch": 0.7043147208121827, "grad_norm": 0.2903640866279602, "learning_rate": 4.870365204402483e-05, "loss": 0.9105, "step": 12210 }, { "epoch": 0.7046031379787725, "grad_norm": 0.2937866449356079, "learning_rate": 4.8617254011216316e-05, "loss": 1.0096, "step": 12215 }, { "epoch": 0.7048915551453623, "grad_norm": 0.29428091645240784, "learning_rate": 4.8530908061466404e-05, "loss": 1.0238, "step": 12220 }, { "epoch": 0.705179972311952, "grad_norm": 0.2882111072540283, "learning_rate": 4.844461428229782e-05, "loss": 0.9895, "step": 12225 }, { "epoch": 0.7054683894785417, "grad_norm": 0.2777700126171112, "learning_rate": 4.835837276118058e-05, "loss": 0.9442, "step": 12230 }, { "epoch": 0.7057568066451315, "grad_norm": 0.30342546105384827, "learning_rate": 4.827218358553148e-05, "loss": 0.9846, "step": 12235 }, { "epoch": 0.7060452238117213, "grad_norm": 0.29388317465782166, "learning_rate": 4.8186046842714504e-05, "loss": 0.8987, "step": 12240 }, { "epoch": 0.7063336409783111, "grad_norm": 0.29445379972457886, "learning_rate": 4.8099962620040314e-05, "loss": 0.9868, "step": 12245 }, { "epoch": 0.7066220581449008, "grad_norm": 0.3062158226966858, "learning_rate": 4.801393100476651e-05, "loss": 0.9183, "step": 12250 }, { "epoch": 0.7069104753114905, "grad_norm": 0.26878073811531067, "learning_rate": 4.792795208409714e-05, "loss": 0.9274, "step": 12255 }, { "epoch": 0.7071988924780803, "grad_norm": 0.2894469201564789, "learning_rate": 4.784202594518298e-05, "loss": 0.9681, "step": 12260 }, { "epoch": 0.7074873096446701, "grad_norm": 0.33790189027786255, "learning_rate": 4.775615267512133e-05, "loss": 0.935, "step": 12265 }, { "epoch": 0.7077757268112598, "grad_norm": 0.293639212846756, "learning_rate": 4.767033236095585e-05, "loss": 0.9128, "step": 12270 }, { "epoch": 0.7080641439778496, "grad_norm": 0.3044992685317993, "learning_rate": 4.758456508967651e-05, "loss": 0.9461, "step": 12275 }, { "epoch": 0.7083525611444393, "grad_norm": 0.3080674111843109, "learning_rate": 4.749885094821951e-05, "loss": 1.0456, "step": 12280 }, { "epoch": 0.7086409783110291, "grad_norm": 0.2785399556159973, "learning_rate": 4.7413190023467266e-05, "loss": 0.9056, "step": 12285 }, { "epoch": 0.7089293954776188, "grad_norm": 0.27793818712234497, "learning_rate": 4.732758240224818e-05, "loss": 0.9389, "step": 12290 }, { "epoch": 0.7092178126442086, "grad_norm": 0.29727405309677124, "learning_rate": 4.7242028171336675e-05, "loss": 0.9914, "step": 12295 }, { "epoch": 0.7095062298107984, "grad_norm": 0.3035670220851898, "learning_rate": 4.715652741745298e-05, "loss": 0.9744, "step": 12300 }, { "epoch": 0.7097946469773881, "grad_norm": 0.33569151163101196, "learning_rate": 4.7071080227263164e-05, "loss": 0.9731, "step": 12305 }, { "epoch": 0.7100830641439778, "grad_norm": 0.29499197006225586, "learning_rate": 4.6985686687379103e-05, "loss": 0.9284, "step": 12310 }, { "epoch": 0.7103714813105676, "grad_norm": 0.27395883202552795, "learning_rate": 4.690034688435809e-05, "loss": 0.9208, "step": 12315 }, { "epoch": 0.7106598984771574, "grad_norm": 0.30385783314704895, "learning_rate": 4.6815060904703046e-05, "loss": 1.0277, "step": 12320 }, { "epoch": 0.7109483156437472, "grad_norm": 0.28144723176956177, "learning_rate": 4.672982883486243e-05, "loss": 0.9771, "step": 12325 }, { "epoch": 0.7112367328103368, "grad_norm": 0.3171987235546112, "learning_rate": 4.664465076122991e-05, "loss": 0.9382, "step": 12330 }, { "epoch": 0.7115251499769266, "grad_norm": 0.33855724334716797, "learning_rate": 4.655952677014449e-05, "loss": 0.9713, "step": 12335 }, { "epoch": 0.7118135671435164, "grad_norm": 0.30471786856651306, "learning_rate": 4.647445694789032e-05, "loss": 0.9294, "step": 12340 }, { "epoch": 0.7121019843101062, "grad_norm": 0.2840113937854767, "learning_rate": 4.638944138069672e-05, "loss": 0.9443, "step": 12345 }, { "epoch": 0.7123904014766959, "grad_norm": 0.3151952028274536, "learning_rate": 4.630448015473794e-05, "loss": 0.9651, "step": 12350 }, { "epoch": 0.7126788186432856, "grad_norm": 0.28496062755584717, "learning_rate": 4.621957335613316e-05, "loss": 0.9156, "step": 12355 }, { "epoch": 0.7129672358098754, "grad_norm": 0.27828526496887207, "learning_rate": 4.613472107094641e-05, "loss": 0.9541, "step": 12360 }, { "epoch": 0.7132556529764652, "grad_norm": 0.320968896150589, "learning_rate": 4.6049923385186414e-05, "loss": 1.021, "step": 12365 }, { "epoch": 0.7135440701430549, "grad_norm": 0.3339998424053192, "learning_rate": 4.596518038480667e-05, "loss": 1.0306, "step": 12370 }, { "epoch": 0.7138324873096447, "grad_norm": 0.29373204708099365, "learning_rate": 4.5880492155705134e-05, "loss": 0.9256, "step": 12375 }, { "epoch": 0.7141209044762344, "grad_norm": 0.28952670097351074, "learning_rate": 4.579585878372428e-05, "loss": 0.9118, "step": 12380 }, { "epoch": 0.7144093216428242, "grad_norm": 0.31708285212516785, "learning_rate": 4.571128035465096e-05, "loss": 0.9585, "step": 12385 }, { "epoch": 0.7146977388094139, "grad_norm": 0.27138781547546387, "learning_rate": 4.562675695421634e-05, "loss": 0.9503, "step": 12390 }, { "epoch": 0.7149861559760037, "grad_norm": 0.29679614305496216, "learning_rate": 4.554228866809591e-05, "loss": 1.0058, "step": 12395 }, { "epoch": 0.7152745731425935, "grad_norm": 0.30243879556655884, "learning_rate": 4.545787558190907e-05, "loss": 0.8985, "step": 12400 }, { "epoch": 0.7155629903091832, "grad_norm": 0.2855212986469269, "learning_rate": 4.53735177812195e-05, "loss": 1.0001, "step": 12405 }, { "epoch": 0.7158514074757729, "grad_norm": 0.29729998111724854, "learning_rate": 4.5289215351534666e-05, "loss": 0.9448, "step": 12410 }, { "epoch": 0.7161398246423627, "grad_norm": 0.2747268080711365, "learning_rate": 4.520496837830609e-05, "loss": 0.9207, "step": 12415 }, { "epoch": 0.7164282418089525, "grad_norm": 0.2757469117641449, "learning_rate": 4.512077694692888e-05, "loss": 0.9151, "step": 12420 }, { "epoch": 0.7167166589755423, "grad_norm": 0.2848719358444214, "learning_rate": 4.503664114274193e-05, "loss": 0.9135, "step": 12425 }, { "epoch": 0.7170050761421319, "grad_norm": 0.26633986830711365, "learning_rate": 4.495256105102784e-05, "loss": 0.9802, "step": 12430 }, { "epoch": 0.7172934933087217, "grad_norm": 0.29793140292167664, "learning_rate": 4.4868536757012614e-05, "loss": 0.9695, "step": 12435 }, { "epoch": 0.7175819104753115, "grad_norm": 0.2670263648033142, "learning_rate": 4.478456834586574e-05, "loss": 0.9149, "step": 12440 }, { "epoch": 0.7178703276419013, "grad_norm": 0.35273703932762146, "learning_rate": 4.4700655902700075e-05, "loss": 0.9942, "step": 12445 }, { "epoch": 0.718158744808491, "grad_norm": 0.29049214720726013, "learning_rate": 4.4616799512571675e-05, "loss": 0.9267, "step": 12450 }, { "epoch": 0.7184471619750807, "grad_norm": 0.2781081199645996, "learning_rate": 4.453299926047997e-05, "loss": 0.9169, "step": 12455 }, { "epoch": 0.7187355791416705, "grad_norm": 0.315525084733963, "learning_rate": 4.4449255231367183e-05, "loss": 1.0033, "step": 12460 }, { "epoch": 0.7190239963082603, "grad_norm": 0.2818520665168762, "learning_rate": 4.4365567510118854e-05, "loss": 0.9783, "step": 12465 }, { "epoch": 0.71931241347485, "grad_norm": 0.3043684959411621, "learning_rate": 4.428193618156322e-05, "loss": 1.0024, "step": 12470 }, { "epoch": 0.7196008306414398, "grad_norm": 0.29822173714637756, "learning_rate": 4.419836133047157e-05, "loss": 1.031, "step": 12475 }, { "epoch": 0.7198892478080295, "grad_norm": 0.27869507670402527, "learning_rate": 4.411484304155771e-05, "loss": 0.9294, "step": 12480 }, { "epoch": 0.7201776649746193, "grad_norm": 0.2925718426704407, "learning_rate": 4.403138139947822e-05, "loss": 0.9644, "step": 12485 }, { "epoch": 0.720466082141209, "grad_norm": 0.2630343735218048, "learning_rate": 4.394797648883236e-05, "loss": 0.9852, "step": 12490 }, { "epoch": 0.7207544993077988, "grad_norm": 0.2998071014881134, "learning_rate": 4.386462839416173e-05, "loss": 1.0253, "step": 12495 }, { "epoch": 0.7210429164743886, "grad_norm": 0.30434897541999817, "learning_rate": 4.378133719995044e-05, "loss": 0.9224, "step": 12500 }, { "epoch": 0.7213313336409783, "grad_norm": 0.30247268080711365, "learning_rate": 4.3698102990624825e-05, "loss": 1.0018, "step": 12505 }, { "epoch": 0.721619750807568, "grad_norm": 0.283896267414093, "learning_rate": 4.36149258505536e-05, "loss": 0.9589, "step": 12510 }, { "epoch": 0.7219081679741578, "grad_norm": 0.2718851566314697, "learning_rate": 4.353180586404752e-05, "loss": 0.8899, "step": 12515 }, { "epoch": 0.7221965851407476, "grad_norm": 0.3284825384616852, "learning_rate": 4.344874311535944e-05, "loss": 0.8988, "step": 12520 }, { "epoch": 0.7224850023073374, "grad_norm": 0.29092779755592346, "learning_rate": 4.336573768868418e-05, "loss": 0.8914, "step": 12525 }, { "epoch": 0.722773419473927, "grad_norm": 0.2990238666534424, "learning_rate": 4.3282789668158476e-05, "loss": 0.9355, "step": 12530 }, { "epoch": 0.7230618366405168, "grad_norm": 0.29451605677604675, "learning_rate": 4.319989913786093e-05, "loss": 0.9247, "step": 12535 }, { "epoch": 0.7233502538071066, "grad_norm": 0.29550743103027344, "learning_rate": 4.31170661818118e-05, "loss": 0.9232, "step": 12540 }, { "epoch": 0.7236386709736964, "grad_norm": 0.3342383801937103, "learning_rate": 4.303429088397293e-05, "loss": 0.9329, "step": 12545 }, { "epoch": 0.7239270881402861, "grad_norm": 0.28196507692337036, "learning_rate": 4.295157332824785e-05, "loss": 0.9368, "step": 12550 }, { "epoch": 0.7242155053068758, "grad_norm": 0.29440298676490784, "learning_rate": 4.2868913598481464e-05, "loss": 0.9863, "step": 12555 }, { "epoch": 0.7245039224734656, "grad_norm": 0.28593847155570984, "learning_rate": 4.27863117784602e-05, "loss": 0.9481, "step": 12560 }, { "epoch": 0.7247923396400554, "grad_norm": 0.2748074233531952, "learning_rate": 4.270376795191152e-05, "loss": 0.9292, "step": 12565 }, { "epoch": 0.7250807568066451, "grad_norm": 0.3184608817100525, "learning_rate": 4.262128220250441e-05, "loss": 0.9201, "step": 12570 }, { "epoch": 0.7253691739732349, "grad_norm": 0.27379128336906433, "learning_rate": 4.2538854613848776e-05, "loss": 0.923, "step": 12575 }, { "epoch": 0.7256575911398246, "grad_norm": 0.3166322410106659, "learning_rate": 4.245648526949567e-05, "loss": 0.9177, "step": 12580 }, { "epoch": 0.7259460083064144, "grad_norm": 0.3264232873916626, "learning_rate": 4.237417425293706e-05, "loss": 0.9677, "step": 12585 }, { "epoch": 0.7262344254730042, "grad_norm": 0.29863858222961426, "learning_rate": 4.229192164760576e-05, "loss": 1.0023, "step": 12590 }, { "epoch": 0.7265228426395939, "grad_norm": 0.3183000981807709, "learning_rate": 4.220972753687552e-05, "loss": 0.9889, "step": 12595 }, { "epoch": 0.7268112598061837, "grad_norm": 0.3113453686237335, "learning_rate": 4.212759200406065e-05, "loss": 0.9502, "step": 12600 }, { "epoch": 0.7270996769727734, "grad_norm": 0.3108140528202057, "learning_rate": 4.204551513241615e-05, "loss": 0.883, "step": 12605 }, { "epoch": 0.7273880941393632, "grad_norm": 0.2969469428062439, "learning_rate": 4.1963497005137516e-05, "loss": 0.9898, "step": 12610 }, { "epoch": 0.7276765113059529, "grad_norm": 0.3002479672431946, "learning_rate": 4.188153770536072e-05, "loss": 0.9679, "step": 12615 }, { "epoch": 0.7279649284725427, "grad_norm": 0.30599701404571533, "learning_rate": 4.179963731616221e-05, "loss": 0.937, "step": 12620 }, { "epoch": 0.7282533456391325, "grad_norm": 0.28612884879112244, "learning_rate": 4.171779592055848e-05, "loss": 0.9575, "step": 12625 }, { "epoch": 0.7285417628057222, "grad_norm": 0.2787284255027771, "learning_rate": 4.163601360150646e-05, "loss": 0.9889, "step": 12630 }, { "epoch": 0.7288301799723119, "grad_norm": 0.3069520890712738, "learning_rate": 4.155429044190311e-05, "loss": 0.9688, "step": 12635 }, { "epoch": 0.7291185971389017, "grad_norm": 0.26980647444725037, "learning_rate": 4.147262652458539e-05, "loss": 0.9243, "step": 12640 }, { "epoch": 0.7294070143054915, "grad_norm": 0.29087504744529724, "learning_rate": 4.139102193233025e-05, "loss": 0.977, "step": 12645 }, { "epoch": 0.7296954314720813, "grad_norm": 0.286493182182312, "learning_rate": 4.130947674785447e-05, "loss": 0.9924, "step": 12650 }, { "epoch": 0.7299838486386709, "grad_norm": 0.3031412661075592, "learning_rate": 4.1227991053814694e-05, "loss": 0.934, "step": 12655 }, { "epoch": 0.7302722658052607, "grad_norm": 0.2925845980644226, "learning_rate": 4.114656493280721e-05, "loss": 0.9979, "step": 12660 }, { "epoch": 0.7305606829718505, "grad_norm": 0.3840329349040985, "learning_rate": 4.106519846736788e-05, "loss": 1.0479, "step": 12665 }, { "epoch": 0.7308491001384403, "grad_norm": 0.2865562438964844, "learning_rate": 4.098389173997218e-05, "loss": 0.9629, "step": 12670 }, { "epoch": 0.73113751730503, "grad_norm": 0.270366907119751, "learning_rate": 4.090264483303493e-05, "loss": 0.9989, "step": 12675 }, { "epoch": 0.7314259344716197, "grad_norm": 0.27948349714279175, "learning_rate": 4.082145782891046e-05, "loss": 0.9558, "step": 12680 }, { "epoch": 0.7317143516382095, "grad_norm": 0.27884334325790405, "learning_rate": 4.074033080989227e-05, "loss": 0.9059, "step": 12685 }, { "epoch": 0.7320027688047993, "grad_norm": 0.30114471912384033, "learning_rate": 4.065926385821307e-05, "loss": 0.9401, "step": 12690 }, { "epoch": 0.732291185971389, "grad_norm": 0.2634121775627136, "learning_rate": 4.057825705604468e-05, "loss": 0.9164, "step": 12695 }, { "epoch": 0.7325796031379788, "grad_norm": 0.2946275472640991, "learning_rate": 4.049731048549804e-05, "loss": 0.9005, "step": 12700 }, { "epoch": 0.7328680203045685, "grad_norm": 0.28885963559150696, "learning_rate": 4.041642422862295e-05, "loss": 0.917, "step": 12705 }, { "epoch": 0.7331564374711583, "grad_norm": 0.28542929887771606, "learning_rate": 4.033559836740801e-05, "loss": 1.0206, "step": 12710 }, { "epoch": 0.733444854637748, "grad_norm": 0.2643488943576813, "learning_rate": 4.0254832983780786e-05, "loss": 0.9287, "step": 12715 }, { "epoch": 0.7337332718043378, "grad_norm": 0.28975099325180054, "learning_rate": 4.017412815960735e-05, "loss": 0.8696, "step": 12720 }, { "epoch": 0.7340216889709276, "grad_norm": 0.2887983024120331, "learning_rate": 4.0093483976692616e-05, "loss": 1.0434, "step": 12725 }, { "epoch": 0.7343101061375173, "grad_norm": 0.295878529548645, "learning_rate": 4.001290051677975e-05, "loss": 0.9562, "step": 12730 }, { "epoch": 0.734598523304107, "grad_norm": 0.29598087072372437, "learning_rate": 3.993237786155055e-05, "loss": 1.0612, "step": 12735 }, { "epoch": 0.7348869404706968, "grad_norm": 0.29324209690093994, "learning_rate": 3.985191609262519e-05, "loss": 0.877, "step": 12740 }, { "epoch": 0.7351753576372866, "grad_norm": 0.26347586512565613, "learning_rate": 3.9771515291562033e-05, "loss": 0.9648, "step": 12745 }, { "epoch": 0.7354637748038764, "grad_norm": 0.3339488208293915, "learning_rate": 3.969117553985772e-05, "loss": 0.9263, "step": 12750 }, { "epoch": 0.735752191970466, "grad_norm": 0.2952229678630829, "learning_rate": 3.961089691894692e-05, "loss": 0.9163, "step": 12755 }, { "epoch": 0.7360406091370558, "grad_norm": 0.27959832549095154, "learning_rate": 3.9530679510202476e-05, "loss": 0.9824, "step": 12760 }, { "epoch": 0.7363290263036456, "grad_norm": 0.2998709976673126, "learning_rate": 3.945052339493507e-05, "loss": 0.9003, "step": 12765 }, { "epoch": 0.7366174434702354, "grad_norm": 0.29380345344543457, "learning_rate": 3.9370428654393296e-05, "loss": 0.9567, "step": 12770 }, { "epoch": 0.7369058606368251, "grad_norm": 0.313002347946167, "learning_rate": 3.929039536976353e-05, "loss": 0.8868, "step": 12775 }, { "epoch": 0.7371942778034148, "grad_norm": 0.2953389883041382, "learning_rate": 3.921042362216983e-05, "loss": 1.0127, "step": 12780 }, { "epoch": 0.7374826949700046, "grad_norm": 0.3077605366706848, "learning_rate": 3.913051349267399e-05, "loss": 0.9811, "step": 12785 }, { "epoch": 0.7377711121365944, "grad_norm": 0.30430668592453003, "learning_rate": 3.905066506227515e-05, "loss": 0.963, "step": 12790 }, { "epoch": 0.7380595293031841, "grad_norm": 0.4236873388290405, "learning_rate": 3.897087841191009e-05, "loss": 0.9818, "step": 12795 }, { "epoch": 0.7383479464697739, "grad_norm": 0.3383219242095947, "learning_rate": 3.8891153622452904e-05, "loss": 0.9542, "step": 12800 }, { "epoch": 0.7386363636363636, "grad_norm": 0.2933284640312195, "learning_rate": 3.881149077471495e-05, "loss": 0.937, "step": 12805 }, { "epoch": 0.7389247808029534, "grad_norm": 0.2885740101337433, "learning_rate": 3.873188994944483e-05, "loss": 0.9057, "step": 12810 }, { "epoch": 0.7392131979695431, "grad_norm": 0.3335943818092346, "learning_rate": 3.8652351227328256e-05, "loss": 0.8971, "step": 12815 }, { "epoch": 0.7395016151361329, "grad_norm": 0.34636521339416504, "learning_rate": 3.857287468898806e-05, "loss": 0.9614, "step": 12820 }, { "epoch": 0.7397900323027227, "grad_norm": 0.3440823256969452, "learning_rate": 3.8493460414983976e-05, "loss": 0.9456, "step": 12825 }, { "epoch": 0.7400784494693124, "grad_norm": 0.2916651666164398, "learning_rate": 3.8414108485812613e-05, "loss": 0.9229, "step": 12830 }, { "epoch": 0.7403668666359021, "grad_norm": 0.2580519914627075, "learning_rate": 3.833481898190745e-05, "loss": 0.892, "step": 12835 }, { "epoch": 0.7406552838024919, "grad_norm": 0.28569847345352173, "learning_rate": 3.825559198363861e-05, "loss": 0.9541, "step": 12840 }, { "epoch": 0.7409437009690817, "grad_norm": 0.28932294249534607, "learning_rate": 3.8176427571312945e-05, "loss": 1.0493, "step": 12845 }, { "epoch": 0.7412321181356715, "grad_norm": 0.3193461000919342, "learning_rate": 3.8097325825173826e-05, "loss": 1.0026, "step": 12850 }, { "epoch": 0.7415205353022611, "grad_norm": 0.26970720291137695, "learning_rate": 3.801828682540107e-05, "loss": 0.8857, "step": 12855 }, { "epoch": 0.7418089524688509, "grad_norm": 0.2908310890197754, "learning_rate": 3.793931065211096e-05, "loss": 0.974, "step": 12860 }, { "epoch": 0.7420973696354407, "grad_norm": 0.28948134183883667, "learning_rate": 3.7860397385356006e-05, "loss": 0.9374, "step": 12865 }, { "epoch": 0.7423857868020305, "grad_norm": 0.3087465763092041, "learning_rate": 3.778154710512512e-05, "loss": 0.9918, "step": 12870 }, { "epoch": 0.7426742039686202, "grad_norm": 0.28556784987449646, "learning_rate": 3.770275989134312e-05, "loss": 0.9906, "step": 12875 }, { "epoch": 0.7429626211352099, "grad_norm": 0.30968794226646423, "learning_rate": 3.762403582387114e-05, "loss": 0.967, "step": 12880 }, { "epoch": 0.7432510383017997, "grad_norm": 0.30540212988853455, "learning_rate": 3.754537498250617e-05, "loss": 0.9226, "step": 12885 }, { "epoch": 0.7435394554683895, "grad_norm": 0.29100221395492554, "learning_rate": 3.746677744698114e-05, "loss": 0.9921, "step": 12890 }, { "epoch": 0.7438278726349792, "grad_norm": 0.26872479915618896, "learning_rate": 3.738824329696483e-05, "loss": 0.9524, "step": 12895 }, { "epoch": 0.744116289801569, "grad_norm": 0.2934696674346924, "learning_rate": 3.730977261206171e-05, "loss": 0.9088, "step": 12900 }, { "epoch": 0.7444047069681587, "grad_norm": 0.31584224104881287, "learning_rate": 3.723136547181205e-05, "loss": 1.0156, "step": 12905 }, { "epoch": 0.7446931241347485, "grad_norm": 0.2779346704483032, "learning_rate": 3.715302195569159e-05, "loss": 0.9704, "step": 12910 }, { "epoch": 0.7449815413013382, "grad_norm": 0.30584028363227844, "learning_rate": 3.7074742143111604e-05, "loss": 0.9977, "step": 12915 }, { "epoch": 0.745269958467928, "grad_norm": 0.29079389572143555, "learning_rate": 3.69965261134188e-05, "loss": 0.9664, "step": 12920 }, { "epoch": 0.7455583756345178, "grad_norm": 0.289608895778656, "learning_rate": 3.691837394589527e-05, "loss": 0.9142, "step": 12925 }, { "epoch": 0.7458467928011075, "grad_norm": 0.294889360666275, "learning_rate": 3.684028571975836e-05, "loss": 0.8972, "step": 12930 }, { "epoch": 0.7461352099676973, "grad_norm": 0.2971971035003662, "learning_rate": 3.6762261514160504e-05, "loss": 0.9482, "step": 12935 }, { "epoch": 0.746423627134287, "grad_norm": 0.28286072611808777, "learning_rate": 3.6684301408189406e-05, "loss": 0.9655, "step": 12940 }, { "epoch": 0.7467120443008768, "grad_norm": 0.2943524420261383, "learning_rate": 3.660640548086765e-05, "loss": 0.9522, "step": 12945 }, { "epoch": 0.7470004614674666, "grad_norm": 0.2993481457233429, "learning_rate": 3.652857381115293e-05, "loss": 0.9377, "step": 12950 }, { "epoch": 0.7472888786340564, "grad_norm": 0.3094017803668976, "learning_rate": 3.6450806477937625e-05, "loss": 0.9582, "step": 12955 }, { "epoch": 0.747577295800646, "grad_norm": 0.29749640822410583, "learning_rate": 3.637310356004897e-05, "loss": 0.9853, "step": 12960 }, { "epoch": 0.7478657129672358, "grad_norm": 0.2731906771659851, "learning_rate": 3.6295465136249006e-05, "loss": 0.9099, "step": 12965 }, { "epoch": 0.7481541301338256, "grad_norm": 0.30480098724365234, "learning_rate": 3.6217891285234265e-05, "loss": 0.9377, "step": 12970 }, { "epoch": 0.7484425473004154, "grad_norm": 0.3027295172214508, "learning_rate": 3.614038208563588e-05, "loss": 0.9813, "step": 12975 }, { "epoch": 0.748730964467005, "grad_norm": 0.30749472975730896, "learning_rate": 3.6062937616019433e-05, "loss": 0.96, "step": 12980 }, { "epoch": 0.7490193816335948, "grad_norm": 0.28358033299446106, "learning_rate": 3.598555795488496e-05, "loss": 0.9461, "step": 12985 }, { "epoch": 0.7493077988001846, "grad_norm": 0.28506404161453247, "learning_rate": 3.5908243180666734e-05, "loss": 0.9203, "step": 12990 }, { "epoch": 0.7495962159667744, "grad_norm": 0.27330875396728516, "learning_rate": 3.5830993371733254e-05, "loss": 0.9645, "step": 12995 }, { "epoch": 0.7498846331333641, "grad_norm": 0.2925858795642853, "learning_rate": 3.5753808606387226e-05, "loss": 0.8993, "step": 13000 }, { "epoch": 0.7501730502999538, "grad_norm": 0.2975315749645233, "learning_rate": 3.5676688962865344e-05, "loss": 0.9683, "step": 13005 }, { "epoch": 0.7504614674665436, "grad_norm": 0.25685515999794006, "learning_rate": 3.55996345193384e-05, "loss": 0.8821, "step": 13010 }, { "epoch": 0.7507498846331334, "grad_norm": 0.2893523871898651, "learning_rate": 3.5522645353911013e-05, "loss": 0.9397, "step": 13015 }, { "epoch": 0.7510383017997231, "grad_norm": 0.2877350449562073, "learning_rate": 3.544572154462165e-05, "loss": 0.9481, "step": 13020 }, { "epoch": 0.7513267189663129, "grad_norm": 0.4357761740684509, "learning_rate": 3.5368863169442556e-05, "loss": 0.9335, "step": 13025 }, { "epoch": 0.7516151361329027, "grad_norm": 0.3034375309944153, "learning_rate": 3.529207030627959e-05, "loss": 0.9487, "step": 13030 }, { "epoch": 0.7519035532994924, "grad_norm": 0.3313092291355133, "learning_rate": 3.5215343032972356e-05, "loss": 1.0163, "step": 13035 }, { "epoch": 0.7521919704660821, "grad_norm": 0.3019075393676758, "learning_rate": 3.513868142729373e-05, "loss": 0.9449, "step": 13040 }, { "epoch": 0.7524803876326719, "grad_norm": 0.2984979450702667, "learning_rate": 3.506208556695028e-05, "loss": 0.9403, "step": 13045 }, { "epoch": 0.7527688047992617, "grad_norm": 0.299967885017395, "learning_rate": 3.498555552958176e-05, "loss": 1.0028, "step": 13050 }, { "epoch": 0.7530572219658515, "grad_norm": 0.2753196954727173, "learning_rate": 3.490909139276127e-05, "loss": 0.8964, "step": 13055 }, { "epoch": 0.7533456391324411, "grad_norm": 0.3142189681529999, "learning_rate": 3.483269323399512e-05, "loss": 0.9908, "step": 13060 }, { "epoch": 0.7536340562990309, "grad_norm": 0.28006476163864136, "learning_rate": 3.475636113072266e-05, "loss": 0.9061, "step": 13065 }, { "epoch": 0.7539224734656207, "grad_norm": 0.3137742578983307, "learning_rate": 3.468009516031644e-05, "loss": 0.9464, "step": 13070 }, { "epoch": 0.7542108906322105, "grad_norm": 0.3132518231868744, "learning_rate": 3.4603895400081846e-05, "loss": 0.8994, "step": 13075 }, { "epoch": 0.7544993077988001, "grad_norm": 0.27635499835014343, "learning_rate": 3.452776192725717e-05, "loss": 0.9249, "step": 13080 }, { "epoch": 0.7547877249653899, "grad_norm": 0.29366254806518555, "learning_rate": 3.445169481901357e-05, "loss": 0.9538, "step": 13085 }, { "epoch": 0.7550761421319797, "grad_norm": 0.2829080820083618, "learning_rate": 3.437569415245483e-05, "loss": 1.0516, "step": 13090 }, { "epoch": 0.7553645592985695, "grad_norm": 0.30424532294273376, "learning_rate": 3.4299760004617573e-05, "loss": 0.934, "step": 13095 }, { "epoch": 0.7556529764651592, "grad_norm": 0.30195289850234985, "learning_rate": 3.422389245247075e-05, "loss": 0.9592, "step": 13100 }, { "epoch": 0.755941393631749, "grad_norm": 0.3074832856655121, "learning_rate": 3.414809157291603e-05, "loss": 1.0067, "step": 13105 }, { "epoch": 0.7562298107983387, "grad_norm": 0.2848927974700928, "learning_rate": 3.407235744278734e-05, "loss": 0.8393, "step": 13110 }, { "epoch": 0.7565182279649285, "grad_norm": 0.29484933614730835, "learning_rate": 3.3996690138851115e-05, "loss": 0.9396, "step": 13115 }, { "epoch": 0.7568066451315182, "grad_norm": 0.30370378494262695, "learning_rate": 3.3921089737805866e-05, "loss": 0.9017, "step": 13120 }, { "epoch": 0.757095062298108, "grad_norm": 0.30583134293556213, "learning_rate": 3.384555631628236e-05, "loss": 0.9263, "step": 13125 }, { "epoch": 0.7573834794646978, "grad_norm": 0.27707552909851074, "learning_rate": 3.3770089950843564e-05, "loss": 0.9476, "step": 13130 }, { "epoch": 0.7576718966312875, "grad_norm": 0.31780895590782166, "learning_rate": 3.3694690717984354e-05, "loss": 0.9377, "step": 13135 }, { "epoch": 0.7579603137978772, "grad_norm": 0.3167707026004791, "learning_rate": 3.361935869413163e-05, "loss": 0.9783, "step": 13140 }, { "epoch": 0.758248730964467, "grad_norm": 0.30130988359451294, "learning_rate": 3.354409395564412e-05, "loss": 1.0485, "step": 13145 }, { "epoch": 0.7585371481310568, "grad_norm": 0.30067166686058044, "learning_rate": 3.3468896578812344e-05, "loss": 0.955, "step": 13150 }, { "epoch": 0.7588255652976466, "grad_norm": 0.27905625104904175, "learning_rate": 3.3393766639858635e-05, "loss": 0.9141, "step": 13155 }, { "epoch": 0.7591139824642362, "grad_norm": 0.27241066098213196, "learning_rate": 3.331870421493688e-05, "loss": 0.9288, "step": 13160 }, { "epoch": 0.759402399630826, "grad_norm": 0.34241122007369995, "learning_rate": 3.324370938013252e-05, "loss": 1.0194, "step": 13165 }, { "epoch": 0.7596908167974158, "grad_norm": 0.3029732406139374, "learning_rate": 3.316878221146253e-05, "loss": 1.0148, "step": 13170 }, { "epoch": 0.7599792339640056, "grad_norm": 0.30240458250045776, "learning_rate": 3.3093922784875344e-05, "loss": 0.9751, "step": 13175 }, { "epoch": 0.7602676511305952, "grad_norm": 0.2856149673461914, "learning_rate": 3.301913117625065e-05, "loss": 0.9089, "step": 13180 }, { "epoch": 0.760556068297185, "grad_norm": 0.29663828015327454, "learning_rate": 3.2944407461399326e-05, "loss": 0.9934, "step": 13185 }, { "epoch": 0.7608444854637748, "grad_norm": 0.33323925733566284, "learning_rate": 3.286975171606362e-05, "loss": 0.92, "step": 13190 }, { "epoch": 0.7611329026303646, "grad_norm": 0.3022535741329193, "learning_rate": 3.279516401591677e-05, "loss": 0.9502, "step": 13195 }, { "epoch": 0.7614213197969543, "grad_norm": 0.29512476921081543, "learning_rate": 3.272064443656303e-05, "loss": 0.9373, "step": 13200 }, { "epoch": 0.761709736963544, "grad_norm": 0.2822323441505432, "learning_rate": 3.264619305353762e-05, "loss": 0.9714, "step": 13205 }, { "epoch": 0.7619981541301338, "grad_norm": 0.27018293738365173, "learning_rate": 3.257180994230671e-05, "loss": 0.9341, "step": 13210 }, { "epoch": 0.7622865712967236, "grad_norm": 0.2699947655200958, "learning_rate": 3.249749517826715e-05, "loss": 0.9173, "step": 13215 }, { "epoch": 0.7625749884633133, "grad_norm": 0.38015538454055786, "learning_rate": 3.2423248836746575e-05, "loss": 0.953, "step": 13220 }, { "epoch": 0.7628634056299031, "grad_norm": 0.293496698141098, "learning_rate": 3.234907099300327e-05, "loss": 0.9636, "step": 13225 }, { "epoch": 0.7631518227964929, "grad_norm": 0.3389447331428528, "learning_rate": 3.227496172222603e-05, "loss": 0.954, "step": 13230 }, { "epoch": 0.7634402399630826, "grad_norm": 0.2947312891483307, "learning_rate": 3.220092109953424e-05, "loss": 0.9586, "step": 13235 }, { "epoch": 0.7637286571296723, "grad_norm": 0.2841379940509796, "learning_rate": 3.212694919997764e-05, "loss": 1.0088, "step": 13240 }, { "epoch": 0.7640170742962621, "grad_norm": 0.30646151304244995, "learning_rate": 3.205304609853629e-05, "loss": 0.9978, "step": 13245 }, { "epoch": 0.7643054914628519, "grad_norm": 0.2783312201499939, "learning_rate": 3.197921187012055e-05, "loss": 0.9793, "step": 13250 }, { "epoch": 0.7645939086294417, "grad_norm": 0.267666757106781, "learning_rate": 3.190544658957094e-05, "loss": 0.9076, "step": 13255 }, { "epoch": 0.7648823257960313, "grad_norm": 0.26863396167755127, "learning_rate": 3.1831750331658196e-05, "loss": 0.8923, "step": 13260 }, { "epoch": 0.7651707429626211, "grad_norm": 0.29262346029281616, "learning_rate": 3.1758123171082874e-05, "loss": 0.9693, "step": 13265 }, { "epoch": 0.7654591601292109, "grad_norm": 0.30880671739578247, "learning_rate": 3.168456518247575e-05, "loss": 0.9445, "step": 13270 }, { "epoch": 0.7657475772958007, "grad_norm": 0.3033742308616638, "learning_rate": 3.161107644039728e-05, "loss": 0.9311, "step": 13275 }, { "epoch": 0.7660359944623903, "grad_norm": 0.2817222774028778, "learning_rate": 3.153765701933784e-05, "loss": 0.9141, "step": 13280 }, { "epoch": 0.7663244116289801, "grad_norm": 0.29499107599258423, "learning_rate": 3.1464306993717505e-05, "loss": 1.0122, "step": 13285 }, { "epoch": 0.7666128287955699, "grad_norm": 0.3005344569683075, "learning_rate": 3.1391026437885984e-05, "loss": 0.9728, "step": 13290 }, { "epoch": 0.7669012459621597, "grad_norm": 0.32257968187332153, "learning_rate": 3.1317815426122646e-05, "loss": 0.9155, "step": 13295 }, { "epoch": 0.7671896631287495, "grad_norm": 0.3038580119609833, "learning_rate": 3.12446740326363e-05, "loss": 1.0097, "step": 13300 }, { "epoch": 0.7674780802953391, "grad_norm": 0.30271029472351074, "learning_rate": 3.117160233156521e-05, "loss": 0.9174, "step": 13305 }, { "epoch": 0.7677664974619289, "grad_norm": 0.29580479860305786, "learning_rate": 3.109860039697699e-05, "loss": 1.0233, "step": 13310 }, { "epoch": 0.7680549146285187, "grad_norm": 0.3255249857902527, "learning_rate": 3.1025668302868505e-05, "loss": 1.0239, "step": 13315 }, { "epoch": 0.7683433317951085, "grad_norm": 0.2764644920825958, "learning_rate": 3.0952806123165945e-05, "loss": 0.906, "step": 13320 }, { "epoch": 0.7686317489616982, "grad_norm": 0.29159998893737793, "learning_rate": 3.0880013931724503e-05, "loss": 0.9322, "step": 13325 }, { "epoch": 0.768920166128288, "grad_norm": 0.2871926426887512, "learning_rate": 3.0807291802328494e-05, "loss": 0.9471, "step": 13330 }, { "epoch": 0.7692085832948777, "grad_norm": 0.2964244484901428, "learning_rate": 3.073463980869117e-05, "loss": 0.9872, "step": 13335 }, { "epoch": 0.7694970004614675, "grad_norm": 0.2808452844619751, "learning_rate": 3.066205802445477e-05, "loss": 0.9309, "step": 13340 }, { "epoch": 0.7697854176280572, "grad_norm": 0.3023243248462677, "learning_rate": 3.0589546523190325e-05, "loss": 0.9376, "step": 13345 }, { "epoch": 0.770073834794647, "grad_norm": 0.28334978222846985, "learning_rate": 3.0517105378397536e-05, "loss": 0.9403, "step": 13350 }, { "epoch": 0.7703622519612368, "grad_norm": 0.29692623019218445, "learning_rate": 3.044473466350496e-05, "loss": 0.9514, "step": 13355 }, { "epoch": 0.7706506691278265, "grad_norm": 0.28074130415916443, "learning_rate": 3.037243445186965e-05, "loss": 0.875, "step": 13360 }, { "epoch": 0.7709390862944162, "grad_norm": 0.2846575081348419, "learning_rate": 3.030020481677721e-05, "loss": 0.9103, "step": 13365 }, { "epoch": 0.771227503461006, "grad_norm": 0.2886015474796295, "learning_rate": 3.0228045831441733e-05, "loss": 0.9395, "step": 13370 }, { "epoch": 0.7715159206275958, "grad_norm": 0.31642863154411316, "learning_rate": 3.0155957569005634e-05, "loss": 0.9715, "step": 13375 }, { "epoch": 0.7718043377941856, "grad_norm": 0.27962034940719604, "learning_rate": 3.0083940102539763e-05, "loss": 0.9315, "step": 13380 }, { "epoch": 0.7720927549607752, "grad_norm": 0.3114453852176666, "learning_rate": 3.00119935050431e-05, "loss": 0.9689, "step": 13385 }, { "epoch": 0.772381172127365, "grad_norm": 0.2955044209957123, "learning_rate": 2.994011784944284e-05, "loss": 0.9821, "step": 13390 }, { "epoch": 0.7726695892939548, "grad_norm": 0.2887440621852875, "learning_rate": 2.9868313208594212e-05, "loss": 0.9542, "step": 13395 }, { "epoch": 0.7729580064605446, "grad_norm": 0.30911940336227417, "learning_rate": 2.9796579655280576e-05, "loss": 0.8929, "step": 13400 }, { "epoch": 0.7732464236271342, "grad_norm": 0.2785640060901642, "learning_rate": 2.9724917262213157e-05, "loss": 0.9291, "step": 13405 }, { "epoch": 0.773534840793724, "grad_norm": 0.30418962240219116, "learning_rate": 2.9653326102030964e-05, "loss": 0.9497, "step": 13410 }, { "epoch": 0.7738232579603138, "grad_norm": 0.27844002842903137, "learning_rate": 2.9581806247301e-05, "loss": 0.9347, "step": 13415 }, { "epoch": 0.7741116751269036, "grad_norm": 0.2980695962905884, "learning_rate": 2.9510357770517825e-05, "loss": 0.9666, "step": 13420 }, { "epoch": 0.7744000922934933, "grad_norm": 0.30434754490852356, "learning_rate": 2.9438980744103807e-05, "loss": 0.996, "step": 13425 }, { "epoch": 0.774688509460083, "grad_norm": 0.28719010949134827, "learning_rate": 2.9367675240408654e-05, "loss": 1.0049, "step": 13430 }, { "epoch": 0.7749769266266728, "grad_norm": 0.30200961232185364, "learning_rate": 2.9296441331709823e-05, "loss": 0.9612, "step": 13435 }, { "epoch": 0.7752653437932626, "grad_norm": 0.27808678150177, "learning_rate": 2.9225279090212067e-05, "loss": 0.9418, "step": 13440 }, { "epoch": 0.7755537609598523, "grad_norm": 0.29861539602279663, "learning_rate": 2.9154188588047504e-05, "loss": 0.8939, "step": 13445 }, { "epoch": 0.7758421781264421, "grad_norm": 0.29694679379463196, "learning_rate": 2.9083169897275552e-05, "loss": 0.9092, "step": 13450 }, { "epoch": 0.7761305952930319, "grad_norm": 0.30668821930885315, "learning_rate": 2.901222308988283e-05, "loss": 1.0086, "step": 13455 }, { "epoch": 0.7764190124596216, "grad_norm": 0.2928648591041565, "learning_rate": 2.894134823778315e-05, "loss": 0.9638, "step": 13460 }, { "epoch": 0.7767074296262113, "grad_norm": 0.2744688391685486, "learning_rate": 2.8870545412817306e-05, "loss": 0.9842, "step": 13465 }, { "epoch": 0.7769958467928011, "grad_norm": 0.313619464635849, "learning_rate": 2.8799814686753134e-05, "loss": 0.9996, "step": 13470 }, { "epoch": 0.7772842639593909, "grad_norm": 0.27718332409858704, "learning_rate": 2.8729156131285362e-05, "loss": 0.8795, "step": 13475 }, { "epoch": 0.7775726811259807, "grad_norm": 0.2873843312263489, "learning_rate": 2.8658569818035542e-05, "loss": 0.9017, "step": 13480 }, { "epoch": 0.7778610982925703, "grad_norm": 0.27686628699302673, "learning_rate": 2.8588055818552096e-05, "loss": 0.9119, "step": 13485 }, { "epoch": 0.7781495154591601, "grad_norm": 0.3248569965362549, "learning_rate": 2.851761420431006e-05, "loss": 0.9749, "step": 13490 }, { "epoch": 0.7784379326257499, "grad_norm": 0.293582558631897, "learning_rate": 2.8447245046711103e-05, "loss": 0.9441, "step": 13495 }, { "epoch": 0.7787263497923397, "grad_norm": 0.2862909436225891, "learning_rate": 2.8376948417083483e-05, "loss": 0.9887, "step": 13500 }, { "epoch": 0.7790147669589293, "grad_norm": 0.3073969781398773, "learning_rate": 2.8306724386681894e-05, "loss": 1.0304, "step": 13505 }, { "epoch": 0.7793031841255191, "grad_norm": 0.29679036140441895, "learning_rate": 2.823657302668755e-05, "loss": 0.9249, "step": 13510 }, { "epoch": 0.7795916012921089, "grad_norm": 0.28810662031173706, "learning_rate": 2.8166494408207812e-05, "loss": 0.9222, "step": 13515 }, { "epoch": 0.7798800184586987, "grad_norm": 0.2718978226184845, "learning_rate": 2.8096488602276528e-05, "loss": 0.929, "step": 13520 }, { "epoch": 0.7801684356252884, "grad_norm": 0.3145858645439148, "learning_rate": 2.8026555679853594e-05, "loss": 0.9618, "step": 13525 }, { "epoch": 0.7804568527918782, "grad_norm": 0.28058719635009766, "learning_rate": 2.7956695711825075e-05, "loss": 1.0025, "step": 13530 }, { "epoch": 0.7807452699584679, "grad_norm": 0.28933846950531006, "learning_rate": 2.7886908769003074e-05, "loss": 0.9186, "step": 13535 }, { "epoch": 0.7810336871250577, "grad_norm": 0.29064711928367615, "learning_rate": 2.7817194922125666e-05, "loss": 0.9462, "step": 13540 }, { "epoch": 0.7813221042916474, "grad_norm": 0.29872819781303406, "learning_rate": 2.774755424185691e-05, "loss": 0.9759, "step": 13545 }, { "epoch": 0.7816105214582372, "grad_norm": 0.28359314799308777, "learning_rate": 2.7677986798786615e-05, "loss": 0.9732, "step": 13550 }, { "epoch": 0.781898938624827, "grad_norm": 0.3036571443080902, "learning_rate": 2.7608492663430363e-05, "loss": 1.0159, "step": 13555 }, { "epoch": 0.7821873557914167, "grad_norm": 0.2676851749420166, "learning_rate": 2.753907190622944e-05, "loss": 0.9785, "step": 13560 }, { "epoch": 0.7824757729580064, "grad_norm": 0.3076855540275574, "learning_rate": 2.7469724597550805e-05, "loss": 0.9082, "step": 13565 }, { "epoch": 0.7827641901245962, "grad_norm": 0.28778043389320374, "learning_rate": 2.7400450807686938e-05, "loss": 0.9618, "step": 13570 }, { "epoch": 0.783052607291186, "grad_norm": 0.29155904054641724, "learning_rate": 2.7331250606855695e-05, "loss": 0.9694, "step": 13575 }, { "epoch": 0.7833410244577758, "grad_norm": 0.27802708745002747, "learning_rate": 2.726212406520051e-05, "loss": 0.8822, "step": 13580 }, { "epoch": 0.7836294416243654, "grad_norm": 0.3171776533126831, "learning_rate": 2.7193071252790013e-05, "loss": 0.9722, "step": 13585 }, { "epoch": 0.7839178587909552, "grad_norm": 0.2921862006187439, "learning_rate": 2.712409223961826e-05, "loss": 0.9562, "step": 13590 }, { "epoch": 0.784206275957545, "grad_norm": 0.29876551032066345, "learning_rate": 2.7055187095604296e-05, "loss": 0.8812, "step": 13595 }, { "epoch": 0.7844946931241348, "grad_norm": 0.33072373270988464, "learning_rate": 2.698635589059242e-05, "loss": 1.0232, "step": 13600 }, { "epoch": 0.7847831102907245, "grad_norm": 0.3147570788860321, "learning_rate": 2.6917598694351998e-05, "loss": 0.9484, "step": 13605 }, { "epoch": 0.7850715274573142, "grad_norm": 0.27354785799980164, "learning_rate": 2.6848915576577317e-05, "loss": 0.8926, "step": 13610 }, { "epoch": 0.785359944623904, "grad_norm": 0.34007906913757324, "learning_rate": 2.6780306606887605e-05, "loss": 0.9087, "step": 13615 }, { "epoch": 0.7856483617904938, "grad_norm": 0.29760247468948364, "learning_rate": 2.6711771854826905e-05, "loss": 0.9881, "step": 13620 }, { "epoch": 0.7859367789570835, "grad_norm": 0.28661879897117615, "learning_rate": 2.6643311389864088e-05, "loss": 0.9354, "step": 13625 }, { "epoch": 0.7862251961236733, "grad_norm": 0.3026251196861267, "learning_rate": 2.657492528139268e-05, "loss": 1.0238, "step": 13630 }, { "epoch": 0.786513613290263, "grad_norm": 0.30861523747444153, "learning_rate": 2.6506613598730833e-05, "loss": 1.0146, "step": 13635 }, { "epoch": 0.7868020304568528, "grad_norm": 0.2883349061012268, "learning_rate": 2.643837641112128e-05, "loss": 0.9967, "step": 13640 }, { "epoch": 0.7870904476234426, "grad_norm": 0.2896696627140045, "learning_rate": 2.6370213787731214e-05, "loss": 0.9344, "step": 13645 }, { "epoch": 0.7873788647900323, "grad_norm": 0.2672329545021057, "learning_rate": 2.630212579765231e-05, "loss": 0.9655, "step": 13650 }, { "epoch": 0.7876672819566221, "grad_norm": 0.28561004996299744, "learning_rate": 2.6234112509900532e-05, "loss": 0.9152, "step": 13655 }, { "epoch": 0.7879556991232118, "grad_norm": 0.2793280780315399, "learning_rate": 2.6166173993416154e-05, "loss": 0.9476, "step": 13660 }, { "epoch": 0.7882441162898016, "grad_norm": 0.2930983006954193, "learning_rate": 2.6098310317063634e-05, "loss": 0.9762, "step": 13665 }, { "epoch": 0.7885325334563913, "grad_norm": 0.3089233338832855, "learning_rate": 2.603052154963158e-05, "loss": 0.9858, "step": 13670 }, { "epoch": 0.7888209506229811, "grad_norm": 0.2855857312679291, "learning_rate": 2.59628077598327e-05, "loss": 0.9528, "step": 13675 }, { "epoch": 0.7891093677895709, "grad_norm": 0.2728000283241272, "learning_rate": 2.5895169016303623e-05, "loss": 0.9513, "step": 13680 }, { "epoch": 0.7893977849561606, "grad_norm": 0.29234278202056885, "learning_rate": 2.5827605387605035e-05, "loss": 1.0129, "step": 13685 }, { "epoch": 0.7896862021227503, "grad_norm": 0.3364224433898926, "learning_rate": 2.576011694222139e-05, "loss": 0.9246, "step": 13690 }, { "epoch": 0.7899746192893401, "grad_norm": 0.30209240317344666, "learning_rate": 2.5692703748560932e-05, "loss": 0.8882, "step": 13695 }, { "epoch": 0.7902630364559299, "grad_norm": 0.31578031182289124, "learning_rate": 2.5625365874955674e-05, "loss": 1.0172, "step": 13700 }, { "epoch": 0.7905514536225197, "grad_norm": 0.31756725907325745, "learning_rate": 2.5558103389661214e-05, "loss": 1.0161, "step": 13705 }, { "epoch": 0.7908398707891093, "grad_norm": 0.28036510944366455, "learning_rate": 2.5490916360856853e-05, "loss": 1.0237, "step": 13710 }, { "epoch": 0.7911282879556991, "grad_norm": 0.2866663336753845, "learning_rate": 2.5423804856645307e-05, "loss": 0.9911, "step": 13715 }, { "epoch": 0.7914167051222889, "grad_norm": 0.3004799783229828, "learning_rate": 2.5356768945052745e-05, "loss": 0.9495, "step": 13720 }, { "epoch": 0.7917051222888787, "grad_norm": 0.30194994807243347, "learning_rate": 2.528980869402875e-05, "loss": 0.952, "step": 13725 }, { "epoch": 0.7919935394554684, "grad_norm": 0.2838331162929535, "learning_rate": 2.522292417144617e-05, "loss": 1.0005, "step": 13730 }, { "epoch": 0.7922819566220581, "grad_norm": 0.2979831397533417, "learning_rate": 2.5156115445101193e-05, "loss": 1.1029, "step": 13735 }, { "epoch": 0.7925703737886479, "grad_norm": 0.25850915908813477, "learning_rate": 2.5089382582712994e-05, "loss": 0.916, "step": 13740 }, { "epoch": 0.7928587909552377, "grad_norm": 0.27513688802719116, "learning_rate": 2.502272565192405e-05, "loss": 0.8642, "step": 13745 }, { "epoch": 0.7931472081218274, "grad_norm": 0.28276485204696655, "learning_rate": 2.4956144720299712e-05, "loss": 0.8874, "step": 13750 }, { "epoch": 0.7934356252884172, "grad_norm": 0.3068575859069824, "learning_rate": 2.4889639855328473e-05, "loss": 0.9945, "step": 13755 }, { "epoch": 0.7937240424550069, "grad_norm": 0.28170597553253174, "learning_rate": 2.482321112442151e-05, "loss": 0.9553, "step": 13760 }, { "epoch": 0.7940124596215967, "grad_norm": 0.34394562244415283, "learning_rate": 2.4756858594912945e-05, "loss": 0.9402, "step": 13765 }, { "epoch": 0.7943008767881864, "grad_norm": 0.2997315227985382, "learning_rate": 2.4690582334059685e-05, "loss": 0.8787, "step": 13770 }, { "epoch": 0.7945892939547762, "grad_norm": 0.27617332339286804, "learning_rate": 2.4624382409041292e-05, "loss": 0.95, "step": 13775 }, { "epoch": 0.794877711121366, "grad_norm": 0.3137553334236145, "learning_rate": 2.455825888695994e-05, "loss": 0.9173, "step": 13780 }, { "epoch": 0.7951661282879557, "grad_norm": 0.3070252537727356, "learning_rate": 2.449221183484036e-05, "loss": 1.0014, "step": 13785 }, { "epoch": 0.7954545454545454, "grad_norm": 0.2886539697647095, "learning_rate": 2.4426241319629772e-05, "loss": 1.0092, "step": 13790 }, { "epoch": 0.7957429626211352, "grad_norm": 0.2871996760368347, "learning_rate": 2.436034740819786e-05, "loss": 0.934, "step": 13795 }, { "epoch": 0.796031379787725, "grad_norm": 0.3281834125518799, "learning_rate": 2.4294530167336615e-05, "loss": 0.91, "step": 13800 }, { "epoch": 0.7963197969543148, "grad_norm": 0.2982265055179596, "learning_rate": 2.422878966376032e-05, "loss": 0.958, "step": 13805 }, { "epoch": 0.7966082141209044, "grad_norm": 0.30917835235595703, "learning_rate": 2.4163125964105448e-05, "loss": 0.9478, "step": 13810 }, { "epoch": 0.7968966312874942, "grad_norm": 0.2909094989299774, "learning_rate": 2.4097539134930703e-05, "loss": 1.0116, "step": 13815 }, { "epoch": 0.797185048454084, "grad_norm": 0.2671252191066742, "learning_rate": 2.4032029242716826e-05, "loss": 0.913, "step": 13820 }, { "epoch": 0.7974734656206738, "grad_norm": 0.2950332760810852, "learning_rate": 2.3966596353866466e-05, "loss": 0.9618, "step": 13825 }, { "epoch": 0.7977618827872635, "grad_norm": 0.3227890729904175, "learning_rate": 2.390124053470443e-05, "loss": 0.9547, "step": 13830 }, { "epoch": 0.7980502999538532, "grad_norm": 0.28030773997306824, "learning_rate": 2.383596185147724e-05, "loss": 0.96, "step": 13835 }, { "epoch": 0.798338717120443, "grad_norm": 0.29495128989219666, "learning_rate": 2.3770760370353294e-05, "loss": 0.9523, "step": 13840 }, { "epoch": 0.7986271342870328, "grad_norm": 0.33828744292259216, "learning_rate": 2.3705636157422707e-05, "loss": 0.9811, "step": 13845 }, { "epoch": 0.7989155514536225, "grad_norm": 0.306768536567688, "learning_rate": 2.364058927869732e-05, "loss": 0.9979, "step": 13850 }, { "epoch": 0.7992039686202123, "grad_norm": 0.3236815333366394, "learning_rate": 2.357561980011057e-05, "loss": 0.9182, "step": 13855 }, { "epoch": 0.799492385786802, "grad_norm": 0.3143635392189026, "learning_rate": 2.3510727787517382e-05, "loss": 0.9045, "step": 13860 }, { "epoch": 0.7997808029533918, "grad_norm": 0.30166271328926086, "learning_rate": 2.3445913306694246e-05, "loss": 0.8859, "step": 13865 }, { "epoch": 0.8000692201199815, "grad_norm": 0.3122861087322235, "learning_rate": 2.3381176423338956e-05, "loss": 0.9755, "step": 13870 }, { "epoch": 0.8003576372865713, "grad_norm": 0.27881136536598206, "learning_rate": 2.33165172030708e-05, "loss": 0.941, "step": 13875 }, { "epoch": 0.8006460544531611, "grad_norm": 0.3169102072715759, "learning_rate": 2.325193571143024e-05, "loss": 0.881, "step": 13880 }, { "epoch": 0.8009344716197508, "grad_norm": 0.2874906659126282, "learning_rate": 2.3187432013878908e-05, "loss": 0.9404, "step": 13885 }, { "epoch": 0.8012228887863405, "grad_norm": 0.349324494600296, "learning_rate": 2.31230061757997e-05, "loss": 0.9942, "step": 13890 }, { "epoch": 0.8015113059529303, "grad_norm": 0.28128063678741455, "learning_rate": 2.30586582624965e-05, "loss": 0.9676, "step": 13895 }, { "epoch": 0.8017997231195201, "grad_norm": 0.27755919098854065, "learning_rate": 2.299438833919432e-05, "loss": 0.9676, "step": 13900 }, { "epoch": 0.8020881402861099, "grad_norm": 0.2768385410308838, "learning_rate": 2.2930196471038924e-05, "loss": 0.9379, "step": 13905 }, { "epoch": 0.8023765574526995, "grad_norm": 0.30479341745376587, "learning_rate": 2.286608272309716e-05, "loss": 0.9811, "step": 13910 }, { "epoch": 0.8026649746192893, "grad_norm": 0.2795366942882538, "learning_rate": 2.2802047160356576e-05, "loss": 1.0481, "step": 13915 }, { "epoch": 0.8029533917858791, "grad_norm": 0.32293620705604553, "learning_rate": 2.2738089847725497e-05, "loss": 0.9632, "step": 13920 }, { "epoch": 0.8032418089524689, "grad_norm": 0.34504541754722595, "learning_rate": 2.267421085003293e-05, "loss": 0.9606, "step": 13925 }, { "epoch": 0.8035302261190586, "grad_norm": 0.3101238012313843, "learning_rate": 2.2610410232028467e-05, "loss": 1.0869, "step": 13930 }, { "epoch": 0.8038186432856483, "grad_norm": 0.29028749465942383, "learning_rate": 2.254668805838236e-05, "loss": 0.9505, "step": 13935 }, { "epoch": 0.8041070604522381, "grad_norm": 0.29808133840560913, "learning_rate": 2.2483044393685215e-05, "loss": 0.9556, "step": 13940 }, { "epoch": 0.8043954776188279, "grad_norm": 0.27106401324272156, "learning_rate": 2.2419479302448144e-05, "loss": 0.898, "step": 13945 }, { "epoch": 0.8046838947854176, "grad_norm": 0.2756030559539795, "learning_rate": 2.235599284910258e-05, "loss": 0.9095, "step": 13950 }, { "epoch": 0.8049723119520074, "grad_norm": 0.2623896896839142, "learning_rate": 2.229258509800023e-05, "loss": 0.9346, "step": 13955 }, { "epoch": 0.8052607291185971, "grad_norm": 0.30703356862068176, "learning_rate": 2.2229256113413087e-05, "loss": 0.9422, "step": 13960 }, { "epoch": 0.8055491462851869, "grad_norm": 0.2994453012943268, "learning_rate": 2.2166005959533266e-05, "loss": 1.0257, "step": 13965 }, { "epoch": 0.8058375634517766, "grad_norm": 0.28820228576660156, "learning_rate": 2.210283470047296e-05, "loss": 0.9725, "step": 13970 }, { "epoch": 0.8061259806183664, "grad_norm": 0.2879176735877991, "learning_rate": 2.2039742400264406e-05, "loss": 0.9524, "step": 13975 }, { "epoch": 0.8064143977849562, "grad_norm": 0.28816553950309753, "learning_rate": 2.1976729122859864e-05, "loss": 0.9219, "step": 13980 }, { "epoch": 0.8067028149515459, "grad_norm": 0.30006927251815796, "learning_rate": 2.191379493213137e-05, "loss": 0.9587, "step": 13985 }, { "epoch": 0.8069912321181357, "grad_norm": 0.3044586777687073, "learning_rate": 2.185093989187087e-05, "loss": 0.978, "step": 13990 }, { "epoch": 0.8072796492847254, "grad_norm": 0.2731790840625763, "learning_rate": 2.1788164065790127e-05, "loss": 0.9413, "step": 13995 }, { "epoch": 0.8075680664513152, "grad_norm": 0.2864460349082947, "learning_rate": 2.1725467517520526e-05, "loss": 0.9536, "step": 14000 }, { "epoch": 0.807856483617905, "grad_norm": 0.2875843346118927, "learning_rate": 2.1662850310613147e-05, "loss": 0.8936, "step": 14005 }, { "epoch": 0.8081449007844947, "grad_norm": 0.286458283662796, "learning_rate": 2.1600312508538602e-05, "loss": 0.9558, "step": 14010 }, { "epoch": 0.8084333179510844, "grad_norm": 0.28374427556991577, "learning_rate": 2.1537854174687034e-05, "loss": 0.8465, "step": 14015 }, { "epoch": 0.8087217351176742, "grad_norm": 0.2845346927642822, "learning_rate": 2.1475475372368094e-05, "loss": 1.0074, "step": 14020 }, { "epoch": 0.809010152284264, "grad_norm": 0.3177453279495239, "learning_rate": 2.1413176164810732e-05, "loss": 0.9771, "step": 14025 }, { "epoch": 0.8092985694508538, "grad_norm": 0.2712036073207855, "learning_rate": 2.1350956615163254e-05, "loss": 0.9188, "step": 14030 }, { "epoch": 0.8095869866174434, "grad_norm": 0.274812787771225, "learning_rate": 2.1288816786493194e-05, "loss": 0.9834, "step": 14035 }, { "epoch": 0.8098754037840332, "grad_norm": 0.28117087483406067, "learning_rate": 2.1226756741787356e-05, "loss": 0.9579, "step": 14040 }, { "epoch": 0.810163820950623, "grad_norm": 0.2860463559627533, "learning_rate": 2.1164776543951635e-05, "loss": 0.8912, "step": 14045 }, { "epoch": 0.8104522381172128, "grad_norm": 0.29822874069213867, "learning_rate": 2.1102876255810887e-05, "loss": 0.9204, "step": 14050 }, { "epoch": 0.8107406552838025, "grad_norm": 0.2793227732181549, "learning_rate": 2.1041055940109133e-05, "loss": 0.9382, "step": 14055 }, { "epoch": 0.8110290724503922, "grad_norm": 0.2899545729160309, "learning_rate": 2.0979315659509223e-05, "loss": 0.9919, "step": 14060 }, { "epoch": 0.811317489616982, "grad_norm": 0.2842821180820465, "learning_rate": 2.091765547659298e-05, "loss": 0.8838, "step": 14065 }, { "epoch": 0.8116059067835718, "grad_norm": 0.282894492149353, "learning_rate": 2.085607545386088e-05, "loss": 0.9497, "step": 14070 }, { "epoch": 0.8118943239501615, "grad_norm": 0.31623557209968567, "learning_rate": 2.0794575653732308e-05, "loss": 0.9599, "step": 14075 }, { "epoch": 0.8121827411167513, "grad_norm": 0.2882692813873291, "learning_rate": 2.0733156138545252e-05, "loss": 0.9176, "step": 14080 }, { "epoch": 0.812471158283341, "grad_norm": 0.312507688999176, "learning_rate": 2.0671816970556312e-05, "loss": 1.0327, "step": 14085 }, { "epoch": 0.8127595754499308, "grad_norm": 0.27789467573165894, "learning_rate": 2.0610558211940702e-05, "loss": 0.9604, "step": 14090 }, { "epoch": 0.8130479926165205, "grad_norm": 0.2766311466693878, "learning_rate": 2.0549379924792045e-05, "loss": 1.0102, "step": 14095 }, { "epoch": 0.8133364097831103, "grad_norm": 0.2832636833190918, "learning_rate": 2.0488282171122498e-05, "loss": 0.9976, "step": 14100 }, { "epoch": 0.8136248269497001, "grad_norm": 0.314397931098938, "learning_rate": 2.042726501286253e-05, "loss": 0.944, "step": 14105 }, { "epoch": 0.8139132441162898, "grad_norm": 0.2753946781158447, "learning_rate": 2.036632851186091e-05, "loss": 0.866, "step": 14110 }, { "epoch": 0.8142016612828795, "grad_norm": 0.28425517678260803, "learning_rate": 2.0305472729884656e-05, "loss": 0.9753, "step": 14115 }, { "epoch": 0.8144900784494693, "grad_norm": 0.265170693397522, "learning_rate": 2.0244697728618966e-05, "loss": 0.9903, "step": 14120 }, { "epoch": 0.8147784956160591, "grad_norm": 0.3373314440250397, "learning_rate": 2.0184003569667198e-05, "loss": 0.929, "step": 14125 }, { "epoch": 0.8150669127826489, "grad_norm": 0.2899061143398285, "learning_rate": 2.0123390314550717e-05, "loss": 0.9916, "step": 14130 }, { "epoch": 0.8153553299492385, "grad_norm": 0.2940767705440521, "learning_rate": 2.0062858024708895e-05, "loss": 0.899, "step": 14135 }, { "epoch": 0.8156437471158283, "grad_norm": 0.29175153374671936, "learning_rate": 2.000240676149904e-05, "loss": 0.9465, "step": 14140 }, { "epoch": 0.8159321642824181, "grad_norm": 0.2850426733493805, "learning_rate": 1.9942036586196312e-05, "loss": 0.978, "step": 14145 }, { "epoch": 0.8162205814490079, "grad_norm": 0.2718311548233032, "learning_rate": 1.9881747559993703e-05, "loss": 0.887, "step": 14150 }, { "epoch": 0.8165089986155976, "grad_norm": 0.33359721302986145, "learning_rate": 1.9821539744001906e-05, "loss": 0.9738, "step": 14155 }, { "epoch": 0.8167974157821873, "grad_norm": 0.2917875349521637, "learning_rate": 1.976141319924939e-05, "loss": 0.9638, "step": 14160 }, { "epoch": 0.8170858329487771, "grad_norm": 0.2972804009914398, "learning_rate": 1.9701367986682152e-05, "loss": 0.9336, "step": 14165 }, { "epoch": 0.8173742501153669, "grad_norm": 0.30192670226097107, "learning_rate": 1.964140416716379e-05, "loss": 1.0037, "step": 14170 }, { "epoch": 0.8176626672819566, "grad_norm": 0.29572001099586487, "learning_rate": 1.9581521801475368e-05, "loss": 0.9069, "step": 14175 }, { "epoch": 0.8179510844485464, "grad_norm": 0.3252546787261963, "learning_rate": 1.9521720950315403e-05, "loss": 1.0215, "step": 14180 }, { "epoch": 0.8182395016151361, "grad_norm": 0.28887739777565, "learning_rate": 1.9462001674299846e-05, "loss": 0.9823, "step": 14185 }, { "epoch": 0.8185279187817259, "grad_norm": 0.30198100209236145, "learning_rate": 1.940236403396186e-05, "loss": 0.9403, "step": 14190 }, { "epoch": 0.8188163359483156, "grad_norm": 0.2884944975376129, "learning_rate": 1.934280808975193e-05, "loss": 0.9939, "step": 14195 }, { "epoch": 0.8191047531149054, "grad_norm": 0.2968595027923584, "learning_rate": 1.9283333902037694e-05, "loss": 0.9257, "step": 14200 }, { "epoch": 0.8193931702814952, "grad_norm": 0.2883051931858063, "learning_rate": 1.9223941531103918e-05, "loss": 0.9695, "step": 14205 }, { "epoch": 0.819681587448085, "grad_norm": 0.29692569375038147, "learning_rate": 1.9164631037152513e-05, "loss": 1.005, "step": 14210 }, { "epoch": 0.8199700046146746, "grad_norm": 0.26805904507637024, "learning_rate": 1.9105402480302237e-05, "loss": 0.9818, "step": 14215 }, { "epoch": 0.8202584217812644, "grad_norm": 0.27732157707214355, "learning_rate": 1.9046255920588985e-05, "loss": 0.9986, "step": 14220 }, { "epoch": 0.8205468389478542, "grad_norm": 0.314737468957901, "learning_rate": 1.8987191417965378e-05, "loss": 0.951, "step": 14225 }, { "epoch": 0.820835256114444, "grad_norm": 0.3076435625553131, "learning_rate": 1.8928209032301013e-05, "loss": 1.0224, "step": 14230 }, { "epoch": 0.8211236732810336, "grad_norm": 0.29111447930336, "learning_rate": 1.886930882338208e-05, "loss": 0.9591, "step": 14235 }, { "epoch": 0.8214120904476234, "grad_norm": 0.3361826241016388, "learning_rate": 1.8810490850911577e-05, "loss": 1.0308, "step": 14240 }, { "epoch": 0.8217005076142132, "grad_norm": 0.2926064431667328, "learning_rate": 1.8751755174509156e-05, "loss": 0.8994, "step": 14245 }, { "epoch": 0.821988924780803, "grad_norm": 0.33232754468917847, "learning_rate": 1.8693101853711004e-05, "loss": 0.9446, "step": 14250 }, { "epoch": 0.8222773419473927, "grad_norm": 0.3139682710170746, "learning_rate": 1.8634530947969853e-05, "loss": 1.0135, "step": 14255 }, { "epoch": 0.8225657591139824, "grad_norm": 0.2984192669391632, "learning_rate": 1.857604251665487e-05, "loss": 0.9311, "step": 14260 }, { "epoch": 0.8228541762805722, "grad_norm": 0.2784828841686249, "learning_rate": 1.851763661905167e-05, "loss": 0.9251, "step": 14265 }, { "epoch": 0.823142593447162, "grad_norm": 0.2821832597255707, "learning_rate": 1.845931331436219e-05, "loss": 0.9135, "step": 14270 }, { "epoch": 0.8234310106137517, "grad_norm": 0.2858486771583557, "learning_rate": 1.840107266170462e-05, "loss": 0.9792, "step": 14275 }, { "epoch": 0.8237194277803415, "grad_norm": 0.3098810613155365, "learning_rate": 1.8342914720113404e-05, "loss": 0.9468, "step": 14280 }, { "epoch": 0.8240078449469312, "grad_norm": 0.28920066356658936, "learning_rate": 1.828483954853911e-05, "loss": 0.9434, "step": 14285 }, { "epoch": 0.824296262113521, "grad_norm": 0.36170583963394165, "learning_rate": 1.822684720584852e-05, "loss": 0.9733, "step": 14290 }, { "epoch": 0.8245846792801107, "grad_norm": 0.2947959303855896, "learning_rate": 1.8168937750824278e-05, "loss": 1.0517, "step": 14295 }, { "epoch": 0.8248730964467005, "grad_norm": 0.264804482460022, "learning_rate": 1.8111111242165124e-05, "loss": 0.9247, "step": 14300 }, { "epoch": 0.8251615136132903, "grad_norm": 0.3069083094596863, "learning_rate": 1.8053367738485748e-05, "loss": 0.941, "step": 14305 }, { "epoch": 0.82544993077988, "grad_norm": 0.29265064001083374, "learning_rate": 1.7995707298316632e-05, "loss": 0.9741, "step": 14310 }, { "epoch": 0.8257383479464697, "grad_norm": 0.2899533212184906, "learning_rate": 1.7938129980104103e-05, "loss": 0.9867, "step": 14315 }, { "epoch": 0.8260267651130595, "grad_norm": 0.312055379152298, "learning_rate": 1.788063584221017e-05, "loss": 1.0349, "step": 14320 }, { "epoch": 0.8263151822796493, "grad_norm": 0.2941701412200928, "learning_rate": 1.7823224942912643e-05, "loss": 1.0064, "step": 14325 }, { "epoch": 0.8266035994462391, "grad_norm": 0.2945972979068756, "learning_rate": 1.776589734040487e-05, "loss": 0.9446, "step": 14330 }, { "epoch": 0.8268920166128289, "grad_norm": 0.35159391164779663, "learning_rate": 1.770865309279578e-05, "loss": 0.9587, "step": 14335 }, { "epoch": 0.8271804337794185, "grad_norm": 0.3027874529361725, "learning_rate": 1.7651492258109835e-05, "loss": 1.0183, "step": 14340 }, { "epoch": 0.8274688509460083, "grad_norm": 0.2887841761112213, "learning_rate": 1.7594414894286893e-05, "loss": 0.9893, "step": 14345 }, { "epoch": 0.8277572681125981, "grad_norm": 0.2893112003803253, "learning_rate": 1.7537421059182314e-05, "loss": 0.928, "step": 14350 }, { "epoch": 0.8280456852791879, "grad_norm": 0.28454893827438354, "learning_rate": 1.7480510810566685e-05, "loss": 0.959, "step": 14355 }, { "epoch": 0.8283341024457775, "grad_norm": 0.29442286491394043, "learning_rate": 1.74236842061259e-05, "loss": 0.9051, "step": 14360 }, { "epoch": 0.8286225196123673, "grad_norm": 0.31676793098449707, "learning_rate": 1.7366941303461083e-05, "loss": 1.0464, "step": 14365 }, { "epoch": 0.8289109367789571, "grad_norm": 0.27417492866516113, "learning_rate": 1.7310282160088465e-05, "loss": 0.9545, "step": 14370 }, { "epoch": 0.8291993539455469, "grad_norm": 0.3116649091243744, "learning_rate": 1.72537068334395e-05, "loss": 1.0463, "step": 14375 }, { "epoch": 0.8294877711121366, "grad_norm": 0.28560543060302734, "learning_rate": 1.7197215380860497e-05, "loss": 1.0247, "step": 14380 }, { "epoch": 0.8297761882787263, "grad_norm": 0.2737559378147125, "learning_rate": 1.7140807859612928e-05, "loss": 0.91, "step": 14385 }, { "epoch": 0.8300646054453161, "grad_norm": 0.2955358326435089, "learning_rate": 1.7084484326873062e-05, "loss": 0.8977, "step": 14390 }, { "epoch": 0.8303530226119059, "grad_norm": 0.2852216362953186, "learning_rate": 1.7028244839732144e-05, "loss": 0.9707, "step": 14395 }, { "epoch": 0.8306414397784956, "grad_norm": 0.3453192412853241, "learning_rate": 1.6972089455196115e-05, "loss": 0.9683, "step": 14400 }, { "epoch": 0.8309298569450854, "grad_norm": 0.29188206791877747, "learning_rate": 1.6916018230185704e-05, "loss": 0.9029, "step": 14405 }, { "epoch": 0.8312182741116751, "grad_norm": 0.2981170117855072, "learning_rate": 1.6860031221536398e-05, "loss": 0.9743, "step": 14410 }, { "epoch": 0.8315066912782649, "grad_norm": 0.28368160128593445, "learning_rate": 1.680412848599826e-05, "loss": 0.9265, "step": 14415 }, { "epoch": 0.8317951084448546, "grad_norm": 0.27159884572029114, "learning_rate": 1.674831008023594e-05, "loss": 0.9942, "step": 14420 }, { "epoch": 0.8320835256114444, "grad_norm": 0.27852651476860046, "learning_rate": 1.66925760608286e-05, "loss": 0.9342, "step": 14425 }, { "epoch": 0.8323719427780342, "grad_norm": 0.3071039319038391, "learning_rate": 1.6636926484269855e-05, "loss": 0.9772, "step": 14430 }, { "epoch": 0.832660359944624, "grad_norm": 0.35024166107177734, "learning_rate": 1.658136140696781e-05, "loss": 0.8992, "step": 14435 }, { "epoch": 0.8329487771112136, "grad_norm": 0.3019395172595978, "learning_rate": 1.6525880885244815e-05, "loss": 0.9288, "step": 14440 }, { "epoch": 0.8332371942778034, "grad_norm": 0.32100218534469604, "learning_rate": 1.6470484975337565e-05, "loss": 0.9542, "step": 14445 }, { "epoch": 0.8335256114443932, "grad_norm": 0.30146655440330505, "learning_rate": 1.641517373339696e-05, "loss": 0.9153, "step": 14450 }, { "epoch": 0.833814028610983, "grad_norm": 0.2922408878803253, "learning_rate": 1.6359947215488157e-05, "loss": 0.8254, "step": 14455 }, { "epoch": 0.8341024457775726, "grad_norm": 0.3297399580478668, "learning_rate": 1.6304805477590312e-05, "loss": 0.9451, "step": 14460 }, { "epoch": 0.8343908629441624, "grad_norm": 0.26936909556388855, "learning_rate": 1.6249748575596702e-05, "loss": 0.9624, "step": 14465 }, { "epoch": 0.8346792801107522, "grad_norm": 0.2955014407634735, "learning_rate": 1.6194776565314672e-05, "loss": 0.9156, "step": 14470 }, { "epoch": 0.834967697277342, "grad_norm": 0.27296435832977295, "learning_rate": 1.6139889502465434e-05, "loss": 1.0005, "step": 14475 }, { "epoch": 0.8352561144439317, "grad_norm": 0.2949640452861786, "learning_rate": 1.6085087442684122e-05, "loss": 0.9119, "step": 14480 }, { "epoch": 0.8355445316105214, "grad_norm": 0.28256770968437195, "learning_rate": 1.6030370441519704e-05, "loss": 0.991, "step": 14485 }, { "epoch": 0.8358329487771112, "grad_norm": 0.26805078983306885, "learning_rate": 1.597573855443497e-05, "loss": 0.8828, "step": 14490 }, { "epoch": 0.836121365943701, "grad_norm": 0.31226465106010437, "learning_rate": 1.592119183680638e-05, "loss": 0.9714, "step": 14495 }, { "epoch": 0.8364097831102907, "grad_norm": 0.31233757734298706, "learning_rate": 1.5866730343924085e-05, "loss": 0.96, "step": 14500 }, { "epoch": 0.8366982002768805, "grad_norm": 0.28462743759155273, "learning_rate": 1.581235413099187e-05, "loss": 0.9373, "step": 14505 }, { "epoch": 0.8369866174434702, "grad_norm": 0.2800358533859253, "learning_rate": 1.575806325312702e-05, "loss": 0.9018, "step": 14510 }, { "epoch": 0.83727503461006, "grad_norm": 0.2955632209777832, "learning_rate": 1.5703857765360407e-05, "loss": 0.876, "step": 14515 }, { "epoch": 0.8375634517766497, "grad_norm": 0.3308626115322113, "learning_rate": 1.5649737722636315e-05, "loss": 0.9169, "step": 14520 }, { "epoch": 0.8378518689432395, "grad_norm": 0.2876846492290497, "learning_rate": 1.5595703179812327e-05, "loss": 0.9658, "step": 14525 }, { "epoch": 0.8381402861098293, "grad_norm": 0.3003042936325073, "learning_rate": 1.554175419165951e-05, "loss": 0.9288, "step": 14530 }, { "epoch": 0.838428703276419, "grad_norm": 0.28002721071243286, "learning_rate": 1.5487890812862094e-05, "loss": 0.9352, "step": 14535 }, { "epoch": 0.8387171204430087, "grad_norm": 0.30450934171676636, "learning_rate": 1.5434113098017667e-05, "loss": 0.9025, "step": 14540 }, { "epoch": 0.8390055376095985, "grad_norm": 0.2632887065410614, "learning_rate": 1.5380421101636778e-05, "loss": 0.8946, "step": 14545 }, { "epoch": 0.8392939547761883, "grad_norm": 0.2949424088001251, "learning_rate": 1.5326814878143304e-05, "loss": 0.9088, "step": 14550 }, { "epoch": 0.8395823719427781, "grad_norm": 0.31812626123428345, "learning_rate": 1.5273294481874044e-05, "loss": 0.9816, "step": 14555 }, { "epoch": 0.8398707891093677, "grad_norm": 0.28734949231147766, "learning_rate": 1.5219859967078854e-05, "loss": 0.9553, "step": 14560 }, { "epoch": 0.8401592062759575, "grad_norm": 0.2892124652862549, "learning_rate": 1.5166511387920512e-05, "loss": 0.9617, "step": 14565 }, { "epoch": 0.8404476234425473, "grad_norm": 0.335213303565979, "learning_rate": 1.5113248798474689e-05, "loss": 0.9573, "step": 14570 }, { "epoch": 0.8407360406091371, "grad_norm": 0.29609566926956177, "learning_rate": 1.5060072252729963e-05, "loss": 0.8896, "step": 14575 }, { "epoch": 0.8410244577757268, "grad_norm": 0.30370384454727173, "learning_rate": 1.5006981804587595e-05, "loss": 0.9445, "step": 14580 }, { "epoch": 0.8413128749423165, "grad_norm": 0.32674163579940796, "learning_rate": 1.495397750786165e-05, "loss": 0.9271, "step": 14585 }, { "epoch": 0.8416012921089063, "grad_norm": 0.306863397359848, "learning_rate": 1.4901059416278806e-05, "loss": 0.9588, "step": 14590 }, { "epoch": 0.8418897092754961, "grad_norm": 0.29718735814094543, "learning_rate": 1.4848227583478392e-05, "loss": 0.9887, "step": 14595 }, { "epoch": 0.8421781264420858, "grad_norm": 0.2633509635925293, "learning_rate": 1.4795482063012367e-05, "loss": 0.9955, "step": 14600 }, { "epoch": 0.8424665436086756, "grad_norm": 0.297232061624527, "learning_rate": 1.4742822908345045e-05, "loss": 0.996, "step": 14605 }, { "epoch": 0.8427549607752653, "grad_norm": 0.27509748935699463, "learning_rate": 1.4690250172853348e-05, "loss": 0.9716, "step": 14610 }, { "epoch": 0.8430433779418551, "grad_norm": 0.28519466519355774, "learning_rate": 1.463776390982654e-05, "loss": 0.9296, "step": 14615 }, { "epoch": 0.8433317951084448, "grad_norm": 0.3120754063129425, "learning_rate": 1.4585364172466231e-05, "loss": 0.9693, "step": 14620 }, { "epoch": 0.8436202122750346, "grad_norm": 0.29427772760391235, "learning_rate": 1.4533051013886323e-05, "loss": 0.8632, "step": 14625 }, { "epoch": 0.8439086294416244, "grad_norm": 0.32537657022476196, "learning_rate": 1.4480824487112943e-05, "loss": 1.0125, "step": 14630 }, { "epoch": 0.8441970466082142, "grad_norm": 0.2881564199924469, "learning_rate": 1.44286846450845e-05, "loss": 0.9031, "step": 14635 }, { "epoch": 0.8444854637748038, "grad_norm": 0.2654680013656616, "learning_rate": 1.437663154065142e-05, "loss": 0.894, "step": 14640 }, { "epoch": 0.8447738809413936, "grad_norm": 0.27556008100509644, "learning_rate": 1.4324665226576261e-05, "loss": 0.9709, "step": 14645 }, { "epoch": 0.8450622981079834, "grad_norm": 0.27537065744400024, "learning_rate": 1.4272785755533601e-05, "loss": 0.9357, "step": 14650 }, { "epoch": 0.8453507152745732, "grad_norm": 0.28559306263923645, "learning_rate": 1.4220993180109987e-05, "loss": 0.9329, "step": 14655 }, { "epoch": 0.8456391324411628, "grad_norm": 0.281829297542572, "learning_rate": 1.4169287552803923e-05, "loss": 0.9157, "step": 14660 }, { "epoch": 0.8459275496077526, "grad_norm": 0.2907716929912567, "learning_rate": 1.411766892602574e-05, "loss": 0.9752, "step": 14665 }, { "epoch": 0.8462159667743424, "grad_norm": 0.2846973240375519, "learning_rate": 1.4066137352097575e-05, "loss": 0.9054, "step": 14670 }, { "epoch": 0.8465043839409322, "grad_norm": 0.2842434048652649, "learning_rate": 1.4014692883253333e-05, "loss": 0.9578, "step": 14675 }, { "epoch": 0.846792801107522, "grad_norm": 0.26932570338249207, "learning_rate": 1.396333557163868e-05, "loss": 0.8609, "step": 14680 }, { "epoch": 0.8470812182741116, "grad_norm": 0.27889811992645264, "learning_rate": 1.3912065469310886e-05, "loss": 0.9687, "step": 14685 }, { "epoch": 0.8473696354407014, "grad_norm": 0.30398595333099365, "learning_rate": 1.3860882628238781e-05, "loss": 0.9927, "step": 14690 }, { "epoch": 0.8476580526072912, "grad_norm": 0.2693740427494049, "learning_rate": 1.380978710030284e-05, "loss": 0.993, "step": 14695 }, { "epoch": 0.847946469773881, "grad_norm": 0.3081601560115814, "learning_rate": 1.3758778937294947e-05, "loss": 0.9079, "step": 14700 }, { "epoch": 0.8482348869404707, "grad_norm": 0.3294139802455902, "learning_rate": 1.3707858190918555e-05, "loss": 0.9586, "step": 14705 }, { "epoch": 0.8485233041070604, "grad_norm": 0.3140733242034912, "learning_rate": 1.365702491278833e-05, "loss": 0.8856, "step": 14710 }, { "epoch": 0.8488117212736502, "grad_norm": 0.29933077096939087, "learning_rate": 1.3606279154430435e-05, "loss": 0.9661, "step": 14715 }, { "epoch": 0.84910013844024, "grad_norm": 0.299584299325943, "learning_rate": 1.3555620967282235e-05, "loss": 0.9069, "step": 14720 }, { "epoch": 0.8493885556068297, "grad_norm": 0.30480751395225525, "learning_rate": 1.3505050402692366e-05, "loss": 0.8985, "step": 14725 }, { "epoch": 0.8496769727734195, "grad_norm": 0.33949342370033264, "learning_rate": 1.3454567511920634e-05, "loss": 0.962, "step": 14730 }, { "epoch": 0.8499653899400093, "grad_norm": 0.2982217073440552, "learning_rate": 1.3404172346137945e-05, "loss": 0.9534, "step": 14735 }, { "epoch": 0.850253807106599, "grad_norm": 0.26105549931526184, "learning_rate": 1.3353864956426366e-05, "loss": 0.9366, "step": 14740 }, { "epoch": 0.8505422242731887, "grad_norm": 0.30479708313941956, "learning_rate": 1.330364539377893e-05, "loss": 0.9505, "step": 14745 }, { "epoch": 0.8508306414397785, "grad_norm": 0.28560617566108704, "learning_rate": 1.3253513709099652e-05, "loss": 0.9081, "step": 14750 }, { "epoch": 0.8511190586063683, "grad_norm": 0.2818647623062134, "learning_rate": 1.3203469953203474e-05, "loss": 0.9252, "step": 14755 }, { "epoch": 0.8514074757729581, "grad_norm": 0.3095349073410034, "learning_rate": 1.3153514176816195e-05, "loss": 0.9276, "step": 14760 }, { "epoch": 0.8516958929395477, "grad_norm": 0.27864789962768555, "learning_rate": 1.3103646430574523e-05, "loss": 0.9634, "step": 14765 }, { "epoch": 0.8519843101061375, "grad_norm": 0.3282797932624817, "learning_rate": 1.305386676502578e-05, "loss": 0.9707, "step": 14770 }, { "epoch": 0.8522727272727273, "grad_norm": 0.2912672758102417, "learning_rate": 1.3004175230628169e-05, "loss": 0.9301, "step": 14775 }, { "epoch": 0.8525611444393171, "grad_norm": 0.28119948506355286, "learning_rate": 1.2954571877750443e-05, "loss": 0.9853, "step": 14780 }, { "epoch": 0.8528495616059067, "grad_norm": 0.3060617744922638, "learning_rate": 1.290505675667204e-05, "loss": 0.9772, "step": 14785 }, { "epoch": 0.8531379787724965, "grad_norm": 0.3374637961387634, "learning_rate": 1.2855629917582935e-05, "loss": 0.9944, "step": 14790 }, { "epoch": 0.8534263959390863, "grad_norm": 0.292756587266922, "learning_rate": 1.2806291410583593e-05, "loss": 0.9562, "step": 14795 }, { "epoch": 0.8537148131056761, "grad_norm": 0.292419970035553, "learning_rate": 1.2757041285685011e-05, "loss": 0.9447, "step": 14800 }, { "epoch": 0.8540032302722658, "grad_norm": 0.2827741503715515, "learning_rate": 1.2707879592808548e-05, "loss": 0.9309, "step": 14805 }, { "epoch": 0.8542916474388556, "grad_norm": 0.29757630825042725, "learning_rate": 1.2658806381785926e-05, "loss": 0.9113, "step": 14810 }, { "epoch": 0.8545800646054453, "grad_norm": 0.28717100620269775, "learning_rate": 1.2609821702359215e-05, "loss": 0.8644, "step": 14815 }, { "epoch": 0.8548684817720351, "grad_norm": 0.32317039370536804, "learning_rate": 1.2560925604180673e-05, "loss": 1.046, "step": 14820 }, { "epoch": 0.8551568989386248, "grad_norm": 0.3420310318470001, "learning_rate": 1.2512118136812878e-05, "loss": 1.0261, "step": 14825 }, { "epoch": 0.8554453161052146, "grad_norm": 0.2869220972061157, "learning_rate": 1.2463399349728488e-05, "loss": 0.9597, "step": 14830 }, { "epoch": 0.8557337332718044, "grad_norm": 0.31378278136253357, "learning_rate": 1.2414769292310301e-05, "loss": 0.9628, "step": 14835 }, { "epoch": 0.8560221504383941, "grad_norm": 0.27532947063446045, "learning_rate": 1.2366228013851156e-05, "loss": 0.9479, "step": 14840 }, { "epoch": 0.8563105676049838, "grad_norm": 0.3077858090400696, "learning_rate": 1.2317775563553902e-05, "loss": 0.9652, "step": 14845 }, { "epoch": 0.8565989847715736, "grad_norm": 0.31280088424682617, "learning_rate": 1.2269411990531421e-05, "loss": 0.9224, "step": 14850 }, { "epoch": 0.8568874019381634, "grad_norm": 0.3110673427581787, "learning_rate": 1.2221137343806377e-05, "loss": 0.9567, "step": 14855 }, { "epoch": 0.8571758191047532, "grad_norm": 0.29823023080825806, "learning_rate": 1.2172951672311427e-05, "loss": 0.9816, "step": 14860 }, { "epoch": 0.8574642362713428, "grad_norm": 0.30940476059913635, "learning_rate": 1.2124855024888937e-05, "loss": 0.9098, "step": 14865 }, { "epoch": 0.8577526534379326, "grad_norm": 0.3029901683330536, "learning_rate": 1.207684745029114e-05, "loss": 1.0217, "step": 14870 }, { "epoch": 0.8580410706045224, "grad_norm": 0.2684774398803711, "learning_rate": 1.2028928997179867e-05, "loss": 0.974, "step": 14875 }, { "epoch": 0.8583294877711122, "grad_norm": 0.31164249777793884, "learning_rate": 1.1981099714126654e-05, "loss": 0.9657, "step": 14880 }, { "epoch": 0.8586179049377018, "grad_norm": 0.2923769950866699, "learning_rate": 1.193335964961273e-05, "loss": 0.9379, "step": 14885 }, { "epoch": 0.8589063221042916, "grad_norm": 0.31549927592277527, "learning_rate": 1.1885708852028777e-05, "loss": 0.9436, "step": 14890 }, { "epoch": 0.8591947392708814, "grad_norm": 0.3174637258052826, "learning_rate": 1.1838147369675056e-05, "loss": 1.0007, "step": 14895 }, { "epoch": 0.8594831564374712, "grad_norm": 0.26647695899009705, "learning_rate": 1.1790675250761263e-05, "loss": 0.9489, "step": 14900 }, { "epoch": 0.8597715736040609, "grad_norm": 0.2883162498474121, "learning_rate": 1.1743292543406558e-05, "loss": 0.9913, "step": 14905 }, { "epoch": 0.8600599907706507, "grad_norm": 0.28514915704727173, "learning_rate": 1.1695999295639459e-05, "loss": 0.9422, "step": 14910 }, { "epoch": 0.8603484079372404, "grad_norm": 0.2868596911430359, "learning_rate": 1.1648795555397719e-05, "loss": 0.93, "step": 14915 }, { "epoch": 0.8606368251038302, "grad_norm": 0.29067856073379517, "learning_rate": 1.1601681370528484e-05, "loss": 0.9241, "step": 14920 }, { "epoch": 0.8609252422704199, "grad_norm": 0.31721532344818115, "learning_rate": 1.1554656788788054e-05, "loss": 0.9678, "step": 14925 }, { "epoch": 0.8612136594370097, "grad_norm": 0.28983375430107117, "learning_rate": 1.150772185784198e-05, "loss": 0.9476, "step": 14930 }, { "epoch": 0.8615020766035995, "grad_norm": 0.2977122664451599, "learning_rate": 1.1460876625264816e-05, "loss": 0.9909, "step": 14935 }, { "epoch": 0.8617904937701892, "grad_norm": 0.3428827226161957, "learning_rate": 1.1414121138540279e-05, "loss": 0.9896, "step": 14940 }, { "epoch": 0.8620789109367789, "grad_norm": 0.2909860908985138, "learning_rate": 1.1367455445061115e-05, "loss": 0.998, "step": 14945 }, { "epoch": 0.8623673281033687, "grad_norm": 0.31093189120292664, "learning_rate": 1.1320879592129052e-05, "loss": 0.938, "step": 14950 }, { "epoch": 0.8626557452699585, "grad_norm": 0.2798452377319336, "learning_rate": 1.1274393626954715e-05, "loss": 0.9822, "step": 14955 }, { "epoch": 0.8629441624365483, "grad_norm": 0.279249370098114, "learning_rate": 1.1227997596657636e-05, "loss": 0.8768, "step": 14960 }, { "epoch": 0.8632325796031379, "grad_norm": 0.32201087474823, "learning_rate": 1.1181691548266226e-05, "loss": 0.9705, "step": 14965 }, { "epoch": 0.8635209967697277, "grad_norm": 0.3062988817691803, "learning_rate": 1.1135475528717642e-05, "loss": 0.9492, "step": 14970 }, { "epoch": 0.8638094139363175, "grad_norm": 0.30123475193977356, "learning_rate": 1.108934958485779e-05, "loss": 0.9371, "step": 14975 }, { "epoch": 0.8640978311029073, "grad_norm": 0.3014609217643738, "learning_rate": 1.1043313763441277e-05, "loss": 0.9114, "step": 14980 }, { "epoch": 0.864386248269497, "grad_norm": 0.29384082555770874, "learning_rate": 1.0997368111131346e-05, "loss": 0.9757, "step": 14985 }, { "epoch": 0.8646746654360867, "grad_norm": 0.2640998363494873, "learning_rate": 1.0951512674499898e-05, "loss": 0.8896, "step": 14990 }, { "epoch": 0.8649630826026765, "grad_norm": 0.28632596135139465, "learning_rate": 1.090574750002733e-05, "loss": 1.004, "step": 14995 }, { "epoch": 0.8652514997692663, "grad_norm": 0.29420316219329834, "learning_rate": 1.0860072634102569e-05, "loss": 0.9623, "step": 15000 }, { "epoch": 0.865539916935856, "grad_norm": 0.2923297882080078, "learning_rate": 1.0814488123022992e-05, "loss": 0.993, "step": 15005 }, { "epoch": 0.8658283341024458, "grad_norm": 0.4265219569206238, "learning_rate": 1.0768994012994371e-05, "loss": 0.9131, "step": 15010 }, { "epoch": 0.8661167512690355, "grad_norm": 0.279988169670105, "learning_rate": 1.0723590350130951e-05, "loss": 0.9918, "step": 15015 }, { "epoch": 0.8664051684356253, "grad_norm": 0.32483842968940735, "learning_rate": 1.0678277180455109e-05, "loss": 0.9298, "step": 15020 }, { "epoch": 0.866693585602215, "grad_norm": 0.27693018317222595, "learning_rate": 1.0633054549897692e-05, "loss": 0.9287, "step": 15025 }, { "epoch": 0.8669820027688048, "grad_norm": 0.29998135566711426, "learning_rate": 1.0587922504297642e-05, "loss": 0.9638, "step": 15030 }, { "epoch": 0.8672704199353946, "grad_norm": 0.31143343448638916, "learning_rate": 1.0542881089402134e-05, "loss": 1.0159, "step": 15035 }, { "epoch": 0.8675588371019843, "grad_norm": 0.30745813250541687, "learning_rate": 1.049793035086647e-05, "loss": 0.966, "step": 15040 }, { "epoch": 0.8678472542685741, "grad_norm": 0.2762928009033203, "learning_rate": 1.0453070334254e-05, "loss": 0.8719, "step": 15045 }, { "epoch": 0.8681356714351638, "grad_norm": 0.3265017569065094, "learning_rate": 1.040830108503622e-05, "loss": 0.9099, "step": 15050 }, { "epoch": 0.8684240886017536, "grad_norm": 0.3012024760246277, "learning_rate": 1.0363622648592518e-05, "loss": 1.0014, "step": 15055 }, { "epoch": 0.8687125057683434, "grad_norm": 0.3051886558532715, "learning_rate": 1.031903507021027e-05, "loss": 0.9775, "step": 15060 }, { "epoch": 0.8690009229349331, "grad_norm": 0.2711678743362427, "learning_rate": 1.0274538395084754e-05, "loss": 0.8841, "step": 15065 }, { "epoch": 0.8692893401015228, "grad_norm": 0.28673282265663147, "learning_rate": 1.0230132668319082e-05, "loss": 0.9822, "step": 15070 }, { "epoch": 0.8695777572681126, "grad_norm": 0.28427520394325256, "learning_rate": 1.0185817934924257e-05, "loss": 0.9592, "step": 15075 }, { "epoch": 0.8698661744347024, "grad_norm": 0.27094730734825134, "learning_rate": 1.014159423981893e-05, "loss": 0.9069, "step": 15080 }, { "epoch": 0.8701545916012922, "grad_norm": 0.2950955033302307, "learning_rate": 1.0097461627829585e-05, "loss": 0.9891, "step": 15085 }, { "epoch": 0.8704430087678818, "grad_norm": 0.28739115595817566, "learning_rate": 1.0053420143690284e-05, "loss": 1.0194, "step": 15090 }, { "epoch": 0.8707314259344716, "grad_norm": 0.30966266989707947, "learning_rate": 1.0009469832042839e-05, "loss": 1.0324, "step": 15095 }, { "epoch": 0.8710198431010614, "grad_norm": 0.29282572865486145, "learning_rate": 9.965610737436515e-06, "loss": 0.9035, "step": 15100 }, { "epoch": 0.8713082602676512, "grad_norm": 0.2793741226196289, "learning_rate": 9.921842904328172e-06, "loss": 0.9867, "step": 15105 }, { "epoch": 0.8715966774342409, "grad_norm": 0.3010117709636688, "learning_rate": 9.87816637708221e-06, "loss": 0.9313, "step": 15110 }, { "epoch": 0.8718850946008306, "grad_norm": 0.2735041081905365, "learning_rate": 9.834581199970427e-06, "loss": 0.9388, "step": 15115 }, { "epoch": 0.8721735117674204, "grad_norm": 0.26067879796028137, "learning_rate": 9.791087417172019e-06, "loss": 0.8819, "step": 15120 }, { "epoch": 0.8724619289340102, "grad_norm": 0.2928871214389801, "learning_rate": 9.74768507277355e-06, "loss": 1.0077, "step": 15125 }, { "epoch": 0.8727503461005999, "grad_norm": 0.2853434085845947, "learning_rate": 9.704374210768952e-06, "loss": 0.997, "step": 15130 }, { "epoch": 0.8730387632671897, "grad_norm": 0.2779994010925293, "learning_rate": 9.661154875059364e-06, "loss": 0.8973, "step": 15135 }, { "epoch": 0.8733271804337794, "grad_norm": 0.35284411907196045, "learning_rate": 9.618027109453176e-06, "loss": 0.9867, "step": 15140 }, { "epoch": 0.8736155976003692, "grad_norm": 0.3049831986427307, "learning_rate": 9.574990957665941e-06, "loss": 0.9175, "step": 15145 }, { "epoch": 0.8739040147669589, "grad_norm": 0.29838353395462036, "learning_rate": 9.532046463320365e-06, "loss": 1.0109, "step": 15150 }, { "epoch": 0.8741924319335487, "grad_norm": 0.2807800769805908, "learning_rate": 9.489193669946273e-06, "loss": 0.9607, "step": 15155 }, { "epoch": 0.8744808491001385, "grad_norm": 0.3492079973220825, "learning_rate": 9.446432620980517e-06, "loss": 1.0417, "step": 15160 }, { "epoch": 0.8747692662667282, "grad_norm": 0.3055107295513153, "learning_rate": 9.403763359766892e-06, "loss": 0.968, "step": 15165 }, { "epoch": 0.8750576834333179, "grad_norm": 0.28376808762550354, "learning_rate": 9.361185929556282e-06, "loss": 0.9536, "step": 15170 }, { "epoch": 0.8753461005999077, "grad_norm": 0.26349642872810364, "learning_rate": 9.318700373506362e-06, "loss": 0.9339, "step": 15175 }, { "epoch": 0.8756345177664975, "grad_norm": 0.2932707369327545, "learning_rate": 9.276306734681805e-06, "loss": 0.9592, "step": 15180 }, { "epoch": 0.8759229349330873, "grad_norm": 0.27456268668174744, "learning_rate": 9.234005056053963e-06, "loss": 0.9042, "step": 15185 }, { "epoch": 0.8762113520996769, "grad_norm": 0.2756810486316681, "learning_rate": 9.191795380501134e-06, "loss": 0.939, "step": 15190 }, { "epoch": 0.8764997692662667, "grad_norm": 0.3087191879749298, "learning_rate": 9.14967775080824e-06, "loss": 0.9555, "step": 15195 }, { "epoch": 0.8767881864328565, "grad_norm": 0.29341921210289, "learning_rate": 9.107652209666973e-06, "loss": 0.9725, "step": 15200 }, { "epoch": 0.8770766035994463, "grad_norm": 0.28505462408065796, "learning_rate": 9.065718799675626e-06, "loss": 0.9523, "step": 15205 }, { "epoch": 0.877365020766036, "grad_norm": 0.28436028957366943, "learning_rate": 9.023877563339134e-06, "loss": 0.9626, "step": 15210 }, { "epoch": 0.8776534379326257, "grad_norm": 0.2829095423221588, "learning_rate": 8.982128543069025e-06, "loss": 0.9674, "step": 15215 }, { "epoch": 0.8779418550992155, "grad_norm": 0.29247331619262695, "learning_rate": 8.940471781183335e-06, "loss": 0.9893, "step": 15220 }, { "epoch": 0.8782302722658053, "grad_norm": 0.277696818113327, "learning_rate": 8.898907319906546e-06, "loss": 0.9762, "step": 15225 }, { "epoch": 0.878518689432395, "grad_norm": 0.32393842935562134, "learning_rate": 8.857435201369645e-06, "loss": 0.9658, "step": 15230 }, { "epoch": 0.8788071065989848, "grad_norm": 0.2773330807685852, "learning_rate": 8.816055467609963e-06, "loss": 0.9994, "step": 15235 }, { "epoch": 0.8790955237655745, "grad_norm": 0.29264041781425476, "learning_rate": 8.774768160571257e-06, "loss": 0.984, "step": 15240 }, { "epoch": 0.8793839409321643, "grad_norm": 0.3316905200481415, "learning_rate": 8.733573322103484e-06, "loss": 1.0043, "step": 15245 }, { "epoch": 0.879672358098754, "grad_norm": 0.3055762052536011, "learning_rate": 8.692470993962987e-06, "loss": 0.9468, "step": 15250 }, { "epoch": 0.8799607752653438, "grad_norm": 0.2926937937736511, "learning_rate": 8.651461217812295e-06, "loss": 0.9743, "step": 15255 }, { "epoch": 0.8802491924319336, "grad_norm": 0.3095841407775879, "learning_rate": 8.610544035220103e-06, "loss": 0.9596, "step": 15260 }, { "epoch": 0.8805376095985233, "grad_norm": 0.2798426151275635, "learning_rate": 8.569719487661276e-06, "loss": 0.8799, "step": 15265 }, { "epoch": 0.880826026765113, "grad_norm": 0.26808908581733704, "learning_rate": 8.528987616516748e-06, "loss": 0.9304, "step": 15270 }, { "epoch": 0.8811144439317028, "grad_norm": 0.283551424741745, "learning_rate": 8.48834846307357e-06, "loss": 0.9231, "step": 15275 }, { "epoch": 0.8814028610982926, "grad_norm": 0.26878780126571655, "learning_rate": 8.44780206852478e-06, "loss": 0.9712, "step": 15280 }, { "epoch": 0.8816912782648824, "grad_norm": 0.28564929962158203, "learning_rate": 8.40734847396938e-06, "loss": 0.9675, "step": 15285 }, { "epoch": 0.881979695431472, "grad_norm": 0.2827951908111572, "learning_rate": 8.366987720412322e-06, "loss": 0.9926, "step": 15290 }, { "epoch": 0.8822681125980618, "grad_norm": 0.2900889217853546, "learning_rate": 8.32671984876443e-06, "loss": 0.9621, "step": 15295 }, { "epoch": 0.8825565297646516, "grad_norm": 0.2751014828681946, "learning_rate": 8.286544899842441e-06, "loss": 0.9992, "step": 15300 }, { "epoch": 0.8828449469312414, "grad_norm": 0.35621145367622375, "learning_rate": 8.246462914368835e-06, "loss": 1.0027, "step": 15305 }, { "epoch": 0.883133364097831, "grad_norm": 0.27734360098838806, "learning_rate": 8.206473932971903e-06, "loss": 0.9242, "step": 15310 }, { "epoch": 0.8834217812644208, "grad_norm": 0.310468852519989, "learning_rate": 8.16657799618561e-06, "loss": 0.998, "step": 15315 }, { "epoch": 0.8837101984310106, "grad_norm": 0.2635403275489807, "learning_rate": 8.126775144449705e-06, "loss": 0.9703, "step": 15320 }, { "epoch": 0.8839986155976004, "grad_norm": 0.3096494674682617, "learning_rate": 8.087065418109519e-06, "loss": 0.9764, "step": 15325 }, { "epoch": 0.8842870327641901, "grad_norm": 0.3017280399799347, "learning_rate": 8.04744885741593e-06, "loss": 0.8677, "step": 15330 }, { "epoch": 0.8845754499307799, "grad_norm": 0.3181787431240082, "learning_rate": 8.007925502525527e-06, "loss": 0.9595, "step": 15335 }, { "epoch": 0.8848638670973696, "grad_norm": 0.2798701822757721, "learning_rate": 7.968495393500285e-06, "loss": 0.9224, "step": 15340 }, { "epoch": 0.8851522842639594, "grad_norm": 0.2835695743560791, "learning_rate": 7.92915857030776e-06, "loss": 0.9276, "step": 15345 }, { "epoch": 0.8854407014305491, "grad_norm": 0.28347131609916687, "learning_rate": 7.889915072820874e-06, "loss": 0.9064, "step": 15350 }, { "epoch": 0.8857291185971389, "grad_norm": 0.2914379835128784, "learning_rate": 7.850764940818e-06, "loss": 0.949, "step": 15355 }, { "epoch": 0.8860175357637287, "grad_norm": 0.326035737991333, "learning_rate": 7.811708213982883e-06, "loss": 0.9154, "step": 15360 }, { "epoch": 0.8863059529303184, "grad_norm": 0.25818875432014465, "learning_rate": 7.77274493190454e-06, "loss": 0.9831, "step": 15365 }, { "epoch": 0.8865943700969081, "grad_norm": 0.2907062768936157, "learning_rate": 7.733875134077307e-06, "loss": 1.0559, "step": 15370 }, { "epoch": 0.8868827872634979, "grad_norm": 0.2752329707145691, "learning_rate": 7.69509885990073e-06, "loss": 0.9661, "step": 15375 }, { "epoch": 0.8871712044300877, "grad_norm": 0.29247012734413147, "learning_rate": 7.656416148679612e-06, "loss": 0.9606, "step": 15380 }, { "epoch": 0.8874596215966775, "grad_norm": 0.3025994896888733, "learning_rate": 7.617827039623893e-06, "loss": 0.962, "step": 15385 }, { "epoch": 0.8877480387632672, "grad_norm": 0.30906012654304504, "learning_rate": 7.579331571848569e-06, "loss": 0.9593, "step": 15390 }, { "epoch": 0.8880364559298569, "grad_norm": 0.2803690731525421, "learning_rate": 7.540929784373818e-06, "loss": 0.8979, "step": 15395 }, { "epoch": 0.8883248730964467, "grad_norm": 0.2828715145587921, "learning_rate": 7.502621716124791e-06, "loss": 0.891, "step": 15400 }, { "epoch": 0.8886132902630365, "grad_norm": 0.29924464225769043, "learning_rate": 7.464407405931728e-06, "loss": 0.9579, "step": 15405 }, { "epoch": 0.8889017074296263, "grad_norm": 0.26582613587379456, "learning_rate": 7.4262868925296995e-06, "loss": 0.9038, "step": 15410 }, { "epoch": 0.8891901245962159, "grad_norm": 0.2724932134151459, "learning_rate": 7.388260214558829e-06, "loss": 0.9405, "step": 15415 }, { "epoch": 0.8894785417628057, "grad_norm": 0.2881430387496948, "learning_rate": 7.35032741056404e-06, "loss": 1.0487, "step": 15420 }, { "epoch": 0.8897669589293955, "grad_norm": 0.29068121314048767, "learning_rate": 7.3124885189951645e-06, "loss": 0.9252, "step": 15425 }, { "epoch": 0.8900553760959853, "grad_norm": 0.29160958528518677, "learning_rate": 7.274743578206788e-06, "loss": 0.9886, "step": 15430 }, { "epoch": 0.890343793262575, "grad_norm": 0.2769414782524109, "learning_rate": 7.237092626458297e-06, "loss": 1.0098, "step": 15435 }, { "epoch": 0.8906322104291647, "grad_norm": 0.28141847252845764, "learning_rate": 7.199535701913806e-06, "loss": 0.9594, "step": 15440 }, { "epoch": 0.8909206275957545, "grad_norm": 0.2850087881088257, "learning_rate": 7.16207284264212e-06, "loss": 0.9334, "step": 15445 }, { "epoch": 0.8912090447623443, "grad_norm": 0.2679743468761444, "learning_rate": 7.124704086616684e-06, "loss": 0.8826, "step": 15450 }, { "epoch": 0.891497461928934, "grad_norm": 0.283372163772583, "learning_rate": 7.0874294717155675e-06, "loss": 0.9705, "step": 15455 }, { "epoch": 0.8917858790955238, "grad_norm": 0.2819139063358307, "learning_rate": 7.05024903572139e-06, "loss": 0.9305, "step": 15460 }, { "epoch": 0.8920742962621135, "grad_norm": 0.2858351767063141, "learning_rate": 7.013162816321373e-06, "loss": 0.9842, "step": 15465 }, { "epoch": 0.8923627134287033, "grad_norm": 0.2785593867301941, "learning_rate": 6.976170851107178e-06, "loss": 1.0081, "step": 15470 }, { "epoch": 0.892651130595293, "grad_norm": 0.3193453848361969, "learning_rate": 6.939273177574945e-06, "loss": 0.9765, "step": 15475 }, { "epoch": 0.8929395477618828, "grad_norm": 0.34608909487724304, "learning_rate": 6.902469833125236e-06, "loss": 1.0135, "step": 15480 }, { "epoch": 0.8932279649284726, "grad_norm": 0.3069968521595001, "learning_rate": 6.865760855062997e-06, "loss": 0.9317, "step": 15485 }, { "epoch": 0.8935163820950623, "grad_norm": 0.2878912687301636, "learning_rate": 6.8291462805975535e-06, "loss": 0.9027, "step": 15490 }, { "epoch": 0.893804799261652, "grad_norm": 0.26937398314476013, "learning_rate": 6.792626146842462e-06, "loss": 0.8568, "step": 15495 }, { "epoch": 0.8940932164282418, "grad_norm": 0.3143722414970398, "learning_rate": 6.756200490815645e-06, "loss": 0.9664, "step": 15500 }, { "epoch": 0.8943816335948316, "grad_norm": 0.28063690662384033, "learning_rate": 6.7198693494392005e-06, "loss": 0.9556, "step": 15505 }, { "epoch": 0.8946700507614214, "grad_norm": 0.27173417806625366, "learning_rate": 6.683632759539449e-06, "loss": 0.9858, "step": 15510 }, { "epoch": 0.894958467928011, "grad_norm": 0.30416610836982727, "learning_rate": 6.647490757846841e-06, "loss": 0.9376, "step": 15515 }, { "epoch": 0.8952468850946008, "grad_norm": 0.3319179117679596, "learning_rate": 6.611443380995963e-06, "loss": 1.0079, "step": 15520 }, { "epoch": 0.8955353022611906, "grad_norm": 0.30170193314552307, "learning_rate": 6.57549066552553e-06, "loss": 0.9143, "step": 15525 }, { "epoch": 0.8958237194277804, "grad_norm": 0.2698569893836975, "learning_rate": 6.5396326478782465e-06, "loss": 0.9221, "step": 15530 }, { "epoch": 0.8961121365943701, "grad_norm": 0.2803119421005249, "learning_rate": 6.50386936440085e-06, "loss": 0.8905, "step": 15535 }, { "epoch": 0.8964005537609598, "grad_norm": 0.4507327377796173, "learning_rate": 6.468200851344042e-06, "loss": 0.9726, "step": 15540 }, { "epoch": 0.8966889709275496, "grad_norm": 0.290579229593277, "learning_rate": 6.432627144862513e-06, "loss": 1.0112, "step": 15545 }, { "epoch": 0.8969773880941394, "grad_norm": 0.29394960403442383, "learning_rate": 6.397148281014798e-06, "loss": 0.9204, "step": 15550 }, { "epoch": 0.8972658052607291, "grad_norm": 0.29478779435157776, "learning_rate": 6.361764295763284e-06, "loss": 0.9157, "step": 15555 }, { "epoch": 0.8975542224273189, "grad_norm": 0.2959185838699341, "learning_rate": 6.326475224974249e-06, "loss": 0.9941, "step": 15560 }, { "epoch": 0.8978426395939086, "grad_norm": 0.3056466281414032, "learning_rate": 6.291281104417712e-06, "loss": 0.9631, "step": 15565 }, { "epoch": 0.8981310567604984, "grad_norm": 0.3037865459918976, "learning_rate": 6.256181969767505e-06, "loss": 0.9736, "step": 15570 }, { "epoch": 0.8984194739270881, "grad_norm": 0.29159796237945557, "learning_rate": 6.22117785660109e-06, "loss": 0.9766, "step": 15575 }, { "epoch": 0.8987078910936779, "grad_norm": 0.29030078649520874, "learning_rate": 6.186268800399675e-06, "loss": 0.9445, "step": 15580 }, { "epoch": 0.8989963082602677, "grad_norm": 0.30294716358184814, "learning_rate": 6.1514548365481315e-06, "loss": 0.9299, "step": 15585 }, { "epoch": 0.8992847254268574, "grad_norm": 0.29575252532958984, "learning_rate": 6.116736000334888e-06, "loss": 0.9612, "step": 15590 }, { "epoch": 0.8995731425934471, "grad_norm": 0.3025875389575958, "learning_rate": 6.082112326951983e-06, "loss": 0.9427, "step": 15595 }, { "epoch": 0.8998615597600369, "grad_norm": 0.31641924381256104, "learning_rate": 6.047583851494965e-06, "loss": 1.0077, "step": 15600 }, { "epoch": 0.9001499769266267, "grad_norm": 0.2969208061695099, "learning_rate": 6.0131506089629586e-06, "loss": 0.9908, "step": 15605 }, { "epoch": 0.9004383940932165, "grad_norm": 0.2816386818885803, "learning_rate": 5.978812634258468e-06, "loss": 0.9903, "step": 15610 }, { "epoch": 0.9007268112598061, "grad_norm": 0.29898086190223694, "learning_rate": 5.9445699621874966e-06, "loss": 0.9487, "step": 15615 }, { "epoch": 0.9010152284263959, "grad_norm": 0.2998582720756531, "learning_rate": 5.910422627459411e-06, "loss": 0.9809, "step": 15620 }, { "epoch": 0.9013036455929857, "grad_norm": 0.31181856989860535, "learning_rate": 5.876370664686926e-06, "loss": 0.9769, "step": 15625 }, { "epoch": 0.9015920627595755, "grad_norm": 0.2707262337207794, "learning_rate": 5.842414108386151e-06, "loss": 0.9884, "step": 15630 }, { "epoch": 0.9018804799261652, "grad_norm": 0.3136463761329651, "learning_rate": 5.8085529929764345e-06, "loss": 1.0145, "step": 15635 }, { "epoch": 0.9021688970927549, "grad_norm": 0.3019693195819855, "learning_rate": 5.774787352780387e-06, "loss": 0.9954, "step": 15640 }, { "epoch": 0.9024573142593447, "grad_norm": 0.2765672206878662, "learning_rate": 5.741117222023862e-06, "loss": 0.9421, "step": 15645 }, { "epoch": 0.9027457314259345, "grad_norm": 0.31335878372192383, "learning_rate": 5.707542634835883e-06, "loss": 0.978, "step": 15650 }, { "epoch": 0.9030341485925242, "grad_norm": 0.2650074362754822, "learning_rate": 5.674063625248638e-06, "loss": 0.9065, "step": 15655 }, { "epoch": 0.903322565759114, "grad_norm": 0.31178587675094604, "learning_rate": 5.640680227197426e-06, "loss": 0.9972, "step": 15660 }, { "epoch": 0.9036109829257037, "grad_norm": 0.2522607445716858, "learning_rate": 5.607392474520667e-06, "loss": 0.8805, "step": 15665 }, { "epoch": 0.9038994000922935, "grad_norm": 0.27256470918655396, "learning_rate": 5.574200400959773e-06, "loss": 0.9331, "step": 15670 }, { "epoch": 0.9041878172588832, "grad_norm": 0.27973702549934387, "learning_rate": 5.541104040159217e-06, "loss": 0.9407, "step": 15675 }, { "epoch": 0.904476234425473, "grad_norm": 0.3060024678707123, "learning_rate": 5.5081034256664445e-06, "loss": 0.9476, "step": 15680 }, { "epoch": 0.9047646515920628, "grad_norm": 0.27370506525039673, "learning_rate": 5.475198590931829e-06, "loss": 0.9436, "step": 15685 }, { "epoch": 0.9050530687586525, "grad_norm": 0.27347618341445923, "learning_rate": 5.442389569308703e-06, "loss": 0.9515, "step": 15690 }, { "epoch": 0.9053414859252422, "grad_norm": 0.32937949895858765, "learning_rate": 5.4096763940532316e-06, "loss": 1.0078, "step": 15695 }, { "epoch": 0.905629903091832, "grad_norm": 0.28055548667907715, "learning_rate": 5.377059098324455e-06, "loss": 0.9161, "step": 15700 }, { "epoch": 0.9059183202584218, "grad_norm": 0.29839953780174255, "learning_rate": 5.344537715184228e-06, "loss": 0.9526, "step": 15705 }, { "epoch": 0.9062067374250116, "grad_norm": 0.27476581931114197, "learning_rate": 5.312112277597159e-06, "loss": 0.9429, "step": 15710 }, { "epoch": 0.9064951545916012, "grad_norm": 0.2815583050251007, "learning_rate": 5.279782818430656e-06, "loss": 0.9715, "step": 15715 }, { "epoch": 0.906783571758191, "grad_norm": 0.3266623914241791, "learning_rate": 5.247549370454763e-06, "loss": 1.0225, "step": 15720 }, { "epoch": 0.9070719889247808, "grad_norm": 0.28937751054763794, "learning_rate": 5.215411966342287e-06, "loss": 0.9117, "step": 15725 }, { "epoch": 0.9073604060913706, "grad_norm": 0.3165436387062073, "learning_rate": 5.183370638668616e-06, "loss": 0.9378, "step": 15730 }, { "epoch": 0.9076488232579604, "grad_norm": 0.2963590919971466, "learning_rate": 5.151425419911815e-06, "loss": 0.9014, "step": 15735 }, { "epoch": 0.90793724042455, "grad_norm": 0.3045542538166046, "learning_rate": 5.119576342452459e-06, "loss": 0.9728, "step": 15740 }, { "epoch": 0.9082256575911398, "grad_norm": 0.26070326566696167, "learning_rate": 5.0878234385737135e-06, "loss": 0.8845, "step": 15745 }, { "epoch": 0.9085140747577296, "grad_norm": 0.3005124032497406, "learning_rate": 5.056166740461265e-06, "loss": 1.0158, "step": 15750 }, { "epoch": 0.9088024919243194, "grad_norm": 0.28957071900367737, "learning_rate": 5.024606280203281e-06, "loss": 0.9114, "step": 15755 }, { "epoch": 0.9090909090909091, "grad_norm": 0.2658599019050598, "learning_rate": 4.993142089790337e-06, "loss": 0.935, "step": 15760 }, { "epoch": 0.9093793262574988, "grad_norm": 0.27619728446006775, "learning_rate": 4.961774201115487e-06, "loss": 0.9431, "step": 15765 }, { "epoch": 0.9096677434240886, "grad_norm": 0.28659436106681824, "learning_rate": 4.9305026459741224e-06, "loss": 0.968, "step": 15770 }, { "epoch": 0.9099561605906784, "grad_norm": 0.3058231472969055, "learning_rate": 4.89932745606404e-06, "loss": 0.9406, "step": 15775 }, { "epoch": 0.9102445777572681, "grad_norm": 0.28550755977630615, "learning_rate": 4.8682486629852975e-06, "loss": 0.9397, "step": 15780 }, { "epoch": 0.9105329949238579, "grad_norm": 0.30754169821739197, "learning_rate": 4.8372662982402835e-06, "loss": 0.9453, "step": 15785 }, { "epoch": 0.9108214120904476, "grad_norm": 0.29143068194389343, "learning_rate": 4.8063803932336114e-06, "loss": 0.9442, "step": 15790 }, { "epoch": 0.9111098292570374, "grad_norm": 0.31427714228630066, "learning_rate": 4.775590979272171e-06, "loss": 0.9886, "step": 15795 }, { "epoch": 0.9113982464236271, "grad_norm": 0.3036996126174927, "learning_rate": 4.74489808756502e-06, "loss": 0.9234, "step": 15800 }, { "epoch": 0.9116866635902169, "grad_norm": 0.31301310658454895, "learning_rate": 4.714301749223326e-06, "loss": 0.9896, "step": 15805 }, { "epoch": 0.9119750807568067, "grad_norm": 0.2925277054309845, "learning_rate": 4.683801995260484e-06, "loss": 0.997, "step": 15810 }, { "epoch": 0.9122634979233964, "grad_norm": 0.31284570693969727, "learning_rate": 4.653398856591917e-06, "loss": 1.0578, "step": 15815 }, { "epoch": 0.9125519150899861, "grad_norm": 0.3127239942550659, "learning_rate": 4.623092364035153e-06, "loss": 0.8718, "step": 15820 }, { "epoch": 0.9128403322565759, "grad_norm": 0.2932998836040497, "learning_rate": 4.592882548309707e-06, "loss": 0.9773, "step": 15825 }, { "epoch": 0.9131287494231657, "grad_norm": 0.30286481976509094, "learning_rate": 4.562769440037174e-06, "loss": 0.9534, "step": 15830 }, { "epoch": 0.9134171665897555, "grad_norm": 0.31667113304138184, "learning_rate": 4.532753069741058e-06, "loss": 0.9168, "step": 15835 }, { "epoch": 0.9137055837563451, "grad_norm": 0.32030534744262695, "learning_rate": 4.502833467846857e-06, "loss": 0.9492, "step": 15840 }, { "epoch": 0.9139940009229349, "grad_norm": 0.30892324447631836, "learning_rate": 4.473010664681932e-06, "loss": 0.9133, "step": 15845 }, { "epoch": 0.9142824180895247, "grad_norm": 0.2874343991279602, "learning_rate": 4.443284690475558e-06, "loss": 0.9352, "step": 15850 }, { "epoch": 0.9145708352561145, "grad_norm": 0.28388267755508423, "learning_rate": 4.413655575358866e-06, "loss": 0.9042, "step": 15855 }, { "epoch": 0.9148592524227042, "grad_norm": 0.315041720867157, "learning_rate": 4.384123349364788e-06, "loss": 0.942, "step": 15860 }, { "epoch": 0.9151476695892939, "grad_norm": 0.2852926552295685, "learning_rate": 4.354688042428057e-06, "loss": 0.9484, "step": 15865 }, { "epoch": 0.9154360867558837, "grad_norm": 0.2800235450267792, "learning_rate": 4.32534968438516e-06, "loss": 0.9049, "step": 15870 }, { "epoch": 0.9157245039224735, "grad_norm": 0.31681615114212036, "learning_rate": 4.296108304974311e-06, "loss": 0.9684, "step": 15875 }, { "epoch": 0.9160129210890632, "grad_norm": 0.31426697969436646, "learning_rate": 4.266963933835455e-06, "loss": 0.9265, "step": 15880 }, { "epoch": 0.916301338255653, "grad_norm": 0.26269903779029846, "learning_rate": 4.237916600510139e-06, "loss": 1.0063, "step": 15885 }, { "epoch": 0.9165897554222427, "grad_norm": 0.3267369568347931, "learning_rate": 4.208966334441633e-06, "loss": 0.9517, "step": 15890 }, { "epoch": 0.9168781725888325, "grad_norm": 0.282438188791275, "learning_rate": 4.180113164974764e-06, "loss": 0.9814, "step": 15895 }, { "epoch": 0.9171665897554222, "grad_norm": 0.2685856819152832, "learning_rate": 4.151357121355947e-06, "loss": 1.0062, "step": 15900 }, { "epoch": 0.917455006922012, "grad_norm": 0.28898438811302185, "learning_rate": 4.122698232733147e-06, "loss": 1.0028, "step": 15905 }, { "epoch": 0.9177434240886018, "grad_norm": 0.2767029404640198, "learning_rate": 4.0941365281558454e-06, "loss": 0.9812, "step": 15910 }, { "epoch": 0.9180318412551915, "grad_norm": 0.2602791488170624, "learning_rate": 4.065672036575052e-06, "loss": 1.1087, "step": 15915 }, { "epoch": 0.9183202584217812, "grad_norm": 0.3219699263572693, "learning_rate": 4.037304786843188e-06, "loss": 1.0031, "step": 15920 }, { "epoch": 0.918608675588371, "grad_norm": 0.26737821102142334, "learning_rate": 4.009034807714152e-06, "loss": 1.0295, "step": 15925 }, { "epoch": 0.9188970927549608, "grad_norm": 0.32514479756355286, "learning_rate": 3.980862127843199e-06, "loss": 1.0016, "step": 15930 }, { "epoch": 0.9191855099215506, "grad_norm": 0.33426961302757263, "learning_rate": 3.952786775786987e-06, "loss": 1.0007, "step": 15935 }, { "epoch": 0.9194739270881402, "grad_norm": 0.2924538850784302, "learning_rate": 3.924808780003531e-06, "loss": 0.9427, "step": 15940 }, { "epoch": 0.91976234425473, "grad_norm": 0.27361974120140076, "learning_rate": 3.896928168852143e-06, "loss": 0.9666, "step": 15945 }, { "epoch": 0.9200507614213198, "grad_norm": 0.3665061891078949, "learning_rate": 3.86914497059343e-06, "loss": 1.0528, "step": 15950 }, { "epoch": 0.9203391785879096, "grad_norm": 0.3001386225223541, "learning_rate": 3.841459213389232e-06, "loss": 0.9524, "step": 15955 }, { "epoch": 0.9206275957544993, "grad_norm": 0.35374686121940613, "learning_rate": 3.813870925302698e-06, "loss": 0.9395, "step": 15960 }, { "epoch": 0.920916012921089, "grad_norm": 0.2924344837665558, "learning_rate": 3.7863801342980845e-06, "loss": 1.0259, "step": 15965 }, { "epoch": 0.9212044300876788, "grad_norm": 0.3271997272968292, "learning_rate": 3.7589868682408434e-06, "loss": 1.0295, "step": 15970 }, { "epoch": 0.9214928472542686, "grad_norm": 0.3009391725063324, "learning_rate": 3.7316911548976543e-06, "loss": 1.0151, "step": 15975 }, { "epoch": 0.9217812644208583, "grad_norm": 0.287616491317749, "learning_rate": 3.7044930219362063e-06, "loss": 1.0131, "step": 15980 }, { "epoch": 0.9220696815874481, "grad_norm": 0.30860260128974915, "learning_rate": 3.677392496925347e-06, "loss": 0.9404, "step": 15985 }, { "epoch": 0.9223580987540378, "grad_norm": 0.2797485589981079, "learning_rate": 3.6503896073349587e-06, "loss": 0.9273, "step": 15990 }, { "epoch": 0.9226465159206276, "grad_norm": 0.26379159092903137, "learning_rate": 3.6234843805359353e-06, "loss": 0.9457, "step": 15995 }, { "epoch": 0.9229349330872173, "grad_norm": 0.29575875401496887, "learning_rate": 3.5966768438002507e-06, "loss": 0.9845, "step": 16000 }, { "epoch": 0.9232233502538071, "grad_norm": 0.2721844017505646, "learning_rate": 3.56996702430078e-06, "loss": 0.9169, "step": 16005 }, { "epoch": 0.9235117674203969, "grad_norm": 0.2789749205112457, "learning_rate": 3.5433549491113884e-06, "loss": 0.8933, "step": 16010 }, { "epoch": 0.9238001845869867, "grad_norm": 0.27569326758384705, "learning_rate": 3.516840645206854e-06, "loss": 0.9735, "step": 16015 }, { "epoch": 0.9240886017535763, "grad_norm": 0.2820409834384918, "learning_rate": 3.4904241394628557e-06, "loss": 0.9656, "step": 16020 }, { "epoch": 0.9243770189201661, "grad_norm": 0.37577053904533386, "learning_rate": 3.464105458655953e-06, "loss": 0.9673, "step": 16025 }, { "epoch": 0.9246654360867559, "grad_norm": 0.29112502932548523, "learning_rate": 3.4378846294634835e-06, "loss": 1.0414, "step": 16030 }, { "epoch": 0.9249538532533457, "grad_norm": 0.30589404702186584, "learning_rate": 3.4117616784637097e-06, "loss": 0.9485, "step": 16035 }, { "epoch": 0.9252422704199353, "grad_norm": 0.3032233715057373, "learning_rate": 3.3857366321355722e-06, "loss": 0.9903, "step": 16040 }, { "epoch": 0.9255306875865251, "grad_norm": 0.3229020833969116, "learning_rate": 3.3598095168588696e-06, "loss": 0.963, "step": 16045 }, { "epoch": 0.9258191047531149, "grad_norm": 0.2780771255493164, "learning_rate": 3.3339803589140352e-06, "loss": 0.9459, "step": 16050 }, { "epoch": 0.9261075219197047, "grad_norm": 0.2907694876194, "learning_rate": 3.3082491844822926e-06, "loss": 0.9683, "step": 16055 }, { "epoch": 0.9263959390862944, "grad_norm": 0.2736465632915497, "learning_rate": 3.2826160196455123e-06, "loss": 0.9295, "step": 16060 }, { "epoch": 0.9266843562528841, "grad_norm": 0.2800248861312866, "learning_rate": 3.2570808903862106e-06, "loss": 0.959, "step": 16065 }, { "epoch": 0.9269727734194739, "grad_norm": 0.27502167224884033, "learning_rate": 3.23164382258756e-06, "loss": 0.931, "step": 16070 }, { "epoch": 0.9272611905860637, "grad_norm": 0.29219675064086914, "learning_rate": 3.206304842033292e-06, "loss": 0.9285, "step": 16075 }, { "epoch": 0.9275496077526535, "grad_norm": 0.2969021797180176, "learning_rate": 3.181063974407772e-06, "loss": 0.9363, "step": 16080 }, { "epoch": 0.9278380249192432, "grad_norm": 0.3350149989128113, "learning_rate": 3.1559212452958674e-06, "loss": 0.9529, "step": 16085 }, { "epoch": 0.928126442085833, "grad_norm": 0.278353214263916, "learning_rate": 3.1308766801829926e-06, "loss": 0.9384, "step": 16090 }, { "epoch": 0.9284148592524227, "grad_norm": 0.27826988697052, "learning_rate": 3.1059303044550515e-06, "loss": 0.9324, "step": 16095 }, { "epoch": 0.9287032764190125, "grad_norm": 0.2794398367404938, "learning_rate": 3.081082143398395e-06, "loss": 0.9595, "step": 16100 }, { "epoch": 0.9289916935856022, "grad_norm": 0.29913151264190674, "learning_rate": 3.056332222199898e-06, "loss": 0.9629, "step": 16105 }, { "epoch": 0.929280110752192, "grad_norm": 0.2864798307418823, "learning_rate": 3.0316805659467705e-06, "loss": 0.881, "step": 16110 }, { "epoch": 0.9295685279187818, "grad_norm": 0.3133912980556488, "learning_rate": 3.0071271996266804e-06, "loss": 0.8897, "step": 16115 }, { "epoch": 0.9298569450853715, "grad_norm": 0.2826102674007416, "learning_rate": 2.9826721481276077e-06, "loss": 0.9238, "step": 16120 }, { "epoch": 0.9301453622519612, "grad_norm": 0.27337539196014404, "learning_rate": 2.958315436237935e-06, "loss": 0.9702, "step": 16125 }, { "epoch": 0.930433779418551, "grad_norm": 0.30676543712615967, "learning_rate": 2.934057088646336e-06, "loss": 0.9966, "step": 16130 }, { "epoch": 0.9307221965851408, "grad_norm": 0.2815453112125397, "learning_rate": 2.9098971299417634e-06, "loss": 0.9087, "step": 16135 }, { "epoch": 0.9310106137517306, "grad_norm": 0.26081717014312744, "learning_rate": 2.8858355846134944e-06, "loss": 0.9942, "step": 16140 }, { "epoch": 0.9312990309183202, "grad_norm": 0.27590805292129517, "learning_rate": 2.8618724770509864e-06, "loss": 0.9347, "step": 16145 }, { "epoch": 0.93158744808491, "grad_norm": 0.29033374786376953, "learning_rate": 2.8380078315439653e-06, "loss": 0.9639, "step": 16150 }, { "epoch": 0.9318758652514998, "grad_norm": 0.3115253150463104, "learning_rate": 2.814241672282336e-06, "loss": 0.987, "step": 16155 }, { "epoch": 0.9321642824180896, "grad_norm": 0.3148709237575531, "learning_rate": 2.790574023356163e-06, "loss": 0.9225, "step": 16160 }, { "epoch": 0.9324526995846792, "grad_norm": 0.2759745717048645, "learning_rate": 2.767004908755677e-06, "loss": 1.0109, "step": 16165 }, { "epoch": 0.932741116751269, "grad_norm": 0.2825234830379486, "learning_rate": 2.7435343523712242e-06, "loss": 0.9106, "step": 16170 }, { "epoch": 0.9330295339178588, "grad_norm": 0.2725534737110138, "learning_rate": 2.7201623779932516e-06, "loss": 0.9313, "step": 16175 }, { "epoch": 0.9333179510844486, "grad_norm": 0.29524463415145874, "learning_rate": 2.6968890093122754e-06, "loss": 1.0006, "step": 16180 }, { "epoch": 0.9336063682510383, "grad_norm": 0.2792085111141205, "learning_rate": 2.6737142699188587e-06, "loss": 0.9995, "step": 16185 }, { "epoch": 0.933894785417628, "grad_norm": 0.28851646184921265, "learning_rate": 2.650638183303611e-06, "loss": 0.932, "step": 16190 }, { "epoch": 0.9341832025842178, "grad_norm": 0.2972305119037628, "learning_rate": 2.62766077285711e-06, "loss": 0.9405, "step": 16195 }, { "epoch": 0.9344716197508076, "grad_norm": 0.2950534522533417, "learning_rate": 2.6047820618699592e-06, "loss": 0.9827, "step": 16200 }, { "epoch": 0.9347600369173973, "grad_norm": 0.2992311120033264, "learning_rate": 2.5820020735326632e-06, "loss": 0.9423, "step": 16205 }, { "epoch": 0.9350484540839871, "grad_norm": 0.2768268287181854, "learning_rate": 2.5593208309357187e-06, "loss": 0.9297, "step": 16210 }, { "epoch": 0.9353368712505769, "grad_norm": 0.2867565453052521, "learning_rate": 2.536738357069468e-06, "loss": 0.9676, "step": 16215 }, { "epoch": 0.9356252884171666, "grad_norm": 0.2953318655490875, "learning_rate": 2.514254674824168e-06, "loss": 0.986, "step": 16220 }, { "epoch": 0.9359137055837563, "grad_norm": 0.3065684139728546, "learning_rate": 2.491869806989966e-06, "loss": 0.9845, "step": 16225 }, { "epoch": 0.9362021227503461, "grad_norm": 0.29751312732696533, "learning_rate": 2.469583776256812e-06, "loss": 0.9883, "step": 16230 }, { "epoch": 0.9364905399169359, "grad_norm": 0.25876522064208984, "learning_rate": 2.447396605214469e-06, "loss": 0.9825, "step": 16235 }, { "epoch": 0.9367789570835257, "grad_norm": 0.28333285450935364, "learning_rate": 2.4253083163525038e-06, "loss": 0.895, "step": 16240 }, { "epoch": 0.9370673742501153, "grad_norm": 0.2942551076412201, "learning_rate": 2.4033189320602613e-06, "loss": 0.9891, "step": 16245 }, { "epoch": 0.9373557914167051, "grad_norm": 0.27687203884124756, "learning_rate": 2.3814284746268344e-06, "loss": 0.9201, "step": 16250 }, { "epoch": 0.9376442085832949, "grad_norm": 0.29299119114875793, "learning_rate": 2.359636966241019e-06, "loss": 0.9656, "step": 16255 }, { "epoch": 0.9379326257498847, "grad_norm": 0.2875472903251648, "learning_rate": 2.3379444289913342e-06, "loss": 0.9556, "step": 16260 }, { "epoch": 0.9382210429164743, "grad_norm": 0.282306432723999, "learning_rate": 2.3163508848659587e-06, "loss": 0.9693, "step": 16265 }, { "epoch": 0.9385094600830641, "grad_norm": 0.31084609031677246, "learning_rate": 2.2948563557527836e-06, "loss": 0.9276, "step": 16270 }, { "epoch": 0.9387978772496539, "grad_norm": 0.27823126316070557, "learning_rate": 2.273460863439236e-06, "loss": 0.9533, "step": 16275 }, { "epoch": 0.9390862944162437, "grad_norm": 0.2766770124435425, "learning_rate": 2.2521644296124466e-06, "loss": 0.9623, "step": 16280 }, { "epoch": 0.9393747115828334, "grad_norm": 0.2881390154361725, "learning_rate": 2.2309670758591138e-06, "loss": 0.9366, "step": 16285 }, { "epoch": 0.9396631287494231, "grad_norm": 0.27332398295402527, "learning_rate": 2.209868823665473e-06, "loss": 0.9053, "step": 16290 }, { "epoch": 0.9399515459160129, "grad_norm": 0.27232545614242554, "learning_rate": 2.1888696944173504e-06, "loss": 0.9867, "step": 16295 }, { "epoch": 0.9402399630826027, "grad_norm": 0.274514764547348, "learning_rate": 2.1679697094000638e-06, "loss": 0.9459, "step": 16300 }, { "epoch": 0.9405283802491924, "grad_norm": 0.28133681416511536, "learning_rate": 2.1471688897984675e-06, "loss": 0.9392, "step": 16305 }, { "epoch": 0.9408167974157822, "grad_norm": 0.30380240082740784, "learning_rate": 2.1264672566968736e-06, "loss": 0.9615, "step": 16310 }, { "epoch": 0.941105214582372, "grad_norm": 0.2733669877052307, "learning_rate": 2.105864831079063e-06, "loss": 0.9116, "step": 16315 }, { "epoch": 0.9413936317489617, "grad_norm": 0.28536126017570496, "learning_rate": 2.0853616338282644e-06, "loss": 0.95, "step": 16320 }, { "epoch": 0.9416820489155514, "grad_norm": 0.28393039107322693, "learning_rate": 2.064957685727109e-06, "loss": 0.9179, "step": 16325 }, { "epoch": 0.9419704660821412, "grad_norm": 0.3136070966720581, "learning_rate": 2.044653007457653e-06, "loss": 1.0152, "step": 16330 }, { "epoch": 0.942258883248731, "grad_norm": 0.28725332021713257, "learning_rate": 2.0244476196012995e-06, "loss": 0.9516, "step": 16335 }, { "epoch": 0.9425473004153208, "grad_norm": 0.28715264797210693, "learning_rate": 2.0043415426388324e-06, "loss": 0.9183, "step": 16340 }, { "epoch": 0.9428357175819104, "grad_norm": 0.2766190469264984, "learning_rate": 1.98433479695036e-06, "loss": 0.9351, "step": 16345 }, { "epoch": 0.9431241347485002, "grad_norm": 0.26775482296943665, "learning_rate": 1.964427402815294e-06, "loss": 0.9339, "step": 16350 }, { "epoch": 0.94341255191509, "grad_norm": 0.35428106784820557, "learning_rate": 1.9446193804123826e-06, "loss": 1.0776, "step": 16355 }, { "epoch": 0.9437009690816798, "grad_norm": 0.2934764325618744, "learning_rate": 1.924910749819586e-06, "loss": 0.9934, "step": 16360 }, { "epoch": 0.9439893862482694, "grad_norm": 0.29175451397895813, "learning_rate": 1.9053015310141587e-06, "loss": 0.9219, "step": 16365 }, { "epoch": 0.9442778034148592, "grad_norm": 0.279075562953949, "learning_rate": 1.8857917438725892e-06, "loss": 0.9532, "step": 16370 }, { "epoch": 0.944566220581449, "grad_norm": 0.2834916114807129, "learning_rate": 1.86638140817057e-06, "loss": 0.9375, "step": 16375 }, { "epoch": 0.9448546377480388, "grad_norm": 0.28975823521614075, "learning_rate": 1.8470705435829849e-06, "loss": 0.962, "step": 16380 }, { "epoch": 0.9451430549146285, "grad_norm": 0.27287888526916504, "learning_rate": 1.8278591696838765e-06, "loss": 0.9551, "step": 16385 }, { "epoch": 0.9454314720812182, "grad_norm": 0.2761301100254059, "learning_rate": 1.8087473059464788e-06, "loss": 0.9122, "step": 16390 }, { "epoch": 0.945719889247808, "grad_norm": 0.2874256372451782, "learning_rate": 1.7897349717431288e-06, "loss": 0.9413, "step": 16395 }, { "epoch": 0.9460083064143978, "grad_norm": 0.2810600697994232, "learning_rate": 1.770822186345289e-06, "loss": 0.9441, "step": 16400 }, { "epoch": 0.9462967235809875, "grad_norm": 0.26397740840911865, "learning_rate": 1.752008968923502e-06, "loss": 0.8909, "step": 16405 }, { "epoch": 0.9465851407475773, "grad_norm": 0.28463664650917053, "learning_rate": 1.7332953385474027e-06, "loss": 0.9269, "step": 16410 }, { "epoch": 0.946873557914167, "grad_norm": 0.29349541664123535, "learning_rate": 1.7146813141856955e-06, "loss": 0.9918, "step": 16415 }, { "epoch": 0.9471619750807568, "grad_norm": 0.2936592996120453, "learning_rate": 1.6961669147060765e-06, "loss": 1.0146, "step": 16420 }, { "epoch": 0.9474503922473465, "grad_norm": 0.28148314356803894, "learning_rate": 1.67775215887529e-06, "loss": 0.9788, "step": 16425 }, { "epoch": 0.9477388094139363, "grad_norm": 0.27716365456581116, "learning_rate": 1.6594370653590706e-06, "loss": 0.9918, "step": 16430 }, { "epoch": 0.9480272265805261, "grad_norm": 0.2905281186103821, "learning_rate": 1.641221652722158e-06, "loss": 0.9588, "step": 16435 }, { "epoch": 0.9483156437471159, "grad_norm": 0.2908460199832916, "learning_rate": 1.6231059394281934e-06, "loss": 0.9696, "step": 16440 }, { "epoch": 0.9486040609137056, "grad_norm": 0.3046049475669861, "learning_rate": 1.6050899438398104e-06, "loss": 0.9385, "step": 16445 }, { "epoch": 0.9488924780802953, "grad_norm": 0.2991294860839844, "learning_rate": 1.587173684218557e-06, "loss": 0.9193, "step": 16450 }, { "epoch": 0.9491808952468851, "grad_norm": 0.29674336314201355, "learning_rate": 1.5693571787248728e-06, "loss": 0.9054, "step": 16455 }, { "epoch": 0.9494693124134749, "grad_norm": 0.3032147288322449, "learning_rate": 1.55164044541809e-06, "loss": 0.9429, "step": 16460 }, { "epoch": 0.9497577295800647, "grad_norm": 0.3129865527153015, "learning_rate": 1.5340235022564098e-06, "loss": 1.0181, "step": 16465 }, { "epoch": 0.9500461467466543, "grad_norm": 0.3160928785800934, "learning_rate": 1.5165063670968926e-06, "loss": 0.9554, "step": 16470 }, { "epoch": 0.9503345639132441, "grad_norm": 0.28156229853630066, "learning_rate": 1.499089057695402e-06, "loss": 0.9426, "step": 16475 }, { "epoch": 0.9506229810798339, "grad_norm": 0.3097882866859436, "learning_rate": 1.4817715917066488e-06, "loss": 1.0092, "step": 16480 }, { "epoch": 0.9509113982464237, "grad_norm": 0.30563685297966003, "learning_rate": 1.464553986684114e-06, "loss": 0.949, "step": 16485 }, { "epoch": 0.9511998154130133, "grad_norm": 0.28421536087989807, "learning_rate": 1.4474362600800706e-06, "loss": 0.9152, "step": 16490 }, { "epoch": 0.9514882325796031, "grad_norm": 0.28444600105285645, "learning_rate": 1.4304184292455613e-06, "loss": 0.967, "step": 16495 }, { "epoch": 0.9517766497461929, "grad_norm": 0.2795342803001404, "learning_rate": 1.4135005114303435e-06, "loss": 0.9806, "step": 16500 }, { "epoch": 0.9520650669127827, "grad_norm": 0.3018958866596222, "learning_rate": 1.3966825237829106e-06, "loss": 0.9117, "step": 16505 }, { "epoch": 0.9523534840793724, "grad_norm": 0.27752962708473206, "learning_rate": 1.379964483350482e-06, "loss": 0.921, "step": 16510 }, { "epoch": 0.9526419012459622, "grad_norm": 0.28480061888694763, "learning_rate": 1.363346407078947e-06, "loss": 0.9557, "step": 16515 }, { "epoch": 0.9529303184125519, "grad_norm": 0.3103495240211487, "learning_rate": 1.3468283118128756e-06, "loss": 0.9517, "step": 16520 }, { "epoch": 0.9532187355791417, "grad_norm": 0.2942695617675781, "learning_rate": 1.3304102142954965e-06, "loss": 0.9146, "step": 16525 }, { "epoch": 0.9535071527457314, "grad_norm": 0.30698221921920776, "learning_rate": 1.314092131168665e-06, "loss": 0.96, "step": 16530 }, { "epoch": 0.9537955699123212, "grad_norm": 0.2886694073677063, "learning_rate": 1.2978740789728827e-06, "loss": 0.8702, "step": 16535 }, { "epoch": 0.954083987078911, "grad_norm": 0.29679936170578003, "learning_rate": 1.2817560741472445e-06, "loss": 0.8778, "step": 16540 }, { "epoch": 0.9543724042455007, "grad_norm": 0.3064761459827423, "learning_rate": 1.2657381330294149e-06, "loss": 0.8782, "step": 16545 }, { "epoch": 0.9546608214120904, "grad_norm": 0.26160743832588196, "learning_rate": 1.2498202718556617e-06, "loss": 0.9198, "step": 16550 }, { "epoch": 0.9549492385786802, "grad_norm": 0.283074289560318, "learning_rate": 1.2340025067608007e-06, "loss": 0.9356, "step": 16555 }, { "epoch": 0.95523765574527, "grad_norm": 0.27303585410118103, "learning_rate": 1.2182848537781622e-06, "loss": 0.91, "step": 16560 }, { "epoch": 0.9555260729118598, "grad_norm": 0.2848970890045166, "learning_rate": 1.2026673288396462e-06, "loss": 0.9014, "step": 16565 }, { "epoch": 0.9558144900784494, "grad_norm": 0.2902991473674774, "learning_rate": 1.187149947775612e-06, "loss": 0.9074, "step": 16570 }, { "epoch": 0.9561029072450392, "grad_norm": 0.2852989137172699, "learning_rate": 1.1717327263149447e-06, "loss": 0.943, "step": 16575 }, { "epoch": 0.956391324411629, "grad_norm": 0.286842405796051, "learning_rate": 1.1564156800849879e-06, "loss": 0.9744, "step": 16580 }, { "epoch": 0.9566797415782188, "grad_norm": 0.302713543176651, "learning_rate": 1.1411988246115556e-06, "loss": 0.9402, "step": 16585 }, { "epoch": 0.9569681587448085, "grad_norm": 0.31443941593170166, "learning_rate": 1.1260821753188987e-06, "loss": 0.9582, "step": 16590 }, { "epoch": 0.9572565759113982, "grad_norm": 0.28639456629753113, "learning_rate": 1.1110657475296827e-06, "loss": 0.927, "step": 16595 }, { "epoch": 0.957544993077988, "grad_norm": 0.27512067556381226, "learning_rate": 1.0961495564650092e-06, "loss": 0.9749, "step": 16600 }, { "epoch": 0.9578334102445778, "grad_norm": 0.2878815531730652, "learning_rate": 1.0813336172443622e-06, "loss": 0.9624, "step": 16605 }, { "epoch": 0.9581218274111675, "grad_norm": 0.31200575828552246, "learning_rate": 1.0666179448856174e-06, "loss": 0.9508, "step": 16610 }, { "epoch": 0.9584102445777573, "grad_norm": 0.2852694094181061, "learning_rate": 1.0520025543050094e-06, "loss": 0.951, "step": 16615 }, { "epoch": 0.958698661744347, "grad_norm": 0.29534223675727844, "learning_rate": 1.0374874603171326e-06, "loss": 0.9803, "step": 16620 }, { "epoch": 0.9589870789109368, "grad_norm": 0.3066651225090027, "learning_rate": 1.0230726776349063e-06, "loss": 0.9267, "step": 16625 }, { "epoch": 0.9592754960775265, "grad_norm": 0.2924138605594635, "learning_rate": 1.0087582208695768e-06, "loss": 0.9759, "step": 16630 }, { "epoch": 0.9595639132441163, "grad_norm": 0.2614937424659729, "learning_rate": 9.945441045306925e-07, "loss": 0.947, "step": 16635 }, { "epoch": 0.9598523304107061, "grad_norm": 0.3080776333808899, "learning_rate": 9.804303430261174e-07, "loss": 0.9668, "step": 16640 }, { "epoch": 0.9601407475772958, "grad_norm": 0.30279916524887085, "learning_rate": 9.664169506619525e-07, "loss": 0.9199, "step": 16645 }, { "epoch": 0.9604291647438855, "grad_norm": 0.29995423555374146, "learning_rate": 9.525039416425907e-07, "loss": 0.9283, "step": 16650 }, { "epoch": 0.9607175819104753, "grad_norm": 0.26539433002471924, "learning_rate": 9.386913300706735e-07, "loss": 0.9023, "step": 16655 }, { "epoch": 0.9610059990770651, "grad_norm": 0.4439990222454071, "learning_rate": 9.249791299470567e-07, "loss": 0.9354, "step": 16660 }, { "epoch": 0.9612944162436549, "grad_norm": 0.2836225926876068, "learning_rate": 9.113673551708446e-07, "loss": 0.9507, "step": 16665 }, { "epoch": 0.9615828334102445, "grad_norm": 0.2889987826347351, "learning_rate": 8.978560195393115e-07, "loss": 0.9704, "step": 16670 }, { "epoch": 0.9618712505768343, "grad_norm": 0.3012427091598511, "learning_rate": 8.844451367479689e-07, "loss": 0.9667, "step": 16675 }, { "epoch": 0.9621596677434241, "grad_norm": 0.27891865372657776, "learning_rate": 8.711347203904541e-07, "loss": 0.9804, "step": 16680 }, { "epoch": 0.9624480849100139, "grad_norm": 0.27315354347229004, "learning_rate": 8.57924783958608e-07, "loss": 0.9159, "step": 16685 }, { "epoch": 0.9627365020766036, "grad_norm": 0.2848674952983856, "learning_rate": 8.448153408424087e-07, "loss": 0.9553, "step": 16690 }, { "epoch": 0.9630249192431933, "grad_norm": 0.30129551887512207, "learning_rate": 8.318064043299823e-07, "loss": 0.8892, "step": 16695 }, { "epoch": 0.9633133364097831, "grad_norm": 0.2893418073654175, "learning_rate": 8.188979876075475e-07, "loss": 1.0384, "step": 16700 }, { "epoch": 0.9636017535763729, "grad_norm": 0.2639276385307312, "learning_rate": 8.060901037594714e-07, "loss": 0.923, "step": 16705 }, { "epoch": 0.9638901707429626, "grad_norm": 0.2888321876525879, "learning_rate": 7.933827657682025e-07, "loss": 0.9154, "step": 16710 }, { "epoch": 0.9641785879095524, "grad_norm": 0.29655885696411133, "learning_rate": 7.807759865142483e-07, "loss": 0.9545, "step": 16715 }, { "epoch": 0.9644670050761421, "grad_norm": 0.30424603819847107, "learning_rate": 7.682697787762317e-07, "loss": 0.9101, "step": 16720 }, { "epoch": 0.9647554222427319, "grad_norm": 0.295279860496521, "learning_rate": 7.558641552308121e-07, "loss": 0.9619, "step": 16725 }, { "epoch": 0.9650438394093216, "grad_norm": 0.29094138741493225, "learning_rate": 7.435591284526866e-07, "loss": 0.9636, "step": 16730 }, { "epoch": 0.9653322565759114, "grad_norm": 0.2868499755859375, "learning_rate": 7.31354710914589e-07, "loss": 1.0484, "step": 16735 }, { "epoch": 0.9656206737425012, "grad_norm": 0.28914302587509155, "learning_rate": 7.192509149872684e-07, "loss": 0.9048, "step": 16740 }, { "epoch": 0.9659090909090909, "grad_norm": 0.2850790023803711, "learning_rate": 7.072477529395105e-07, "loss": 0.933, "step": 16745 }, { "epoch": 0.9661975080756806, "grad_norm": 0.27555444836616516, "learning_rate": 6.953452369380497e-07, "loss": 0.9781, "step": 16750 }, { "epoch": 0.9664859252422704, "grad_norm": 0.3144330084323883, "learning_rate": 6.835433790476354e-07, "loss": 0.9763, "step": 16755 }, { "epoch": 0.9667743424088602, "grad_norm": 0.2599543631076813, "learning_rate": 6.718421912309758e-07, "loss": 0.8993, "step": 16760 }, { "epoch": 0.96706275957545, "grad_norm": 0.2996498644351959, "learning_rate": 6.602416853487392e-07, "loss": 0.9793, "step": 16765 }, { "epoch": 0.9673511767420396, "grad_norm": 0.28505122661590576, "learning_rate": 6.487418731595418e-07, "loss": 0.9164, "step": 16770 }, { "epoch": 0.9676395939086294, "grad_norm": 0.2904942035675049, "learning_rate": 6.373427663199261e-07, "loss": 0.96, "step": 16775 }, { "epoch": 0.9679280110752192, "grad_norm": 0.2883807122707367, "learning_rate": 6.260443763843493e-07, "loss": 0.9771, "step": 16780 }, { "epoch": 0.968216428241809, "grad_norm": 0.3035736382007599, "learning_rate": 6.148467148052172e-07, "loss": 0.9108, "step": 16785 }, { "epoch": 0.9685048454083988, "grad_norm": 0.2641626000404358, "learning_rate": 6.037497929327839e-07, "loss": 1.008, "step": 16790 }, { "epoch": 0.9687932625749884, "grad_norm": 0.28156375885009766, "learning_rate": 5.927536220152296e-07, "loss": 0.9439, "step": 16795 }, { "epoch": 0.9690816797415782, "grad_norm": 0.2757076621055603, "learning_rate": 5.818582131985939e-07, "loss": 0.9123, "step": 16800 }, { "epoch": 0.969370096908168, "grad_norm": 0.29132992029190063, "learning_rate": 5.710635775267759e-07, "loss": 0.8855, "step": 16805 }, { "epoch": 0.9696585140747578, "grad_norm": 0.27889660000801086, "learning_rate": 5.603697259415341e-07, "loss": 0.9758, "step": 16810 }, { "epoch": 0.9699469312413475, "grad_norm": 0.3405865430831909, "learning_rate": 5.497766692824868e-07, "loss": 1.0036, "step": 16815 }, { "epoch": 0.9702353484079372, "grad_norm": 0.30193057656288147, "learning_rate": 5.392844182870449e-07, "loss": 0.9067, "step": 16820 }, { "epoch": 0.970523765574527, "grad_norm": 0.2817526161670685, "learning_rate": 5.288929835904788e-07, "loss": 0.9047, "step": 16825 }, { "epoch": 0.9708121827411168, "grad_norm": 0.2961997091770172, "learning_rate": 5.186023757258407e-07, "loss": 0.9158, "step": 16830 }, { "epoch": 0.9711005999077065, "grad_norm": 0.2850496470928192, "learning_rate": 5.08412605123998e-07, "loss": 0.9862, "step": 16835 }, { "epoch": 0.9713890170742963, "grad_norm": 0.297963410615921, "learning_rate": 4.983236821135995e-07, "loss": 0.9943, "step": 16840 }, { "epoch": 0.971677434240886, "grad_norm": 0.2760219871997833, "learning_rate": 4.883356169210651e-07, "loss": 0.9675, "step": 16845 }, { "epoch": 0.9719658514074758, "grad_norm": 0.29743608832359314, "learning_rate": 4.784484196706073e-07, "loss": 0.9248, "step": 16850 }, { "epoch": 0.9722542685740655, "grad_norm": 0.37705740332603455, "learning_rate": 4.6866210038417625e-07, "loss": 0.9858, "step": 16855 }, { "epoch": 0.9725426857406553, "grad_norm": 0.29688891768455505, "learning_rate": 4.5897666898145896e-07, "loss": 0.9439, "step": 16860 }, { "epoch": 0.9728311029072451, "grad_norm": 0.3039180040359497, "learning_rate": 4.4939213527990245e-07, "loss": 1.0214, "step": 16865 }, { "epoch": 0.9731195200738348, "grad_norm": 0.28710174560546875, "learning_rate": 4.3990850899467975e-07, "loss": 0.9145, "step": 16870 }, { "epoch": 0.9734079372404245, "grad_norm": 0.29073718190193176, "learning_rate": 4.305257997386458e-07, "loss": 0.901, "step": 16875 }, { "epoch": 0.9736963544070143, "grad_norm": 0.2846122682094574, "learning_rate": 4.2124401702241524e-07, "loss": 0.9701, "step": 16880 }, { "epoch": 0.9739847715736041, "grad_norm": 0.27932944893836975, "learning_rate": 4.120631702542732e-07, "loss": 0.9515, "step": 16885 }, { "epoch": 0.9742731887401939, "grad_norm": 0.2845093309879303, "learning_rate": 4.029832687401758e-07, "loss": 0.8943, "step": 16890 }, { "epoch": 0.9745616059067835, "grad_norm": 0.281512051820755, "learning_rate": 3.940043216838052e-07, "loss": 0.8849, "step": 16895 }, { "epoch": 0.9748500230733733, "grad_norm": 0.30090752243995667, "learning_rate": 3.851263381864589e-07, "loss": 0.9418, "step": 16900 }, { "epoch": 0.9751384402399631, "grad_norm": 0.26722562313079834, "learning_rate": 3.7634932724713854e-07, "loss": 0.9539, "step": 16905 }, { "epoch": 0.9754268574065529, "grad_norm": 0.2945277988910675, "learning_rate": 3.67673297762483e-07, "loss": 0.9728, "step": 16910 }, { "epoch": 0.9757152745731426, "grad_norm": 0.3588777184486389, "learning_rate": 3.590982585267466e-07, "loss": 1.0383, "step": 16915 }, { "epoch": 0.9760036917397323, "grad_norm": 0.29528379440307617, "learning_rate": 3.506242182318653e-07, "loss": 1.0303, "step": 16920 }, { "epoch": 0.9762921089063221, "grad_norm": 0.3141199052333832, "learning_rate": 3.422511854673682e-07, "loss": 0.8857, "step": 16925 }, { "epoch": 0.9765805260729119, "grad_norm": 0.2811961770057678, "learning_rate": 3.339791687203997e-07, "loss": 0.9484, "step": 16930 }, { "epoch": 0.9768689432395016, "grad_norm": 0.2975912094116211, "learning_rate": 3.2580817637571923e-07, "loss": 0.9638, "step": 16935 }, { "epoch": 0.9771573604060914, "grad_norm": 0.3165859580039978, "learning_rate": 3.177382167156906e-07, "loss": 0.9067, "step": 16940 }, { "epoch": 0.9774457775726811, "grad_norm": 0.381736159324646, "learning_rate": 3.097692979202704e-07, "loss": 0.9496, "step": 16945 }, { "epoch": 0.9777341947392709, "grad_norm": 0.28172680735588074, "learning_rate": 3.019014280669641e-07, "loss": 0.8839, "step": 16950 }, { "epoch": 0.9780226119058606, "grad_norm": 0.3085029125213623, "learning_rate": 2.9413461513090324e-07, "loss": 0.8825, "step": 16955 }, { "epoch": 0.9783110290724504, "grad_norm": 0.3072126507759094, "learning_rate": 2.8646886698473484e-07, "loss": 0.9802, "step": 16960 }, { "epoch": 0.9785994462390402, "grad_norm": 0.2818610966205597, "learning_rate": 2.789041913986878e-07, "loss": 0.9383, "step": 16965 }, { "epoch": 0.9788878634056299, "grad_norm": 0.297048419713974, "learning_rate": 2.7144059604055085e-07, "loss": 0.9903, "step": 16970 }, { "epoch": 0.9791762805722196, "grad_norm": 0.30197322368621826, "learning_rate": 2.640780884756389e-07, "loss": 0.9497, "step": 16975 }, { "epoch": 0.9794646977388094, "grad_norm": 0.312307208776474, "learning_rate": 2.568166761668156e-07, "loss": 0.9123, "step": 16980 }, { "epoch": 0.9797531149053992, "grad_norm": 0.30723750591278076, "learning_rate": 2.496563664744378e-07, "loss": 0.9771, "step": 16985 }, { "epoch": 0.980041532071989, "grad_norm": 0.29737958312034607, "learning_rate": 2.4259716665641083e-07, "loss": 0.9366, "step": 16990 }, { "epoch": 0.9803299492385786, "grad_norm": 0.34504443407058716, "learning_rate": 2.3563908386816657e-07, "loss": 0.984, "step": 16995 }, { "epoch": 0.9806183664051684, "grad_norm": 0.2985767722129822, "learning_rate": 2.2878212516260766e-07, "loss": 0.9287, "step": 17000 }, { "epoch": 0.9809067835717582, "grad_norm": 0.27487707138061523, "learning_rate": 2.2202629749015213e-07, "loss": 0.9725, "step": 17005 }, { "epoch": 0.981195200738348, "grad_norm": 0.2767324149608612, "learning_rate": 2.1537160769870002e-07, "loss": 0.916, "step": 17010 }, { "epoch": 0.9814836179049377, "grad_norm": 0.2735516130924225, "learning_rate": 2.0881806253364444e-07, "loss": 0.9905, "step": 17015 }, { "epoch": 0.9817720350715274, "grad_norm": 0.2798011600971222, "learning_rate": 2.0236566863784944e-07, "loss": 0.884, "step": 17020 }, { "epoch": 0.9820604522381172, "grad_norm": 0.32747435569763184, "learning_rate": 1.9601443255164998e-07, "loss": 0.9432, "step": 17025 }, { "epoch": 0.982348869404707, "grad_norm": 0.2849380373954773, "learning_rate": 1.8976436071284076e-07, "loss": 0.9873, "step": 17030 }, { "epoch": 0.9826372865712967, "grad_norm": 0.2873431444168091, "learning_rate": 1.8361545945668747e-07, "loss": 0.9483, "step": 17035 }, { "epoch": 0.9829257037378865, "grad_norm": 0.30190160870552063, "learning_rate": 1.775677350159044e-07, "loss": 0.9605, "step": 17040 }, { "epoch": 0.9832141209044762, "grad_norm": 0.32146456837654114, "learning_rate": 1.7162119352062135e-07, "loss": 0.9293, "step": 17045 }, { "epoch": 0.983502538071066, "grad_norm": 0.26646754145622253, "learning_rate": 1.657758409984278e-07, "loss": 0.9107, "step": 17050 }, { "epoch": 0.9837909552376557, "grad_norm": 0.2890629470348358, "learning_rate": 1.6003168337437313e-07, "loss": 0.953, "step": 17055 }, { "epoch": 0.9840793724042455, "grad_norm": 0.3307664692401886, "learning_rate": 1.5438872647086655e-07, "loss": 1.0018, "step": 17060 }, { "epoch": 0.9843677895708353, "grad_norm": 0.29319411516189575, "learning_rate": 1.488469760077993e-07, "loss": 0.9824, "step": 17065 }, { "epoch": 0.984656206737425, "grad_norm": 0.2756195366382599, "learning_rate": 1.4340643760244464e-07, "loss": 0.924, "step": 17070 }, { "epoch": 0.9849446239040147, "grad_norm": 0.2855581045150757, "learning_rate": 1.3806711676950245e-07, "loss": 0.9318, "step": 17075 }, { "epoch": 0.9852330410706045, "grad_norm": 0.2845672369003296, "learning_rate": 1.328290189210435e-07, "loss": 0.8953, "step": 17080 }, { "epoch": 0.9855214582371943, "grad_norm": 0.2991209924221039, "learning_rate": 1.2769214936657615e-07, "loss": 0.9597, "step": 17085 }, { "epoch": 0.9858098754037841, "grad_norm": 0.29595181345939636, "learning_rate": 1.2265651331296869e-07, "loss": 0.9461, "step": 17090 }, { "epoch": 0.9860982925703737, "grad_norm": 0.31181660294532776, "learning_rate": 1.1772211586449367e-07, "loss": 0.9795, "step": 17095 }, { "epoch": 0.9863867097369635, "grad_norm": 0.292348176240921, "learning_rate": 1.1288896202281685e-07, "loss": 0.9391, "step": 17100 }, { "epoch": 0.9866751269035533, "grad_norm": 0.7884487509727478, "learning_rate": 1.0815705668694165e-07, "loss": 0.9307, "step": 17105 }, { "epoch": 0.9869635440701431, "grad_norm": 0.3149639070034027, "learning_rate": 1.0352640465327578e-07, "loss": 0.9954, "step": 17110 }, { "epoch": 0.9872519612367328, "grad_norm": 0.3055214583873749, "learning_rate": 9.899701061558687e-08, "loss": 0.9155, "step": 17115 }, { "epoch": 0.9875403784033225, "grad_norm": 0.28656816482543945, "learning_rate": 9.456887916499125e-08, "loss": 0.996, "step": 17120 }, { "epoch": 0.9878287955699123, "grad_norm": 0.3164063096046448, "learning_rate": 9.024201478998739e-08, "loss": 0.9331, "step": 17125 }, { "epoch": 0.9881172127365021, "grad_norm": 0.2873050570487976, "learning_rate": 8.601642187640036e-08, "loss": 1.055, "step": 17130 }, { "epoch": 0.9884056299030919, "grad_norm": 0.2891038656234741, "learning_rate": 8.189210470742614e-08, "loss": 0.9559, "step": 17135 }, { "epoch": 0.9886940470696816, "grad_norm": 0.2986387312412262, "learning_rate": 7.786906746358735e-08, "loss": 1.0243, "step": 17140 }, { "epoch": 0.9889824642362713, "grad_norm": 0.30982112884521484, "learning_rate": 7.394731422274426e-08, "loss": 0.987, "step": 17145 }, { "epoch": 0.9892708814028611, "grad_norm": 0.3040391504764557, "learning_rate": 7.012684896011702e-08, "loss": 0.9656, "step": 17150 }, { "epoch": 0.9895592985694509, "grad_norm": 0.2852766811847687, "learning_rate": 6.640767554823013e-08, "loss": 0.937, "step": 17155 }, { "epoch": 0.9898477157360406, "grad_norm": 0.2964860200881958, "learning_rate": 6.278979775694582e-08, "loss": 0.9073, "step": 17160 }, { "epoch": 0.9901361329026304, "grad_norm": 0.26632729172706604, "learning_rate": 5.927321925346396e-08, "loss": 0.9722, "step": 17165 }, { "epoch": 0.9904245500692201, "grad_norm": 0.29934900999069214, "learning_rate": 5.585794360226659e-08, "loss": 1.0155, "step": 17170 }, { "epoch": 0.9907129672358099, "grad_norm": 0.2905521094799042, "learning_rate": 5.254397426520674e-08, "loss": 0.9827, "step": 17175 }, { "epoch": 0.9910013844023996, "grad_norm": 0.2757057547569275, "learning_rate": 4.9331314601408495e-08, "loss": 0.8905, "step": 17180 }, { "epoch": 0.9912898015689894, "grad_norm": 0.3028772473335266, "learning_rate": 4.621996786731142e-08, "loss": 0.9571, "step": 17185 }, { "epoch": 0.9915782187355792, "grad_norm": 0.28152334690093994, "learning_rate": 4.320993721668165e-08, "loss": 0.9648, "step": 17190 }, { "epoch": 0.991866635902169, "grad_norm": 0.2736474871635437, "learning_rate": 4.030122570055639e-08, "loss": 0.91, "step": 17195 }, { "epoch": 0.9921550530687586, "grad_norm": 0.31447696685791016, "learning_rate": 3.7493836267310514e-08, "loss": 0.9955, "step": 17200 }, { "epoch": 0.9924434702353484, "grad_norm": 0.3125499188899994, "learning_rate": 3.4787771762578856e-08, "loss": 0.9984, "step": 17205 }, { "epoch": 0.9927318874019382, "grad_norm": 0.28617557883262634, "learning_rate": 3.218303492932284e-08, "loss": 0.9312, "step": 17210 }, { "epoch": 0.993020304568528, "grad_norm": 0.29386937618255615, "learning_rate": 2.9679628407763837e-08, "loss": 0.8687, "step": 17215 }, { "epoch": 0.9933087217351176, "grad_norm": 0.3066183626651764, "learning_rate": 2.7277554735449794e-08, "loss": 1.0048, "step": 17220 }, { "epoch": 0.9935971389017074, "grad_norm": 0.28535598516464233, "learning_rate": 2.4976816347177524e-08, "loss": 0.9516, "step": 17225 }, { "epoch": 0.9938855560682972, "grad_norm": 0.27699753642082214, "learning_rate": 2.2777415575037098e-08, "loss": 0.9985, "step": 17230 }, { "epoch": 0.994173973234887, "grad_norm": 0.27334800362586975, "learning_rate": 2.0679354648422968e-08, "loss": 0.9417, "step": 17235 }, { "epoch": 0.9944623904014767, "grad_norm": 0.2618880569934845, "learning_rate": 1.8682635693978433e-08, "loss": 0.9599, "step": 17240 }, { "epoch": 0.9947508075680664, "grad_norm": 0.2842215895652771, "learning_rate": 1.6787260735640075e-08, "loss": 0.9852, "step": 17245 }, { "epoch": 0.9950392247346562, "grad_norm": 0.29653632640838623, "learning_rate": 1.499323169462663e-08, "loss": 0.9443, "step": 17250 }, { "epoch": 0.995327641901246, "grad_norm": 0.2780844569206238, "learning_rate": 1.3300550389394595e-08, "loss": 0.9895, "step": 17255 }, { "epoch": 0.9956160590678357, "grad_norm": 0.2827605903148651, "learning_rate": 1.1709218535715938e-08, "loss": 0.9583, "step": 17260 }, { "epoch": 0.9959044762344255, "grad_norm": 0.2900019884109497, "learning_rate": 1.0219237746611487e-08, "loss": 0.932, "step": 17265 }, { "epoch": 0.9961928934010152, "grad_norm": 0.28944316506385803, "learning_rate": 8.83060953235093e-09, "loss": 0.9875, "step": 17270 }, { "epoch": 0.996481310567605, "grad_norm": 0.3088100254535675, "learning_rate": 7.543335300497223e-09, "loss": 0.9681, "step": 17275 }, { "epoch": 0.9967697277341947, "grad_norm": 0.31131041049957275, "learning_rate": 6.357416355884382e-09, "loss": 0.9911, "step": 17280 }, { "epoch": 0.9970581449007845, "grad_norm": 0.28186625242233276, "learning_rate": 5.272853900573082e-09, "loss": 0.8708, "step": 17285 }, { "epoch": 0.9973465620673743, "grad_norm": 0.27050358057022095, "learning_rate": 4.289649033928367e-09, "loss": 0.9871, "step": 17290 }, { "epoch": 0.997634979233964, "grad_norm": 0.27937865257263184, "learning_rate": 3.407802752530831e-09, "loss": 0.9746, "step": 17295 }, { "epoch": 0.9979233964005537, "grad_norm": 0.28143933415412903, "learning_rate": 2.627315950265441e-09, "loss": 0.983, "step": 17300 }, { "epoch": 0.9982118135671435, "grad_norm": 0.2851989269256592, "learning_rate": 1.9481894182549198e-09, "loss": 0.9577, "step": 17305 }, { "epoch": 0.9985002307337333, "grad_norm": 0.2806662917137146, "learning_rate": 1.3704238448708496e-09, "loss": 0.9316, "step": 17310 }, { "epoch": 0.9987886479003231, "grad_norm": 0.2934742271900177, "learning_rate": 8.940198157558755e-10, "loss": 0.9058, "step": 17315 }, { "epoch": 0.9990770650669127, "grad_norm": 0.29554542899131775, "learning_rate": 5.189778138237067e-10, "loss": 0.9034, "step": 17320 }, { "epoch": 0.9993654822335025, "grad_norm": 0.28154152631759644, "learning_rate": 2.452982192036046e-10, "loss": 0.8279, "step": 17325 }, { "epoch": 0.9996538994000923, "grad_norm": 0.28424975275993347, "learning_rate": 7.298130931809865e-11, "loss": 0.9575, "step": 17330 }, { "epoch": 0.9999423165666821, "grad_norm": 0.27066484093666077, "learning_rate": 2.0272588274750093e-12, "loss": 0.9597, "step": 17335 }, { "epoch": 1.0, "eval_loss": 0.9577949643135071, "eval_runtime": 1918.8672, "eval_samples_per_second": 8.0, "eval_steps_per_second": 1.0, "step": 17336 }, { "epoch": 1.0, "step": 17336, "total_flos": 1.2189365563134312e+19, "train_loss": 0.9574739795073309, "train_runtime": 64802.032, "train_samples_per_second": 2.14, "train_steps_per_second": 0.268 } ], "logging_steps": 5, "max_steps": 17336, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2189365563134312e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }