{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 56255, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003555239534263621, "grad_norm": 6.5037431716918945, "learning_rate": 4.739336492890996e-08, "loss": 3.6777, "step": 10 }, { "epoch": 0.0007110479068527242, "grad_norm": 6.644636154174805, "learning_rate": 9.478672985781992e-08, "loss": 3.6844, "step": 20 }, { "epoch": 0.0010665718602790863, "grad_norm": 5.921571254730225, "learning_rate": 1.4218009478672986e-07, "loss": 3.6619, "step": 30 }, { "epoch": 0.0014220958137054485, "grad_norm": 5.546863555908203, "learning_rate": 1.8957345971563984e-07, "loss": 3.6723, "step": 40 }, { "epoch": 0.0017776197671318106, "grad_norm": 5.478545188903809, "learning_rate": 2.3696682464454978e-07, "loss": 3.6157, "step": 50 }, { "epoch": 0.0021331437205581727, "grad_norm": 5.220118045806885, "learning_rate": 2.843601895734597e-07, "loss": 3.5973, "step": 60 }, { "epoch": 0.002488667673984535, "grad_norm": 5.022663116455078, "learning_rate": 3.317535545023697e-07, "loss": 3.5078, "step": 70 }, { "epoch": 0.002844191627410897, "grad_norm": 5.178471088409424, "learning_rate": 3.791469194312797e-07, "loss": 3.4621, "step": 80 }, { "epoch": 0.003199715580837259, "grad_norm": 4.002021312713623, "learning_rate": 4.265402843601896e-07, "loss": 3.4312, "step": 90 }, { "epoch": 0.003555239534263621, "grad_norm": 4.136953830718994, "learning_rate": 4.7393364928909956e-07, "loss": 3.3377, "step": 100 }, { "epoch": 0.003910763487689983, "grad_norm": 3.798868179321289, "learning_rate": 5.213270142180095e-07, "loss": 3.2934, "step": 110 }, { "epoch": 0.004266287441116345, "grad_norm": 4.2911763191223145, "learning_rate": 5.687203791469194e-07, "loss": 3.1883, "step": 120 }, { "epoch": 0.0046218113945427075, "grad_norm": 3.2939631938934326, "learning_rate": 6.161137440758294e-07, "loss": 3.139, "step": 130 }, { "epoch": 0.00497733534796907, "grad_norm": 3.2117068767547607, "learning_rate": 6.635071090047394e-07, "loss": 3.1006, "step": 140 }, { "epoch": 0.005332859301395432, "grad_norm": 3.382498264312744, "learning_rate": 7.109004739336493e-07, "loss": 3.0482, "step": 150 }, { "epoch": 0.005688383254821794, "grad_norm": 2.9878997802734375, "learning_rate": 7.582938388625594e-07, "loss": 2.9764, "step": 160 }, { "epoch": 0.006043907208248156, "grad_norm": 3.0326039791107178, "learning_rate": 8.056872037914692e-07, "loss": 2.9183, "step": 170 }, { "epoch": 0.006399431161674518, "grad_norm": 3.024005174636841, "learning_rate": 8.530805687203792e-07, "loss": 2.8864, "step": 180 }, { "epoch": 0.00675495511510088, "grad_norm": 3.1701743602752686, "learning_rate": 9.004739336492892e-07, "loss": 2.828, "step": 190 }, { "epoch": 0.007110479068527242, "grad_norm": 2.661820888519287, "learning_rate": 9.478672985781991e-07, "loss": 2.7768, "step": 200 }, { "epoch": 0.0074660030219536044, "grad_norm": 3.0967164039611816, "learning_rate": 9.95260663507109e-07, "loss": 2.7001, "step": 210 }, { "epoch": 0.007821526975379967, "grad_norm": 2.645308017730713, "learning_rate": 1.042654028436019e-06, "loss": 2.6736, "step": 220 }, { "epoch": 0.008177050928806329, "grad_norm": 2.3344457149505615, "learning_rate": 1.090047393364929e-06, "loss": 2.6604, "step": 230 }, { "epoch": 0.00853257488223269, "grad_norm": 2.545344829559326, "learning_rate": 1.1374407582938388e-06, "loss": 2.6873, "step": 240 }, { "epoch": 0.008888098835659053, "grad_norm": 2.7679622173309326, "learning_rate": 1.184834123222749e-06, "loss": 2.6686, "step": 250 }, { "epoch": 0.009243622789085415, "grad_norm": 2.9236536026000977, "learning_rate": 1.2322274881516587e-06, "loss": 2.636, "step": 260 }, { "epoch": 0.009599146742511777, "grad_norm": 3.063015937805176, "learning_rate": 1.2796208530805687e-06, "loss": 2.5889, "step": 270 }, { "epoch": 0.00995467069593814, "grad_norm": 2.6119446754455566, "learning_rate": 1.3270142180094788e-06, "loss": 2.6461, "step": 280 }, { "epoch": 0.010310194649364501, "grad_norm": 2.787658214569092, "learning_rate": 1.3744075829383887e-06, "loss": 2.5946, "step": 290 }, { "epoch": 0.010665718602790863, "grad_norm": 2.616063117980957, "learning_rate": 1.4218009478672987e-06, "loss": 2.6025, "step": 300 }, { "epoch": 0.011021242556217226, "grad_norm": 2.5729634761810303, "learning_rate": 1.4691943127962086e-06, "loss": 2.538, "step": 310 }, { "epoch": 0.011376766509643588, "grad_norm": 2.5412399768829346, "learning_rate": 1.5165876777251187e-06, "loss": 2.5636, "step": 320 }, { "epoch": 0.01173229046306995, "grad_norm": 3.008772611618042, "learning_rate": 1.5639810426540287e-06, "loss": 2.5296, "step": 330 }, { "epoch": 0.012087814416496312, "grad_norm": 2.5369677543640137, "learning_rate": 1.6113744075829384e-06, "loss": 2.5443, "step": 340 }, { "epoch": 0.012443338369922674, "grad_norm": 2.5106494426727295, "learning_rate": 1.6587677725118483e-06, "loss": 2.5304, "step": 350 }, { "epoch": 0.012798862323349036, "grad_norm": 2.8165364265441895, "learning_rate": 1.7061611374407585e-06, "loss": 2.561, "step": 360 }, { "epoch": 0.013154386276775398, "grad_norm": 2.5253000259399414, "learning_rate": 1.7535545023696684e-06, "loss": 2.532, "step": 370 }, { "epoch": 0.01350991023020176, "grad_norm": 2.657996892929077, "learning_rate": 1.8009478672985784e-06, "loss": 2.5121, "step": 380 }, { "epoch": 0.013865434183628123, "grad_norm": 2.816077470779419, "learning_rate": 1.8483412322274883e-06, "loss": 2.4412, "step": 390 }, { "epoch": 0.014220958137054485, "grad_norm": 2.648855686187744, "learning_rate": 1.8957345971563982e-06, "loss": 2.4674, "step": 400 }, { "epoch": 0.014576482090480847, "grad_norm": 2.7675745487213135, "learning_rate": 1.943127962085308e-06, "loss": 2.4813, "step": 410 }, { "epoch": 0.014932006043907209, "grad_norm": 2.5133159160614014, "learning_rate": 1.990521327014218e-06, "loss": 2.5003, "step": 420 }, { "epoch": 0.015287529997333571, "grad_norm": 2.5450525283813477, "learning_rate": 2.037914691943128e-06, "loss": 2.4677, "step": 430 }, { "epoch": 0.015643053950759933, "grad_norm": 7.591505527496338, "learning_rate": 2.085308056872038e-06, "loss": 2.5193, "step": 440 }, { "epoch": 0.015998577904186295, "grad_norm": 2.441828727722168, "learning_rate": 2.1327014218009483e-06, "loss": 2.4628, "step": 450 }, { "epoch": 0.016354101857612657, "grad_norm": 2.5905909538269043, "learning_rate": 2.180094786729858e-06, "loss": 2.4269, "step": 460 }, { "epoch": 0.01670962581103902, "grad_norm": 2.608412981033325, "learning_rate": 2.2274881516587678e-06, "loss": 2.45, "step": 470 }, { "epoch": 0.01706514976446538, "grad_norm": 2.5817153453826904, "learning_rate": 2.2748815165876777e-06, "loss": 2.4327, "step": 480 }, { "epoch": 0.017420673717891744, "grad_norm": 2.6757760047912598, "learning_rate": 2.322274881516588e-06, "loss": 2.4466, "step": 490 }, { "epoch": 0.017776197671318106, "grad_norm": 2.745751142501831, "learning_rate": 2.369668246445498e-06, "loss": 2.4339, "step": 500 }, { "epoch": 0.018131721624744468, "grad_norm": 2.6839053630828857, "learning_rate": 2.417061611374408e-06, "loss": 2.4309, "step": 510 }, { "epoch": 0.01848724557817083, "grad_norm": 3.034766912460327, "learning_rate": 2.4644549763033174e-06, "loss": 2.3887, "step": 520 }, { "epoch": 0.018842769531597192, "grad_norm": 2.3895976543426514, "learning_rate": 2.5118483412322274e-06, "loss": 2.3988, "step": 530 }, { "epoch": 0.019198293485023554, "grad_norm": 2.550225257873535, "learning_rate": 2.5592417061611373e-06, "loss": 2.4171, "step": 540 }, { "epoch": 0.019553817438449916, "grad_norm": 2.5043528079986572, "learning_rate": 2.606635071090048e-06, "loss": 2.4244, "step": 550 }, { "epoch": 0.01990934139187628, "grad_norm": 2.4192793369293213, "learning_rate": 2.6540284360189576e-06, "loss": 2.4324, "step": 560 }, { "epoch": 0.02026486534530264, "grad_norm": 2.150911331176758, "learning_rate": 2.7014218009478675e-06, "loss": 2.4225, "step": 570 }, { "epoch": 0.020620389298729003, "grad_norm": 2.4253993034362793, "learning_rate": 2.7488151658767775e-06, "loss": 2.4464, "step": 580 }, { "epoch": 0.020975913252155365, "grad_norm": 2.2535018920898438, "learning_rate": 2.7962085308056874e-06, "loss": 2.3521, "step": 590 }, { "epoch": 0.021331437205581727, "grad_norm": 2.497767210006714, "learning_rate": 2.8436018957345973e-06, "loss": 2.3227, "step": 600 }, { "epoch": 0.02168696115900809, "grad_norm": 2.3069896697998047, "learning_rate": 2.8909952606635073e-06, "loss": 2.3832, "step": 610 }, { "epoch": 0.02204248511243445, "grad_norm": 2.4061696529388428, "learning_rate": 2.938388625592417e-06, "loss": 2.385, "step": 620 }, { "epoch": 0.022398009065860813, "grad_norm": 2.2939300537109375, "learning_rate": 2.985781990521327e-06, "loss": 2.3879, "step": 630 }, { "epoch": 0.022753533019287175, "grad_norm": 2.414767026901245, "learning_rate": 3.0331753554502375e-06, "loss": 2.3585, "step": 640 }, { "epoch": 0.023109056972713538, "grad_norm": 2.20217227935791, "learning_rate": 3.0805687203791474e-06, "loss": 2.4066, "step": 650 }, { "epoch": 0.0234645809261399, "grad_norm": 2.1113369464874268, "learning_rate": 3.1279620853080574e-06, "loss": 2.3501, "step": 660 }, { "epoch": 0.023820104879566262, "grad_norm": 2.116257905960083, "learning_rate": 3.1753554502369673e-06, "loss": 2.3473, "step": 670 }, { "epoch": 0.024175628832992624, "grad_norm": 2.11061692237854, "learning_rate": 3.222748815165877e-06, "loss": 2.367, "step": 680 }, { "epoch": 0.024531152786418986, "grad_norm": 2.093904733657837, "learning_rate": 3.2701421800947867e-06, "loss": 2.3376, "step": 690 }, { "epoch": 0.024886676739845348, "grad_norm": 2.0183255672454834, "learning_rate": 3.3175355450236967e-06, "loss": 2.376, "step": 700 }, { "epoch": 0.02524220069327171, "grad_norm": 2.031346559524536, "learning_rate": 3.3649289099526066e-06, "loss": 2.3569, "step": 710 }, { "epoch": 0.025597724646698072, "grad_norm": 2.0024542808532715, "learning_rate": 3.412322274881517e-06, "loss": 2.349, "step": 720 }, { "epoch": 0.025953248600124434, "grad_norm": 2.029694080352783, "learning_rate": 3.459715639810427e-06, "loss": 2.3457, "step": 730 }, { "epoch": 0.026308772553550797, "grad_norm": 2.160933017730713, "learning_rate": 3.507109004739337e-06, "loss": 2.296, "step": 740 }, { "epoch": 0.02666429650697716, "grad_norm": 2.0530309677124023, "learning_rate": 3.5545023696682468e-06, "loss": 2.3714, "step": 750 }, { "epoch": 0.02701982046040352, "grad_norm": 1.919304370880127, "learning_rate": 3.6018957345971567e-06, "loss": 2.3104, "step": 760 }, { "epoch": 0.027375344413829883, "grad_norm": 2.03427791595459, "learning_rate": 3.6492890995260666e-06, "loss": 2.2817, "step": 770 }, { "epoch": 0.027730868367256245, "grad_norm": 2.2166829109191895, "learning_rate": 3.6966824644549766e-06, "loss": 2.3678, "step": 780 }, { "epoch": 0.028086392320682607, "grad_norm": 2.0098068714141846, "learning_rate": 3.7440758293838865e-06, "loss": 2.3577, "step": 790 }, { "epoch": 0.02844191627410897, "grad_norm": 2.175006151199341, "learning_rate": 3.7914691943127964e-06, "loss": 2.317, "step": 800 }, { "epoch": 0.02879744022753533, "grad_norm": 2.008929491043091, "learning_rate": 3.838862559241707e-06, "loss": 2.3594, "step": 810 }, { "epoch": 0.029152964180961694, "grad_norm": 2.1698524951934814, "learning_rate": 3.886255924170616e-06, "loss": 2.298, "step": 820 }, { "epoch": 0.029508488134388056, "grad_norm": 2.0474202632904053, "learning_rate": 3.933649289099527e-06, "loss": 2.2345, "step": 830 }, { "epoch": 0.029864012087814418, "grad_norm": 1.8713438510894775, "learning_rate": 3.981042654028436e-06, "loss": 2.3007, "step": 840 }, { "epoch": 0.03021953604124078, "grad_norm": 2.2103753089904785, "learning_rate": 4.0284360189573465e-06, "loss": 2.2866, "step": 850 }, { "epoch": 0.030575059994667142, "grad_norm": 1.954866886138916, "learning_rate": 4.075829383886256e-06, "loss": 2.3095, "step": 860 }, { "epoch": 0.030930583948093504, "grad_norm": 2.025651454925537, "learning_rate": 4.123222748815166e-06, "loss": 2.3398, "step": 870 }, { "epoch": 0.031286107901519866, "grad_norm": 1.9596309661865234, "learning_rate": 4.170616113744076e-06, "loss": 2.3475, "step": 880 }, { "epoch": 0.03164163185494623, "grad_norm": 1.9388352632522583, "learning_rate": 4.218009478672986e-06, "loss": 2.2968, "step": 890 }, { "epoch": 0.03199715580837259, "grad_norm": 2.0771589279174805, "learning_rate": 4.265402843601897e-06, "loss": 2.274, "step": 900 }, { "epoch": 0.03235267976179895, "grad_norm": 1.8007633686065674, "learning_rate": 4.312796208530806e-06, "loss": 2.3144, "step": 910 }, { "epoch": 0.032708203715225315, "grad_norm": 1.8899132013320923, "learning_rate": 4.360189573459716e-06, "loss": 2.3019, "step": 920 }, { "epoch": 0.03306372766865168, "grad_norm": 1.913493037223816, "learning_rate": 4.407582938388626e-06, "loss": 2.3061, "step": 930 }, { "epoch": 0.03341925162207804, "grad_norm": 2.0034244060516357, "learning_rate": 4.4549763033175355e-06, "loss": 2.2956, "step": 940 }, { "epoch": 0.0337747755755044, "grad_norm": 1.9144295454025269, "learning_rate": 4.502369668246446e-06, "loss": 2.2693, "step": 950 }, { "epoch": 0.03413029952893076, "grad_norm": 2.0472214221954346, "learning_rate": 4.549763033175355e-06, "loss": 2.2076, "step": 960 }, { "epoch": 0.034485823482357125, "grad_norm": 2.0035147666931152, "learning_rate": 4.597156398104266e-06, "loss": 2.3414, "step": 970 }, { "epoch": 0.03484134743578349, "grad_norm": 2.1267082691192627, "learning_rate": 4.644549763033176e-06, "loss": 2.2974, "step": 980 }, { "epoch": 0.03519687138920985, "grad_norm": 2.001173257827759, "learning_rate": 4.691943127962086e-06, "loss": 2.2925, "step": 990 }, { "epoch": 0.03555239534263621, "grad_norm": 1.8909610509872437, "learning_rate": 4.739336492890996e-06, "loss": 2.3045, "step": 1000 }, { "epoch": 0.035907919296062574, "grad_norm": 1.8200558423995972, "learning_rate": 4.7867298578199055e-06, "loss": 2.2761, "step": 1010 }, { "epoch": 0.036263443249488936, "grad_norm": 1.8652522563934326, "learning_rate": 4.834123222748816e-06, "loss": 2.3357, "step": 1020 }, { "epoch": 0.0366189672029153, "grad_norm": 1.907865285873413, "learning_rate": 4.881516587677725e-06, "loss": 2.2691, "step": 1030 }, { "epoch": 0.03697449115634166, "grad_norm": 1.8563010692596436, "learning_rate": 4.928909952606635e-06, "loss": 2.2373, "step": 1040 }, { "epoch": 0.03733001510976802, "grad_norm": 1.8197952508926392, "learning_rate": 4.976303317535545e-06, "loss": 2.288, "step": 1050 }, { "epoch": 0.037685539063194384, "grad_norm": 1.8919203281402588, "learning_rate": 5.023696682464455e-06, "loss": 2.2458, "step": 1060 }, { "epoch": 0.038041063016620746, "grad_norm": 1.8901917934417725, "learning_rate": 5.071090047393366e-06, "loss": 2.2557, "step": 1070 }, { "epoch": 0.03839658697004711, "grad_norm": 1.9412710666656494, "learning_rate": 5.118483412322275e-06, "loss": 2.2927, "step": 1080 }, { "epoch": 0.03875211092347347, "grad_norm": 2.022618055343628, "learning_rate": 5.165876777251185e-06, "loss": 2.2815, "step": 1090 }, { "epoch": 0.03910763487689983, "grad_norm": 1.8631237745285034, "learning_rate": 5.213270142180096e-06, "loss": 2.3075, "step": 1100 }, { "epoch": 0.039463158830326195, "grad_norm": 2.0299785137176514, "learning_rate": 5.260663507109005e-06, "loss": 2.2475, "step": 1110 }, { "epoch": 0.03981868278375256, "grad_norm": 1.8889631032943726, "learning_rate": 5.308056872037915e-06, "loss": 2.2771, "step": 1120 }, { "epoch": 0.04017420673717892, "grad_norm": 1.81557297706604, "learning_rate": 5.355450236966825e-06, "loss": 2.3143, "step": 1130 }, { "epoch": 0.04052973069060528, "grad_norm": 1.985235333442688, "learning_rate": 5.402843601895735e-06, "loss": 2.2641, "step": 1140 }, { "epoch": 0.04088525464403164, "grad_norm": 2.0497684478759766, "learning_rate": 5.4502369668246446e-06, "loss": 2.2873, "step": 1150 }, { "epoch": 0.041240778597458005, "grad_norm": 2.0543909072875977, "learning_rate": 5.497630331753555e-06, "loss": 2.285, "step": 1160 }, { "epoch": 0.04159630255088437, "grad_norm": 2.0090322494506836, "learning_rate": 5.5450236966824644e-06, "loss": 2.251, "step": 1170 }, { "epoch": 0.04195182650431073, "grad_norm": 2.01086163520813, "learning_rate": 5.592417061611375e-06, "loss": 2.2004, "step": 1180 }, { "epoch": 0.04230735045773709, "grad_norm": 1.9381053447723389, "learning_rate": 5.639810426540285e-06, "loss": 2.2603, "step": 1190 }, { "epoch": 0.042662874411163454, "grad_norm": 1.9569153785705566, "learning_rate": 5.687203791469195e-06, "loss": 2.233, "step": 1200 }, { "epoch": 0.043018398364589816, "grad_norm": 1.9454693794250488, "learning_rate": 5.734597156398105e-06, "loss": 2.2415, "step": 1210 }, { "epoch": 0.04337392231801618, "grad_norm": 1.7950100898742676, "learning_rate": 5.7819905213270145e-06, "loss": 2.243, "step": 1220 }, { "epoch": 0.04372944627144254, "grad_norm": 1.8708484172821045, "learning_rate": 5.829383886255925e-06, "loss": 2.2722, "step": 1230 }, { "epoch": 0.0440849702248689, "grad_norm": 1.9645860195159912, "learning_rate": 5.876777251184834e-06, "loss": 2.2032, "step": 1240 }, { "epoch": 0.044440494178295264, "grad_norm": 1.973793625831604, "learning_rate": 5.924170616113745e-06, "loss": 2.2247, "step": 1250 }, { "epoch": 0.04479601813172163, "grad_norm": 1.8837801218032837, "learning_rate": 5.971563981042654e-06, "loss": 2.2531, "step": 1260 }, { "epoch": 0.04515154208514799, "grad_norm": 1.9478363990783691, "learning_rate": 6.018957345971565e-06, "loss": 2.1965, "step": 1270 }, { "epoch": 0.04550706603857435, "grad_norm": 2.0230274200439453, "learning_rate": 6.066350710900475e-06, "loss": 2.2398, "step": 1280 }, { "epoch": 0.04586258999200071, "grad_norm": 1.92470121383667, "learning_rate": 6.1137440758293845e-06, "loss": 2.2117, "step": 1290 }, { "epoch": 0.046218113945427075, "grad_norm": 1.9754289388656616, "learning_rate": 6.161137440758295e-06, "loss": 2.2447, "step": 1300 }, { "epoch": 0.04657363789885344, "grad_norm": 1.9050835371017456, "learning_rate": 6.208530805687204e-06, "loss": 2.257, "step": 1310 }, { "epoch": 0.0469291618522798, "grad_norm": 2.0105550289154053, "learning_rate": 6.255924170616115e-06, "loss": 2.2238, "step": 1320 }, { "epoch": 0.04728468580570616, "grad_norm": 1.8811384439468384, "learning_rate": 6.303317535545023e-06, "loss": 2.2305, "step": 1330 }, { "epoch": 0.047640209759132524, "grad_norm": 1.7932945489883423, "learning_rate": 6.350710900473935e-06, "loss": 2.2524, "step": 1340 }, { "epoch": 0.047995733712558886, "grad_norm": 1.950801134109497, "learning_rate": 6.398104265402843e-06, "loss": 2.2003, "step": 1350 }, { "epoch": 0.04835125766598525, "grad_norm": 1.797852635383606, "learning_rate": 6.445497630331754e-06, "loss": 2.2409, "step": 1360 }, { "epoch": 0.04870678161941161, "grad_norm": 1.877571702003479, "learning_rate": 6.492890995260665e-06, "loss": 2.2534, "step": 1370 }, { "epoch": 0.04906230557283797, "grad_norm": 1.7737246751785278, "learning_rate": 6.5402843601895735e-06, "loss": 2.232, "step": 1380 }, { "epoch": 0.049417829526264334, "grad_norm": 1.8201391696929932, "learning_rate": 6.587677725118484e-06, "loss": 2.2015, "step": 1390 }, { "epoch": 0.049773353479690696, "grad_norm": 2.060864210128784, "learning_rate": 6.635071090047393e-06, "loss": 2.2239, "step": 1400 }, { "epoch": 0.05012887743311706, "grad_norm": 1.956697702407837, "learning_rate": 6.682464454976304e-06, "loss": 2.2289, "step": 1410 }, { "epoch": 0.05048440138654342, "grad_norm": 1.8469531536102295, "learning_rate": 6.729857819905213e-06, "loss": 2.2408, "step": 1420 }, { "epoch": 0.05083992533996978, "grad_norm": 1.8741036653518677, "learning_rate": 6.777251184834124e-06, "loss": 2.2032, "step": 1430 }, { "epoch": 0.051195449293396145, "grad_norm": 1.8755990266799927, "learning_rate": 6.824644549763034e-06, "loss": 2.2485, "step": 1440 }, { "epoch": 0.05155097324682251, "grad_norm": 1.8328008651733398, "learning_rate": 6.8720379146919435e-06, "loss": 2.2939, "step": 1450 }, { "epoch": 0.05190649720024887, "grad_norm": 1.8831439018249512, "learning_rate": 6.919431279620854e-06, "loss": 2.2182, "step": 1460 }, { "epoch": 0.05226202115367523, "grad_norm": 1.965472936630249, "learning_rate": 6.966824644549763e-06, "loss": 2.2382, "step": 1470 }, { "epoch": 0.05261754510710159, "grad_norm": 2.070134162902832, "learning_rate": 7.014218009478674e-06, "loss": 2.1926, "step": 1480 }, { "epoch": 0.052973069060527955, "grad_norm": 1.9419405460357666, "learning_rate": 7.061611374407583e-06, "loss": 2.2103, "step": 1490 }, { "epoch": 0.05332859301395432, "grad_norm": 1.9228605031967163, "learning_rate": 7.1090047393364935e-06, "loss": 2.2038, "step": 1500 }, { "epoch": 0.05368411696738068, "grad_norm": 1.8788220882415771, "learning_rate": 7.156398104265403e-06, "loss": 2.1504, "step": 1510 }, { "epoch": 0.05403964092080704, "grad_norm": 1.8219408988952637, "learning_rate": 7.203791469194313e-06, "loss": 2.246, "step": 1520 }, { "epoch": 0.054395164874233404, "grad_norm": 2.0067989826202393, "learning_rate": 7.251184834123224e-06, "loss": 2.1704, "step": 1530 }, { "epoch": 0.054750688827659766, "grad_norm": 1.7641377449035645, "learning_rate": 7.298578199052133e-06, "loss": 2.1625, "step": 1540 }, { "epoch": 0.05510621278108613, "grad_norm": 1.803442358970642, "learning_rate": 7.345971563981044e-06, "loss": 2.225, "step": 1550 }, { "epoch": 0.05546173673451249, "grad_norm": 1.9596740007400513, "learning_rate": 7.393364928909953e-06, "loss": 2.2086, "step": 1560 }, { "epoch": 0.05581726068793885, "grad_norm": 1.8549288511276245, "learning_rate": 7.4407582938388635e-06, "loss": 2.2154, "step": 1570 }, { "epoch": 0.056172784641365214, "grad_norm": 1.7597678899765015, "learning_rate": 7.488151658767773e-06, "loss": 2.1198, "step": 1580 }, { "epoch": 0.056528308594791576, "grad_norm": 1.9706618785858154, "learning_rate": 7.535545023696683e-06, "loss": 2.2277, "step": 1590 }, { "epoch": 0.05688383254821794, "grad_norm": 1.8696203231811523, "learning_rate": 7.582938388625593e-06, "loss": 2.2775, "step": 1600 }, { "epoch": 0.0572393565016443, "grad_norm": 1.9137041568756104, "learning_rate": 7.630331753554503e-06, "loss": 2.2055, "step": 1610 }, { "epoch": 0.05759488045507066, "grad_norm": 1.9089804887771606, "learning_rate": 7.677725118483414e-06, "loss": 2.2311, "step": 1620 }, { "epoch": 0.057950404408497025, "grad_norm": 1.8806320428848267, "learning_rate": 7.725118483412322e-06, "loss": 2.2372, "step": 1630 }, { "epoch": 0.05830592836192339, "grad_norm": 1.775240182876587, "learning_rate": 7.772511848341233e-06, "loss": 2.1764, "step": 1640 }, { "epoch": 0.05866145231534975, "grad_norm": 1.8922443389892578, "learning_rate": 7.819905213270143e-06, "loss": 2.168, "step": 1650 }, { "epoch": 0.05901697626877611, "grad_norm": 1.7949473857879639, "learning_rate": 7.867298578199053e-06, "loss": 2.2688, "step": 1660 }, { "epoch": 0.05937250022220247, "grad_norm": 1.8707879781723022, "learning_rate": 7.914691943127962e-06, "loss": 2.2207, "step": 1670 }, { "epoch": 0.059728024175628835, "grad_norm": 1.814133882522583, "learning_rate": 7.962085308056872e-06, "loss": 2.2056, "step": 1680 }, { "epoch": 0.0600835481290552, "grad_norm": 1.8329755067825317, "learning_rate": 8.009478672985783e-06, "loss": 2.1702, "step": 1690 }, { "epoch": 0.06043907208248156, "grad_norm": 1.839277744293213, "learning_rate": 8.056872037914693e-06, "loss": 2.2376, "step": 1700 }, { "epoch": 0.06079459603590792, "grad_norm": 1.8632655143737793, "learning_rate": 8.104265402843603e-06, "loss": 2.232, "step": 1710 }, { "epoch": 0.061150119989334284, "grad_norm": 1.8680291175842285, "learning_rate": 8.151658767772512e-06, "loss": 2.2177, "step": 1720 }, { "epoch": 0.061505643942760646, "grad_norm": 1.8234357833862305, "learning_rate": 8.199052132701422e-06, "loss": 2.1791, "step": 1730 }, { "epoch": 0.06186116789618701, "grad_norm": 1.8661161661148071, "learning_rate": 8.246445497630333e-06, "loss": 2.1528, "step": 1740 }, { "epoch": 0.06221669184961337, "grad_norm": 1.90630042552948, "learning_rate": 8.293838862559243e-06, "loss": 2.1641, "step": 1750 }, { "epoch": 0.06257221580303973, "grad_norm": 1.873467206954956, "learning_rate": 8.341232227488152e-06, "loss": 2.1503, "step": 1760 }, { "epoch": 0.0629277397564661, "grad_norm": 1.8596463203430176, "learning_rate": 8.388625592417062e-06, "loss": 2.1719, "step": 1770 }, { "epoch": 0.06328326370989246, "grad_norm": 1.8411222696304321, "learning_rate": 8.436018957345973e-06, "loss": 2.21, "step": 1780 }, { "epoch": 0.06363878766331882, "grad_norm": 1.8672677278518677, "learning_rate": 8.483412322274883e-06, "loss": 2.1558, "step": 1790 }, { "epoch": 0.06399431161674518, "grad_norm": 1.9354828596115112, "learning_rate": 8.530805687203793e-06, "loss": 2.2281, "step": 1800 }, { "epoch": 0.06434983557017154, "grad_norm": 1.8663649559020996, "learning_rate": 8.578199052132702e-06, "loss": 2.2147, "step": 1810 }, { "epoch": 0.0647053595235979, "grad_norm": 1.8387905359268188, "learning_rate": 8.625592417061612e-06, "loss": 2.2229, "step": 1820 }, { "epoch": 0.06506088347702427, "grad_norm": 1.741399884223938, "learning_rate": 8.672985781990521e-06, "loss": 2.1799, "step": 1830 }, { "epoch": 0.06541640743045063, "grad_norm": 1.9359266757965088, "learning_rate": 8.720379146919431e-06, "loss": 2.2001, "step": 1840 }, { "epoch": 0.06577193138387699, "grad_norm": 1.9217205047607422, "learning_rate": 8.767772511848342e-06, "loss": 2.1804, "step": 1850 }, { "epoch": 0.06612745533730335, "grad_norm": 1.98513662815094, "learning_rate": 8.815165876777252e-06, "loss": 2.1211, "step": 1860 }, { "epoch": 0.06648297929072972, "grad_norm": 1.9897305965423584, "learning_rate": 8.862559241706162e-06, "loss": 2.2303, "step": 1870 }, { "epoch": 0.06683850324415608, "grad_norm": 1.8167400360107422, "learning_rate": 8.909952606635071e-06, "loss": 2.2092, "step": 1880 }, { "epoch": 0.06719402719758244, "grad_norm": 1.842371940612793, "learning_rate": 8.957345971563981e-06, "loss": 2.2234, "step": 1890 }, { "epoch": 0.0675495511510088, "grad_norm": 1.8326691389083862, "learning_rate": 9.004739336492892e-06, "loss": 2.1621, "step": 1900 }, { "epoch": 0.06790507510443516, "grad_norm": 1.914857029914856, "learning_rate": 9.052132701421802e-06, "loss": 2.1706, "step": 1910 }, { "epoch": 0.06826059905786153, "grad_norm": 1.7782554626464844, "learning_rate": 9.09952606635071e-06, "loss": 2.176, "step": 1920 }, { "epoch": 0.06861612301128789, "grad_norm": 1.8087050914764404, "learning_rate": 9.146919431279621e-06, "loss": 2.1664, "step": 1930 }, { "epoch": 0.06897164696471425, "grad_norm": 1.9598517417907715, "learning_rate": 9.194312796208532e-06, "loss": 2.1703, "step": 1940 }, { "epoch": 0.06932717091814061, "grad_norm": 1.9140313863754272, "learning_rate": 9.241706161137442e-06, "loss": 2.183, "step": 1950 }, { "epoch": 0.06968269487156697, "grad_norm": 1.920876383781433, "learning_rate": 9.289099526066352e-06, "loss": 2.2278, "step": 1960 }, { "epoch": 0.07003821882499334, "grad_norm": 1.7880984544754028, "learning_rate": 9.336492890995261e-06, "loss": 2.223, "step": 1970 }, { "epoch": 0.0703937427784197, "grad_norm": 1.8553565740585327, "learning_rate": 9.383886255924171e-06, "loss": 2.1544, "step": 1980 }, { "epoch": 0.07074926673184606, "grad_norm": 1.8793102502822876, "learning_rate": 9.431279620853082e-06, "loss": 2.1707, "step": 1990 }, { "epoch": 0.07110479068527242, "grad_norm": 1.7974066734313965, "learning_rate": 9.478672985781992e-06, "loss": 2.1662, "step": 2000 }, { "epoch": 0.07146031463869879, "grad_norm": 1.857040524482727, "learning_rate": 9.5260663507109e-06, "loss": 2.1379, "step": 2010 }, { "epoch": 0.07181583859212515, "grad_norm": 1.8388969898223877, "learning_rate": 9.573459715639811e-06, "loss": 2.1987, "step": 2020 }, { "epoch": 0.07217136254555151, "grad_norm": 2.041085958480835, "learning_rate": 9.620853080568721e-06, "loss": 2.157, "step": 2030 }, { "epoch": 0.07252688649897787, "grad_norm": 1.8348536491394043, "learning_rate": 9.668246445497632e-06, "loss": 2.1598, "step": 2040 }, { "epoch": 0.07288241045240423, "grad_norm": 1.81992769241333, "learning_rate": 9.715639810426542e-06, "loss": 2.1749, "step": 2050 }, { "epoch": 0.0732379344058306, "grad_norm": 1.9568251371383667, "learning_rate": 9.76303317535545e-06, "loss": 2.1719, "step": 2060 }, { "epoch": 0.07359345835925696, "grad_norm": 1.852169394493103, "learning_rate": 9.810426540284361e-06, "loss": 2.0978, "step": 2070 }, { "epoch": 0.07394898231268332, "grad_norm": 1.8278820514678955, "learning_rate": 9.85781990521327e-06, "loss": 2.1978, "step": 2080 }, { "epoch": 0.07430450626610968, "grad_norm": 2.0013930797576904, "learning_rate": 9.905213270142182e-06, "loss": 2.2025, "step": 2090 }, { "epoch": 0.07466003021953604, "grad_norm": 1.822750210762024, "learning_rate": 9.95260663507109e-06, "loss": 2.1063, "step": 2100 }, { "epoch": 0.0750155541729624, "grad_norm": 1.9742094278335571, "learning_rate": 1e-05, "loss": 2.173, "step": 2110 }, { "epoch": 0.07537107812638877, "grad_norm": 1.7482434511184692, "learning_rate": 1.004739336492891e-05, "loss": 2.1546, "step": 2120 }, { "epoch": 0.07572660207981513, "grad_norm": 1.820383071899414, "learning_rate": 1.0094786729857822e-05, "loss": 2.1565, "step": 2130 }, { "epoch": 0.07608212603324149, "grad_norm": 1.7688885927200317, "learning_rate": 1.0142180094786732e-05, "loss": 2.1533, "step": 2140 }, { "epoch": 0.07643764998666785, "grad_norm": 1.9063228368759155, "learning_rate": 1.018957345971564e-05, "loss": 2.1278, "step": 2150 }, { "epoch": 0.07679317394009422, "grad_norm": 1.819233775138855, "learning_rate": 1.023696682464455e-05, "loss": 2.1686, "step": 2160 }, { "epoch": 0.07714869789352058, "grad_norm": 1.8698222637176514, "learning_rate": 1.0284360189573461e-05, "loss": 2.1222, "step": 2170 }, { "epoch": 0.07750422184694694, "grad_norm": 1.7308324575424194, "learning_rate": 1.033175355450237e-05, "loss": 2.1716, "step": 2180 }, { "epoch": 0.0778597458003733, "grad_norm": 1.8845312595367432, "learning_rate": 1.037914691943128e-05, "loss": 2.157, "step": 2190 }, { "epoch": 0.07821526975379967, "grad_norm": 1.7712396383285522, "learning_rate": 1.0426540284360192e-05, "loss": 2.1132, "step": 2200 }, { "epoch": 0.07857079370722603, "grad_norm": 1.8825665712356567, "learning_rate": 1.0473933649289101e-05, "loss": 2.2037, "step": 2210 }, { "epoch": 0.07892631766065239, "grad_norm": 1.7894903421401978, "learning_rate": 1.052132701421801e-05, "loss": 2.1479, "step": 2220 }, { "epoch": 0.07928184161407875, "grad_norm": 1.774488925933838, "learning_rate": 1.056872037914692e-05, "loss": 2.1836, "step": 2230 }, { "epoch": 0.07963736556750511, "grad_norm": 1.8073627948760986, "learning_rate": 1.061611374407583e-05, "loss": 2.1312, "step": 2240 }, { "epoch": 0.07999288952093148, "grad_norm": 1.863756537437439, "learning_rate": 1.066350710900474e-05, "loss": 2.1381, "step": 2250 }, { "epoch": 0.08034841347435784, "grad_norm": 1.8275458812713623, "learning_rate": 1.071090047393365e-05, "loss": 2.1564, "step": 2260 }, { "epoch": 0.0807039374277842, "grad_norm": 1.8978358507156372, "learning_rate": 1.075829383886256e-05, "loss": 2.1592, "step": 2270 }, { "epoch": 0.08105946138121056, "grad_norm": 1.9047290086746216, "learning_rate": 1.080568720379147e-05, "loss": 2.1967, "step": 2280 }, { "epoch": 0.08141498533463692, "grad_norm": 1.7772575616836548, "learning_rate": 1.085308056872038e-05, "loss": 2.1896, "step": 2290 }, { "epoch": 0.08177050928806329, "grad_norm": 1.8947906494140625, "learning_rate": 1.0900473933649289e-05, "loss": 2.1464, "step": 2300 }, { "epoch": 0.08212603324148965, "grad_norm": 1.8788243532180786, "learning_rate": 1.0947867298578201e-05, "loss": 2.196, "step": 2310 }, { "epoch": 0.08248155719491601, "grad_norm": 1.8443186283111572, "learning_rate": 1.099526066350711e-05, "loss": 2.1347, "step": 2320 }, { "epoch": 0.08283708114834237, "grad_norm": 1.757403016090393, "learning_rate": 1.104265402843602e-05, "loss": 2.1551, "step": 2330 }, { "epoch": 0.08319260510176874, "grad_norm": 1.8618059158325195, "learning_rate": 1.1090047393364929e-05, "loss": 2.1738, "step": 2340 }, { "epoch": 0.0835481290551951, "grad_norm": 1.759847640991211, "learning_rate": 1.1137440758293841e-05, "loss": 2.1513, "step": 2350 }, { "epoch": 0.08390365300862146, "grad_norm": 1.8621257543563843, "learning_rate": 1.118483412322275e-05, "loss": 2.1029, "step": 2360 }, { "epoch": 0.08425917696204782, "grad_norm": 1.7522389888763428, "learning_rate": 1.1232227488151658e-05, "loss": 2.1761, "step": 2370 }, { "epoch": 0.08461470091547418, "grad_norm": 1.8118172883987427, "learning_rate": 1.127962085308057e-05, "loss": 2.1581, "step": 2380 }, { "epoch": 0.08497022486890055, "grad_norm": 1.8402385711669922, "learning_rate": 1.132701421800948e-05, "loss": 2.1761, "step": 2390 }, { "epoch": 0.08532574882232691, "grad_norm": 1.7629215717315674, "learning_rate": 1.137440758293839e-05, "loss": 2.1369, "step": 2400 }, { "epoch": 0.08568127277575327, "grad_norm": 1.7961244583129883, "learning_rate": 1.1421800947867298e-05, "loss": 2.1293, "step": 2410 }, { "epoch": 0.08603679672917963, "grad_norm": 1.9245836734771729, "learning_rate": 1.146919431279621e-05, "loss": 2.1529, "step": 2420 }, { "epoch": 0.086392320682606, "grad_norm": 1.871652364730835, "learning_rate": 1.1516587677725119e-05, "loss": 2.1264, "step": 2430 }, { "epoch": 0.08674784463603236, "grad_norm": 1.7594822645187378, "learning_rate": 1.1563981042654029e-05, "loss": 2.1433, "step": 2440 }, { "epoch": 0.08710336858945872, "grad_norm": 1.9148240089416504, "learning_rate": 1.1611374407582941e-05, "loss": 2.1479, "step": 2450 }, { "epoch": 0.08745889254288508, "grad_norm": 1.8528403043746948, "learning_rate": 1.165876777251185e-05, "loss": 2.1668, "step": 2460 }, { "epoch": 0.08781441649631144, "grad_norm": 1.8765084743499756, "learning_rate": 1.1706161137440758e-05, "loss": 2.1776, "step": 2470 }, { "epoch": 0.0881699404497378, "grad_norm": 1.8227301836013794, "learning_rate": 1.1753554502369669e-05, "loss": 2.1451, "step": 2480 }, { "epoch": 0.08852546440316417, "grad_norm": 1.866655945777893, "learning_rate": 1.180094786729858e-05, "loss": 2.1236, "step": 2490 }, { "epoch": 0.08888098835659053, "grad_norm": 1.8193955421447754, "learning_rate": 1.184834123222749e-05, "loss": 2.11, "step": 2500 }, { "epoch": 0.08923651231001689, "grad_norm": 1.866882085800171, "learning_rate": 1.1895734597156398e-05, "loss": 2.1669, "step": 2510 }, { "epoch": 0.08959203626344325, "grad_norm": 1.7875139713287354, "learning_rate": 1.1943127962085309e-05, "loss": 2.1876, "step": 2520 }, { "epoch": 0.08994756021686962, "grad_norm": 1.8214070796966553, "learning_rate": 1.1990521327014219e-05, "loss": 2.173, "step": 2530 }, { "epoch": 0.09030308417029598, "grad_norm": 1.7724765539169312, "learning_rate": 1.203791469194313e-05, "loss": 2.1776, "step": 2540 }, { "epoch": 0.09065860812372234, "grad_norm": 1.9167289733886719, "learning_rate": 1.2085308056872038e-05, "loss": 2.1499, "step": 2550 }, { "epoch": 0.0910141320771487, "grad_norm": 1.8983221054077148, "learning_rate": 1.213270142180095e-05, "loss": 2.142, "step": 2560 }, { "epoch": 0.09136965603057506, "grad_norm": 1.8105605840682983, "learning_rate": 1.2180094786729859e-05, "loss": 2.1533, "step": 2570 }, { "epoch": 0.09172517998400143, "grad_norm": 1.761996865272522, "learning_rate": 1.2227488151658769e-05, "loss": 2.1532, "step": 2580 }, { "epoch": 0.09208070393742779, "grad_norm": 1.8852143287658691, "learning_rate": 1.2274881516587678e-05, "loss": 2.1707, "step": 2590 }, { "epoch": 0.09243622789085415, "grad_norm": 1.7447150945663452, "learning_rate": 1.232227488151659e-05, "loss": 2.1333, "step": 2600 }, { "epoch": 0.09279175184428051, "grad_norm": 2.3988707065582275, "learning_rate": 1.2369668246445498e-05, "loss": 2.153, "step": 2610 }, { "epoch": 0.09314727579770687, "grad_norm": 1.8021430969238281, "learning_rate": 1.2417061611374409e-05, "loss": 2.0996, "step": 2620 }, { "epoch": 0.09350279975113324, "grad_norm": 1.8730841875076294, "learning_rate": 1.2464454976303319e-05, "loss": 2.1489, "step": 2630 }, { "epoch": 0.0938583237045596, "grad_norm": 1.8202563524246216, "learning_rate": 1.251184834123223e-05, "loss": 2.1429, "step": 2640 }, { "epoch": 0.09421384765798596, "grad_norm": 1.9044020175933838, "learning_rate": 1.2559241706161138e-05, "loss": 2.1005, "step": 2650 }, { "epoch": 0.09456937161141232, "grad_norm": 1.7460588216781616, "learning_rate": 1.2606635071090047e-05, "loss": 2.1891, "step": 2660 }, { "epoch": 0.09492489556483868, "grad_norm": 1.8756009340286255, "learning_rate": 1.2654028436018959e-05, "loss": 2.1536, "step": 2670 }, { "epoch": 0.09528041951826505, "grad_norm": 1.7713373899459839, "learning_rate": 1.270142180094787e-05, "loss": 2.1297, "step": 2680 }, { "epoch": 0.09563594347169141, "grad_norm": 1.802303433418274, "learning_rate": 1.2748815165876778e-05, "loss": 2.1254, "step": 2690 }, { "epoch": 0.09599146742511777, "grad_norm": 1.7659459114074707, "learning_rate": 1.2796208530805687e-05, "loss": 2.1714, "step": 2700 }, { "epoch": 0.09634699137854413, "grad_norm": 1.8053789138793945, "learning_rate": 1.2843601895734599e-05, "loss": 2.0936, "step": 2710 }, { "epoch": 0.0967025153319705, "grad_norm": 1.755724310874939, "learning_rate": 1.2890995260663507e-05, "loss": 2.1576, "step": 2720 }, { "epoch": 0.09705803928539686, "grad_norm": 1.7820082902908325, "learning_rate": 1.2938388625592418e-05, "loss": 2.1376, "step": 2730 }, { "epoch": 0.09741356323882322, "grad_norm": 1.8214658498764038, "learning_rate": 1.298578199052133e-05, "loss": 2.1255, "step": 2740 }, { "epoch": 0.09776908719224958, "grad_norm": 1.6887913942337036, "learning_rate": 1.3033175355450238e-05, "loss": 2.1164, "step": 2750 }, { "epoch": 0.09812461114567594, "grad_norm": 1.791401743888855, "learning_rate": 1.3080568720379147e-05, "loss": 2.0981, "step": 2760 }, { "epoch": 0.0984801350991023, "grad_norm": 2.0565669536590576, "learning_rate": 1.3127962085308057e-05, "loss": 2.1544, "step": 2770 }, { "epoch": 0.09883565905252867, "grad_norm": 1.7514798641204834, "learning_rate": 1.3175355450236968e-05, "loss": 2.1674, "step": 2780 }, { "epoch": 0.09919118300595503, "grad_norm": 1.9238693714141846, "learning_rate": 1.3222748815165878e-05, "loss": 2.1556, "step": 2790 }, { "epoch": 0.09954670695938139, "grad_norm": 1.700263500213623, "learning_rate": 1.3270142180094787e-05, "loss": 2.1248, "step": 2800 }, { "epoch": 0.09990223091280775, "grad_norm": 1.8183735609054565, "learning_rate": 1.3317535545023699e-05, "loss": 2.0735, "step": 2810 }, { "epoch": 0.10025775486623412, "grad_norm": 1.8340328931808472, "learning_rate": 1.3364928909952607e-05, "loss": 2.1051, "step": 2820 }, { "epoch": 0.10061327881966048, "grad_norm": 1.866182565689087, "learning_rate": 1.3412322274881518e-05, "loss": 2.1201, "step": 2830 }, { "epoch": 0.10096880277308684, "grad_norm": 2.0255119800567627, "learning_rate": 1.3459715639810426e-05, "loss": 2.1717, "step": 2840 }, { "epoch": 0.1013243267265132, "grad_norm": 1.772624135017395, "learning_rate": 1.3507109004739339e-05, "loss": 2.1187, "step": 2850 }, { "epoch": 0.10167985067993957, "grad_norm": 1.7913185358047485, "learning_rate": 1.3554502369668247e-05, "loss": 2.1251, "step": 2860 }, { "epoch": 0.10203537463336593, "grad_norm": 1.756658911705017, "learning_rate": 1.3601895734597158e-05, "loss": 2.1102, "step": 2870 }, { "epoch": 0.10239089858679229, "grad_norm": 1.7615718841552734, "learning_rate": 1.3649289099526068e-05, "loss": 2.1116, "step": 2880 }, { "epoch": 0.10274642254021865, "grad_norm": 1.9718692302703857, "learning_rate": 1.3696682464454978e-05, "loss": 2.1488, "step": 2890 }, { "epoch": 0.10310194649364501, "grad_norm": 1.7149349451065063, "learning_rate": 1.3744075829383887e-05, "loss": 2.1533, "step": 2900 }, { "epoch": 0.10345747044707138, "grad_norm": 1.900169849395752, "learning_rate": 1.3791469194312797e-05, "loss": 2.1513, "step": 2910 }, { "epoch": 0.10381299440049774, "grad_norm": 1.909778118133545, "learning_rate": 1.3838862559241708e-05, "loss": 2.1273, "step": 2920 }, { "epoch": 0.1041685183539241, "grad_norm": 1.7948297262191772, "learning_rate": 1.3886255924170618e-05, "loss": 2.0679, "step": 2930 }, { "epoch": 0.10452404230735046, "grad_norm": 1.755152940750122, "learning_rate": 1.3933649289099527e-05, "loss": 2.0874, "step": 2940 }, { "epoch": 0.10487956626077682, "grad_norm": 1.7649750709533691, "learning_rate": 1.3981042654028435e-05, "loss": 2.1234, "step": 2950 }, { "epoch": 0.10523509021420319, "grad_norm": 1.7477363348007202, "learning_rate": 1.4028436018957347e-05, "loss": 2.175, "step": 2960 }, { "epoch": 0.10559061416762955, "grad_norm": 1.8121856451034546, "learning_rate": 1.4075829383886258e-05, "loss": 2.0937, "step": 2970 }, { "epoch": 0.10594613812105591, "grad_norm": 1.7684760093688965, "learning_rate": 1.4123222748815166e-05, "loss": 2.1086, "step": 2980 }, { "epoch": 0.10630166207448227, "grad_norm": 1.7173081636428833, "learning_rate": 1.4170616113744078e-05, "loss": 2.1222, "step": 2990 }, { "epoch": 0.10665718602790863, "grad_norm": 1.7345879077911377, "learning_rate": 1.4218009478672987e-05, "loss": 2.1364, "step": 3000 }, { "epoch": 0.107012709981335, "grad_norm": 1.7358227968215942, "learning_rate": 1.4265402843601896e-05, "loss": 2.1226, "step": 3010 }, { "epoch": 0.10736823393476136, "grad_norm": 1.8484522104263306, "learning_rate": 1.4312796208530806e-05, "loss": 2.1451, "step": 3020 }, { "epoch": 0.10772375788818772, "grad_norm": 1.7787588834762573, "learning_rate": 1.4360189573459718e-05, "loss": 2.1015, "step": 3030 }, { "epoch": 0.10807928184161408, "grad_norm": 1.7221862077713013, "learning_rate": 1.4407582938388627e-05, "loss": 2.1076, "step": 3040 }, { "epoch": 0.10843480579504045, "grad_norm": 1.825199842453003, "learning_rate": 1.4454976303317535e-05, "loss": 2.1341, "step": 3050 }, { "epoch": 0.10879032974846681, "grad_norm": 1.7941467761993408, "learning_rate": 1.4502369668246448e-05, "loss": 2.0934, "step": 3060 }, { "epoch": 0.10914585370189317, "grad_norm": 1.7535361051559448, "learning_rate": 1.4549763033175356e-05, "loss": 2.1409, "step": 3070 }, { "epoch": 0.10950137765531953, "grad_norm": 1.778220295906067, "learning_rate": 1.4597156398104267e-05, "loss": 2.0991, "step": 3080 }, { "epoch": 0.1098569016087459, "grad_norm": 1.8794920444488525, "learning_rate": 1.4644549763033175e-05, "loss": 2.1184, "step": 3090 }, { "epoch": 0.11021242556217226, "grad_norm": 1.7211918830871582, "learning_rate": 1.4691943127962087e-05, "loss": 2.1462, "step": 3100 }, { "epoch": 0.11056794951559862, "grad_norm": 1.821576476097107, "learning_rate": 1.4739336492890996e-05, "loss": 2.1064, "step": 3110 }, { "epoch": 0.11092347346902498, "grad_norm": 1.7056803703308105, "learning_rate": 1.4786729857819906e-05, "loss": 2.1186, "step": 3120 }, { "epoch": 0.11127899742245134, "grad_norm": 1.8238040208816528, "learning_rate": 1.4834123222748817e-05, "loss": 2.0878, "step": 3130 }, { "epoch": 0.1116345213758777, "grad_norm": 1.8366920948028564, "learning_rate": 1.4881516587677727e-05, "loss": 2.0733, "step": 3140 }, { "epoch": 0.11199004532930407, "grad_norm": 1.7596580982208252, "learning_rate": 1.4928909952606636e-05, "loss": 2.1166, "step": 3150 }, { "epoch": 0.11234556928273043, "grad_norm": 1.6376357078552246, "learning_rate": 1.4976303317535546e-05, "loss": 2.1283, "step": 3160 }, { "epoch": 0.11270109323615679, "grad_norm": 1.7873460054397583, "learning_rate": 1.5023696682464456e-05, "loss": 2.1222, "step": 3170 }, { "epoch": 0.11305661718958315, "grad_norm": 1.809497356414795, "learning_rate": 1.5071090047393367e-05, "loss": 2.1203, "step": 3180 }, { "epoch": 0.11341214114300951, "grad_norm": 1.7785967588424683, "learning_rate": 1.5118483412322275e-05, "loss": 2.1825, "step": 3190 }, { "epoch": 0.11376766509643588, "grad_norm": 1.6852540969848633, "learning_rate": 1.5165876777251186e-05, "loss": 2.0729, "step": 3200 }, { "epoch": 0.11412318904986224, "grad_norm": 1.8209391832351685, "learning_rate": 1.5213270142180096e-05, "loss": 2.1747, "step": 3210 }, { "epoch": 0.1144787130032886, "grad_norm": 1.730448603630066, "learning_rate": 1.5260663507109007e-05, "loss": 2.0646, "step": 3220 }, { "epoch": 0.11483423695671496, "grad_norm": 1.8416671752929688, "learning_rate": 1.5308056872037915e-05, "loss": 2.1426, "step": 3230 }, { "epoch": 0.11518976091014133, "grad_norm": 1.871046781539917, "learning_rate": 1.5355450236966827e-05, "loss": 2.1861, "step": 3240 }, { "epoch": 0.11554528486356769, "grad_norm": 1.7909883260726929, "learning_rate": 1.5402843601895736e-05, "loss": 2.0659, "step": 3250 }, { "epoch": 0.11590080881699405, "grad_norm": 1.7126774787902832, "learning_rate": 1.5450236966824645e-05, "loss": 2.1062, "step": 3260 }, { "epoch": 0.11625633277042041, "grad_norm": 1.8236619234085083, "learning_rate": 1.5497630331753553e-05, "loss": 2.0713, "step": 3270 }, { "epoch": 0.11661185672384677, "grad_norm": 1.7866365909576416, "learning_rate": 1.5545023696682465e-05, "loss": 2.0975, "step": 3280 }, { "epoch": 0.11696738067727314, "grad_norm": 1.8846007585525513, "learning_rate": 1.5592417061611377e-05, "loss": 2.0856, "step": 3290 }, { "epoch": 0.1173229046306995, "grad_norm": 1.8732142448425293, "learning_rate": 1.5639810426540286e-05, "loss": 2.0821, "step": 3300 }, { "epoch": 0.11767842858412586, "grad_norm": 1.805627465248108, "learning_rate": 1.5687203791469198e-05, "loss": 2.0877, "step": 3310 }, { "epoch": 0.11803395253755222, "grad_norm": 1.6648415327072144, "learning_rate": 1.5734597156398107e-05, "loss": 2.1029, "step": 3320 }, { "epoch": 0.11838947649097858, "grad_norm": 1.9431143999099731, "learning_rate": 1.5781990521327015e-05, "loss": 2.1317, "step": 3330 }, { "epoch": 0.11874500044440495, "grad_norm": 1.7122180461883545, "learning_rate": 1.5829383886255924e-05, "loss": 2.1403, "step": 3340 }, { "epoch": 0.11910052439783131, "grad_norm": 1.8386825323104858, "learning_rate": 1.5876777251184836e-05, "loss": 2.1307, "step": 3350 }, { "epoch": 0.11945604835125767, "grad_norm": 1.8558971881866455, "learning_rate": 1.5924170616113745e-05, "loss": 2.0973, "step": 3360 }, { "epoch": 0.11981157230468403, "grad_norm": 1.8701891899108887, "learning_rate": 1.5971563981042653e-05, "loss": 2.0487, "step": 3370 }, { "epoch": 0.1201670962581104, "grad_norm": 1.7447658777236938, "learning_rate": 1.6018957345971565e-05, "loss": 2.0547, "step": 3380 }, { "epoch": 0.12052262021153676, "grad_norm": 1.8656021356582642, "learning_rate": 1.6066350710900474e-05, "loss": 2.1156, "step": 3390 }, { "epoch": 0.12087814416496312, "grad_norm": 1.718123197555542, "learning_rate": 1.6113744075829386e-05, "loss": 2.0908, "step": 3400 }, { "epoch": 0.12123366811838948, "grad_norm": 1.6136643886566162, "learning_rate": 1.6161137440758295e-05, "loss": 2.0786, "step": 3410 }, { "epoch": 0.12158919207181584, "grad_norm": 1.748200535774231, "learning_rate": 1.6208530805687207e-05, "loss": 2.0935, "step": 3420 }, { "epoch": 0.1219447160252422, "grad_norm": 1.918379306793213, "learning_rate": 1.6255924170616116e-05, "loss": 2.1453, "step": 3430 }, { "epoch": 0.12230023997866857, "grad_norm": 1.996518611907959, "learning_rate": 1.6303317535545024e-05, "loss": 2.1181, "step": 3440 }, { "epoch": 0.12265576393209493, "grad_norm": 1.7767255306243896, "learning_rate": 1.6350710900473933e-05, "loss": 2.1108, "step": 3450 }, { "epoch": 0.12301128788552129, "grad_norm": 1.7845118045806885, "learning_rate": 1.6398104265402845e-05, "loss": 2.0712, "step": 3460 }, { "epoch": 0.12336681183894765, "grad_norm": 1.7948838472366333, "learning_rate": 1.6445497630331754e-05, "loss": 2.0738, "step": 3470 }, { "epoch": 0.12372233579237402, "grad_norm": 1.703697919845581, "learning_rate": 1.6492890995260666e-05, "loss": 2.112, "step": 3480 }, { "epoch": 0.12407785974580038, "grad_norm": 1.8490139245986938, "learning_rate": 1.6540284360189574e-05, "loss": 2.0903, "step": 3490 }, { "epoch": 0.12443338369922674, "grad_norm": 1.7895227670669556, "learning_rate": 1.6587677725118486e-05, "loss": 2.0949, "step": 3500 }, { "epoch": 0.1247889076526531, "grad_norm": 1.7390356063842773, "learning_rate": 1.6635071090047395e-05, "loss": 2.0782, "step": 3510 }, { "epoch": 0.12514443160607946, "grad_norm": 1.760340690612793, "learning_rate": 1.6682464454976304e-05, "loss": 2.0841, "step": 3520 }, { "epoch": 0.12549995555950583, "grad_norm": 1.7745305299758911, "learning_rate": 1.6729857819905216e-05, "loss": 2.0654, "step": 3530 }, { "epoch": 0.1258554795129322, "grad_norm": 1.7406567335128784, "learning_rate": 1.6777251184834124e-05, "loss": 2.0701, "step": 3540 }, { "epoch": 0.12621100346635855, "grad_norm": 1.71855628490448, "learning_rate": 1.6824644549763033e-05, "loss": 2.158, "step": 3550 }, { "epoch": 0.1265665274197849, "grad_norm": 1.7532312870025635, "learning_rate": 1.6872037914691945e-05, "loss": 2.104, "step": 3560 }, { "epoch": 0.12692205137321128, "grad_norm": 1.8171640634536743, "learning_rate": 1.6919431279620854e-05, "loss": 2.1028, "step": 3570 }, { "epoch": 0.12727757532663764, "grad_norm": 1.8336002826690674, "learning_rate": 1.6966824644549766e-05, "loss": 2.06, "step": 3580 }, { "epoch": 0.127633099280064, "grad_norm": 1.8022377490997314, "learning_rate": 1.7014218009478674e-05, "loss": 2.0928, "step": 3590 }, { "epoch": 0.12798862323349036, "grad_norm": 1.7392102479934692, "learning_rate": 1.7061611374407587e-05, "loss": 2.118, "step": 3600 }, { "epoch": 0.12834414718691672, "grad_norm": 1.7536499500274658, "learning_rate": 1.7109004739336495e-05, "loss": 2.0753, "step": 3610 }, { "epoch": 0.12869967114034309, "grad_norm": 1.6055700778961182, "learning_rate": 1.7156398104265404e-05, "loss": 2.0485, "step": 3620 }, { "epoch": 0.12905519509376945, "grad_norm": 1.7383241653442383, "learning_rate": 1.7203791469194316e-05, "loss": 2.1041, "step": 3630 }, { "epoch": 0.1294107190471958, "grad_norm": 1.7801284790039062, "learning_rate": 1.7251184834123225e-05, "loss": 2.1046, "step": 3640 }, { "epoch": 0.12976624300062217, "grad_norm": 1.816885232925415, "learning_rate": 1.7298578199052133e-05, "loss": 2.1035, "step": 3650 }, { "epoch": 0.13012176695404853, "grad_norm": 1.8502243757247925, "learning_rate": 1.7345971563981042e-05, "loss": 2.0305, "step": 3660 }, { "epoch": 0.1304772909074749, "grad_norm": 1.803113341331482, "learning_rate": 1.7393364928909954e-05, "loss": 2.0559, "step": 3670 }, { "epoch": 0.13083281486090126, "grad_norm": 1.7713572978973389, "learning_rate": 1.7440758293838863e-05, "loss": 2.0995, "step": 3680 }, { "epoch": 0.13118833881432762, "grad_norm": 1.7642992734909058, "learning_rate": 1.7488151658767775e-05, "loss": 2.1226, "step": 3690 }, { "epoch": 0.13154386276775398, "grad_norm": 1.766026258468628, "learning_rate": 1.7535545023696683e-05, "loss": 2.0462, "step": 3700 }, { "epoch": 0.13189938672118035, "grad_norm": 1.7639282941818237, "learning_rate": 1.7582938388625595e-05, "loss": 2.0673, "step": 3710 }, { "epoch": 0.1322549106746067, "grad_norm": 1.7410707473754883, "learning_rate": 1.7630331753554504e-05, "loss": 2.0288, "step": 3720 }, { "epoch": 0.13261043462803307, "grad_norm": 1.8906439542770386, "learning_rate": 1.7677725118483413e-05, "loss": 2.1113, "step": 3730 }, { "epoch": 0.13296595858145943, "grad_norm": 1.8331823348999023, "learning_rate": 1.7725118483412325e-05, "loss": 2.1004, "step": 3740 }, { "epoch": 0.1333214825348858, "grad_norm": 1.6593939065933228, "learning_rate": 1.7772511848341233e-05, "loss": 2.1035, "step": 3750 }, { "epoch": 0.13367700648831216, "grad_norm": 1.7040388584136963, "learning_rate": 1.7819905213270142e-05, "loss": 2.091, "step": 3760 }, { "epoch": 0.13403253044173852, "grad_norm": 1.8393237590789795, "learning_rate": 1.7867298578199054e-05, "loss": 2.1337, "step": 3770 }, { "epoch": 0.13438805439516488, "grad_norm": 1.8253166675567627, "learning_rate": 1.7914691943127963e-05, "loss": 2.1284, "step": 3780 }, { "epoch": 0.13474357834859124, "grad_norm": 1.8016252517700195, "learning_rate": 1.7962085308056875e-05, "loss": 2.0431, "step": 3790 }, { "epoch": 0.1350991023020176, "grad_norm": 1.87069833278656, "learning_rate": 1.8009478672985784e-05, "loss": 2.1197, "step": 3800 }, { "epoch": 0.13545462625544397, "grad_norm": 1.6724790334701538, "learning_rate": 1.8056872037914696e-05, "loss": 2.0782, "step": 3810 }, { "epoch": 0.13581015020887033, "grad_norm": 1.9466593265533447, "learning_rate": 1.8104265402843604e-05, "loss": 2.0586, "step": 3820 }, { "epoch": 0.1361656741622967, "grad_norm": 1.7319644689559937, "learning_rate": 1.8151658767772513e-05, "loss": 2.0381, "step": 3830 }, { "epoch": 0.13652119811572305, "grad_norm": 1.7674616575241089, "learning_rate": 1.819905213270142e-05, "loss": 2.0909, "step": 3840 }, { "epoch": 0.13687672206914941, "grad_norm": 1.7459237575531006, "learning_rate": 1.8246445497630334e-05, "loss": 2.1042, "step": 3850 }, { "epoch": 0.13723224602257578, "grad_norm": 1.760031819343567, "learning_rate": 1.8293838862559242e-05, "loss": 2.0488, "step": 3860 }, { "epoch": 0.13758776997600214, "grad_norm": 1.712561011314392, "learning_rate": 1.8341232227488154e-05, "loss": 2.0586, "step": 3870 }, { "epoch": 0.1379432939294285, "grad_norm": 1.7915306091308594, "learning_rate": 1.8388625592417063e-05, "loss": 2.0766, "step": 3880 }, { "epoch": 0.13829881788285486, "grad_norm": 1.7054728269577026, "learning_rate": 1.8436018957345975e-05, "loss": 2.0715, "step": 3890 }, { "epoch": 0.13865434183628123, "grad_norm": 1.7007368803024292, "learning_rate": 1.8483412322274884e-05, "loss": 2.095, "step": 3900 }, { "epoch": 0.1390098657897076, "grad_norm": 1.7768644094467163, "learning_rate": 1.8530805687203792e-05, "loss": 2.0839, "step": 3910 }, { "epoch": 0.13936538974313395, "grad_norm": 1.7239347696304321, "learning_rate": 1.8578199052132704e-05, "loss": 2.0959, "step": 3920 }, { "epoch": 0.1397209136965603, "grad_norm": 1.720320463180542, "learning_rate": 1.8625592417061613e-05, "loss": 2.1016, "step": 3930 }, { "epoch": 0.14007643764998667, "grad_norm": 1.6920055150985718, "learning_rate": 1.8672985781990522e-05, "loss": 2.0696, "step": 3940 }, { "epoch": 0.14043196160341304, "grad_norm": 1.734519124031067, "learning_rate": 1.872037914691943e-05, "loss": 2.0701, "step": 3950 }, { "epoch": 0.1407874855568394, "grad_norm": 1.675017237663269, "learning_rate": 1.8767772511848342e-05, "loss": 2.0882, "step": 3960 }, { "epoch": 0.14114300951026576, "grad_norm": 1.674024224281311, "learning_rate": 1.881516587677725e-05, "loss": 2.0911, "step": 3970 }, { "epoch": 0.14149853346369212, "grad_norm": 1.8348963260650635, "learning_rate": 1.8862559241706163e-05, "loss": 2.0811, "step": 3980 }, { "epoch": 0.14185405741711848, "grad_norm": 1.7325314283370972, "learning_rate": 1.8909952606635075e-05, "loss": 2.1263, "step": 3990 }, { "epoch": 0.14220958137054485, "grad_norm": 1.7979702949523926, "learning_rate": 1.8957345971563984e-05, "loss": 2.0669, "step": 4000 }, { "epoch": 0.1425651053239712, "grad_norm": 1.8183528184890747, "learning_rate": 1.9004739336492893e-05, "loss": 2.0812, "step": 4010 }, { "epoch": 0.14292062927739757, "grad_norm": 1.834951400756836, "learning_rate": 1.90521327014218e-05, "loss": 2.1108, "step": 4020 }, { "epoch": 0.14327615323082393, "grad_norm": 1.8123762607574463, "learning_rate": 1.9099526066350713e-05, "loss": 2.1003, "step": 4030 }, { "epoch": 0.1436316771842503, "grad_norm": 1.7275183200836182, "learning_rate": 1.9146919431279622e-05, "loss": 2.0628, "step": 4040 }, { "epoch": 0.14398720113767666, "grad_norm": 1.761306643486023, "learning_rate": 1.919431279620853e-05, "loss": 2.0798, "step": 4050 }, { "epoch": 0.14434272509110302, "grad_norm": 1.7983522415161133, "learning_rate": 1.9241706161137443e-05, "loss": 2.0592, "step": 4060 }, { "epoch": 0.14469824904452938, "grad_norm": 1.7820279598236084, "learning_rate": 1.928909952606635e-05, "loss": 2.1256, "step": 4070 }, { "epoch": 0.14505377299795574, "grad_norm": 1.7048124074935913, "learning_rate": 1.9336492890995263e-05, "loss": 2.0455, "step": 4080 }, { "epoch": 0.1454092969513821, "grad_norm": 1.6913201808929443, "learning_rate": 1.9383886255924172e-05, "loss": 2.0553, "step": 4090 }, { "epoch": 0.14576482090480847, "grad_norm": 1.7552233934402466, "learning_rate": 1.9431279620853084e-05, "loss": 2.0603, "step": 4100 }, { "epoch": 0.14612034485823483, "grad_norm": 1.876004934310913, "learning_rate": 1.9478672985781993e-05, "loss": 2.055, "step": 4110 }, { "epoch": 0.1464758688116612, "grad_norm": 1.7505298852920532, "learning_rate": 1.95260663507109e-05, "loss": 2.0663, "step": 4120 }, { "epoch": 0.14683139276508755, "grad_norm": 1.703034520149231, "learning_rate": 1.957345971563981e-05, "loss": 2.057, "step": 4130 }, { "epoch": 0.14718691671851392, "grad_norm": 1.696067214012146, "learning_rate": 1.9620853080568722e-05, "loss": 2.056, "step": 4140 }, { "epoch": 0.14754244067194028, "grad_norm": 1.7719448804855347, "learning_rate": 1.966824644549763e-05, "loss": 2.0528, "step": 4150 }, { "epoch": 0.14789796462536664, "grad_norm": 1.7029516696929932, "learning_rate": 1.971563981042654e-05, "loss": 2.0723, "step": 4160 }, { "epoch": 0.148253488578793, "grad_norm": 1.654297113418579, "learning_rate": 1.976303317535545e-05, "loss": 2.1012, "step": 4170 }, { "epoch": 0.14860901253221936, "grad_norm": 1.712504506111145, "learning_rate": 1.9810426540284364e-05, "loss": 2.0764, "step": 4180 }, { "epoch": 0.14896453648564573, "grad_norm": 1.783565878868103, "learning_rate": 1.9857819905213272e-05, "loss": 2.0711, "step": 4190 }, { "epoch": 0.1493200604390721, "grad_norm": 1.6996036767959595, "learning_rate": 1.990521327014218e-05, "loss": 2.0745, "step": 4200 }, { "epoch": 0.14967558439249845, "grad_norm": 1.9073193073272705, "learning_rate": 1.9952606635071093e-05, "loss": 2.0869, "step": 4210 }, { "epoch": 0.1500311083459248, "grad_norm": 1.7565745115280151, "learning_rate": 2e-05, "loss": 2.0942, "step": 4220 }, { "epoch": 0.15038663229935118, "grad_norm": 1.7577910423278809, "learning_rate": 1.9999999232031353e-05, "loss": 2.0911, "step": 4230 }, { "epoch": 0.15074215625277754, "grad_norm": 1.8030418157577515, "learning_rate": 1.999999692812552e-05, "loss": 2.0636, "step": 4240 }, { "epoch": 0.1510976802062039, "grad_norm": 1.677435040473938, "learning_rate": 1.9999993088282862e-05, "loss": 2.0763, "step": 4250 }, { "epoch": 0.15145320415963026, "grad_norm": 1.6775643825531006, "learning_rate": 1.9999987712503962e-05, "loss": 2.0756, "step": 4260 }, { "epoch": 0.15180872811305662, "grad_norm": 1.6850625276565552, "learning_rate": 1.9999980800789655e-05, "loss": 2.0389, "step": 4270 }, { "epoch": 0.15216425206648299, "grad_norm": 1.7442346811294556, "learning_rate": 1.9999972353140994e-05, "loss": 2.0205, "step": 4280 }, { "epoch": 0.15251977601990935, "grad_norm": 1.7059242725372314, "learning_rate": 1.9999962369559283e-05, "loss": 2.1182, "step": 4290 }, { "epoch": 0.1528752999733357, "grad_norm": 1.754390835762024, "learning_rate": 1.999995085004605e-05, "loss": 2.1102, "step": 4300 }, { "epoch": 0.15323082392676207, "grad_norm": 1.7949542999267578, "learning_rate": 1.9999937794603067e-05, "loss": 2.0428, "step": 4310 }, { "epoch": 0.15358634788018843, "grad_norm": 1.7033107280731201, "learning_rate": 1.999992320323234e-05, "loss": 2.0979, "step": 4320 }, { "epoch": 0.1539418718336148, "grad_norm": 1.6337509155273438, "learning_rate": 1.9999907075936108e-05, "loss": 2.0603, "step": 4330 }, { "epoch": 0.15429739578704116, "grad_norm": 1.660717248916626, "learning_rate": 1.999988941271685e-05, "loss": 1.9921, "step": 4340 }, { "epoch": 0.15465291974046752, "grad_norm": 1.606544017791748, "learning_rate": 1.9999870213577273e-05, "loss": 2.0588, "step": 4350 }, { "epoch": 0.15500844369389388, "grad_norm": 1.9054453372955322, "learning_rate": 1.9999849478520335e-05, "loss": 2.0657, "step": 4360 }, { "epoch": 0.15536396764732024, "grad_norm": 1.735126256942749, "learning_rate": 1.999982720754922e-05, "loss": 2.0433, "step": 4370 }, { "epoch": 0.1557194916007466, "grad_norm": 1.8115259408950806, "learning_rate": 1.999980340066734e-05, "loss": 2.0617, "step": 4380 }, { "epoch": 0.15607501555417297, "grad_norm": 1.7404751777648926, "learning_rate": 1.9999778057878355e-05, "loss": 2.1696, "step": 4390 }, { "epoch": 0.15643053950759933, "grad_norm": 1.8304661512374878, "learning_rate": 1.9999751179186165e-05, "loss": 2.0745, "step": 4400 }, { "epoch": 0.1567860634610257, "grad_norm": 1.7348082065582275, "learning_rate": 1.999972276459489e-05, "loss": 2.0759, "step": 4410 }, { "epoch": 0.15714158741445206, "grad_norm": 1.7998183965682983, "learning_rate": 1.99996928141089e-05, "loss": 2.0714, "step": 4420 }, { "epoch": 0.15749711136787842, "grad_norm": 1.7887214422225952, "learning_rate": 1.999966132773279e-05, "loss": 2.0458, "step": 4430 }, { "epoch": 0.15785263532130478, "grad_norm": 1.6847667694091797, "learning_rate": 1.9999628305471398e-05, "loss": 2.0631, "step": 4440 }, { "epoch": 0.15820815927473114, "grad_norm": 1.6815139055252075, "learning_rate": 1.99995937473298e-05, "loss": 2.0624, "step": 4450 }, { "epoch": 0.1585636832281575, "grad_norm": 1.7544565200805664, "learning_rate": 1.9999557653313297e-05, "loss": 2.0384, "step": 4460 }, { "epoch": 0.15891920718158387, "grad_norm": 1.6978838443756104, "learning_rate": 1.9999520023427436e-05, "loss": 2.1006, "step": 4470 }, { "epoch": 0.15927473113501023, "grad_norm": 1.7811368703842163, "learning_rate": 1.9999480857677996e-05, "loss": 1.9965, "step": 4480 }, { "epoch": 0.1596302550884366, "grad_norm": 1.7523053884506226, "learning_rate": 1.9999440156070994e-05, "loss": 2.0728, "step": 4490 }, { "epoch": 0.15998577904186295, "grad_norm": 1.7489559650421143, "learning_rate": 1.999939791861268e-05, "loss": 2.0528, "step": 4500 }, { "epoch": 0.16034130299528931, "grad_norm": 1.6864522695541382, "learning_rate": 1.9999354145309547e-05, "loss": 2.0547, "step": 4510 }, { "epoch": 0.16069682694871568, "grad_norm": 1.8162442445755005, "learning_rate": 1.999930883616831e-05, "loss": 2.0844, "step": 4520 }, { "epoch": 0.16105235090214204, "grad_norm": 1.7836850881576538, "learning_rate": 1.9999261991195932e-05, "loss": 2.0555, "step": 4530 }, { "epoch": 0.1614078748555684, "grad_norm": 1.7825000286102295, "learning_rate": 1.999921361039961e-05, "loss": 2.1001, "step": 4540 }, { "epoch": 0.16176339880899476, "grad_norm": 1.6806426048278809, "learning_rate": 1.9999163693786773e-05, "loss": 2.0538, "step": 4550 }, { "epoch": 0.16211892276242112, "grad_norm": 1.753866195678711, "learning_rate": 1.9999112241365087e-05, "loss": 2.066, "step": 4560 }, { "epoch": 0.1624744467158475, "grad_norm": 1.8063266277313232, "learning_rate": 1.9999059253142455e-05, "loss": 2.0863, "step": 4570 }, { "epoch": 0.16282997066927385, "grad_norm": 1.6922051906585693, "learning_rate": 1.9999004729127015e-05, "loss": 2.0699, "step": 4580 }, { "epoch": 0.1631854946227002, "grad_norm": 1.783348798751831, "learning_rate": 1.9998948669327146e-05, "loss": 2.0548, "step": 4590 }, { "epoch": 0.16354101857612657, "grad_norm": 1.8088620901107788, "learning_rate": 1.9998891073751455e-05, "loss": 2.036, "step": 4600 }, { "epoch": 0.16389654252955294, "grad_norm": 1.6807172298431396, "learning_rate": 1.9998831942408786e-05, "loss": 2.0789, "step": 4610 }, { "epoch": 0.1642520664829793, "grad_norm": 1.806488275527954, "learning_rate": 1.9998771275308225e-05, "loss": 2.0311, "step": 4620 }, { "epoch": 0.16460759043640566, "grad_norm": 1.6912070512771606, "learning_rate": 1.999870907245909e-05, "loss": 2.0759, "step": 4630 }, { "epoch": 0.16496311438983202, "grad_norm": 1.8495771884918213, "learning_rate": 1.9998645333870933e-05, "loss": 2.0675, "step": 4640 }, { "epoch": 0.16531863834325838, "grad_norm": 1.8063349723815918, "learning_rate": 1.9998580059553545e-05, "loss": 2.0519, "step": 4650 }, { "epoch": 0.16567416229668475, "grad_norm": 1.629701018333435, "learning_rate": 1.999851324951695e-05, "loss": 2.05, "step": 4660 }, { "epoch": 0.1660296862501111, "grad_norm": 1.757324457168579, "learning_rate": 1.9998444903771414e-05, "loss": 2.0642, "step": 4670 }, { "epoch": 0.16638521020353747, "grad_norm": 1.7033988237380981, "learning_rate": 1.999837502232743e-05, "loss": 2.0087, "step": 4680 }, { "epoch": 0.16674073415696383, "grad_norm": 1.8296514749526978, "learning_rate": 1.9998303605195733e-05, "loss": 2.0361, "step": 4690 }, { "epoch": 0.1670962581103902, "grad_norm": 1.7206568717956543, "learning_rate": 1.999823065238729e-05, "loss": 2.031, "step": 4700 }, { "epoch": 0.16745178206381656, "grad_norm": 1.7555227279663086, "learning_rate": 1.999815616391331e-05, "loss": 2.0715, "step": 4710 }, { "epoch": 0.16780730601724292, "grad_norm": 1.6362791061401367, "learning_rate": 1.9998080139785233e-05, "loss": 2.0709, "step": 4720 }, { "epoch": 0.16816282997066928, "grad_norm": 1.6888320446014404, "learning_rate": 1.999800258001473e-05, "loss": 2.1093, "step": 4730 }, { "epoch": 0.16851835392409564, "grad_norm": 1.8050616979599, "learning_rate": 1.9997923484613726e-05, "loss": 1.9949, "step": 4740 }, { "epoch": 0.168873877877522, "grad_norm": 1.8730134963989258, "learning_rate": 1.9997842853594358e-05, "loss": 2.0634, "step": 4750 }, { "epoch": 0.16922940183094837, "grad_norm": 1.7403571605682373, "learning_rate": 1.999776068696902e-05, "loss": 2.0523, "step": 4760 }, { "epoch": 0.16958492578437473, "grad_norm": 1.7689752578735352, "learning_rate": 1.999767698475032e-05, "loss": 2.0308, "step": 4770 }, { "epoch": 0.1699404497378011, "grad_norm": 1.739279866218567, "learning_rate": 1.9997591746951124e-05, "loss": 2.0456, "step": 4780 }, { "epoch": 0.17029597369122745, "grad_norm": 1.750306248664856, "learning_rate": 1.9997504973584522e-05, "loss": 2.0603, "step": 4790 }, { "epoch": 0.17065149764465382, "grad_norm": 1.6228183507919312, "learning_rate": 1.9997416664663836e-05, "loss": 2.0419, "step": 4800 }, { "epoch": 0.17100702159808018, "grad_norm": 1.744736671447754, "learning_rate": 1.9997326820202637e-05, "loss": 2.085, "step": 4810 }, { "epoch": 0.17136254555150654, "grad_norm": 1.699845314025879, "learning_rate": 1.9997235440214727e-05, "loss": 2.0595, "step": 4820 }, { "epoch": 0.1717180695049329, "grad_norm": 1.7860050201416016, "learning_rate": 1.9997142524714133e-05, "loss": 2.0442, "step": 4830 }, { "epoch": 0.17207359345835926, "grad_norm": 1.6302872896194458, "learning_rate": 1.999704807371513e-05, "loss": 1.9743, "step": 4840 }, { "epoch": 0.17242911741178563, "grad_norm": 1.6316944360733032, "learning_rate": 1.9996952087232224e-05, "loss": 2.0304, "step": 4850 }, { "epoch": 0.172784641365212, "grad_norm": 1.5872083902359009, "learning_rate": 1.999685456528016e-05, "loss": 2.0263, "step": 4860 }, { "epoch": 0.17314016531863835, "grad_norm": 1.7513489723205566, "learning_rate": 1.9996755507873913e-05, "loss": 2.0782, "step": 4870 }, { "epoch": 0.1734956892720647, "grad_norm": 1.7436871528625488, "learning_rate": 1.9996654915028702e-05, "loss": 2.028, "step": 4880 }, { "epoch": 0.17385121322549107, "grad_norm": 1.938206672668457, "learning_rate": 1.9996552786759976e-05, "loss": 2.0195, "step": 4890 }, { "epoch": 0.17420673717891744, "grad_norm": 1.8229395151138306, "learning_rate": 1.999644912308342e-05, "loss": 2.0548, "step": 4900 }, { "epoch": 0.1745622611323438, "grad_norm": 1.8051745891571045, "learning_rate": 1.9996343924014955e-05, "loss": 2.0011, "step": 4910 }, { "epoch": 0.17491778508577016, "grad_norm": 1.7209690809249878, "learning_rate": 1.9996237189570745e-05, "loss": 2.0255, "step": 4920 }, { "epoch": 0.17527330903919652, "grad_norm": 1.8421382904052734, "learning_rate": 1.999612891976718e-05, "loss": 2.0211, "step": 4930 }, { "epoch": 0.17562883299262289, "grad_norm": 1.680746078491211, "learning_rate": 1.9996019114620883e-05, "loss": 2.0211, "step": 4940 }, { "epoch": 0.17598435694604925, "grad_norm": 1.7081247568130493, "learning_rate": 1.9995907774148732e-05, "loss": 2.0696, "step": 4950 }, { "epoch": 0.1763398808994756, "grad_norm": 1.6198205947875977, "learning_rate": 1.999579489836782e-05, "loss": 2.0525, "step": 4960 }, { "epoch": 0.17669540485290197, "grad_norm": 1.6535507440567017, "learning_rate": 1.999568048729548e-05, "loss": 2.0414, "step": 4970 }, { "epoch": 0.17705092880632833, "grad_norm": 1.5443240404129028, "learning_rate": 1.9995564540949298e-05, "loss": 2.0826, "step": 4980 }, { "epoch": 0.1774064527597547, "grad_norm": 1.733921766281128, "learning_rate": 1.9995447059347072e-05, "loss": 2.0257, "step": 4990 }, { "epoch": 0.17776197671318106, "grad_norm": 1.6675341129302979, "learning_rate": 1.999532804250685e-05, "loss": 2.0282, "step": 5000 }, { "epoch": 0.17811750066660742, "grad_norm": 1.6987435817718506, "learning_rate": 1.9995207490446913e-05, "loss": 2.0103, "step": 5010 }, { "epoch": 0.17847302462003378, "grad_norm": 1.7035290002822876, "learning_rate": 1.9995085403185772e-05, "loss": 2.0162, "step": 5020 }, { "epoch": 0.17882854857346014, "grad_norm": 1.6742795705795288, "learning_rate": 1.9994961780742188e-05, "loss": 1.9878, "step": 5030 }, { "epoch": 0.1791840725268865, "grad_norm": 1.6395647525787354, "learning_rate": 1.999483662313514e-05, "loss": 2.0434, "step": 5040 }, { "epoch": 0.17953959648031287, "grad_norm": 1.7217377424240112, "learning_rate": 1.9994709930383857e-05, "loss": 2.0372, "step": 5050 }, { "epoch": 0.17989512043373923, "grad_norm": 1.750941514968872, "learning_rate": 1.9994581702507793e-05, "loss": 2.0878, "step": 5060 }, { "epoch": 0.1802506443871656, "grad_norm": 1.6906191110610962, "learning_rate": 1.999445193952665e-05, "loss": 2.025, "step": 5070 }, { "epoch": 0.18060616834059195, "grad_norm": 1.736352801322937, "learning_rate": 1.999432064146035e-05, "loss": 2.0176, "step": 5080 }, { "epoch": 0.18096169229401832, "grad_norm": 1.7025351524353027, "learning_rate": 1.9994187808329068e-05, "loss": 2.0428, "step": 5090 }, { "epoch": 0.18131721624744468, "grad_norm": 1.745902419090271, "learning_rate": 1.99940534401532e-05, "loss": 2.0525, "step": 5100 }, { "epoch": 0.18167274020087104, "grad_norm": 1.695774793624878, "learning_rate": 1.9993917536953387e-05, "loss": 2.0147, "step": 5110 }, { "epoch": 0.1820282641542974, "grad_norm": 1.876430630683899, "learning_rate": 1.9993780098750506e-05, "loss": 2.0695, "step": 5120 }, { "epoch": 0.18238378810772377, "grad_norm": 1.6322757005691528, "learning_rate": 1.999364112556566e-05, "loss": 2.1169, "step": 5130 }, { "epoch": 0.18273931206115013, "grad_norm": 1.739761471748352, "learning_rate": 1.9993500617420202e-05, "loss": 2.0632, "step": 5140 }, { "epoch": 0.1830948360145765, "grad_norm": 1.6902257204055786, "learning_rate": 1.9993358574335703e-05, "loss": 1.982, "step": 5150 }, { "epoch": 0.18345035996800285, "grad_norm": 1.7146416902542114, "learning_rate": 1.999321499633399e-05, "loss": 2.0088, "step": 5160 }, { "epoch": 0.18380588392142921, "grad_norm": 1.7331721782684326, "learning_rate": 1.999306988343711e-05, "loss": 2.0219, "step": 5170 }, { "epoch": 0.18416140787485558, "grad_norm": 1.7011710405349731, "learning_rate": 1.9992923235667354e-05, "loss": 1.9641, "step": 5180 }, { "epoch": 0.18451693182828194, "grad_norm": 1.6844632625579834, "learning_rate": 1.9992775053047245e-05, "loss": 2.008, "step": 5190 }, { "epoch": 0.1848724557817083, "grad_norm": 1.6663172245025635, "learning_rate": 1.999262533559954e-05, "loss": 2.0401, "step": 5200 }, { "epoch": 0.18522797973513466, "grad_norm": 1.7183222770690918, "learning_rate": 1.9992474083347243e-05, "loss": 2.0093, "step": 5210 }, { "epoch": 0.18558350368856102, "grad_norm": 1.7464849948883057, "learning_rate": 1.9992321296313574e-05, "loss": 2.0489, "step": 5220 }, { "epoch": 0.1859390276419874, "grad_norm": 1.7614524364471436, "learning_rate": 1.9992166974522012e-05, "loss": 1.9959, "step": 5230 }, { "epoch": 0.18629455159541375, "grad_norm": 1.7039707899093628, "learning_rate": 1.999201111799625e-05, "loss": 2.0562, "step": 5240 }, { "epoch": 0.1866500755488401, "grad_norm": 1.6510025262832642, "learning_rate": 1.9991853726760234e-05, "loss": 2.0293, "step": 5250 }, { "epoch": 0.18700559950226647, "grad_norm": 1.7329883575439453, "learning_rate": 1.999169480083813e-05, "loss": 2.0111, "step": 5260 }, { "epoch": 0.18736112345569284, "grad_norm": 1.5895287990570068, "learning_rate": 1.999153434025436e-05, "loss": 2.0381, "step": 5270 }, { "epoch": 0.1877166474091192, "grad_norm": 1.7224647998809814, "learning_rate": 1.9991372345033558e-05, "loss": 2.0457, "step": 5280 }, { "epoch": 0.18807217136254556, "grad_norm": 1.6291033029556274, "learning_rate": 1.999120881520061e-05, "loss": 1.9931, "step": 5290 }, { "epoch": 0.18842769531597192, "grad_norm": 1.7096868753433228, "learning_rate": 1.9991043750780636e-05, "loss": 2.0432, "step": 5300 }, { "epoch": 0.18878321926939828, "grad_norm": 1.709851861000061, "learning_rate": 1.9990877151798983e-05, "loss": 2.0747, "step": 5310 }, { "epoch": 0.18913874322282465, "grad_norm": 1.6630563735961914, "learning_rate": 1.9990709018281244e-05, "loss": 2.0401, "step": 5320 }, { "epoch": 0.189494267176251, "grad_norm": 1.7594939470291138, "learning_rate": 1.9990539350253244e-05, "loss": 2.0068, "step": 5330 }, { "epoch": 0.18984979112967737, "grad_norm": 1.6987512111663818, "learning_rate": 1.999036814774104e-05, "loss": 2.0458, "step": 5340 }, { "epoch": 0.19020531508310373, "grad_norm": 1.7822080850601196, "learning_rate": 1.999019541077093e-05, "loss": 2.0003, "step": 5350 }, { "epoch": 0.1905608390365301, "grad_norm": 1.6965094804763794, "learning_rate": 1.9990021139369437e-05, "loss": 2.0074, "step": 5360 }, { "epoch": 0.19091636298995646, "grad_norm": 1.7385228872299194, "learning_rate": 1.998984533356334e-05, "loss": 2.007, "step": 5370 }, { "epoch": 0.19127188694338282, "grad_norm": 1.8442659378051758, "learning_rate": 1.9989667993379636e-05, "loss": 2.0201, "step": 5380 }, { "epoch": 0.19162741089680918, "grad_norm": 1.6964536905288696, "learning_rate": 1.9989489118845565e-05, "loss": 2.0059, "step": 5390 }, { "epoch": 0.19198293485023554, "grad_norm": 1.6384342908859253, "learning_rate": 1.99893087099886e-05, "loss": 2.035, "step": 5400 }, { "epoch": 0.1923384588036619, "grad_norm": 1.866665005683899, "learning_rate": 1.998912676683645e-05, "loss": 2.039, "step": 5410 }, { "epoch": 0.19269398275708827, "grad_norm": 1.658677101135254, "learning_rate": 1.998894328941706e-05, "loss": 2.0323, "step": 5420 }, { "epoch": 0.19304950671051463, "grad_norm": 1.708578109741211, "learning_rate": 1.9988758277758613e-05, "loss": 1.9997, "step": 5430 }, { "epoch": 0.193405030663941, "grad_norm": 1.7182897329330444, "learning_rate": 1.9988571731889522e-05, "loss": 2.0486, "step": 5440 }, { "epoch": 0.19376055461736735, "grad_norm": 1.7253814935684204, "learning_rate": 1.9988383651838447e-05, "loss": 2.068, "step": 5450 }, { "epoch": 0.19411607857079372, "grad_norm": 1.7060468196868896, "learning_rate": 1.9988194037634267e-05, "loss": 2.0753, "step": 5460 }, { "epoch": 0.19447160252422008, "grad_norm": 1.7148746252059937, "learning_rate": 1.998800288930611e-05, "loss": 2.1006, "step": 5470 }, { "epoch": 0.19482712647764644, "grad_norm": 1.7313423156738281, "learning_rate": 1.9987810206883334e-05, "loss": 2.0498, "step": 5480 }, { "epoch": 0.1951826504310728, "grad_norm": 1.7278350591659546, "learning_rate": 1.9987615990395536e-05, "loss": 2.0793, "step": 5490 }, { "epoch": 0.19553817438449916, "grad_norm": 1.7059909105300903, "learning_rate": 1.998742023987254e-05, "loss": 2.0506, "step": 5500 }, { "epoch": 0.19589369833792553, "grad_norm": 1.6768639087677002, "learning_rate": 1.9987222955344423e-05, "loss": 2.0102, "step": 5510 }, { "epoch": 0.1962492222913519, "grad_norm": 1.6817662715911865, "learning_rate": 1.998702413684148e-05, "loss": 2.0643, "step": 5520 }, { "epoch": 0.19660474624477825, "grad_norm": 1.687806248664856, "learning_rate": 1.9986823784394244e-05, "loss": 2.0429, "step": 5530 }, { "epoch": 0.1969602701982046, "grad_norm": 1.6257810592651367, "learning_rate": 1.9986621898033496e-05, "loss": 2.0082, "step": 5540 }, { "epoch": 0.19731579415163097, "grad_norm": 1.6978223323822021, "learning_rate": 1.9986418477790237e-05, "loss": 2.0416, "step": 5550 }, { "epoch": 0.19767131810505734, "grad_norm": 1.6773324012756348, "learning_rate": 1.998621352369572e-05, "loss": 2.0248, "step": 5560 }, { "epoch": 0.1980268420584837, "grad_norm": 1.616140365600586, "learning_rate": 1.998600703578142e-05, "loss": 1.9969, "step": 5570 }, { "epoch": 0.19838236601191006, "grad_norm": 1.7479193210601807, "learning_rate": 1.9985799014079048e-05, "loss": 2.0435, "step": 5580 }, { "epoch": 0.19873788996533642, "grad_norm": 1.7679039239883423, "learning_rate": 1.998558945862056e-05, "loss": 2.0311, "step": 5590 }, { "epoch": 0.19909341391876278, "grad_norm": 1.6383432149887085, "learning_rate": 1.9985378369438143e-05, "loss": 2.0143, "step": 5600 }, { "epoch": 0.19944893787218915, "grad_norm": 1.709038257598877, "learning_rate": 1.9985165746564215e-05, "loss": 1.9876, "step": 5610 }, { "epoch": 0.1998044618256155, "grad_norm": 1.638545274734497, "learning_rate": 1.9984951590031437e-05, "loss": 2.0268, "step": 5620 }, { "epoch": 0.20015998577904187, "grad_norm": 1.6110061407089233, "learning_rate": 1.9984735899872703e-05, "loss": 2.0289, "step": 5630 }, { "epoch": 0.20051550973246823, "grad_norm": 1.7065180540084839, "learning_rate": 1.9984518676121137e-05, "loss": 1.9918, "step": 5640 }, { "epoch": 0.2008710336858946, "grad_norm": 1.667405366897583, "learning_rate": 1.9984299918810108e-05, "loss": 2.0055, "step": 5650 }, { "epoch": 0.20122655763932096, "grad_norm": 1.665473461151123, "learning_rate": 1.998407962797321e-05, "loss": 2.0696, "step": 5660 }, { "epoch": 0.20158208159274732, "grad_norm": 1.6744557619094849, "learning_rate": 1.9983857803644283e-05, "loss": 2.0162, "step": 5670 }, { "epoch": 0.20193760554617368, "grad_norm": 1.6898198127746582, "learning_rate": 1.9983634445857397e-05, "loss": 2.0127, "step": 5680 }, { "epoch": 0.20229312949960004, "grad_norm": 1.6509838104248047, "learning_rate": 1.998340955464686e-05, "loss": 2.0387, "step": 5690 }, { "epoch": 0.2026486534530264, "grad_norm": 1.6415313482284546, "learning_rate": 1.998318313004721e-05, "loss": 1.9999, "step": 5700 }, { "epoch": 0.20300417740645277, "grad_norm": 1.5902107954025269, "learning_rate": 1.9982955172093227e-05, "loss": 2.0019, "step": 5710 }, { "epoch": 0.20335970135987913, "grad_norm": 1.6910830736160278, "learning_rate": 1.9982725680819922e-05, "loss": 2.0246, "step": 5720 }, { "epoch": 0.2037152253133055, "grad_norm": 1.7578151226043701, "learning_rate": 1.9982494656262544e-05, "loss": 2.0488, "step": 5730 }, { "epoch": 0.20407074926673185, "grad_norm": 1.7221277952194214, "learning_rate": 1.9982262098456582e-05, "loss": 2.0274, "step": 5740 }, { "epoch": 0.20442627322015822, "grad_norm": 1.6714609861373901, "learning_rate": 1.9982028007437745e-05, "loss": 1.9624, "step": 5750 }, { "epoch": 0.20478179717358458, "grad_norm": 1.7402448654174805, "learning_rate": 1.9981792383242e-05, "loss": 2.0711, "step": 5760 }, { "epoch": 0.20513732112701094, "grad_norm": 1.6645225286483765, "learning_rate": 1.9981555225905526e-05, "loss": 2.0254, "step": 5770 }, { "epoch": 0.2054928450804373, "grad_norm": 1.6826421022415161, "learning_rate": 1.9981316535464758e-05, "loss": 1.9941, "step": 5780 }, { "epoch": 0.20584836903386367, "grad_norm": 1.8125617504119873, "learning_rate": 1.998107631195635e-05, "loss": 1.9914, "step": 5790 }, { "epoch": 0.20620389298729003, "grad_norm": 1.7178610563278198, "learning_rate": 1.9980834555417203e-05, "loss": 2.0078, "step": 5800 }, { "epoch": 0.2065594169407164, "grad_norm": 1.7131744623184204, "learning_rate": 1.998059126588445e-05, "loss": 2.0586, "step": 5810 }, { "epoch": 0.20691494089414275, "grad_norm": 1.646399974822998, "learning_rate": 1.9980346443395454e-05, "loss": 1.9898, "step": 5820 }, { "epoch": 0.2072704648475691, "grad_norm": 1.6671161651611328, "learning_rate": 1.9980100087987826e-05, "loss": 1.9859, "step": 5830 }, { "epoch": 0.20762598880099548, "grad_norm": 1.600407361984253, "learning_rate": 1.9979852199699402e-05, "loss": 2.0468, "step": 5840 }, { "epoch": 0.20798151275442184, "grad_norm": 1.7410165071487427, "learning_rate": 1.9979602778568246e-05, "loss": 1.9843, "step": 5850 }, { "epoch": 0.2083370367078482, "grad_norm": 1.652017593383789, "learning_rate": 1.9979351824632683e-05, "loss": 2.0221, "step": 5860 }, { "epoch": 0.20869256066127456, "grad_norm": 1.7493135929107666, "learning_rate": 1.997909933793125e-05, "loss": 2.0233, "step": 5870 }, { "epoch": 0.20904808461470092, "grad_norm": 1.6278657913208008, "learning_rate": 1.9978845318502724e-05, "loss": 2.0155, "step": 5880 }, { "epoch": 0.2094036085681273, "grad_norm": 1.7754439115524292, "learning_rate": 1.9978589766386126e-05, "loss": 2.0214, "step": 5890 }, { "epoch": 0.20975913252155365, "grad_norm": 1.7136434316635132, "learning_rate": 1.997833268162071e-05, "loss": 2.0478, "step": 5900 }, { "epoch": 0.21011465647498, "grad_norm": 1.8517942428588867, "learning_rate": 1.9978074064245955e-05, "loss": 1.9675, "step": 5910 }, { "epoch": 0.21047018042840637, "grad_norm": 1.7563371658325195, "learning_rate": 1.997781391430159e-05, "loss": 1.9882, "step": 5920 }, { "epoch": 0.21082570438183273, "grad_norm": 1.78915274143219, "learning_rate": 1.9977552231827566e-05, "loss": 2.0131, "step": 5930 }, { "epoch": 0.2111812283352591, "grad_norm": 1.6726717948913574, "learning_rate": 1.997728901686408e-05, "loss": 2.0216, "step": 5940 }, { "epoch": 0.21153675228868546, "grad_norm": 1.6768577098846436, "learning_rate": 1.9977024269451563e-05, "loss": 2.013, "step": 5950 }, { "epoch": 0.21189227624211182, "grad_norm": 1.6979024410247803, "learning_rate": 1.9976757989630667e-05, "loss": 1.9895, "step": 5960 }, { "epoch": 0.21224780019553818, "grad_norm": 1.6715058088302612, "learning_rate": 1.9976490177442303e-05, "loss": 2.0148, "step": 5970 }, { "epoch": 0.21260332414896455, "grad_norm": 1.655503273010254, "learning_rate": 1.99762208329276e-05, "loss": 1.9948, "step": 5980 }, { "epoch": 0.2129588481023909, "grad_norm": 1.6729159355163574, "learning_rate": 1.9975949956127928e-05, "loss": 2.0333, "step": 5990 }, { "epoch": 0.21331437205581727, "grad_norm": 1.8006285429000854, "learning_rate": 1.9975677547084892e-05, "loss": 2.0658, "step": 6000 }, { "epoch": 0.21366989600924363, "grad_norm": 1.688770055770874, "learning_rate": 1.9975403605840336e-05, "loss": 2.0038, "step": 6010 }, { "epoch": 0.21402541996267, "grad_norm": 1.6850205659866333, "learning_rate": 1.997512813243633e-05, "loss": 2.0117, "step": 6020 }, { "epoch": 0.21438094391609636, "grad_norm": 1.7397541999816895, "learning_rate": 1.9974851126915185e-05, "loss": 1.9922, "step": 6030 }, { "epoch": 0.21473646786952272, "grad_norm": 1.6360909938812256, "learning_rate": 1.9974572589319456e-05, "loss": 2.0097, "step": 6040 }, { "epoch": 0.21509199182294908, "grad_norm": 1.600882649421692, "learning_rate": 1.9974292519691912e-05, "loss": 2.0519, "step": 6050 }, { "epoch": 0.21544751577637544, "grad_norm": 1.6638786792755127, "learning_rate": 1.9974010918075582e-05, "loss": 2.0346, "step": 6060 }, { "epoch": 0.2158030397298018, "grad_norm": 1.6250038146972656, "learning_rate": 1.997372778451371e-05, "loss": 2.0147, "step": 6070 }, { "epoch": 0.21615856368322817, "grad_norm": 1.7188122272491455, "learning_rate": 1.9973443119049785e-05, "loss": 2.0257, "step": 6080 }, { "epoch": 0.21651408763665453, "grad_norm": 1.798150897026062, "learning_rate": 1.997315692172753e-05, "loss": 1.9708, "step": 6090 }, { "epoch": 0.2168696115900809, "grad_norm": 1.6894288063049316, "learning_rate": 1.9972869192590906e-05, "loss": 1.9706, "step": 6100 }, { "epoch": 0.21722513554350725, "grad_norm": 1.6506953239440918, "learning_rate": 1.99725799316841e-05, "loss": 1.9368, "step": 6110 }, { "epoch": 0.21758065949693361, "grad_norm": 1.7530012130737305, "learning_rate": 1.9972289139051553e-05, "loss": 2.0346, "step": 6120 }, { "epoch": 0.21793618345035998, "grad_norm": 1.7064405679702759, "learning_rate": 1.9971996814737916e-05, "loss": 2.0102, "step": 6130 }, { "epoch": 0.21829170740378634, "grad_norm": 1.6821569204330444, "learning_rate": 1.9971702958788092e-05, "loss": 2.0345, "step": 6140 }, { "epoch": 0.2186472313572127, "grad_norm": 1.6987396478652954, "learning_rate": 1.997140757124722e-05, "loss": 2.0189, "step": 6150 }, { "epoch": 0.21900275531063906, "grad_norm": 1.6398649215698242, "learning_rate": 1.9971110652160665e-05, "loss": 2.0298, "step": 6160 }, { "epoch": 0.21935827926406543, "grad_norm": 1.791056752204895, "learning_rate": 1.9970812201574033e-05, "loss": 1.9631, "step": 6170 }, { "epoch": 0.2197138032174918, "grad_norm": 1.6377999782562256, "learning_rate": 1.9970512219533163e-05, "loss": 2.0032, "step": 6180 }, { "epoch": 0.22006932717091815, "grad_norm": 1.733130931854248, "learning_rate": 1.9970210706084135e-05, "loss": 2.0358, "step": 6190 }, { "epoch": 0.2204248511243445, "grad_norm": 1.6794812679290771, "learning_rate": 1.9969907661273254e-05, "loss": 1.966, "step": 6200 }, { "epoch": 0.22078037507777087, "grad_norm": 1.684327483177185, "learning_rate": 1.996960308514707e-05, "loss": 2.0665, "step": 6210 }, { "epoch": 0.22113589903119724, "grad_norm": 1.670153021812439, "learning_rate": 1.9969296977752358e-05, "loss": 1.9962, "step": 6220 }, { "epoch": 0.2214914229846236, "grad_norm": 1.7059471607208252, "learning_rate": 1.996898933913614e-05, "loss": 2.0477, "step": 6230 }, { "epoch": 0.22184694693804996, "grad_norm": 1.7042587995529175, "learning_rate": 1.996868016934567e-05, "loss": 2.0678, "step": 6240 }, { "epoch": 0.22220247089147632, "grad_norm": 1.6933459043502808, "learning_rate": 1.9968369468428422e-05, "loss": 1.9882, "step": 6250 }, { "epoch": 0.22255799484490268, "grad_norm": 1.7479889392852783, "learning_rate": 1.9968057236432132e-05, "loss": 1.985, "step": 6260 }, { "epoch": 0.22291351879832905, "grad_norm": 1.6197444200515747, "learning_rate": 1.996774347340475e-05, "loss": 1.9593, "step": 6270 }, { "epoch": 0.2232690427517554, "grad_norm": 1.6699209213256836, "learning_rate": 1.9967428179394464e-05, "loss": 2.0094, "step": 6280 }, { "epoch": 0.22362456670518177, "grad_norm": 1.7393150329589844, "learning_rate": 1.996711135444971e-05, "loss": 1.9976, "step": 6290 }, { "epoch": 0.22398009065860813, "grad_norm": 1.6761125326156616, "learning_rate": 1.9966792998619147e-05, "loss": 1.9891, "step": 6300 }, { "epoch": 0.2243356146120345, "grad_norm": 1.7271678447723389, "learning_rate": 1.996647311195167e-05, "loss": 2.0581, "step": 6310 }, { "epoch": 0.22469113856546086, "grad_norm": 1.7058919668197632, "learning_rate": 1.9966151694496413e-05, "loss": 2.0233, "step": 6320 }, { "epoch": 0.22504666251888722, "grad_norm": 1.668379306793213, "learning_rate": 1.9965828746302743e-05, "loss": 2.0138, "step": 6330 }, { "epoch": 0.22540218647231358, "grad_norm": 1.7820411920547485, "learning_rate": 1.9965504267420266e-05, "loss": 2.0, "step": 6340 }, { "epoch": 0.22575771042573994, "grad_norm": 1.8417869806289673, "learning_rate": 1.9965178257898818e-05, "loss": 1.9905, "step": 6350 }, { "epoch": 0.2261132343791663, "grad_norm": 1.6538697481155396, "learning_rate": 1.9964850717788468e-05, "loss": 1.978, "step": 6360 }, { "epoch": 0.22646875833259267, "grad_norm": 1.6233092546463013, "learning_rate": 1.9964521647139534e-05, "loss": 1.9075, "step": 6370 }, { "epoch": 0.22682428228601903, "grad_norm": 1.6171590089797974, "learning_rate": 1.996419104600255e-05, "loss": 1.9938, "step": 6380 }, { "epoch": 0.2271798062394454, "grad_norm": 1.6618120670318604, "learning_rate": 1.9963858914428295e-05, "loss": 2.0056, "step": 6390 }, { "epoch": 0.22753533019287175, "grad_norm": 1.6301599740982056, "learning_rate": 1.9963525252467787e-05, "loss": 2.0055, "step": 6400 }, { "epoch": 0.22789085414629812, "grad_norm": 1.8050549030303955, "learning_rate": 1.996319006017227e-05, "loss": 2.0112, "step": 6410 }, { "epoch": 0.22824637809972448, "grad_norm": 1.8067948818206787, "learning_rate": 1.9962853337593234e-05, "loss": 1.9908, "step": 6420 }, { "epoch": 0.22860190205315084, "grad_norm": 1.656425952911377, "learning_rate": 1.996251508478239e-05, "loss": 2.0401, "step": 6430 }, { "epoch": 0.2289574260065772, "grad_norm": 1.7688844203948975, "learning_rate": 1.9962175301791695e-05, "loss": 1.9961, "step": 6440 }, { "epoch": 0.22931294996000356, "grad_norm": 1.6689438819885254, "learning_rate": 1.9961833988673332e-05, "loss": 1.9763, "step": 6450 }, { "epoch": 0.22966847391342993, "grad_norm": 1.6061345338821411, "learning_rate": 1.9961491145479736e-05, "loss": 2.0064, "step": 6460 }, { "epoch": 0.2300239978668563, "grad_norm": 1.6575069427490234, "learning_rate": 1.9961146772263557e-05, "loss": 2.0424, "step": 6470 }, { "epoch": 0.23037952182028265, "grad_norm": 1.7679375410079956, "learning_rate": 1.996080086907769e-05, "loss": 2.0052, "step": 6480 }, { "epoch": 0.230735045773709, "grad_norm": 1.7035895586013794, "learning_rate": 1.9960453435975266e-05, "loss": 2.0053, "step": 6490 }, { "epoch": 0.23109056972713538, "grad_norm": 1.6635199785232544, "learning_rate": 1.9960104473009645e-05, "loss": 2.0175, "step": 6500 }, { "epoch": 0.23144609368056174, "grad_norm": 1.6349929571151733, "learning_rate": 1.9959753980234428e-05, "loss": 2.0434, "step": 6510 }, { "epoch": 0.2318016176339881, "grad_norm": 1.7579482793807983, "learning_rate": 1.9959401957703447e-05, "loss": 1.9801, "step": 6520 }, { "epoch": 0.23215714158741446, "grad_norm": 1.7130075693130493, "learning_rate": 1.995904840547077e-05, "loss": 1.9774, "step": 6530 }, { "epoch": 0.23251266554084082, "grad_norm": 1.6815969944000244, "learning_rate": 1.9958693323590706e-05, "loss": 1.9948, "step": 6540 }, { "epoch": 0.23286818949426719, "grad_norm": 1.673559308052063, "learning_rate": 1.9958336712117783e-05, "loss": 1.9878, "step": 6550 }, { "epoch": 0.23322371344769355, "grad_norm": 1.74596107006073, "learning_rate": 1.9957978571106785e-05, "loss": 2.0221, "step": 6560 }, { "epoch": 0.2335792374011199, "grad_norm": 1.663725733757019, "learning_rate": 1.9957618900612714e-05, "loss": 1.9824, "step": 6570 }, { "epoch": 0.23393476135454627, "grad_norm": 1.6240813732147217, "learning_rate": 1.9957257700690816e-05, "loss": 2.0462, "step": 6580 }, { "epoch": 0.23429028530797263, "grad_norm": 1.791911005973816, "learning_rate": 1.9956894971396566e-05, "loss": 1.9706, "step": 6590 }, { "epoch": 0.234645809261399, "grad_norm": 1.6861087083816528, "learning_rate": 1.995653071278568e-05, "loss": 2.0081, "step": 6600 }, { "epoch": 0.23500133321482536, "grad_norm": 1.6701043844223022, "learning_rate": 1.9956164924914102e-05, "loss": 2.0197, "step": 6610 }, { "epoch": 0.23535685716825172, "grad_norm": 1.657975673675537, "learning_rate": 1.995579760783802e-05, "loss": 2.0568, "step": 6620 }, { "epoch": 0.23571238112167808, "grad_norm": 1.7297064065933228, "learning_rate": 1.9955428761613847e-05, "loss": 1.9838, "step": 6630 }, { "epoch": 0.23606790507510444, "grad_norm": 1.5905978679656982, "learning_rate": 1.9955058386298235e-05, "loss": 1.9611, "step": 6640 }, { "epoch": 0.2364234290285308, "grad_norm": 1.620131492614746, "learning_rate": 1.9954686481948078e-05, "loss": 2.028, "step": 6650 }, { "epoch": 0.23677895298195717, "grad_norm": 1.6750433444976807, "learning_rate": 1.9954313048620493e-05, "loss": 2.0173, "step": 6660 }, { "epoch": 0.23713447693538353, "grad_norm": 1.7565199136734009, "learning_rate": 1.9953938086372833e-05, "loss": 2.0216, "step": 6670 }, { "epoch": 0.2374900008888099, "grad_norm": 1.6780565977096558, "learning_rate": 1.9953561595262702e-05, "loss": 1.9907, "step": 6680 }, { "epoch": 0.23784552484223626, "grad_norm": 1.7479665279388428, "learning_rate": 1.9953183575347913e-05, "loss": 2.011, "step": 6690 }, { "epoch": 0.23820104879566262, "grad_norm": 1.497279405593872, "learning_rate": 1.995280402668654e-05, "loss": 1.9641, "step": 6700 }, { "epoch": 0.23855657274908898, "grad_norm": 1.6642096042633057, "learning_rate": 1.9952422949336867e-05, "loss": 2.0186, "step": 6710 }, { "epoch": 0.23891209670251534, "grad_norm": 1.632526159286499, "learning_rate": 1.9952040343357434e-05, "loss": 1.9739, "step": 6720 }, { "epoch": 0.2392676206559417, "grad_norm": 1.690468192100525, "learning_rate": 1.9951656208807005e-05, "loss": 2.0481, "step": 6730 }, { "epoch": 0.23962314460936807, "grad_norm": 1.7386364936828613, "learning_rate": 1.9951270545744576e-05, "loss": 2.0415, "step": 6740 }, { "epoch": 0.23997866856279443, "grad_norm": 1.662619709968567, "learning_rate": 1.9950883354229388e-05, "loss": 2.0204, "step": 6750 }, { "epoch": 0.2403341925162208, "grad_norm": 1.5891426801681519, "learning_rate": 1.9950494634320912e-05, "loss": 1.9883, "step": 6760 }, { "epoch": 0.24068971646964715, "grad_norm": 1.4959187507629395, "learning_rate": 1.995010438607885e-05, "loss": 1.9959, "step": 6770 }, { "epoch": 0.24104524042307351, "grad_norm": 1.819345235824585, "learning_rate": 1.9949712609563136e-05, "loss": 1.9796, "step": 6780 }, { "epoch": 0.24140076437649988, "grad_norm": 1.6453137397766113, "learning_rate": 1.9949319304833955e-05, "loss": 1.9728, "step": 6790 }, { "epoch": 0.24175628832992624, "grad_norm": 1.695168137550354, "learning_rate": 1.9948924471951712e-05, "loss": 1.9577, "step": 6800 }, { "epoch": 0.2421118122833526, "grad_norm": 1.682152509689331, "learning_rate": 1.994852811097705e-05, "loss": 2.0488, "step": 6810 }, { "epoch": 0.24246733623677896, "grad_norm": 1.5780915021896362, "learning_rate": 1.9948130221970844e-05, "loss": 1.9847, "step": 6820 }, { "epoch": 0.24282286019020533, "grad_norm": 1.7200599908828735, "learning_rate": 1.9947730804994215e-05, "loss": 2.0024, "step": 6830 }, { "epoch": 0.2431783841436317, "grad_norm": 1.6747233867645264, "learning_rate": 1.9947329860108507e-05, "loss": 2.0127, "step": 6840 }, { "epoch": 0.24353390809705805, "grad_norm": 1.659424066543579, "learning_rate": 1.99469273873753e-05, "loss": 1.9576, "step": 6850 }, { "epoch": 0.2438894320504844, "grad_norm": 1.7689529657363892, "learning_rate": 1.994652338685642e-05, "loss": 2.0254, "step": 6860 }, { "epoch": 0.24424495600391077, "grad_norm": 1.6394208669662476, "learning_rate": 1.9946117858613905e-05, "loss": 1.9943, "step": 6870 }, { "epoch": 0.24460047995733714, "grad_norm": 1.563705325126648, "learning_rate": 1.9945710802710056e-05, "loss": 1.9709, "step": 6880 }, { "epoch": 0.2449560039107635, "grad_norm": 1.7137606143951416, "learning_rate": 1.9945302219207386e-05, "loss": 1.9595, "step": 6890 }, { "epoch": 0.24531152786418986, "grad_norm": 1.663386344909668, "learning_rate": 1.9944892108168653e-05, "loss": 2.0311, "step": 6900 }, { "epoch": 0.24566705181761622, "grad_norm": 1.6972986459732056, "learning_rate": 1.9944480469656846e-05, "loss": 1.9545, "step": 6910 }, { "epoch": 0.24602257577104258, "grad_norm": 1.664995789527893, "learning_rate": 1.994406730373519e-05, "loss": 1.9906, "step": 6920 }, { "epoch": 0.24637809972446895, "grad_norm": 1.7193104028701782, "learning_rate": 1.9943652610467147e-05, "loss": 2.0138, "step": 6930 }, { "epoch": 0.2467336236778953, "grad_norm": 1.629887342453003, "learning_rate": 1.9943236389916412e-05, "loss": 2.0099, "step": 6940 }, { "epoch": 0.24708914763132167, "grad_norm": 1.5323138236999512, "learning_rate": 1.994281864214691e-05, "loss": 2.0121, "step": 6950 }, { "epoch": 0.24744467158474803, "grad_norm": 1.743226170539856, "learning_rate": 1.9942399367222808e-05, "loss": 1.938, "step": 6960 }, { "epoch": 0.2478001955381744, "grad_norm": 1.6816143989562988, "learning_rate": 1.99419785652085e-05, "loss": 2.0286, "step": 6970 }, { "epoch": 0.24815571949160076, "grad_norm": 1.6175199747085571, "learning_rate": 1.9941556236168624e-05, "loss": 2.0356, "step": 6980 }, { "epoch": 0.24851124344502712, "grad_norm": 1.6289336681365967, "learning_rate": 1.9941132380168043e-05, "loss": 2.0156, "step": 6990 }, { "epoch": 0.24886676739845348, "grad_norm": 1.7291028499603271, "learning_rate": 1.994070699727186e-05, "loss": 1.9689, "step": 7000 }, { "epoch": 0.24922229135187984, "grad_norm": 1.6669102907180786, "learning_rate": 1.994028008754541e-05, "loss": 2.0108, "step": 7010 }, { "epoch": 0.2495778153053062, "grad_norm": 1.6643351316452026, "learning_rate": 1.9939851651054265e-05, "loss": 2.0142, "step": 7020 }, { "epoch": 0.24993333925873257, "grad_norm": 1.6312966346740723, "learning_rate": 1.993942168786423e-05, "loss": 1.9666, "step": 7030 }, { "epoch": 0.25028886321215893, "grad_norm": 1.6095354557037354, "learning_rate": 1.9938990198041344e-05, "loss": 2.0038, "step": 7040 }, { "epoch": 0.2506443871655853, "grad_norm": 1.5703375339508057, "learning_rate": 1.9938557181651882e-05, "loss": 1.9245, "step": 7050 }, { "epoch": 0.25099991111901165, "grad_norm": 1.5972967147827148, "learning_rate": 1.9938122638762353e-05, "loss": 1.9794, "step": 7060 }, { "epoch": 0.251355435072438, "grad_norm": 1.6072685718536377, "learning_rate": 1.9937686569439497e-05, "loss": 1.972, "step": 7070 }, { "epoch": 0.2517109590258644, "grad_norm": 1.6625516414642334, "learning_rate": 1.9937248973750297e-05, "loss": 2.0329, "step": 7080 }, { "epoch": 0.25206648297929074, "grad_norm": 1.6364060640335083, "learning_rate": 1.993680985176196e-05, "loss": 1.9573, "step": 7090 }, { "epoch": 0.2524220069327171, "grad_norm": 1.6397068500518799, "learning_rate": 1.9936369203541932e-05, "loss": 1.9711, "step": 7100 }, { "epoch": 0.25277753088614346, "grad_norm": 1.6169209480285645, "learning_rate": 1.9935927029157897e-05, "loss": 1.9561, "step": 7110 }, { "epoch": 0.2531330548395698, "grad_norm": 1.6281391382217407, "learning_rate": 1.993548332867777e-05, "loss": 1.9774, "step": 7120 }, { "epoch": 0.2534885787929962, "grad_norm": 1.7074105739593506, "learning_rate": 1.99350381021697e-05, "loss": 2.0044, "step": 7130 }, { "epoch": 0.25384410274642255, "grad_norm": 1.6432868242263794, "learning_rate": 1.9934591349702068e-05, "loss": 2.0351, "step": 7140 }, { "epoch": 0.2541996266998489, "grad_norm": 1.6380431652069092, "learning_rate": 1.99341430713435e-05, "loss": 1.945, "step": 7150 }, { "epoch": 0.2545551506532753, "grad_norm": 1.728100299835205, "learning_rate": 1.9933693267162838e-05, "loss": 1.9846, "step": 7160 }, { "epoch": 0.25491067460670164, "grad_norm": 1.668286919593811, "learning_rate": 1.9933241937229176e-05, "loss": 1.9898, "step": 7170 }, { "epoch": 0.255266198560128, "grad_norm": 1.5646265745162964, "learning_rate": 1.9932789081611838e-05, "loss": 2.0117, "step": 7180 }, { "epoch": 0.25562172251355436, "grad_norm": 1.7030260562896729, "learning_rate": 1.9932334700380375e-05, "loss": 2.0039, "step": 7190 }, { "epoch": 0.2559772464669807, "grad_norm": 1.6385523080825806, "learning_rate": 1.9931878793604577e-05, "loss": 1.9967, "step": 7200 }, { "epoch": 0.2563327704204071, "grad_norm": 1.6005942821502686, "learning_rate": 1.993142136135447e-05, "loss": 1.9734, "step": 7210 }, { "epoch": 0.25668829437383345, "grad_norm": 1.6573022603988647, "learning_rate": 1.9930962403700313e-05, "loss": 2.0014, "step": 7220 }, { "epoch": 0.2570438183272598, "grad_norm": 1.624893069267273, "learning_rate": 1.9930501920712597e-05, "loss": 1.9705, "step": 7230 }, { "epoch": 0.25739934228068617, "grad_norm": 1.571858525276184, "learning_rate": 1.9930039912462052e-05, "loss": 2.033, "step": 7240 }, { "epoch": 0.25775486623411253, "grad_norm": 1.672079086303711, "learning_rate": 1.9929576379019638e-05, "loss": 1.9694, "step": 7250 }, { "epoch": 0.2581103901875389, "grad_norm": 1.7197091579437256, "learning_rate": 1.992911132045655e-05, "loss": 2.0, "step": 7260 }, { "epoch": 0.25846591414096526, "grad_norm": 1.6931955814361572, "learning_rate": 1.992864473684422e-05, "loss": 1.9984, "step": 7270 }, { "epoch": 0.2588214380943916, "grad_norm": 1.7102130651474, "learning_rate": 1.9928176628254313e-05, "loss": 1.9194, "step": 7280 }, { "epoch": 0.259176962047818, "grad_norm": 1.6537212133407593, "learning_rate": 1.9927706994758728e-05, "loss": 2.0039, "step": 7290 }, { "epoch": 0.25953248600124434, "grad_norm": 1.5721324682235718, "learning_rate": 1.992723583642959e-05, "loss": 1.9303, "step": 7300 }, { "epoch": 0.2598880099546707, "grad_norm": 1.559722661972046, "learning_rate": 1.9926763153339275e-05, "loss": 1.9264, "step": 7310 }, { "epoch": 0.26024353390809707, "grad_norm": 1.630336880683899, "learning_rate": 1.992628894556038e-05, "loss": 1.9928, "step": 7320 }, { "epoch": 0.26059905786152343, "grad_norm": 1.6448107957839966, "learning_rate": 1.9925813213165742e-05, "loss": 1.9774, "step": 7330 }, { "epoch": 0.2609545818149498, "grad_norm": 1.5473449230194092, "learning_rate": 1.9925335956228426e-05, "loss": 1.9798, "step": 7340 }, { "epoch": 0.26131010576837616, "grad_norm": 1.6154736280441284, "learning_rate": 1.9924857174821744e-05, "loss": 1.9911, "step": 7350 }, { "epoch": 0.2616656297218025, "grad_norm": 1.7111319303512573, "learning_rate": 1.992437686901923e-05, "loss": 1.9627, "step": 7360 }, { "epoch": 0.2620211536752289, "grad_norm": 1.7432341575622559, "learning_rate": 1.9923895038894652e-05, "loss": 2.0037, "step": 7370 }, { "epoch": 0.26237667762865524, "grad_norm": 1.6998772621154785, "learning_rate": 1.992341168452202e-05, "loss": 1.9835, "step": 7380 }, { "epoch": 0.2627322015820816, "grad_norm": 1.6501591205596924, "learning_rate": 1.9922926805975576e-05, "loss": 2.0224, "step": 7390 }, { "epoch": 0.26308772553550797, "grad_norm": 1.7000082731246948, "learning_rate": 1.9922440403329788e-05, "loss": 1.9953, "step": 7400 }, { "epoch": 0.26344324948893433, "grad_norm": 1.7175724506378174, "learning_rate": 1.992195247665937e-05, "loss": 1.9985, "step": 7410 }, { "epoch": 0.2637987734423607, "grad_norm": 1.661278486251831, "learning_rate": 1.9921463026039264e-05, "loss": 1.9272, "step": 7420 }, { "epoch": 0.26415429739578705, "grad_norm": 1.6486077308654785, "learning_rate": 1.9920972051544646e-05, "loss": 1.9973, "step": 7430 }, { "epoch": 0.2645098213492134, "grad_norm": 1.7884916067123413, "learning_rate": 1.9920479553250922e-05, "loss": 1.9849, "step": 7440 }, { "epoch": 0.2648653453026398, "grad_norm": 1.6757444143295288, "learning_rate": 1.9919985531233743e-05, "loss": 1.9792, "step": 7450 }, { "epoch": 0.26522086925606614, "grad_norm": 1.592671513557434, "learning_rate": 1.9919489985568985e-05, "loss": 1.9994, "step": 7460 }, { "epoch": 0.2655763932094925, "grad_norm": 1.6154669523239136, "learning_rate": 1.991899291633276e-05, "loss": 1.9741, "step": 7470 }, { "epoch": 0.26593191716291886, "grad_norm": 1.6644245386123657, "learning_rate": 1.9918494323601415e-05, "loss": 1.9716, "step": 7480 }, { "epoch": 0.2662874411163452, "grad_norm": 1.726770281791687, "learning_rate": 1.9917994207451533e-05, "loss": 2.0, "step": 7490 }, { "epoch": 0.2666429650697716, "grad_norm": 1.72080397605896, "learning_rate": 1.9917492567959926e-05, "loss": 1.9882, "step": 7500 }, { "epoch": 0.26699848902319795, "grad_norm": 1.7543189525604248, "learning_rate": 1.9916989405203648e-05, "loss": 1.9709, "step": 7510 }, { "epoch": 0.2673540129766243, "grad_norm": 1.7151052951812744, "learning_rate": 1.9916484719259973e-05, "loss": 1.9461, "step": 7520 }, { "epoch": 0.2677095369300507, "grad_norm": 1.6305655241012573, "learning_rate": 1.9915978510206423e-05, "loss": 1.984, "step": 7530 }, { "epoch": 0.26806506088347704, "grad_norm": 1.655402660369873, "learning_rate": 1.9915470778120746e-05, "loss": 1.9903, "step": 7540 }, { "epoch": 0.2684205848369034, "grad_norm": 1.6377874612808228, "learning_rate": 1.991496152308093e-05, "loss": 2.0417, "step": 7550 }, { "epoch": 0.26877610879032976, "grad_norm": 1.6017521619796753, "learning_rate": 1.991445074516519e-05, "loss": 1.9541, "step": 7560 }, { "epoch": 0.2691316327437561, "grad_norm": 1.6277523040771484, "learning_rate": 1.991393844445198e-05, "loss": 1.9803, "step": 7570 }, { "epoch": 0.2694871566971825, "grad_norm": 1.6906298398971558, "learning_rate": 1.9913424621019987e-05, "loss": 2.0192, "step": 7580 }, { "epoch": 0.26984268065060885, "grad_norm": 1.611890435218811, "learning_rate": 1.9912909274948126e-05, "loss": 1.9576, "step": 7590 }, { "epoch": 0.2701982046040352, "grad_norm": 1.5919207334518433, "learning_rate": 1.991239240631556e-05, "loss": 2.0154, "step": 7600 }, { "epoch": 0.27055372855746157, "grad_norm": 1.6700984239578247, "learning_rate": 1.9911874015201667e-05, "loss": 1.9696, "step": 7610 }, { "epoch": 0.27090925251088793, "grad_norm": 1.6968141794204712, "learning_rate": 1.9911354101686076e-05, "loss": 1.9666, "step": 7620 }, { "epoch": 0.2712647764643143, "grad_norm": 1.6996207237243652, "learning_rate": 1.991083266584864e-05, "loss": 2.0248, "step": 7630 }, { "epoch": 0.27162030041774066, "grad_norm": 1.5757447481155396, "learning_rate": 1.9910309707769448e-05, "loss": 2.0071, "step": 7640 }, { "epoch": 0.271975824371167, "grad_norm": 1.7433087825775146, "learning_rate": 1.990978522752882e-05, "loss": 2.0058, "step": 7650 }, { "epoch": 0.2723313483245934, "grad_norm": 1.6996994018554688, "learning_rate": 1.9909259225207318e-05, "loss": 1.9921, "step": 7660 }, { "epoch": 0.27268687227801974, "grad_norm": 1.8098211288452148, "learning_rate": 1.990873170088573e-05, "loss": 1.959, "step": 7670 }, { "epoch": 0.2730423962314461, "grad_norm": 1.6081329584121704, "learning_rate": 1.9908202654645082e-05, "loss": 1.9493, "step": 7680 }, { "epoch": 0.27339792018487247, "grad_norm": 1.668576717376709, "learning_rate": 1.990767208656663e-05, "loss": 1.9601, "step": 7690 }, { "epoch": 0.27375344413829883, "grad_norm": 1.720616102218628, "learning_rate": 1.9907139996731866e-05, "loss": 1.9961, "step": 7700 }, { "epoch": 0.2741089680917252, "grad_norm": 1.6592384576797485, "learning_rate": 1.990660638522252e-05, "loss": 1.9922, "step": 7710 }, { "epoch": 0.27446449204515155, "grad_norm": 1.6151938438415527, "learning_rate": 1.9906071252120546e-05, "loss": 1.9841, "step": 7720 }, { "epoch": 0.2748200159985779, "grad_norm": 1.651526689529419, "learning_rate": 1.990553459750814e-05, "loss": 1.9546, "step": 7730 }, { "epoch": 0.2751755399520043, "grad_norm": 1.7546813488006592, "learning_rate": 1.9904996421467727e-05, "loss": 1.9433, "step": 7740 }, { "epoch": 0.27553106390543064, "grad_norm": 1.8067822456359863, "learning_rate": 1.990445672408197e-05, "loss": 1.9861, "step": 7750 }, { "epoch": 0.275886587858857, "grad_norm": 1.693223237991333, "learning_rate": 1.990391550543376e-05, "loss": 1.9519, "step": 7760 }, { "epoch": 0.27624211181228336, "grad_norm": 1.5922542810440063, "learning_rate": 1.9903372765606227e-05, "loss": 1.957, "step": 7770 }, { "epoch": 0.2765976357657097, "grad_norm": 1.715636134147644, "learning_rate": 1.9902828504682733e-05, "loss": 1.9941, "step": 7780 }, { "epoch": 0.2769531597191361, "grad_norm": 1.71389639377594, "learning_rate": 1.990228272274687e-05, "loss": 1.9925, "step": 7790 }, { "epoch": 0.27730868367256245, "grad_norm": 1.6671241521835327, "learning_rate": 1.9901735419882467e-05, "loss": 1.9709, "step": 7800 }, { "epoch": 0.2776642076259888, "grad_norm": 1.6671831607818604, "learning_rate": 1.9901186596173593e-05, "loss": 1.9648, "step": 7810 }, { "epoch": 0.2780197315794152, "grad_norm": 1.6454179286956787, "learning_rate": 1.9900636251704537e-05, "loss": 1.9815, "step": 7820 }, { "epoch": 0.27837525553284154, "grad_norm": 1.6738249063491821, "learning_rate": 1.990008438655983e-05, "loss": 1.9861, "step": 7830 }, { "epoch": 0.2787307794862679, "grad_norm": 1.6551613807678223, "learning_rate": 1.9899531000824234e-05, "loss": 1.916, "step": 7840 }, { "epoch": 0.27908630343969426, "grad_norm": 1.6680654287338257, "learning_rate": 1.9898976094582746e-05, "loss": 2.0078, "step": 7850 }, { "epoch": 0.2794418273931206, "grad_norm": 1.69269597530365, "learning_rate": 1.9898419667920598e-05, "loss": 1.9147, "step": 7860 }, { "epoch": 0.279797351346547, "grad_norm": 1.6375586986541748, "learning_rate": 1.9897861720923255e-05, "loss": 1.9521, "step": 7870 }, { "epoch": 0.28015287529997335, "grad_norm": 1.6596815586090088, "learning_rate": 1.989730225367641e-05, "loss": 1.9801, "step": 7880 }, { "epoch": 0.2805083992533997, "grad_norm": 1.649316668510437, "learning_rate": 1.9896741266265994e-05, "loss": 1.924, "step": 7890 }, { "epoch": 0.28086392320682607, "grad_norm": 1.636256217956543, "learning_rate": 1.9896178758778173e-05, "loss": 2.0577, "step": 7900 }, { "epoch": 0.28121944716025243, "grad_norm": 1.656315565109253, "learning_rate": 1.9895614731299344e-05, "loss": 1.9554, "step": 7910 }, { "epoch": 0.2815749711136788, "grad_norm": 1.6406137943267822, "learning_rate": 1.989504918391614e-05, "loss": 1.969, "step": 7920 }, { "epoch": 0.28193049506710516, "grad_norm": 1.7492715120315552, "learning_rate": 1.9894482116715422e-05, "loss": 2.0121, "step": 7930 }, { "epoch": 0.2822860190205315, "grad_norm": 1.7002264261245728, "learning_rate": 1.9893913529784294e-05, "loss": 1.9895, "step": 7940 }, { "epoch": 0.2826415429739579, "grad_norm": 1.7042263746261597, "learning_rate": 1.9893343423210077e-05, "loss": 1.9873, "step": 7950 }, { "epoch": 0.28299706692738424, "grad_norm": 1.6575679779052734, "learning_rate": 1.9892771797080342e-05, "loss": 2.017, "step": 7960 }, { "epoch": 0.2833525908808106, "grad_norm": 1.537338137626648, "learning_rate": 1.989219865148289e-05, "loss": 1.9702, "step": 7970 }, { "epoch": 0.28370811483423697, "grad_norm": 1.6482051610946655, "learning_rate": 1.9891623986505752e-05, "loss": 1.9639, "step": 7980 }, { "epoch": 0.28406363878766333, "grad_norm": 1.6478086709976196, "learning_rate": 1.9891047802237188e-05, "loss": 2.0201, "step": 7990 }, { "epoch": 0.2844191627410897, "grad_norm": 1.7550305128097534, "learning_rate": 1.98904700987657e-05, "loss": 1.9951, "step": 8000 }, { "epoch": 0.28477468669451605, "grad_norm": 1.7667663097381592, "learning_rate": 1.9889890876180015e-05, "loss": 1.9867, "step": 8010 }, { "epoch": 0.2851302106479424, "grad_norm": 1.6240298748016357, "learning_rate": 1.9889310134569104e-05, "loss": 1.9708, "step": 8020 }, { "epoch": 0.2854857346013688, "grad_norm": 1.6603147983551025, "learning_rate": 1.9888727874022163e-05, "loss": 1.9523, "step": 8030 }, { "epoch": 0.28584125855479514, "grad_norm": 1.6123783588409424, "learning_rate": 1.9888144094628624e-05, "loss": 1.9703, "step": 8040 }, { "epoch": 0.2861967825082215, "grad_norm": 1.8504589796066284, "learning_rate": 1.988755879647815e-05, "loss": 1.9828, "step": 8050 }, { "epoch": 0.28655230646164787, "grad_norm": 1.5599009990692139, "learning_rate": 1.988697197966064e-05, "loss": 1.9852, "step": 8060 }, { "epoch": 0.2869078304150742, "grad_norm": 1.671850562095642, "learning_rate": 1.9886383644266226e-05, "loss": 1.9513, "step": 8070 }, { "epoch": 0.2872633543685006, "grad_norm": 1.6670169830322266, "learning_rate": 1.9885793790385274e-05, "loss": 1.9426, "step": 8080 }, { "epoch": 0.28761887832192695, "grad_norm": 1.6796684265136719, "learning_rate": 1.988520241810838e-05, "loss": 1.9954, "step": 8090 }, { "epoch": 0.2879744022753533, "grad_norm": 1.6433721780776978, "learning_rate": 1.9884609527526374e-05, "loss": 2.0219, "step": 8100 }, { "epoch": 0.2883299262287797, "grad_norm": 1.6410399675369263, "learning_rate": 1.988401511873032e-05, "loss": 1.9721, "step": 8110 }, { "epoch": 0.28868545018220604, "grad_norm": 1.6350502967834473, "learning_rate": 1.988341919181152e-05, "loss": 1.9548, "step": 8120 }, { "epoch": 0.2890409741356324, "grad_norm": 1.6690397262573242, "learning_rate": 1.98828217468615e-05, "loss": 1.9407, "step": 8130 }, { "epoch": 0.28939649808905876, "grad_norm": 1.7214834690093994, "learning_rate": 1.9882222783972026e-05, "loss": 1.953, "step": 8140 }, { "epoch": 0.2897520220424851, "grad_norm": 1.700016975402832, "learning_rate": 1.9881622303235094e-05, "loss": 2.031, "step": 8150 }, { "epoch": 0.2901075459959115, "grad_norm": 1.659131407737732, "learning_rate": 1.9881020304742935e-05, "loss": 1.9669, "step": 8160 }, { "epoch": 0.29046306994933785, "grad_norm": 1.6631243228912354, "learning_rate": 1.9880416788588013e-05, "loss": 1.989, "step": 8170 }, { "epoch": 0.2908185939027642, "grad_norm": 1.6930702924728394, "learning_rate": 1.9879811754863022e-05, "loss": 1.9386, "step": 8180 }, { "epoch": 0.2911741178561906, "grad_norm": 1.6611827611923218, "learning_rate": 1.987920520366089e-05, "loss": 1.9319, "step": 8190 }, { "epoch": 0.29152964180961694, "grad_norm": 1.7308286428451538, "learning_rate": 1.9878597135074784e-05, "loss": 1.9844, "step": 8200 }, { "epoch": 0.2918851657630433, "grad_norm": 1.7405385971069336, "learning_rate": 1.9877987549198097e-05, "loss": 2.0176, "step": 8210 }, { "epoch": 0.29224068971646966, "grad_norm": 1.5844897031784058, "learning_rate": 1.987737644612446e-05, "loss": 1.9362, "step": 8220 }, { "epoch": 0.292596213669896, "grad_norm": 1.6568342447280884, "learning_rate": 1.987676382594773e-05, "loss": 1.9755, "step": 8230 }, { "epoch": 0.2929517376233224, "grad_norm": 1.615202784538269, "learning_rate": 1.9876149688762e-05, "loss": 1.9943, "step": 8240 }, { "epoch": 0.29330726157674875, "grad_norm": 1.6444897651672363, "learning_rate": 1.9875534034661607e-05, "loss": 2.0033, "step": 8250 }, { "epoch": 0.2936627855301751, "grad_norm": 1.6145836114883423, "learning_rate": 1.9874916863741108e-05, "loss": 1.9547, "step": 8260 }, { "epoch": 0.29401830948360147, "grad_norm": 1.6955664157867432, "learning_rate": 1.9874298176095292e-05, "loss": 1.9754, "step": 8270 }, { "epoch": 0.29437383343702783, "grad_norm": 1.7114002704620361, "learning_rate": 1.987367797181919e-05, "loss": 1.9632, "step": 8280 }, { "epoch": 0.2947293573904542, "grad_norm": 1.7069765329360962, "learning_rate": 1.9873056251008057e-05, "loss": 1.945, "step": 8290 }, { "epoch": 0.29508488134388056, "grad_norm": 1.728232979774475, "learning_rate": 1.987243301375739e-05, "loss": 1.9799, "step": 8300 }, { "epoch": 0.2954404052973069, "grad_norm": 1.684863567352295, "learning_rate": 1.9871808260162914e-05, "loss": 1.984, "step": 8310 }, { "epoch": 0.2957959292507333, "grad_norm": 1.7075393199920654, "learning_rate": 1.9871181990320586e-05, "loss": 1.9602, "step": 8320 }, { "epoch": 0.29615145320415964, "grad_norm": 1.620227575302124, "learning_rate": 1.9870554204326597e-05, "loss": 1.9698, "step": 8330 }, { "epoch": 0.296506977157586, "grad_norm": 1.690664529800415, "learning_rate": 1.9869924902277368e-05, "loss": 1.996, "step": 8340 }, { "epoch": 0.29686250111101237, "grad_norm": 1.6752780675888062, "learning_rate": 1.9869294084269563e-05, "loss": 1.9374, "step": 8350 }, { "epoch": 0.29721802506443873, "grad_norm": 1.6255035400390625, "learning_rate": 1.9868661750400066e-05, "loss": 1.9483, "step": 8360 }, { "epoch": 0.2975735490178651, "grad_norm": 1.593761920928955, "learning_rate": 1.9868027900765997e-05, "loss": 1.9777, "step": 8370 }, { "epoch": 0.29792907297129145, "grad_norm": 1.7208243608474731, "learning_rate": 1.986739253546472e-05, "loss": 1.9921, "step": 8380 }, { "epoch": 0.2982845969247178, "grad_norm": 1.6406222581863403, "learning_rate": 1.986675565459382e-05, "loss": 1.9242, "step": 8390 }, { "epoch": 0.2986401208781442, "grad_norm": 1.610335111618042, "learning_rate": 1.9866117258251112e-05, "loss": 1.9952, "step": 8400 }, { "epoch": 0.29899564483157054, "grad_norm": 1.560603141784668, "learning_rate": 1.986547734653466e-05, "loss": 1.9441, "step": 8410 }, { "epoch": 0.2993511687849969, "grad_norm": 1.7074131965637207, "learning_rate": 1.986483591954274e-05, "loss": 1.9832, "step": 8420 }, { "epoch": 0.29970669273842326, "grad_norm": 1.6053872108459473, "learning_rate": 1.9864192977373876e-05, "loss": 1.9541, "step": 8430 }, { "epoch": 0.3000622166918496, "grad_norm": 1.589003324508667, "learning_rate": 1.986354852012682e-05, "loss": 1.9578, "step": 8440 }, { "epoch": 0.300417740645276, "grad_norm": 1.634819746017456, "learning_rate": 1.9862902547900556e-05, "loss": 1.9853, "step": 8450 }, { "epoch": 0.30077326459870235, "grad_norm": 1.6535834074020386, "learning_rate": 1.98622550607943e-05, "loss": 1.9676, "step": 8460 }, { "epoch": 0.3011287885521287, "grad_norm": 1.7705475091934204, "learning_rate": 1.986160605890751e-05, "loss": 1.9961, "step": 8470 }, { "epoch": 0.3014843125055551, "grad_norm": 1.552779197692871, "learning_rate": 1.9860955542339857e-05, "loss": 1.967, "step": 8480 }, { "epoch": 0.30183983645898144, "grad_norm": 1.7160229682922363, "learning_rate": 1.9860303511191263e-05, "loss": 1.9741, "step": 8490 }, { "epoch": 0.3021953604124078, "grad_norm": 1.616591453552246, "learning_rate": 1.9859649965561872e-05, "loss": 1.9501, "step": 8500 }, { "epoch": 0.30255088436583416, "grad_norm": 1.6419483423233032, "learning_rate": 1.985899490555207e-05, "loss": 1.9324, "step": 8510 }, { "epoch": 0.3029064083192605, "grad_norm": 1.7029931545257568, "learning_rate": 1.9858338331262463e-05, "loss": 1.9497, "step": 8520 }, { "epoch": 0.3032619322726869, "grad_norm": 1.6598072052001953, "learning_rate": 1.9857680242793903e-05, "loss": 1.9854, "step": 8530 }, { "epoch": 0.30361745622611325, "grad_norm": 1.6088287830352783, "learning_rate": 1.9857020640247466e-05, "loss": 1.9934, "step": 8540 }, { "epoch": 0.3039729801795396, "grad_norm": 1.561793565750122, "learning_rate": 1.9856359523724462e-05, "loss": 1.9441, "step": 8550 }, { "epoch": 0.30432850413296597, "grad_norm": 1.61014986038208, "learning_rate": 1.9855696893326435e-05, "loss": 1.9602, "step": 8560 }, { "epoch": 0.30468402808639233, "grad_norm": 1.710195541381836, "learning_rate": 1.9855032749155163e-05, "loss": 1.9865, "step": 8570 }, { "epoch": 0.3050395520398187, "grad_norm": 1.6399401426315308, "learning_rate": 1.985436709131265e-05, "loss": 1.968, "step": 8580 }, { "epoch": 0.30539507599324506, "grad_norm": 1.6504136323928833, "learning_rate": 1.985369991990114e-05, "loss": 1.9412, "step": 8590 }, { "epoch": 0.3057505999466714, "grad_norm": 1.6371665000915527, "learning_rate": 1.9853031235023104e-05, "loss": 2.036, "step": 8600 }, { "epoch": 0.3061061239000978, "grad_norm": 1.5760153532028198, "learning_rate": 1.985236103678125e-05, "loss": 1.9256, "step": 8610 }, { "epoch": 0.30646164785352414, "grad_norm": 1.683082103729248, "learning_rate": 1.9851689325278516e-05, "loss": 1.959, "step": 8620 }, { "epoch": 0.3068171718069505, "grad_norm": 1.6927025318145752, "learning_rate": 1.9851016100618072e-05, "loss": 1.9749, "step": 8630 }, { "epoch": 0.30717269576037687, "grad_norm": 1.7396620512008667, "learning_rate": 1.9850341362903322e-05, "loss": 1.9904, "step": 8640 }, { "epoch": 0.30752821971380323, "grad_norm": 1.6537268161773682, "learning_rate": 1.9849665112237898e-05, "loss": 1.9988, "step": 8650 }, { "epoch": 0.3078837436672296, "grad_norm": 1.6655324697494507, "learning_rate": 1.9848987348725674e-05, "loss": 2.0425, "step": 8660 }, { "epoch": 0.30823926762065595, "grad_norm": 1.8228856325149536, "learning_rate": 1.9848308072470746e-05, "loss": 1.9687, "step": 8670 }, { "epoch": 0.3085947915740823, "grad_norm": 1.6344400644302368, "learning_rate": 1.9847627283577446e-05, "loss": 1.9623, "step": 8680 }, { "epoch": 0.3089503155275087, "grad_norm": 1.5347328186035156, "learning_rate": 1.984694498215034e-05, "loss": 1.9687, "step": 8690 }, { "epoch": 0.30930583948093504, "grad_norm": 1.6126179695129395, "learning_rate": 1.984626116829423e-05, "loss": 1.9642, "step": 8700 }, { "epoch": 0.3096613634343614, "grad_norm": 1.7136037349700928, "learning_rate": 1.984557584211414e-05, "loss": 1.9823, "step": 8710 }, { "epoch": 0.31001688738778777, "grad_norm": 1.6425186395645142, "learning_rate": 1.9844889003715327e-05, "loss": 1.9147, "step": 8720 }, { "epoch": 0.3103724113412141, "grad_norm": 1.6839289665222168, "learning_rate": 1.9844200653203293e-05, "loss": 1.9372, "step": 8730 }, { "epoch": 0.3107279352946405, "grad_norm": 1.6468852758407593, "learning_rate": 1.9843510790683767e-05, "loss": 1.9748, "step": 8740 }, { "epoch": 0.31108345924806685, "grad_norm": 1.6727285385131836, "learning_rate": 1.9842819416262698e-05, "loss": 1.9462, "step": 8750 }, { "epoch": 0.3114389832014932, "grad_norm": 1.7718051671981812, "learning_rate": 1.9842126530046285e-05, "loss": 1.8897, "step": 8760 }, { "epoch": 0.3117945071549196, "grad_norm": 1.708163857460022, "learning_rate": 1.9841432132140945e-05, "loss": 1.9546, "step": 8770 }, { "epoch": 0.31215003110834594, "grad_norm": 1.61192786693573, "learning_rate": 1.9840736222653337e-05, "loss": 1.9602, "step": 8780 }, { "epoch": 0.3125055550617723, "grad_norm": 1.5967910289764404, "learning_rate": 1.9840038801690346e-05, "loss": 1.995, "step": 8790 }, { "epoch": 0.31286107901519866, "grad_norm": 1.7376841306686401, "learning_rate": 1.9839339869359094e-05, "loss": 1.929, "step": 8800 }, { "epoch": 0.313216602968625, "grad_norm": 1.6677132844924927, "learning_rate": 1.9838639425766932e-05, "loss": 1.9763, "step": 8810 }, { "epoch": 0.3135721269220514, "grad_norm": 1.5603753328323364, "learning_rate": 1.9837937471021442e-05, "loss": 1.9691, "step": 8820 }, { "epoch": 0.31392765087547775, "grad_norm": 1.7010904550552368, "learning_rate": 1.9837234005230442e-05, "loss": 1.9976, "step": 8830 }, { "epoch": 0.3142831748289041, "grad_norm": 1.6840176582336426, "learning_rate": 1.9836529028501976e-05, "loss": 1.9502, "step": 8840 }, { "epoch": 0.3146386987823305, "grad_norm": 1.6860710382461548, "learning_rate": 1.9835822540944328e-05, "loss": 1.951, "step": 8850 }, { "epoch": 0.31499422273575683, "grad_norm": 1.7188667058944702, "learning_rate": 1.983511454266601e-05, "loss": 1.9385, "step": 8860 }, { "epoch": 0.3153497466891832, "grad_norm": 1.562816858291626, "learning_rate": 1.9834405033775762e-05, "loss": 1.9437, "step": 8870 }, { "epoch": 0.31570527064260956, "grad_norm": 1.6143077611923218, "learning_rate": 1.9833694014382565e-05, "loss": 1.9291, "step": 8880 }, { "epoch": 0.3160607945960359, "grad_norm": 1.7434130907058716, "learning_rate": 1.9832981484595626e-05, "loss": 1.9641, "step": 8890 }, { "epoch": 0.3164163185494623, "grad_norm": 1.656266450881958, "learning_rate": 1.983226744452438e-05, "loss": 1.9055, "step": 8900 }, { "epoch": 0.31677184250288865, "grad_norm": 1.6229407787322998, "learning_rate": 1.9831551894278508e-05, "loss": 2.0194, "step": 8910 }, { "epoch": 0.317127366456315, "grad_norm": 1.568166732788086, "learning_rate": 1.9830834833967907e-05, "loss": 1.944, "step": 8920 }, { "epoch": 0.31748289040974137, "grad_norm": 1.6000345945358276, "learning_rate": 1.9830116263702717e-05, "loss": 1.9643, "step": 8930 }, { "epoch": 0.31783841436316773, "grad_norm": 1.6643948554992676, "learning_rate": 1.9829396183593303e-05, "loss": 1.9434, "step": 8940 }, { "epoch": 0.3181939383165941, "grad_norm": 1.6422382593154907, "learning_rate": 1.9828674593750266e-05, "loss": 1.9447, "step": 8950 }, { "epoch": 0.31854946227002046, "grad_norm": 1.6364524364471436, "learning_rate": 1.982795149428444e-05, "loss": 1.9909, "step": 8960 }, { "epoch": 0.3189049862234468, "grad_norm": 1.5950331687927246, "learning_rate": 1.9827226885306883e-05, "loss": 1.9634, "step": 8970 }, { "epoch": 0.3192605101768732, "grad_norm": 1.6469675302505493, "learning_rate": 1.9826500766928896e-05, "loss": 1.9507, "step": 8980 }, { "epoch": 0.31961603413029954, "grad_norm": 1.7085630893707275, "learning_rate": 1.9825773139262e-05, "loss": 1.9463, "step": 8990 }, { "epoch": 0.3199715580837259, "grad_norm": 1.7127751111984253, "learning_rate": 1.9825044002417962e-05, "loss": 1.9602, "step": 9000 }, { "epoch": 0.32032708203715227, "grad_norm": 1.6652568578720093, "learning_rate": 1.9824313356508766e-05, "loss": 1.9677, "step": 9010 }, { "epoch": 0.32068260599057863, "grad_norm": 1.617638349533081, "learning_rate": 1.9823581201646638e-05, "loss": 1.9952, "step": 9020 }, { "epoch": 0.321038129944005, "grad_norm": 1.6622306108474731, "learning_rate": 1.9822847537944033e-05, "loss": 1.917, "step": 9030 }, { "epoch": 0.32139365389743135, "grad_norm": 3.469229221343994, "learning_rate": 1.9822112365513633e-05, "loss": 1.9613, "step": 9040 }, { "epoch": 0.3217491778508577, "grad_norm": 1.6523138284683228, "learning_rate": 1.9821375684468363e-05, "loss": 1.9621, "step": 9050 }, { "epoch": 0.3221047018042841, "grad_norm": 1.6972028017044067, "learning_rate": 1.9820637494921367e-05, "loss": 1.9565, "step": 9060 }, { "epoch": 0.32246022575771044, "grad_norm": 1.6572128534317017, "learning_rate": 1.9819897796986027e-05, "loss": 1.9616, "step": 9070 }, { "epoch": 0.3228157497111368, "grad_norm": 1.641320824623108, "learning_rate": 1.9819156590775958e-05, "loss": 1.9568, "step": 9080 }, { "epoch": 0.32317127366456316, "grad_norm": 1.7216322422027588, "learning_rate": 1.9818413876405e-05, "loss": 1.9556, "step": 9090 }, { "epoch": 0.3235267976179895, "grad_norm": 1.5993764400482178, "learning_rate": 1.9817669653987234e-05, "loss": 1.9693, "step": 9100 }, { "epoch": 0.3238823215714159, "grad_norm": 1.6592811346054077, "learning_rate": 1.9816923923636967e-05, "loss": 1.9442, "step": 9110 }, { "epoch": 0.32423784552484225, "grad_norm": 1.6523818969726562, "learning_rate": 1.981617668546874e-05, "loss": 1.9406, "step": 9120 }, { "epoch": 0.3245933694782686, "grad_norm": 1.6113784313201904, "learning_rate": 1.9815427939597318e-05, "loss": 1.9877, "step": 9130 }, { "epoch": 0.324948893431695, "grad_norm": 1.7487437725067139, "learning_rate": 1.981467768613771e-05, "loss": 1.9455, "step": 9140 }, { "epoch": 0.32530441738512134, "grad_norm": 1.6923714876174927, "learning_rate": 1.9813925925205146e-05, "loss": 1.9662, "step": 9150 }, { "epoch": 0.3256599413385477, "grad_norm": 1.700963020324707, "learning_rate": 1.9813172656915096e-05, "loss": 1.911, "step": 9160 }, { "epoch": 0.32601546529197406, "grad_norm": 1.6489852666854858, "learning_rate": 1.9812417881383254e-05, "loss": 1.9495, "step": 9170 }, { "epoch": 0.3263709892454004, "grad_norm": 1.7082645893096924, "learning_rate": 1.981166159872555e-05, "loss": 1.9882, "step": 9180 }, { "epoch": 0.3267265131988268, "grad_norm": 1.6140416860580444, "learning_rate": 1.981090380905814e-05, "loss": 1.9462, "step": 9190 }, { "epoch": 0.32708203715225315, "grad_norm": 1.6398372650146484, "learning_rate": 1.9810144512497426e-05, "loss": 1.9147, "step": 9200 }, { "epoch": 0.3274375611056795, "grad_norm": 1.6099936962127686, "learning_rate": 1.9809383709160023e-05, "loss": 1.978, "step": 9210 }, { "epoch": 0.32779308505910587, "grad_norm": 1.6432914733886719, "learning_rate": 1.980862139916279e-05, "loss": 1.9342, "step": 9220 }, { "epoch": 0.32814860901253223, "grad_norm": 1.7044553756713867, "learning_rate": 1.9807857582622803e-05, "loss": 1.9318, "step": 9230 }, { "epoch": 0.3285041329659586, "grad_norm": 1.5483976602554321, "learning_rate": 1.9807092259657395e-05, "loss": 1.9708, "step": 9240 }, { "epoch": 0.32885965691938496, "grad_norm": 1.7499970197677612, "learning_rate": 1.9806325430384104e-05, "loss": 1.9595, "step": 9250 }, { "epoch": 0.3292151808728113, "grad_norm": 1.6411001682281494, "learning_rate": 1.9805557094920712e-05, "loss": 1.926, "step": 9260 }, { "epoch": 0.3295707048262377, "grad_norm": 1.6664100885391235, "learning_rate": 1.9804787253385232e-05, "loss": 1.9795, "step": 9270 }, { "epoch": 0.32992622877966404, "grad_norm": 1.6018718481063843, "learning_rate": 1.9804015905895906e-05, "loss": 1.9423, "step": 9280 }, { "epoch": 0.3302817527330904, "grad_norm": 1.580801010131836, "learning_rate": 1.980324305257121e-05, "loss": 1.9469, "step": 9290 }, { "epoch": 0.33063727668651677, "grad_norm": 1.6953452825546265, "learning_rate": 1.9802468693529847e-05, "loss": 1.9624, "step": 9300 }, { "epoch": 0.33099280063994313, "grad_norm": 1.7434360980987549, "learning_rate": 1.9801692828890754e-05, "loss": 1.9296, "step": 9310 }, { "epoch": 0.3313483245933695, "grad_norm": 1.6928406953811646, "learning_rate": 1.98009154587731e-05, "loss": 1.9109, "step": 9320 }, { "epoch": 0.33170384854679585, "grad_norm": 1.6208775043487549, "learning_rate": 1.9800136583296282e-05, "loss": 1.9515, "step": 9330 }, { "epoch": 0.3320593725002222, "grad_norm": 1.5621469020843506, "learning_rate": 1.9799356202579935e-05, "loss": 1.974, "step": 9340 }, { "epoch": 0.3324148964536486, "grad_norm": 1.7981388568878174, "learning_rate": 1.9798574316743918e-05, "loss": 1.9863, "step": 9350 }, { "epoch": 0.33277042040707494, "grad_norm": 1.6251649856567383, "learning_rate": 1.979779092590832e-05, "loss": 1.9512, "step": 9360 }, { "epoch": 0.3331259443605013, "grad_norm": 1.5817049741744995, "learning_rate": 1.9797006030193472e-05, "loss": 1.9169, "step": 9370 }, { "epoch": 0.33348146831392766, "grad_norm": 1.6933389902114868, "learning_rate": 1.9796219629719923e-05, "loss": 1.9411, "step": 9380 }, { "epoch": 0.333836992267354, "grad_norm": 1.6590707302093506, "learning_rate": 1.9795431724608465e-05, "loss": 1.9705, "step": 9390 }, { "epoch": 0.3341925162207804, "grad_norm": 1.6322592496871948, "learning_rate": 1.9794642314980108e-05, "loss": 1.9743, "step": 9400 }, { "epoch": 0.33454804017420675, "grad_norm": 1.6067534685134888, "learning_rate": 1.9793851400956106e-05, "loss": 1.9444, "step": 9410 }, { "epoch": 0.3349035641276331, "grad_norm": 1.6392176151275635, "learning_rate": 1.979305898265794e-05, "loss": 1.9448, "step": 9420 }, { "epoch": 0.3352590880810595, "grad_norm": 1.462730884552002, "learning_rate": 1.9792265060207313e-05, "loss": 1.9329, "step": 9430 }, { "epoch": 0.33561461203448584, "grad_norm": 1.507010817527771, "learning_rate": 1.979146963372617e-05, "loss": 1.9374, "step": 9440 }, { "epoch": 0.3359701359879122, "grad_norm": 1.7325148582458496, "learning_rate": 1.9790672703336688e-05, "loss": 1.9483, "step": 9450 }, { "epoch": 0.33632565994133856, "grad_norm": 1.5408869981765747, "learning_rate": 1.9789874269161264e-05, "loss": 1.9503, "step": 9460 }, { "epoch": 0.3366811838947649, "grad_norm": 1.561712622642517, "learning_rate": 1.9789074331322537e-05, "loss": 1.9847, "step": 9470 }, { "epoch": 0.3370367078481913, "grad_norm": 1.731758713722229, "learning_rate": 1.978827288994337e-05, "loss": 1.9415, "step": 9480 }, { "epoch": 0.33739223180161765, "grad_norm": 1.676243543624878, "learning_rate": 1.978746994514686e-05, "loss": 1.9787, "step": 9490 }, { "epoch": 0.337747755755044, "grad_norm": 1.5879807472229004, "learning_rate": 1.978666549705633e-05, "loss": 1.9606, "step": 9500 }, { "epoch": 0.3381032797084704, "grad_norm": 1.629159927368164, "learning_rate": 1.978585954579535e-05, "loss": 1.9389, "step": 9510 }, { "epoch": 0.33845880366189673, "grad_norm": 1.5797783136367798, "learning_rate": 1.9785052091487697e-05, "loss": 1.9321, "step": 9520 }, { "epoch": 0.3388143276153231, "grad_norm": 1.6011348962783813, "learning_rate": 1.9784243134257396e-05, "loss": 1.9485, "step": 9530 }, { "epoch": 0.33916985156874946, "grad_norm": 1.6593711376190186, "learning_rate": 1.9783432674228697e-05, "loss": 1.9338, "step": 9540 }, { "epoch": 0.3395253755221758, "grad_norm": 1.489780306816101, "learning_rate": 1.9782620711526084e-05, "loss": 1.9475, "step": 9550 }, { "epoch": 0.3398808994756022, "grad_norm": 1.6868865489959717, "learning_rate": 1.9781807246274264e-05, "loss": 1.8677, "step": 9560 }, { "epoch": 0.34023642342902854, "grad_norm": 1.5744456052780151, "learning_rate": 1.9780992278598187e-05, "loss": 1.9541, "step": 9570 }, { "epoch": 0.3405919473824549, "grad_norm": 1.7402019500732422, "learning_rate": 1.978017580862302e-05, "loss": 1.916, "step": 9580 }, { "epoch": 0.34094747133588127, "grad_norm": 1.706446647644043, "learning_rate": 1.9779357836474175e-05, "loss": 1.9769, "step": 9590 }, { "epoch": 0.34130299528930763, "grad_norm": 1.6508241891860962, "learning_rate": 1.977853836227728e-05, "loss": 1.936, "step": 9600 }, { "epoch": 0.341658519242734, "grad_norm": 1.656411051750183, "learning_rate": 1.9777717386158203e-05, "loss": 1.932, "step": 9610 }, { "epoch": 0.34201404319616036, "grad_norm": 1.6659698486328125, "learning_rate": 1.977689490824305e-05, "loss": 1.9417, "step": 9620 }, { "epoch": 0.3423695671495867, "grad_norm": 1.6094539165496826, "learning_rate": 1.9776070928658134e-05, "loss": 1.9385, "step": 9630 }, { "epoch": 0.3427250911030131, "grad_norm": 1.5917366743087769, "learning_rate": 1.9775245447530017e-05, "loss": 1.9526, "step": 9640 }, { "epoch": 0.34308061505643944, "grad_norm": 1.7756634950637817, "learning_rate": 1.97744184649855e-05, "loss": 1.9502, "step": 9650 }, { "epoch": 0.3434361390098658, "grad_norm": 1.629005789756775, "learning_rate": 1.9773589981151585e-05, "loss": 1.9, "step": 9660 }, { "epoch": 0.34379166296329217, "grad_norm": 1.6309583187103271, "learning_rate": 1.9772759996155533e-05, "loss": 1.9751, "step": 9670 }, { "epoch": 0.34414718691671853, "grad_norm": 1.6217846870422363, "learning_rate": 1.977192851012482e-05, "loss": 1.9265, "step": 9680 }, { "epoch": 0.3445027108701449, "grad_norm": 1.7747803926467896, "learning_rate": 1.977109552318716e-05, "loss": 1.9351, "step": 9690 }, { "epoch": 0.34485823482357125, "grad_norm": 1.629852294921875, "learning_rate": 1.977026103547049e-05, "loss": 1.9203, "step": 9700 }, { "epoch": 0.3452137587769976, "grad_norm": 1.723342776298523, "learning_rate": 1.9769425047102986e-05, "loss": 1.9823, "step": 9710 }, { "epoch": 0.345569282730424, "grad_norm": 1.6114434003829956, "learning_rate": 1.9768587558213052e-05, "loss": 1.9757, "step": 9720 }, { "epoch": 0.34592480668385034, "grad_norm": 1.5842297077178955, "learning_rate": 1.976774856892932e-05, "loss": 1.9348, "step": 9730 }, { "epoch": 0.3462803306372767, "grad_norm": 1.6690183877944946, "learning_rate": 1.9766908079380645e-05, "loss": 1.9714, "step": 9740 }, { "epoch": 0.34663585459070306, "grad_norm": 1.626365303993225, "learning_rate": 1.976606608969613e-05, "loss": 1.9595, "step": 9750 }, { "epoch": 0.3469913785441294, "grad_norm": 1.6194849014282227, "learning_rate": 1.9765222600005104e-05, "loss": 1.918, "step": 9760 }, { "epoch": 0.3473469024975558, "grad_norm": 1.6636567115783691, "learning_rate": 1.9764377610437106e-05, "loss": 1.9131, "step": 9770 }, { "epoch": 0.34770242645098215, "grad_norm": 1.5718607902526855, "learning_rate": 1.9763531121121937e-05, "loss": 1.9218, "step": 9780 }, { "epoch": 0.3480579504044085, "grad_norm": 1.650025486946106, "learning_rate": 1.97626831321896e-05, "loss": 1.9203, "step": 9790 }, { "epoch": 0.3484134743578349, "grad_norm": 1.69324791431427, "learning_rate": 1.976183364377035e-05, "loss": 1.9502, "step": 9800 }, { "epoch": 0.34876899831126124, "grad_norm": 1.6470222473144531, "learning_rate": 1.976098265599466e-05, "loss": 1.9373, "step": 9810 }, { "epoch": 0.3491245222646876, "grad_norm": 1.5626472234725952, "learning_rate": 1.9760130168993233e-05, "loss": 1.9187, "step": 9820 }, { "epoch": 0.34948004621811396, "grad_norm": 1.632422685623169, "learning_rate": 1.9759276182897012e-05, "loss": 1.945, "step": 9830 }, { "epoch": 0.3498355701715403, "grad_norm": 1.637856125831604, "learning_rate": 1.975842069783716e-05, "loss": 1.9157, "step": 9840 }, { "epoch": 0.3501910941249667, "grad_norm": 1.5937124490737915, "learning_rate": 1.9757563713945075e-05, "loss": 1.9355, "step": 9850 }, { "epoch": 0.35054661807839305, "grad_norm": 1.7395747900009155, "learning_rate": 1.975670523135238e-05, "loss": 1.9387, "step": 9860 }, { "epoch": 0.3509021420318194, "grad_norm": 1.5780001878738403, "learning_rate": 1.975584525019094e-05, "loss": 1.9666, "step": 9870 }, { "epoch": 0.35125766598524577, "grad_norm": 1.6015253067016602, "learning_rate": 1.975498377059284e-05, "loss": 1.9587, "step": 9880 }, { "epoch": 0.35161318993867213, "grad_norm": 1.6278748512268066, "learning_rate": 1.9754120792690392e-05, "loss": 1.963, "step": 9890 }, { "epoch": 0.3519687138920985, "grad_norm": 1.7236615419387817, "learning_rate": 1.975325631661615e-05, "loss": 1.9142, "step": 9900 }, { "epoch": 0.35232423784552486, "grad_norm": 1.7599778175354004, "learning_rate": 1.9752390342502895e-05, "loss": 1.9628, "step": 9910 }, { "epoch": 0.3526797617989512, "grad_norm": 1.6511987447738647, "learning_rate": 1.975152287048363e-05, "loss": 1.9447, "step": 9920 }, { "epoch": 0.3530352857523776, "grad_norm": 1.7247734069824219, "learning_rate": 1.9750653900691595e-05, "loss": 1.9288, "step": 9930 }, { "epoch": 0.35339080970580394, "grad_norm": 1.667855143547058, "learning_rate": 1.9749783433260258e-05, "loss": 1.9486, "step": 9940 }, { "epoch": 0.3537463336592303, "grad_norm": 1.671859622001648, "learning_rate": 1.9748911468323314e-05, "loss": 1.9339, "step": 9950 }, { "epoch": 0.35410185761265667, "grad_norm": 1.6676883697509766, "learning_rate": 1.9748038006014698e-05, "loss": 1.9307, "step": 9960 }, { "epoch": 0.35445738156608303, "grad_norm": 1.5199689865112305, "learning_rate": 1.974716304646856e-05, "loss": 1.9542, "step": 9970 }, { "epoch": 0.3548129055195094, "grad_norm": 1.6532329320907593, "learning_rate": 1.97462865898193e-05, "loss": 1.9508, "step": 9980 }, { "epoch": 0.35516842947293575, "grad_norm": 1.6774016618728638, "learning_rate": 1.9745408636201525e-05, "loss": 1.9041, "step": 9990 }, { "epoch": 0.3555239534263621, "grad_norm": 1.5950804948806763, "learning_rate": 1.9744529185750093e-05, "loss": 1.9588, "step": 10000 }, { "epoch": 0.3558794773797885, "grad_norm": 1.639394998550415, "learning_rate": 1.9743648238600074e-05, "loss": 1.9836, "step": 10010 }, { "epoch": 0.35623500133321484, "grad_norm": 1.6646896600723267, "learning_rate": 1.9742765794886774e-05, "loss": 1.9421, "step": 10020 }, { "epoch": 0.3565905252866412, "grad_norm": 1.6006731986999512, "learning_rate": 1.974188185474574e-05, "loss": 1.9689, "step": 10030 }, { "epoch": 0.35694604924006756, "grad_norm": 1.85233736038208, "learning_rate": 1.9740996418312735e-05, "loss": 1.9364, "step": 10040 }, { "epoch": 0.3573015731934939, "grad_norm": 1.7664031982421875, "learning_rate": 1.9740109485723758e-05, "loss": 1.9545, "step": 10050 }, { "epoch": 0.3576570971469203, "grad_norm": 1.6243541240692139, "learning_rate": 1.973922105711503e-05, "loss": 1.9743, "step": 10060 }, { "epoch": 0.35801262110034665, "grad_norm": 1.7577147483825684, "learning_rate": 1.973833113262302e-05, "loss": 1.9407, "step": 10070 }, { "epoch": 0.358368145053773, "grad_norm": 1.598021388053894, "learning_rate": 1.9737439712384404e-05, "loss": 1.9488, "step": 10080 }, { "epoch": 0.3587236690071994, "grad_norm": 1.663016676902771, "learning_rate": 1.97365467965361e-05, "loss": 1.9123, "step": 10090 }, { "epoch": 0.35907919296062574, "grad_norm": 1.629992127418518, "learning_rate": 1.973565238521526e-05, "loss": 1.9211, "step": 10100 }, { "epoch": 0.3594347169140521, "grad_norm": 1.6966192722320557, "learning_rate": 1.9734756478559255e-05, "loss": 1.9352, "step": 10110 }, { "epoch": 0.35979024086747846, "grad_norm": 1.6816771030426025, "learning_rate": 1.9733859076705696e-05, "loss": 1.9132, "step": 10120 }, { "epoch": 0.3601457648209048, "grad_norm": 1.5039012432098389, "learning_rate": 1.973296017979241e-05, "loss": 1.9761, "step": 10130 }, { "epoch": 0.3605012887743312, "grad_norm": 1.685896635055542, "learning_rate": 1.9732059787957466e-05, "loss": 1.9875, "step": 10140 }, { "epoch": 0.36085681272775755, "grad_norm": 1.6027092933654785, "learning_rate": 1.973115790133916e-05, "loss": 1.9406, "step": 10150 }, { "epoch": 0.3612123366811839, "grad_norm": 1.6120712757110596, "learning_rate": 1.973025452007602e-05, "loss": 1.9095, "step": 10160 }, { "epoch": 0.36156786063461027, "grad_norm": 1.705488920211792, "learning_rate": 1.972934964430679e-05, "loss": 1.942, "step": 10170 }, { "epoch": 0.36192338458803663, "grad_norm": 1.63666832447052, "learning_rate": 1.972844327417046e-05, "loss": 1.9205, "step": 10180 }, { "epoch": 0.362278908541463, "grad_norm": 1.6071900129318237, "learning_rate": 1.972753540980624e-05, "loss": 1.9248, "step": 10190 }, { "epoch": 0.36263443249488936, "grad_norm": 1.6618894338607788, "learning_rate": 1.972662605135357e-05, "loss": 1.9252, "step": 10200 }, { "epoch": 0.3629899564483157, "grad_norm": 1.6412007808685303, "learning_rate": 1.972571519895213e-05, "loss": 1.8986, "step": 10210 }, { "epoch": 0.3633454804017421, "grad_norm": 1.6706079244613647, "learning_rate": 1.9724802852741817e-05, "loss": 1.9449, "step": 10220 }, { "epoch": 0.36370100435516844, "grad_norm": 1.550260305404663, "learning_rate": 1.9723889012862757e-05, "loss": 1.9436, "step": 10230 }, { "epoch": 0.3640565283085948, "grad_norm": 1.6799967288970947, "learning_rate": 1.9722973679455316e-05, "loss": 1.9889, "step": 10240 }, { "epoch": 0.36441205226202117, "grad_norm": 1.598900556564331, "learning_rate": 1.972205685266008e-05, "loss": 1.9648, "step": 10250 }, { "epoch": 0.36476757621544753, "grad_norm": 1.63681960105896, "learning_rate": 1.972113853261787e-05, "loss": 1.8904, "step": 10260 }, { "epoch": 0.3651231001688739, "grad_norm": 1.6806844472885132, "learning_rate": 1.9720218719469732e-05, "loss": 1.9616, "step": 10270 }, { "epoch": 0.36547862412230026, "grad_norm": 1.6543742418289185, "learning_rate": 1.9719297413356945e-05, "loss": 1.8776, "step": 10280 }, { "epoch": 0.3658341480757266, "grad_norm": 1.6117966175079346, "learning_rate": 1.971837461442102e-05, "loss": 1.9284, "step": 10290 }, { "epoch": 0.366189672029153, "grad_norm": 1.5849404335021973, "learning_rate": 1.9717450322803682e-05, "loss": 1.9351, "step": 10300 }, { "epoch": 0.36654519598257934, "grad_norm": 1.7554417848587036, "learning_rate": 1.9716524538646912e-05, "loss": 1.9462, "step": 10310 }, { "epoch": 0.3669007199360057, "grad_norm": 1.5392756462097168, "learning_rate": 1.971559726209289e-05, "loss": 1.9657, "step": 10320 }, { "epoch": 0.36725624388943207, "grad_norm": 1.6382545232772827, "learning_rate": 1.9714668493284045e-05, "loss": 1.9089, "step": 10330 }, { "epoch": 0.36761176784285843, "grad_norm": 1.6288036108016968, "learning_rate": 1.9713738232363033e-05, "loss": 1.9634, "step": 10340 }, { "epoch": 0.3679672917962848, "grad_norm": 1.622418761253357, "learning_rate": 1.9712806479472736e-05, "loss": 1.926, "step": 10350 }, { "epoch": 0.36832281574971115, "grad_norm": 1.5704150199890137, "learning_rate": 1.9711873234756262e-05, "loss": 1.9506, "step": 10360 }, { "epoch": 0.3686783397031375, "grad_norm": 1.7928626537322998, "learning_rate": 1.971093849835695e-05, "loss": 1.9115, "step": 10370 }, { "epoch": 0.3690338636565639, "grad_norm": 1.653241515159607, "learning_rate": 1.9710002270418377e-05, "loss": 1.9287, "step": 10380 }, { "epoch": 0.36938938760999024, "grad_norm": 1.6538161039352417, "learning_rate": 1.970906455108433e-05, "loss": 1.9478, "step": 10390 }, { "epoch": 0.3697449115634166, "grad_norm": 1.6418386697769165, "learning_rate": 1.9708125340498853e-05, "loss": 1.9312, "step": 10400 }, { "epoch": 0.37010043551684296, "grad_norm": 1.542913556098938, "learning_rate": 1.970718463880619e-05, "loss": 1.9444, "step": 10410 }, { "epoch": 0.3704559594702693, "grad_norm": 1.6365742683410645, "learning_rate": 1.9706242446150833e-05, "loss": 1.9704, "step": 10420 }, { "epoch": 0.3708114834236957, "grad_norm": 1.6890367269515991, "learning_rate": 1.9705298762677492e-05, "loss": 1.9118, "step": 10430 }, { "epoch": 0.37116700737712205, "grad_norm": 1.7141989469528198, "learning_rate": 1.9704353588531115e-05, "loss": 1.9375, "step": 10440 }, { "epoch": 0.3715225313305484, "grad_norm": 1.6225630044937134, "learning_rate": 1.9703406923856875e-05, "loss": 1.969, "step": 10450 }, { "epoch": 0.3718780552839748, "grad_norm": 1.5948486328125, "learning_rate": 1.9702458768800173e-05, "loss": 1.9047, "step": 10460 }, { "epoch": 0.37223357923740114, "grad_norm": 1.6593469381332397, "learning_rate": 1.9701509123506635e-05, "loss": 1.9562, "step": 10470 }, { "epoch": 0.3725891031908275, "grad_norm": 1.6699244976043701, "learning_rate": 1.970055798812213e-05, "loss": 1.9842, "step": 10480 }, { "epoch": 0.37294462714425386, "grad_norm": 1.6418598890304565, "learning_rate": 1.9699605362792736e-05, "loss": 1.9448, "step": 10490 }, { "epoch": 0.3733001510976802, "grad_norm": 1.7763127088546753, "learning_rate": 1.9698651247664778e-05, "loss": 1.9569, "step": 10500 }, { "epoch": 0.3736556750511066, "grad_norm": 1.5967490673065186, "learning_rate": 1.96976956428848e-05, "loss": 1.9609, "step": 10510 }, { "epoch": 0.37401119900453295, "grad_norm": 1.666662573814392, "learning_rate": 1.9696738548599575e-05, "loss": 1.8398, "step": 10520 }, { "epoch": 0.3743667229579593, "grad_norm": 1.682659387588501, "learning_rate": 1.9695779964956106e-05, "loss": 1.9632, "step": 10530 }, { "epoch": 0.37472224691138567, "grad_norm": 1.6394110918045044, "learning_rate": 1.9694819892101627e-05, "loss": 1.9686, "step": 10540 }, { "epoch": 0.37507777086481203, "grad_norm": 1.7876205444335938, "learning_rate": 1.9693858330183604e-05, "loss": 1.9342, "step": 10550 }, { "epoch": 0.3754332948182384, "grad_norm": 1.6362394094467163, "learning_rate": 1.9692895279349723e-05, "loss": 1.9146, "step": 10560 }, { "epoch": 0.37578881877166476, "grad_norm": 1.6904796361923218, "learning_rate": 1.96919307397479e-05, "loss": 1.9252, "step": 10570 }, { "epoch": 0.3761443427250911, "grad_norm": 1.64126455783844, "learning_rate": 1.969096471152628e-05, "loss": 1.9646, "step": 10580 }, { "epoch": 0.3764998666785175, "grad_norm": 1.6415106058120728, "learning_rate": 1.968999719483325e-05, "loss": 1.9016, "step": 10590 }, { "epoch": 0.37685539063194384, "grad_norm": 1.6585341691970825, "learning_rate": 1.96890281898174e-05, "loss": 1.9525, "step": 10600 }, { "epoch": 0.3772109145853702, "grad_norm": 1.6419931650161743, "learning_rate": 1.9688057696627575e-05, "loss": 1.9364, "step": 10610 }, { "epoch": 0.37756643853879657, "grad_norm": 1.625403642654419, "learning_rate": 1.9687085715412832e-05, "loss": 1.9232, "step": 10620 }, { "epoch": 0.37792196249222293, "grad_norm": 1.5984399318695068, "learning_rate": 1.9686112246322464e-05, "loss": 1.9623, "step": 10630 }, { "epoch": 0.3782774864456493, "grad_norm": 1.6441439390182495, "learning_rate": 1.9685137289505985e-05, "loss": 1.9348, "step": 10640 }, { "epoch": 0.37863301039907565, "grad_norm": 1.6237053871154785, "learning_rate": 1.9684160845113145e-05, "loss": 1.9182, "step": 10650 }, { "epoch": 0.378988534352502, "grad_norm": 1.6530194282531738, "learning_rate": 1.9683182913293918e-05, "loss": 1.9084, "step": 10660 }, { "epoch": 0.3793440583059284, "grad_norm": 1.6410064697265625, "learning_rate": 1.968220349419851e-05, "loss": 1.9048, "step": 10670 }, { "epoch": 0.37969958225935474, "grad_norm": 1.5841423273086548, "learning_rate": 1.9681222587977357e-05, "loss": 1.9042, "step": 10680 }, { "epoch": 0.3800551062127811, "grad_norm": 1.5987772941589355, "learning_rate": 1.9680240194781113e-05, "loss": 1.9088, "step": 10690 }, { "epoch": 0.38041063016620746, "grad_norm": 1.746238350868225, "learning_rate": 1.967925631476067e-05, "loss": 1.948, "step": 10700 }, { "epoch": 0.3807661541196338, "grad_norm": 1.6123676300048828, "learning_rate": 1.9678270948067148e-05, "loss": 1.9685, "step": 10710 }, { "epoch": 0.3811216780730602, "grad_norm": 1.5973690748214722, "learning_rate": 1.9677284094851893e-05, "loss": 1.9073, "step": 10720 }, { "epoch": 0.38147720202648655, "grad_norm": 1.5998799800872803, "learning_rate": 1.9676295755266475e-05, "loss": 1.9561, "step": 10730 }, { "epoch": 0.3818327259799129, "grad_norm": 1.565384030342102, "learning_rate": 1.96753059294627e-05, "loss": 1.9553, "step": 10740 }, { "epoch": 0.3821882499333393, "grad_norm": 1.621324896812439, "learning_rate": 1.9674314617592597e-05, "loss": 1.9116, "step": 10750 }, { "epoch": 0.38254377388676564, "grad_norm": 1.5708541870117188, "learning_rate": 1.9673321819808432e-05, "loss": 1.9764, "step": 10760 }, { "epoch": 0.382899297840192, "grad_norm": 1.6714891195297241, "learning_rate": 1.9672327536262687e-05, "loss": 1.9354, "step": 10770 }, { "epoch": 0.38325482179361836, "grad_norm": 1.6341030597686768, "learning_rate": 1.9671331767108074e-05, "loss": 1.9139, "step": 10780 }, { "epoch": 0.3836103457470447, "grad_norm": 1.5689213275909424, "learning_rate": 1.9670334512497546e-05, "loss": 1.9208, "step": 10790 }, { "epoch": 0.3839658697004711, "grad_norm": 1.6299171447753906, "learning_rate": 1.9669335772584268e-05, "loss": 1.875, "step": 10800 }, { "epoch": 0.38432139365389745, "grad_norm": 1.6344177722930908, "learning_rate": 1.9668335547521645e-05, "loss": 1.9297, "step": 10810 }, { "epoch": 0.3846769176073238, "grad_norm": 1.5587433576583862, "learning_rate": 1.9667333837463302e-05, "loss": 1.9084, "step": 10820 }, { "epoch": 0.38503244156075017, "grad_norm": 1.574712872505188, "learning_rate": 1.966633064256309e-05, "loss": 1.9071, "step": 10830 }, { "epoch": 0.38538796551417653, "grad_norm": 1.5423403978347778, "learning_rate": 1.966532596297511e-05, "loss": 1.9419, "step": 10840 }, { "epoch": 0.3857434894676029, "grad_norm": 1.6940547227859497, "learning_rate": 1.966431979885366e-05, "loss": 1.9032, "step": 10850 }, { "epoch": 0.38609901342102926, "grad_norm": 1.6661192178726196, "learning_rate": 1.9663312150353283e-05, "loss": 1.971, "step": 10860 }, { "epoch": 0.3864545373744556, "grad_norm": 1.6065622568130493, "learning_rate": 1.966230301762875e-05, "loss": 1.9547, "step": 10870 }, { "epoch": 0.386810061327882, "grad_norm": 1.5964338779449463, "learning_rate": 1.966129240083505e-05, "loss": 1.9231, "step": 10880 }, { "epoch": 0.38716558528130834, "grad_norm": 1.7134864330291748, "learning_rate": 1.9660280300127423e-05, "loss": 1.9143, "step": 10890 }, { "epoch": 0.3875211092347347, "grad_norm": 1.6787341833114624, "learning_rate": 1.965926671566131e-05, "loss": 1.9589, "step": 10900 }, { "epoch": 0.38787663318816107, "grad_norm": 1.7104814052581787, "learning_rate": 1.9658251647592396e-05, "loss": 1.9147, "step": 10910 }, { "epoch": 0.38823215714158743, "grad_norm": 1.6374329328536987, "learning_rate": 1.965723509607658e-05, "loss": 1.9499, "step": 10920 }, { "epoch": 0.3885876810950138, "grad_norm": 1.6665334701538086, "learning_rate": 1.965621706127001e-05, "loss": 1.8982, "step": 10930 }, { "epoch": 0.38894320504844015, "grad_norm": 1.658685564994812, "learning_rate": 1.9655197543329043e-05, "loss": 1.9017, "step": 10940 }, { "epoch": 0.3892987290018665, "grad_norm": 1.6136287450790405, "learning_rate": 1.9654176542410274e-05, "loss": 1.9129, "step": 10950 }, { "epoch": 0.3896542529552929, "grad_norm": 1.6693179607391357, "learning_rate": 1.9653154058670517e-05, "loss": 1.9192, "step": 10960 }, { "epoch": 0.39000977690871924, "grad_norm": 1.6613389253616333, "learning_rate": 1.965213009226682e-05, "loss": 1.911, "step": 10970 }, { "epoch": 0.3903653008621456, "grad_norm": 1.5833563804626465, "learning_rate": 1.9651104643356465e-05, "loss": 1.9032, "step": 10980 }, { "epoch": 0.39072082481557197, "grad_norm": 1.7355057001113892, "learning_rate": 1.965007771209695e-05, "loss": 1.9614, "step": 10990 }, { "epoch": 0.3910763487689983, "grad_norm": 1.660593032836914, "learning_rate": 1.9649049298646004e-05, "loss": 1.9462, "step": 11000 }, { "epoch": 0.3914318727224247, "grad_norm": 1.7234996557235718, "learning_rate": 1.964801940316158e-05, "loss": 1.964, "step": 11010 }, { "epoch": 0.39178739667585105, "grad_norm": 1.5677638053894043, "learning_rate": 1.9646988025801878e-05, "loss": 1.9563, "step": 11020 }, { "epoch": 0.3921429206292774, "grad_norm": 1.5681052207946777, "learning_rate": 1.9645955166725298e-05, "loss": 1.9512, "step": 11030 }, { "epoch": 0.3924984445827038, "grad_norm": 1.7496832609176636, "learning_rate": 1.9644920826090485e-05, "loss": 1.9241, "step": 11040 }, { "epoch": 0.39285396853613014, "grad_norm": 1.7510963678359985, "learning_rate": 1.9643885004056308e-05, "loss": 1.9386, "step": 11050 }, { "epoch": 0.3932094924895565, "grad_norm": 1.56932532787323, "learning_rate": 1.964284770078186e-05, "loss": 1.9036, "step": 11060 }, { "epoch": 0.39356501644298286, "grad_norm": 1.6665147542953491, "learning_rate": 1.9641808916426468e-05, "loss": 1.9061, "step": 11070 }, { "epoch": 0.3939205403964092, "grad_norm": 1.6765482425689697, "learning_rate": 1.9640768651149683e-05, "loss": 1.9217, "step": 11080 }, { "epoch": 0.3942760643498356, "grad_norm": 1.7809001207351685, "learning_rate": 1.9639726905111275e-05, "loss": 1.9605, "step": 11090 }, { "epoch": 0.39463158830326195, "grad_norm": 1.6571179628372192, "learning_rate": 1.9638683678471262e-05, "loss": 1.9131, "step": 11100 }, { "epoch": 0.3949871122566883, "grad_norm": 1.6656454801559448, "learning_rate": 1.963763897138987e-05, "loss": 1.9423, "step": 11110 }, { "epoch": 0.3953426362101147, "grad_norm": 1.6632436513900757, "learning_rate": 1.963659278402756e-05, "loss": 1.8889, "step": 11120 }, { "epoch": 0.39569816016354104, "grad_norm": 1.6332790851593018, "learning_rate": 1.963554511654502e-05, "loss": 1.9505, "step": 11130 }, { "epoch": 0.3960536841169674, "grad_norm": 1.6795090436935425, "learning_rate": 1.963449596910316e-05, "loss": 1.9273, "step": 11140 }, { "epoch": 0.39640920807039376, "grad_norm": 1.7208044528961182, "learning_rate": 1.963344534186314e-05, "loss": 1.9377, "step": 11150 }, { "epoch": 0.3967647320238201, "grad_norm": 1.6807401180267334, "learning_rate": 1.963239323498631e-05, "loss": 1.9144, "step": 11160 }, { "epoch": 0.3971202559772465, "grad_norm": 1.535725474357605, "learning_rate": 1.9631339648634273e-05, "loss": 1.905, "step": 11170 }, { "epoch": 0.39747577993067285, "grad_norm": 1.7075817584991455, "learning_rate": 1.9630284582968858e-05, "loss": 1.9189, "step": 11180 }, { "epoch": 0.3978313038840992, "grad_norm": 1.557862639427185, "learning_rate": 1.9629228038152114e-05, "loss": 1.9081, "step": 11190 }, { "epoch": 0.39818682783752557, "grad_norm": 1.5928765535354614, "learning_rate": 1.9628170014346316e-05, "loss": 1.9666, "step": 11200 }, { "epoch": 0.39854235179095193, "grad_norm": 1.5738533735275269, "learning_rate": 1.9627110511713977e-05, "loss": 1.917, "step": 11210 }, { "epoch": 0.3988978757443783, "grad_norm": 1.663844108581543, "learning_rate": 1.962604953041782e-05, "loss": 1.9212, "step": 11220 }, { "epoch": 0.39925339969780466, "grad_norm": 1.6645182371139526, "learning_rate": 1.9624987070620817e-05, "loss": 1.9029, "step": 11230 }, { "epoch": 0.399608923651231, "grad_norm": 1.6867510080337524, "learning_rate": 1.9623923132486148e-05, "loss": 1.95, "step": 11240 }, { "epoch": 0.3999644476046574, "grad_norm": 1.7048712968826294, "learning_rate": 1.9622857716177223e-05, "loss": 1.8893, "step": 11250 }, { "epoch": 0.40031997155808374, "grad_norm": 1.668708324432373, "learning_rate": 1.9621790821857693e-05, "loss": 1.9566, "step": 11260 }, { "epoch": 0.4006754955115101, "grad_norm": 1.6398382186889648, "learning_rate": 1.962072244969142e-05, "loss": 1.894, "step": 11270 }, { "epoch": 0.40103101946493647, "grad_norm": 1.7319424152374268, "learning_rate": 1.9619652599842506e-05, "loss": 1.9149, "step": 11280 }, { "epoch": 0.40138654341836283, "grad_norm": 1.6810721158981323, "learning_rate": 1.9618581272475263e-05, "loss": 1.9177, "step": 11290 }, { "epoch": 0.4017420673717892, "grad_norm": 1.6722646951675415, "learning_rate": 1.9617508467754248e-05, "loss": 1.9404, "step": 11300 }, { "epoch": 0.40209759132521555, "grad_norm": 1.566178798675537, "learning_rate": 1.9616434185844233e-05, "loss": 1.913, "step": 11310 }, { "epoch": 0.4024531152786419, "grad_norm": 1.5735375881195068, "learning_rate": 1.9615358426910223e-05, "loss": 1.9028, "step": 11320 }, { "epoch": 0.4028086392320683, "grad_norm": 1.5467545986175537, "learning_rate": 1.9614281191117448e-05, "loss": 1.9518, "step": 11330 }, { "epoch": 0.40316416318549464, "grad_norm": 1.557876706123352, "learning_rate": 1.9613202478631365e-05, "loss": 1.9298, "step": 11340 }, { "epoch": 0.403519687138921, "grad_norm": 1.5856714248657227, "learning_rate": 1.9612122289617656e-05, "loss": 1.8999, "step": 11350 }, { "epoch": 0.40387521109234736, "grad_norm": 1.6419317722320557, "learning_rate": 1.961104062424223e-05, "loss": 1.9112, "step": 11360 }, { "epoch": 0.4042307350457737, "grad_norm": 1.5576741695404053, "learning_rate": 1.9609957482671228e-05, "loss": 1.9552, "step": 11370 }, { "epoch": 0.4045862589992001, "grad_norm": 1.6433225870132446, "learning_rate": 1.960887286507101e-05, "loss": 1.9176, "step": 11380 }, { "epoch": 0.40494178295262645, "grad_norm": 1.6947438716888428, "learning_rate": 1.9607786771608167e-05, "loss": 1.9159, "step": 11390 }, { "epoch": 0.4052973069060528, "grad_norm": 1.6012718677520752, "learning_rate": 1.960669920244952e-05, "loss": 1.9614, "step": 11400 }, { "epoch": 0.4056528308594792, "grad_norm": 1.623199224472046, "learning_rate": 1.960561015776211e-05, "loss": 2.0027, "step": 11410 }, { "epoch": 0.40600835481290554, "grad_norm": 1.6276559829711914, "learning_rate": 1.9604519637713207e-05, "loss": 1.9317, "step": 11420 }, { "epoch": 0.4063638787663319, "grad_norm": 1.6999454498291016, "learning_rate": 1.9603427642470306e-05, "loss": 1.9109, "step": 11430 }, { "epoch": 0.40671940271975826, "grad_norm": 1.6149712800979614, "learning_rate": 1.9602334172201138e-05, "loss": 1.9643, "step": 11440 }, { "epoch": 0.4070749266731846, "grad_norm": 1.6748502254486084, "learning_rate": 1.9601239227073644e-05, "loss": 1.9647, "step": 11450 }, { "epoch": 0.407430450626611, "grad_norm": 1.6914095878601074, "learning_rate": 1.9600142807256003e-05, "loss": 1.9464, "step": 11460 }, { "epoch": 0.40778597458003735, "grad_norm": 1.6857210397720337, "learning_rate": 1.9599044912916624e-05, "loss": 1.9394, "step": 11470 }, { "epoch": 0.4081414985334637, "grad_norm": 1.6877509355545044, "learning_rate": 1.9597945544224134e-05, "loss": 1.9421, "step": 11480 }, { "epoch": 0.40849702248689007, "grad_norm": 1.6180742979049683, "learning_rate": 1.9596844701347386e-05, "loss": 1.9018, "step": 11490 }, { "epoch": 0.40885254644031643, "grad_norm": 2.0876150131225586, "learning_rate": 1.9595742384455466e-05, "loss": 1.8849, "step": 11500 }, { "epoch": 0.4092080703937428, "grad_norm": 1.581478476524353, "learning_rate": 1.9594638593717676e-05, "loss": 1.9041, "step": 11510 }, { "epoch": 0.40956359434716916, "grad_norm": 1.655171275138855, "learning_rate": 1.9593533329303562e-05, "loss": 1.9051, "step": 11520 }, { "epoch": 0.4099191183005955, "grad_norm": 1.6481330394744873, "learning_rate": 1.959242659138288e-05, "loss": 1.9061, "step": 11530 }, { "epoch": 0.4102746422540219, "grad_norm": 1.7449766397476196, "learning_rate": 1.9591318380125618e-05, "loss": 1.9395, "step": 11540 }, { "epoch": 0.41063016620744824, "grad_norm": 1.6915010213851929, "learning_rate": 1.959020869570199e-05, "loss": 1.932, "step": 11550 }, { "epoch": 0.4109856901608746, "grad_norm": 1.5623327493667603, "learning_rate": 1.958909753828244e-05, "loss": 1.8734, "step": 11560 }, { "epoch": 0.41134121411430097, "grad_norm": 1.646648645401001, "learning_rate": 1.9587984908037628e-05, "loss": 1.8816, "step": 11570 }, { "epoch": 0.41169673806772733, "grad_norm": 1.6405104398727417, "learning_rate": 1.9586870805138452e-05, "loss": 1.9458, "step": 11580 }, { "epoch": 0.4120522620211537, "grad_norm": 1.5775431394577026, "learning_rate": 1.9585755229756035e-05, "loss": 1.8958, "step": 11590 }, { "epoch": 0.41240778597458005, "grad_norm": 1.6822415590286255, "learning_rate": 1.9584638182061715e-05, "loss": 1.9371, "step": 11600 }, { "epoch": 0.4127633099280064, "grad_norm": 1.6032915115356445, "learning_rate": 1.9583519662227067e-05, "loss": 1.9238, "step": 11610 }, { "epoch": 0.4131188338814328, "grad_norm": 1.6131377220153809, "learning_rate": 1.958239967042389e-05, "loss": 1.9565, "step": 11620 }, { "epoch": 0.41347435783485914, "grad_norm": 1.6385976076126099, "learning_rate": 1.95812782068242e-05, "loss": 1.8512, "step": 11630 }, { "epoch": 0.4138298817882855, "grad_norm": 1.616721272468567, "learning_rate": 1.9580155271600257e-05, "loss": 1.928, "step": 11640 }, { "epoch": 0.41418540574171187, "grad_norm": 1.5912649631500244, "learning_rate": 1.9579030864924528e-05, "loss": 1.9304, "step": 11650 }, { "epoch": 0.4145409296951382, "grad_norm": 1.5707584619522095, "learning_rate": 1.9577904986969724e-05, "loss": 1.8915, "step": 11660 }, { "epoch": 0.4148964536485646, "grad_norm": 1.617496132850647, "learning_rate": 1.9576777637908765e-05, "loss": 1.9334, "step": 11670 }, { "epoch": 0.41525197760199095, "grad_norm": 1.6306051015853882, "learning_rate": 1.9575648817914808e-05, "loss": 1.9461, "step": 11680 }, { "epoch": 0.4156075015554173, "grad_norm": 1.6395132541656494, "learning_rate": 1.9574518527161234e-05, "loss": 1.902, "step": 11690 }, { "epoch": 0.4159630255088437, "grad_norm": 1.6522890329360962, "learning_rate": 1.9573386765821647e-05, "loss": 1.9307, "step": 11700 }, { "epoch": 0.41631854946227004, "grad_norm": 1.6019282341003418, "learning_rate": 1.9572253534069876e-05, "loss": 1.9779, "step": 11710 }, { "epoch": 0.4166740734156964, "grad_norm": 1.6080857515335083, "learning_rate": 1.9571118832079982e-05, "loss": 1.974, "step": 11720 }, { "epoch": 0.41702959736912276, "grad_norm": 1.581402063369751, "learning_rate": 1.956998266002625e-05, "loss": 1.9045, "step": 11730 }, { "epoch": 0.4173851213225491, "grad_norm": 1.6517729759216309, "learning_rate": 1.9568845018083182e-05, "loss": 1.9641, "step": 11740 }, { "epoch": 0.4177406452759755, "grad_norm": 1.6685960292816162, "learning_rate": 1.9567705906425515e-05, "loss": 1.9623, "step": 11750 }, { "epoch": 0.41809616922940185, "grad_norm": 1.6158983707427979, "learning_rate": 1.956656532522821e-05, "loss": 1.9484, "step": 11760 }, { "epoch": 0.4184516931828282, "grad_norm": 1.55313241481781, "learning_rate": 1.956542327466646e-05, "loss": 1.8969, "step": 11770 }, { "epoch": 0.4188072171362546, "grad_norm": 1.6633541584014893, "learning_rate": 1.9564279754915666e-05, "loss": 1.9344, "step": 11780 }, { "epoch": 0.41916274108968093, "grad_norm": 1.5694234371185303, "learning_rate": 1.9563134766151474e-05, "loss": 1.9093, "step": 11790 }, { "epoch": 0.4195182650431073, "grad_norm": 1.6353678703308105, "learning_rate": 1.956198830854974e-05, "loss": 1.8947, "step": 11800 }, { "epoch": 0.41987378899653366, "grad_norm": 1.6353477239608765, "learning_rate": 1.9560840382286556e-05, "loss": 1.8985, "step": 11810 }, { "epoch": 0.42022931294996, "grad_norm": 1.6388938426971436, "learning_rate": 1.9559690987538235e-05, "loss": 1.9071, "step": 11820 }, { "epoch": 0.4205848369033864, "grad_norm": 1.7158305644989014, "learning_rate": 1.955854012448132e-05, "loss": 1.9155, "step": 11830 }, { "epoch": 0.42094036085681275, "grad_norm": 1.6991742849349976, "learning_rate": 1.9557387793292574e-05, "loss": 1.9092, "step": 11840 }, { "epoch": 0.4212958848102391, "grad_norm": 1.7193807363510132, "learning_rate": 1.955623399414899e-05, "loss": 1.9046, "step": 11850 }, { "epoch": 0.42165140876366547, "grad_norm": 1.6610537767410278, "learning_rate": 1.9555078727227782e-05, "loss": 1.9967, "step": 11860 }, { "epoch": 0.42200693271709183, "grad_norm": 1.6989997625350952, "learning_rate": 1.9553921992706392e-05, "loss": 1.9065, "step": 11870 }, { "epoch": 0.4223624566705182, "grad_norm": 1.6551401615142822, "learning_rate": 1.9552763790762484e-05, "loss": 1.9347, "step": 11880 }, { "epoch": 0.42271798062394456, "grad_norm": 1.628297209739685, "learning_rate": 1.9551604121573956e-05, "loss": 1.9132, "step": 11890 }, { "epoch": 0.4230735045773709, "grad_norm": 1.6718446016311646, "learning_rate": 1.9550442985318926e-05, "loss": 1.931, "step": 11900 }, { "epoch": 0.4234290285307973, "grad_norm": 1.5980277061462402, "learning_rate": 1.9549280382175734e-05, "loss": 1.9359, "step": 11910 }, { "epoch": 0.42378455248422364, "grad_norm": 1.787895917892456, "learning_rate": 1.954811631232295e-05, "loss": 1.8912, "step": 11920 }, { "epoch": 0.42414007643765, "grad_norm": 1.5402417182922363, "learning_rate": 1.9546950775939366e-05, "loss": 1.9499, "step": 11930 }, { "epoch": 0.42449560039107637, "grad_norm": 1.5597251653671265, "learning_rate": 1.9545783773204006e-05, "loss": 1.9, "step": 11940 }, { "epoch": 0.42485112434450273, "grad_norm": 1.6577147245407104, "learning_rate": 1.954461530429611e-05, "loss": 1.9103, "step": 11950 }, { "epoch": 0.4252066482979291, "grad_norm": 1.6680374145507812, "learning_rate": 1.9543445369395144e-05, "loss": 1.8914, "step": 11960 }, { "epoch": 0.42556217225135545, "grad_norm": 1.5822381973266602, "learning_rate": 1.954227396868081e-05, "loss": 1.8932, "step": 11970 }, { "epoch": 0.4259176962047818, "grad_norm": 1.7338037490844727, "learning_rate": 1.9541101102333026e-05, "loss": 1.9341, "step": 11980 }, { "epoch": 0.4262732201582082, "grad_norm": 1.7024821043014526, "learning_rate": 1.9539926770531937e-05, "loss": 1.9385, "step": 11990 }, { "epoch": 0.42662874411163454, "grad_norm": 1.6833677291870117, "learning_rate": 1.9538750973457907e-05, "loss": 1.9207, "step": 12000 }, { "epoch": 0.4269842680650609, "grad_norm": 1.6972459554672241, "learning_rate": 1.953757371129154e-05, "loss": 1.8644, "step": 12010 }, { "epoch": 0.42733979201848726, "grad_norm": 1.5408259630203247, "learning_rate": 1.953639498421365e-05, "loss": 1.9359, "step": 12020 }, { "epoch": 0.4276953159719136, "grad_norm": 1.6107205152511597, "learning_rate": 1.9535214792405286e-05, "loss": 1.9678, "step": 12030 }, { "epoch": 0.42805083992534, "grad_norm": 1.7111223936080933, "learning_rate": 1.9534033136047715e-05, "loss": 1.9065, "step": 12040 }, { "epoch": 0.42840636387876635, "grad_norm": 1.6180115938186646, "learning_rate": 1.9532850015322434e-05, "loss": 1.9034, "step": 12050 }, { "epoch": 0.4287618878321927, "grad_norm": 1.5723388195037842, "learning_rate": 1.953166543041116e-05, "loss": 1.926, "step": 12060 }, { "epoch": 0.4291174117856191, "grad_norm": 1.6159480810165405, "learning_rate": 1.953047938149584e-05, "loss": 1.9281, "step": 12070 }, { "epoch": 0.42947293573904544, "grad_norm": 1.650526523590088, "learning_rate": 1.9529291868758646e-05, "loss": 1.9341, "step": 12080 }, { "epoch": 0.4298284596924718, "grad_norm": 1.643018126487732, "learning_rate": 1.952810289238197e-05, "loss": 1.9349, "step": 12090 }, { "epoch": 0.43018398364589816, "grad_norm": 1.7136913537979126, "learning_rate": 1.952691245254843e-05, "loss": 1.9012, "step": 12100 }, { "epoch": 0.4305395075993245, "grad_norm": 1.5042798519134521, "learning_rate": 1.952572054944087e-05, "loss": 1.9318, "step": 12110 }, { "epoch": 0.4308950315527509, "grad_norm": 1.6267616748809814, "learning_rate": 1.9524527183242364e-05, "loss": 1.9018, "step": 12120 }, { "epoch": 0.43125055550617725, "grad_norm": 1.628717064857483, "learning_rate": 1.95233323541362e-05, "loss": 1.9261, "step": 12130 }, { "epoch": 0.4316060794596036, "grad_norm": 1.5327138900756836, "learning_rate": 1.95221360623059e-05, "loss": 1.9033, "step": 12140 }, { "epoch": 0.43196160341302997, "grad_norm": 1.6792123317718506, "learning_rate": 1.95209383079352e-05, "loss": 1.8913, "step": 12150 }, { "epoch": 0.43231712736645633, "grad_norm": 1.5949348211288452, "learning_rate": 1.951973909120808e-05, "loss": 1.9061, "step": 12160 }, { "epoch": 0.4326726513198827, "grad_norm": 1.5922819375991821, "learning_rate": 1.9518538412308717e-05, "loss": 1.979, "step": 12170 }, { "epoch": 0.43302817527330906, "grad_norm": 1.6448159217834473, "learning_rate": 1.9517336271421542e-05, "loss": 1.9077, "step": 12180 }, { "epoch": 0.4333836992267354, "grad_norm": 1.6367768049240112, "learning_rate": 1.9516132668731186e-05, "loss": 1.9506, "step": 12190 }, { "epoch": 0.4337392231801618, "grad_norm": 1.6600077152252197, "learning_rate": 1.951492760442252e-05, "loss": 1.9433, "step": 12200 }, { "epoch": 0.43409474713358814, "grad_norm": 1.5921251773834229, "learning_rate": 1.9513721078680633e-05, "loss": 1.9233, "step": 12210 }, { "epoch": 0.4344502710870145, "grad_norm": 1.6690195798873901, "learning_rate": 1.9512513091690838e-05, "loss": 1.9013, "step": 12220 }, { "epoch": 0.43480579504044087, "grad_norm": 1.6232579946517944, "learning_rate": 1.951130364363868e-05, "loss": 1.9308, "step": 12230 }, { "epoch": 0.43516131899386723, "grad_norm": 1.5536707639694214, "learning_rate": 1.9510092734709917e-05, "loss": 1.8874, "step": 12240 }, { "epoch": 0.4355168429472936, "grad_norm": 1.5925525426864624, "learning_rate": 1.9508880365090537e-05, "loss": 1.9133, "step": 12250 }, { "epoch": 0.43587236690071995, "grad_norm": 1.5788756608963013, "learning_rate": 1.9507666534966755e-05, "loss": 1.9278, "step": 12260 }, { "epoch": 0.4362278908541463, "grad_norm": 1.5934933423995972, "learning_rate": 1.9506451244525008e-05, "loss": 1.8873, "step": 12270 }, { "epoch": 0.4365834148075727, "grad_norm": 1.6129816770553589, "learning_rate": 1.9505234493951953e-05, "loss": 1.8911, "step": 12280 }, { "epoch": 0.43693893876099904, "grad_norm": 1.5828301906585693, "learning_rate": 1.950401628343448e-05, "loss": 1.8813, "step": 12290 }, { "epoch": 0.4372944627144254, "grad_norm": 1.6710927486419678, "learning_rate": 1.9502796613159698e-05, "loss": 1.9387, "step": 12300 }, { "epoch": 0.43764998666785176, "grad_norm": 1.695595622062683, "learning_rate": 1.9501575483314938e-05, "loss": 1.9406, "step": 12310 }, { "epoch": 0.4380055106212781, "grad_norm": 1.6410837173461914, "learning_rate": 1.9500352894087754e-05, "loss": 1.9012, "step": 12320 }, { "epoch": 0.4383610345747045, "grad_norm": 1.558901309967041, "learning_rate": 1.949912884566594e-05, "loss": 1.9015, "step": 12330 }, { "epoch": 0.43871655852813085, "grad_norm": 1.6479629278182983, "learning_rate": 1.9497903338237495e-05, "loss": 1.8987, "step": 12340 }, { "epoch": 0.4390720824815572, "grad_norm": 1.517884612083435, "learning_rate": 1.9496676371990647e-05, "loss": 1.8754, "step": 12350 }, { "epoch": 0.4394276064349836, "grad_norm": 1.6467676162719727, "learning_rate": 1.9495447947113852e-05, "loss": 1.9431, "step": 12360 }, { "epoch": 0.43978313038840994, "grad_norm": 1.6293061971664429, "learning_rate": 1.949421806379579e-05, "loss": 1.9369, "step": 12370 }, { "epoch": 0.4401386543418363, "grad_norm": 1.5900800228118896, "learning_rate": 1.9492986722225363e-05, "loss": 1.9401, "step": 12380 }, { "epoch": 0.44049417829526266, "grad_norm": 1.676795482635498, "learning_rate": 1.9491753922591695e-05, "loss": 1.8996, "step": 12390 }, { "epoch": 0.440849702248689, "grad_norm": 1.604500412940979, "learning_rate": 1.9490519665084142e-05, "loss": 1.9152, "step": 12400 }, { "epoch": 0.4412052262021154, "grad_norm": 1.7166574001312256, "learning_rate": 1.9489283949892275e-05, "loss": 1.9418, "step": 12410 }, { "epoch": 0.44156075015554175, "grad_norm": 1.6014140844345093, "learning_rate": 1.948804677720589e-05, "loss": 1.8891, "step": 12420 }, { "epoch": 0.4419162741089681, "grad_norm": 1.6134687662124634, "learning_rate": 1.9486808147215007e-05, "loss": 1.9174, "step": 12430 }, { "epoch": 0.44227179806239447, "grad_norm": 1.6366617679595947, "learning_rate": 1.948556806010988e-05, "loss": 1.9196, "step": 12440 }, { "epoch": 0.44262732201582083, "grad_norm": 1.7176318168640137, "learning_rate": 1.9484326516080973e-05, "loss": 1.9369, "step": 12450 }, { "epoch": 0.4429828459692472, "grad_norm": 1.704402208328247, "learning_rate": 1.948308351531898e-05, "loss": 1.9034, "step": 12460 }, { "epoch": 0.44333836992267356, "grad_norm": 1.6780890226364136, "learning_rate": 1.948183905801482e-05, "loss": 1.921, "step": 12470 }, { "epoch": 0.4436938938760999, "grad_norm": 1.6434670686721802, "learning_rate": 1.9480593144359627e-05, "loss": 1.921, "step": 12480 }, { "epoch": 0.4440494178295263, "grad_norm": 1.7120178937911987, "learning_rate": 1.9479345774544774e-05, "loss": 1.9433, "step": 12490 }, { "epoch": 0.44440494178295264, "grad_norm": 1.7237123250961304, "learning_rate": 1.9478096948761846e-05, "loss": 1.9048, "step": 12500 }, { "epoch": 0.444760465736379, "grad_norm": 1.5160402059555054, "learning_rate": 1.9476846667202657e-05, "loss": 1.8998, "step": 12510 }, { "epoch": 0.44511598968980537, "grad_norm": 1.6251343488693237, "learning_rate": 1.947559493005924e-05, "loss": 1.9001, "step": 12520 }, { "epoch": 0.44547151364323173, "grad_norm": 1.6302895545959473, "learning_rate": 1.947434173752385e-05, "loss": 1.894, "step": 12530 }, { "epoch": 0.4458270375966581, "grad_norm": 1.5880894660949707, "learning_rate": 1.9473087089788975e-05, "loss": 1.9003, "step": 12540 }, { "epoch": 0.44618256155008446, "grad_norm": 1.6394000053405762, "learning_rate": 1.947183098704732e-05, "loss": 1.9044, "step": 12550 }, { "epoch": 0.4465380855035108, "grad_norm": 1.6077085733413696, "learning_rate": 1.9470573429491816e-05, "loss": 1.9345, "step": 12560 }, { "epoch": 0.4468936094569372, "grad_norm": 1.6135870218276978, "learning_rate": 1.9469314417315615e-05, "loss": 1.9363, "step": 12570 }, { "epoch": 0.44724913341036354, "grad_norm": 1.593467354774475, "learning_rate": 1.9468053950712086e-05, "loss": 1.8974, "step": 12580 }, { "epoch": 0.4476046573637899, "grad_norm": 5.049159049987793, "learning_rate": 1.9466792029874845e-05, "loss": 1.9817, "step": 12590 }, { "epoch": 0.44796018131721627, "grad_norm": 1.66642165184021, "learning_rate": 1.9465528654997698e-05, "loss": 1.9273, "step": 12600 }, { "epoch": 0.44831570527064263, "grad_norm": 1.6853978633880615, "learning_rate": 1.9464263826274702e-05, "loss": 1.9217, "step": 12610 }, { "epoch": 0.448671229224069, "grad_norm": 1.632203459739685, "learning_rate": 1.946299754390012e-05, "loss": 1.8761, "step": 12620 }, { "epoch": 0.44902675317749535, "grad_norm": 1.5917268991470337, "learning_rate": 1.9461729808068456e-05, "loss": 1.9205, "step": 12630 }, { "epoch": 0.4493822771309217, "grad_norm": 1.6298881769180298, "learning_rate": 1.9460460618974414e-05, "loss": 1.9213, "step": 12640 }, { "epoch": 0.4497378010843481, "grad_norm": 1.6424980163574219, "learning_rate": 1.945918997681294e-05, "loss": 1.9061, "step": 12650 }, { "epoch": 0.45009332503777444, "grad_norm": 1.6484566926956177, "learning_rate": 1.9457917881779193e-05, "loss": 1.8696, "step": 12660 }, { "epoch": 0.4504488489912008, "grad_norm": 1.640828013420105, "learning_rate": 1.945664433406856e-05, "loss": 1.9214, "step": 12670 }, { "epoch": 0.45080437294462716, "grad_norm": 1.7374014854431152, "learning_rate": 1.9455369333876656e-05, "loss": 1.8919, "step": 12680 }, { "epoch": 0.4511598968980535, "grad_norm": 1.6202126741409302, "learning_rate": 1.9454092881399305e-05, "loss": 1.9765, "step": 12690 }, { "epoch": 0.4515154208514799, "grad_norm": 1.59443199634552, "learning_rate": 1.9452814976832567e-05, "loss": 1.9083, "step": 12700 }, { "epoch": 0.45187094480490625, "grad_norm": 1.7748507261276245, "learning_rate": 1.9451535620372715e-05, "loss": 1.8549, "step": 12710 }, { "epoch": 0.4522264687583326, "grad_norm": 1.6348975896835327, "learning_rate": 1.9450254812216254e-05, "loss": 1.9299, "step": 12720 }, { "epoch": 0.452581992711759, "grad_norm": 1.525699496269226, "learning_rate": 1.9448972552559907e-05, "loss": 1.9804, "step": 12730 }, { "epoch": 0.45293751666518534, "grad_norm": 1.582080602645874, "learning_rate": 1.9447688841600624e-05, "loss": 1.8692, "step": 12740 }, { "epoch": 0.4532930406186117, "grad_norm": 1.6867910623550415, "learning_rate": 1.944640367953557e-05, "loss": 1.9214, "step": 12750 }, { "epoch": 0.45364856457203806, "grad_norm": 1.6968588829040527, "learning_rate": 1.944511706656214e-05, "loss": 1.8956, "step": 12760 }, { "epoch": 0.4540040885254644, "grad_norm": 1.5950309038162231, "learning_rate": 1.944382900287795e-05, "loss": 1.8831, "step": 12770 }, { "epoch": 0.4543596124788908, "grad_norm": 1.6183698177337646, "learning_rate": 1.944253948868084e-05, "loss": 1.9049, "step": 12780 }, { "epoch": 0.45471513643231715, "grad_norm": 1.6075899600982666, "learning_rate": 1.944124852416887e-05, "loss": 1.904, "step": 12790 }, { "epoch": 0.4550706603857435, "grad_norm": 1.6205227375030518, "learning_rate": 1.9439956109540327e-05, "loss": 1.9254, "step": 12800 }, { "epoch": 0.45542618433916987, "grad_norm": 1.6769038438796997, "learning_rate": 1.9438662244993706e-05, "loss": 1.9147, "step": 12810 }, { "epoch": 0.45578170829259623, "grad_norm": 1.553166151046753, "learning_rate": 1.9437366930727753e-05, "loss": 1.8794, "step": 12820 }, { "epoch": 0.4561372322460226, "grad_norm": 1.585443377494812, "learning_rate": 1.9436070166941408e-05, "loss": 1.8902, "step": 12830 }, { "epoch": 0.45649275619944896, "grad_norm": 1.6273727416992188, "learning_rate": 1.943477195383385e-05, "loss": 1.9012, "step": 12840 }, { "epoch": 0.4568482801528753, "grad_norm": 1.5927307605743408, "learning_rate": 1.9433472291604478e-05, "loss": 1.9499, "step": 12850 }, { "epoch": 0.4572038041063017, "grad_norm": 1.636775016784668, "learning_rate": 1.943217118045291e-05, "loss": 1.9058, "step": 12860 }, { "epoch": 0.45755932805972804, "grad_norm": 1.5159486532211304, "learning_rate": 1.9430868620578987e-05, "loss": 1.8783, "step": 12870 }, { "epoch": 0.4579148520131544, "grad_norm": 1.6984831094741821, "learning_rate": 1.9429564612182776e-05, "loss": 1.9713, "step": 12880 }, { "epoch": 0.45827037596658077, "grad_norm": 1.663204550743103, "learning_rate": 1.9428259155464566e-05, "loss": 1.9375, "step": 12890 }, { "epoch": 0.45862589992000713, "grad_norm": 1.7423876523971558, "learning_rate": 1.9426952250624866e-05, "loss": 1.9169, "step": 12900 }, { "epoch": 0.4589814238734335, "grad_norm": 1.6697475910186768, "learning_rate": 1.9425643897864404e-05, "loss": 1.8952, "step": 12910 }, { "epoch": 0.45933694782685985, "grad_norm": 1.5229991674423218, "learning_rate": 1.9424334097384143e-05, "loss": 1.8981, "step": 12920 }, { "epoch": 0.4596924717802862, "grad_norm": 1.6769003868103027, "learning_rate": 1.9423022849385256e-05, "loss": 1.9478, "step": 12930 }, { "epoch": 0.4600479957337126, "grad_norm": 1.5650204420089722, "learning_rate": 1.942171015406914e-05, "loss": 1.8654, "step": 12940 }, { "epoch": 0.46040351968713894, "grad_norm": 1.6443099975585938, "learning_rate": 1.942039601163742e-05, "loss": 1.8507, "step": 12950 }, { "epoch": 0.4607590436405653, "grad_norm": 1.5867111682891846, "learning_rate": 1.9419080422291935e-05, "loss": 1.923, "step": 12960 }, { "epoch": 0.46111456759399166, "grad_norm": 1.7638205289840698, "learning_rate": 1.941776338623476e-05, "loss": 1.924, "step": 12970 }, { "epoch": 0.461470091547418, "grad_norm": 1.6498541831970215, "learning_rate": 1.9416444903668175e-05, "loss": 1.9073, "step": 12980 }, { "epoch": 0.4618256155008444, "grad_norm": 1.7238328456878662, "learning_rate": 1.9415124974794696e-05, "loss": 1.9016, "step": 12990 }, { "epoch": 0.46218113945427075, "grad_norm": 1.6477906703948975, "learning_rate": 1.9413803599817056e-05, "loss": 1.9108, "step": 13000 }, { "epoch": 0.4625366634076971, "grad_norm": 1.6353874206542969, "learning_rate": 1.9412480778938206e-05, "loss": 1.8839, "step": 13010 }, { "epoch": 0.4628921873611235, "grad_norm": 1.6428017616271973, "learning_rate": 1.9411156512361327e-05, "loss": 1.8822, "step": 13020 }, { "epoch": 0.46324771131454984, "grad_norm": 1.6262905597686768, "learning_rate": 1.9409830800289814e-05, "loss": 1.9024, "step": 13030 }, { "epoch": 0.4636032352679762, "grad_norm": 1.6137045621871948, "learning_rate": 1.940850364292729e-05, "loss": 1.9447, "step": 13040 }, { "epoch": 0.46395875922140256, "grad_norm": 1.689227819442749, "learning_rate": 1.9407175040477598e-05, "loss": 1.9493, "step": 13050 }, { "epoch": 0.4643142831748289, "grad_norm": 1.5779759883880615, "learning_rate": 1.9405844993144806e-05, "loss": 1.8913, "step": 13060 }, { "epoch": 0.4646698071282553, "grad_norm": 1.5751711130142212, "learning_rate": 1.94045135011332e-05, "loss": 1.8812, "step": 13070 }, { "epoch": 0.46502533108168165, "grad_norm": 1.5909771919250488, "learning_rate": 1.940318056464728e-05, "loss": 1.9119, "step": 13080 }, { "epoch": 0.465380855035108, "grad_norm": 1.6060901880264282, "learning_rate": 1.9401846183891785e-05, "loss": 1.8829, "step": 13090 }, { "epoch": 0.46573637898853437, "grad_norm": 1.659328579902649, "learning_rate": 1.9400510359071668e-05, "loss": 1.9048, "step": 13100 }, { "epoch": 0.46609190294196073, "grad_norm": 1.6586227416992188, "learning_rate": 1.9399173090392102e-05, "loss": 1.8598, "step": 13110 }, { "epoch": 0.4664474268953871, "grad_norm": 1.7154213190078735, "learning_rate": 1.9397834378058484e-05, "loss": 1.9386, "step": 13120 }, { "epoch": 0.46680295084881346, "grad_norm": 1.599129319190979, "learning_rate": 1.939649422227643e-05, "loss": 1.8908, "step": 13130 }, { "epoch": 0.4671584748022398, "grad_norm": 1.5811879634857178, "learning_rate": 1.9395152623251778e-05, "loss": 1.8768, "step": 13140 }, { "epoch": 0.4675139987556662, "grad_norm": 1.5494011640548706, "learning_rate": 1.939380958119059e-05, "loss": 1.8714, "step": 13150 }, { "epoch": 0.46786952270909254, "grad_norm": 1.5415995121002197, "learning_rate": 1.9392465096299154e-05, "loss": 1.8892, "step": 13160 }, { "epoch": 0.4682250466625189, "grad_norm": 1.589040756225586, "learning_rate": 1.9391119168783966e-05, "loss": 1.9234, "step": 13170 }, { "epoch": 0.46858057061594527, "grad_norm": 1.597524881362915, "learning_rate": 1.938977179885176e-05, "loss": 1.885, "step": 13180 }, { "epoch": 0.46893609456937163, "grad_norm": 1.7451609373092651, "learning_rate": 1.938842298670948e-05, "loss": 1.9116, "step": 13190 }, { "epoch": 0.469291618522798, "grad_norm": 1.6840392351150513, "learning_rate": 1.938707273256429e-05, "loss": 1.9634, "step": 13200 }, { "epoch": 0.46964714247622436, "grad_norm": 1.607974648475647, "learning_rate": 1.938572103662359e-05, "loss": 1.9127, "step": 13210 }, { "epoch": 0.4700026664296507, "grad_norm": 1.5624415874481201, "learning_rate": 1.938436789909499e-05, "loss": 1.9265, "step": 13220 }, { "epoch": 0.4703581903830771, "grad_norm": 1.5929343700408936, "learning_rate": 1.9383013320186317e-05, "loss": 1.9057, "step": 13230 }, { "epoch": 0.47071371433650344, "grad_norm": 1.5993164777755737, "learning_rate": 1.9381657300105633e-05, "loss": 1.9265, "step": 13240 }, { "epoch": 0.4710692382899298, "grad_norm": 1.6968023777008057, "learning_rate": 1.9380299839061207e-05, "loss": 1.9215, "step": 13250 }, { "epoch": 0.47142476224335617, "grad_norm": 1.6598241329193115, "learning_rate": 1.9378940937261544e-05, "loss": 1.9089, "step": 13260 }, { "epoch": 0.47178028619678253, "grad_norm": 1.6763114929199219, "learning_rate": 1.937758059491536e-05, "loss": 1.882, "step": 13270 }, { "epoch": 0.4721358101502089, "grad_norm": 1.6208711862564087, "learning_rate": 1.937621881223159e-05, "loss": 1.949, "step": 13280 }, { "epoch": 0.47249133410363525, "grad_norm": 1.656736969947815, "learning_rate": 1.9374855589419406e-05, "loss": 1.9028, "step": 13290 }, { "epoch": 0.4728468580570616, "grad_norm": 1.6773045063018799, "learning_rate": 1.937349092668818e-05, "loss": 1.8978, "step": 13300 }, { "epoch": 0.473202382010488, "grad_norm": 1.639001727104187, "learning_rate": 1.937212482424752e-05, "loss": 1.9054, "step": 13310 }, { "epoch": 0.47355790596391434, "grad_norm": 1.6301771402359009, "learning_rate": 1.9370757282307252e-05, "loss": 1.8839, "step": 13320 }, { "epoch": 0.4739134299173407, "grad_norm": 1.6172529458999634, "learning_rate": 1.9369388301077422e-05, "loss": 1.8938, "step": 13330 }, { "epoch": 0.47426895387076706, "grad_norm": 1.5646872520446777, "learning_rate": 1.9368017880768292e-05, "loss": 1.9032, "step": 13340 }, { "epoch": 0.4746244778241934, "grad_norm": 1.6018928289413452, "learning_rate": 1.9366646021590356e-05, "loss": 1.909, "step": 13350 }, { "epoch": 0.4749800017776198, "grad_norm": 1.692284345626831, "learning_rate": 1.9365272723754318e-05, "loss": 1.9055, "step": 13360 }, { "epoch": 0.47533552573104615, "grad_norm": 1.6436405181884766, "learning_rate": 1.9363897987471113e-05, "loss": 1.9448, "step": 13370 }, { "epoch": 0.4756910496844725, "grad_norm": 1.5884928703308105, "learning_rate": 1.936252181295189e-05, "loss": 1.8558, "step": 13380 }, { "epoch": 0.4760465736378989, "grad_norm": 1.5993527173995972, "learning_rate": 1.9361144200408016e-05, "loss": 1.8373, "step": 13390 }, { "epoch": 0.47640209759132524, "grad_norm": 1.6061522960662842, "learning_rate": 1.9359765150051092e-05, "loss": 1.8592, "step": 13400 }, { "epoch": 0.4767576215447516, "grad_norm": 1.6307588815689087, "learning_rate": 1.9358384662092923e-05, "loss": 1.9192, "step": 13410 }, { "epoch": 0.47711314549817796, "grad_norm": 1.5935544967651367, "learning_rate": 1.935700273674555e-05, "loss": 1.8745, "step": 13420 }, { "epoch": 0.4774686694516043, "grad_norm": 1.5504134893417358, "learning_rate": 1.9355619374221223e-05, "loss": 1.9084, "step": 13430 }, { "epoch": 0.4778241934050307, "grad_norm": 1.635148286819458, "learning_rate": 1.9354234574732422e-05, "loss": 1.915, "step": 13440 }, { "epoch": 0.47817971735845705, "grad_norm": 1.7150083780288696, "learning_rate": 1.9352848338491842e-05, "loss": 1.867, "step": 13450 }, { "epoch": 0.4785352413118834, "grad_norm": 1.595038890838623, "learning_rate": 1.93514606657124e-05, "loss": 1.885, "step": 13460 }, { "epoch": 0.47889076526530977, "grad_norm": 1.6162172555923462, "learning_rate": 1.9350071556607234e-05, "loss": 1.8561, "step": 13470 }, { "epoch": 0.47924628921873613, "grad_norm": 1.6387050151824951, "learning_rate": 1.93486810113897e-05, "loss": 1.9129, "step": 13480 }, { "epoch": 0.4796018131721625, "grad_norm": 1.7246519327163696, "learning_rate": 1.9347289030273385e-05, "loss": 1.9086, "step": 13490 }, { "epoch": 0.47995733712558886, "grad_norm": 1.7230844497680664, "learning_rate": 1.934589561347208e-05, "loss": 1.9352, "step": 13500 }, { "epoch": 0.4803128610790152, "grad_norm": 1.6448251008987427, "learning_rate": 1.9344500761199806e-05, "loss": 1.9607, "step": 13510 }, { "epoch": 0.4806683850324416, "grad_norm": 1.631333589553833, "learning_rate": 1.9343104473670808e-05, "loss": 1.851, "step": 13520 }, { "epoch": 0.48102390898586794, "grad_norm": 1.6933995485305786, "learning_rate": 1.9341706751099542e-05, "loss": 1.9284, "step": 13530 }, { "epoch": 0.4813794329392943, "grad_norm": 1.556166172027588, "learning_rate": 1.9340307593700695e-05, "loss": 1.8744, "step": 13540 }, { "epoch": 0.48173495689272067, "grad_norm": 1.5917972326278687, "learning_rate": 1.933890700168916e-05, "loss": 1.9204, "step": 13550 }, { "epoch": 0.48209048084614703, "grad_norm": 1.685929298400879, "learning_rate": 1.933750497528007e-05, "loss": 1.9084, "step": 13560 }, { "epoch": 0.4824460047995734, "grad_norm": 1.6132503747940063, "learning_rate": 1.9336101514688764e-05, "loss": 1.913, "step": 13570 }, { "epoch": 0.48280152875299975, "grad_norm": 1.6278367042541504, "learning_rate": 1.93346966201308e-05, "loss": 1.9202, "step": 13580 }, { "epoch": 0.4831570527064261, "grad_norm": 1.604725956916809, "learning_rate": 1.9333290291821966e-05, "loss": 1.943, "step": 13590 }, { "epoch": 0.4835125766598525, "grad_norm": 1.6484644412994385, "learning_rate": 1.933188252997826e-05, "loss": 1.8807, "step": 13600 }, { "epoch": 0.48386810061327884, "grad_norm": 1.629534363746643, "learning_rate": 1.9330473334815912e-05, "loss": 1.9084, "step": 13610 }, { "epoch": 0.4842236245667052, "grad_norm": 1.6911847591400146, "learning_rate": 1.932906270655136e-05, "loss": 1.9041, "step": 13620 }, { "epoch": 0.48457914852013156, "grad_norm": 1.6820714473724365, "learning_rate": 1.9327650645401272e-05, "loss": 1.9035, "step": 13630 }, { "epoch": 0.4849346724735579, "grad_norm": 1.5599110126495361, "learning_rate": 1.932623715158253e-05, "loss": 1.8682, "step": 13640 }, { "epoch": 0.4852901964269843, "grad_norm": 1.7825508117675781, "learning_rate": 1.9324822225312236e-05, "loss": 1.8875, "step": 13650 }, { "epoch": 0.48564572038041065, "grad_norm": 1.5723313093185425, "learning_rate": 1.9323405866807716e-05, "loss": 1.8622, "step": 13660 }, { "epoch": 0.486001244333837, "grad_norm": 1.7442368268966675, "learning_rate": 1.9321988076286514e-05, "loss": 1.8773, "step": 13670 }, { "epoch": 0.4863567682872634, "grad_norm": 1.6873736381530762, "learning_rate": 1.932056885396639e-05, "loss": 1.9515, "step": 13680 }, { "epoch": 0.48671229224068974, "grad_norm": 1.6768368482589722, "learning_rate": 1.931914820006533e-05, "loss": 1.9093, "step": 13690 }, { "epoch": 0.4870678161941161, "grad_norm": 1.6003371477127075, "learning_rate": 1.9317726114801544e-05, "loss": 1.915, "step": 13700 }, { "epoch": 0.48742334014754246, "grad_norm": 1.6443897485733032, "learning_rate": 1.931630259839344e-05, "loss": 1.9073, "step": 13710 }, { "epoch": 0.4877788641009688, "grad_norm": 1.5972694158554077, "learning_rate": 1.931487765105968e-05, "loss": 1.8917, "step": 13720 }, { "epoch": 0.4881343880543952, "grad_norm": 1.5997296571731567, "learning_rate": 1.9313451273019112e-05, "loss": 1.9197, "step": 13730 }, { "epoch": 0.48848991200782155, "grad_norm": 1.5601133108139038, "learning_rate": 1.9312023464490825e-05, "loss": 1.9408, "step": 13740 }, { "epoch": 0.4888454359612479, "grad_norm": 1.601619839668274, "learning_rate": 1.9310594225694122e-05, "loss": 1.9261, "step": 13750 }, { "epoch": 0.48920095991467427, "grad_norm": 1.6181591749191284, "learning_rate": 1.9309163556848523e-05, "loss": 1.9181, "step": 13760 }, { "epoch": 0.48955648386810063, "grad_norm": 1.5995105504989624, "learning_rate": 1.930773145817377e-05, "loss": 1.8906, "step": 13770 }, { "epoch": 0.489912007821527, "grad_norm": 1.6839659214019775, "learning_rate": 1.930629792988983e-05, "loss": 1.9285, "step": 13780 }, { "epoch": 0.49026753177495336, "grad_norm": 1.7488518953323364, "learning_rate": 1.930486297221687e-05, "loss": 1.9038, "step": 13790 }, { "epoch": 0.4906230557283797, "grad_norm": 1.7097923755645752, "learning_rate": 1.9303426585375305e-05, "loss": 1.8895, "step": 13800 }, { "epoch": 0.4909785796818061, "grad_norm": 1.5708833932876587, "learning_rate": 1.930198876958575e-05, "loss": 1.8745, "step": 13810 }, { "epoch": 0.49133410363523244, "grad_norm": 1.5927371978759766, "learning_rate": 1.9300549525069043e-05, "loss": 1.9429, "step": 13820 }, { "epoch": 0.4916896275886588, "grad_norm": 1.603779673576355, "learning_rate": 1.929910885204624e-05, "loss": 1.8854, "step": 13830 }, { "epoch": 0.49204515154208517, "grad_norm": 1.6558383703231812, "learning_rate": 1.9297666750738627e-05, "loss": 1.9246, "step": 13840 }, { "epoch": 0.49240067549551153, "grad_norm": 1.598236083984375, "learning_rate": 1.9296223221367696e-05, "loss": 1.9234, "step": 13850 }, { "epoch": 0.4927561994489379, "grad_norm": 1.67493736743927, "learning_rate": 1.929477826415517e-05, "loss": 1.8706, "step": 13860 }, { "epoch": 0.49311172340236425, "grad_norm": 1.7419062852859497, "learning_rate": 1.929333187932298e-05, "loss": 1.8659, "step": 13870 }, { "epoch": 0.4934672473557906, "grad_norm": 1.6347029209136963, "learning_rate": 1.929188406709328e-05, "loss": 1.9214, "step": 13880 }, { "epoch": 0.493822771309217, "grad_norm": 1.6613197326660156, "learning_rate": 1.929043482768845e-05, "loss": 1.8609, "step": 13890 }, { "epoch": 0.49417829526264334, "grad_norm": 1.6763050556182861, "learning_rate": 1.928898416133108e-05, "loss": 1.8925, "step": 13900 }, { "epoch": 0.4945338192160697, "grad_norm": 1.6605231761932373, "learning_rate": 1.9287532068243986e-05, "loss": 1.9065, "step": 13910 }, { "epoch": 0.49488934316949607, "grad_norm": 1.751645565032959, "learning_rate": 1.92860785486502e-05, "loss": 1.9027, "step": 13920 }, { "epoch": 0.4952448671229224, "grad_norm": 1.6546722650527954, "learning_rate": 1.9284623602772973e-05, "loss": 1.8895, "step": 13930 }, { "epoch": 0.4956003910763488, "grad_norm": 1.5988556146621704, "learning_rate": 1.928316723083578e-05, "loss": 1.8798, "step": 13940 }, { "epoch": 0.49595591502977515, "grad_norm": 1.657577633857727, "learning_rate": 1.9281709433062298e-05, "loss": 1.9087, "step": 13950 }, { "epoch": 0.4963114389832015, "grad_norm": 1.639682412147522, "learning_rate": 1.928025020967645e-05, "loss": 1.8825, "step": 13960 }, { "epoch": 0.4966669629366279, "grad_norm": 1.5884422063827515, "learning_rate": 1.9278789560902354e-05, "loss": 1.8911, "step": 13970 }, { "epoch": 0.49702248689005424, "grad_norm": 1.6883779764175415, "learning_rate": 1.9277327486964364e-05, "loss": 1.9463, "step": 13980 }, { "epoch": 0.4973780108434806, "grad_norm": 1.6581050157546997, "learning_rate": 1.927586398808704e-05, "loss": 1.9201, "step": 13990 }, { "epoch": 0.49773353479690696, "grad_norm": 1.616876482963562, "learning_rate": 1.9274399064495162e-05, "loss": 1.8996, "step": 14000 }, { "epoch": 0.4980890587503333, "grad_norm": 1.7162045240402222, "learning_rate": 1.9272932716413742e-05, "loss": 1.9051, "step": 14010 }, { "epoch": 0.4984445827037597, "grad_norm": 1.655335545539856, "learning_rate": 1.9271464944068e-05, "loss": 1.9265, "step": 14020 }, { "epoch": 0.49880010665718605, "grad_norm": 1.575579285621643, "learning_rate": 1.9269995747683375e-05, "loss": 1.8908, "step": 14030 }, { "epoch": 0.4991556306106124, "grad_norm": 1.6541529893875122, "learning_rate": 1.9268525127485528e-05, "loss": 1.8684, "step": 14040 }, { "epoch": 0.4995111545640388, "grad_norm": 1.6520228385925293, "learning_rate": 1.9267053083700332e-05, "loss": 1.9144, "step": 14050 }, { "epoch": 0.49986667851746514, "grad_norm": 1.5795373916625977, "learning_rate": 1.9265579616553886e-05, "loss": 1.8651, "step": 14060 }, { "epoch": 0.5002222024708914, "grad_norm": 1.6121716499328613, "learning_rate": 1.926410472627251e-05, "loss": 1.8891, "step": 14070 }, { "epoch": 0.5005777264243179, "grad_norm": 1.676979899406433, "learning_rate": 1.9262628413082733e-05, "loss": 1.8769, "step": 14080 }, { "epoch": 0.5009332503777442, "grad_norm": 1.6514569520950317, "learning_rate": 1.9261150677211313e-05, "loss": 1.892, "step": 14090 }, { "epoch": 0.5012887743311706, "grad_norm": 1.5872944593429565, "learning_rate": 1.925967151888521e-05, "loss": 1.9033, "step": 14100 }, { "epoch": 0.5016442982845969, "grad_norm": 1.5890864133834839, "learning_rate": 1.9258190938331624e-05, "loss": 1.9534, "step": 14110 }, { "epoch": 0.5019998222380233, "grad_norm": 1.624387502670288, "learning_rate": 1.925670893577796e-05, "loss": 1.894, "step": 14120 }, { "epoch": 0.5023553461914496, "grad_norm": 1.62134850025177, "learning_rate": 1.9255225511451843e-05, "loss": 1.8916, "step": 14130 }, { "epoch": 0.502710870144876, "grad_norm": 1.5931661128997803, "learning_rate": 1.9253740665581117e-05, "loss": 1.8384, "step": 14140 }, { "epoch": 0.5030663940983023, "grad_norm": 1.6112724542617798, "learning_rate": 1.925225439839385e-05, "loss": 1.8913, "step": 14150 }, { "epoch": 0.5034219180517288, "grad_norm": 1.6279163360595703, "learning_rate": 1.9250766710118314e-05, "loss": 1.9066, "step": 14160 }, { "epoch": 0.5037774420051551, "grad_norm": 1.6252950429916382, "learning_rate": 1.9249277600983018e-05, "loss": 1.8862, "step": 14170 }, { "epoch": 0.5041329659585815, "grad_norm": 1.787638783454895, "learning_rate": 1.924778707121667e-05, "loss": 1.8761, "step": 14180 }, { "epoch": 0.5044884899120078, "grad_norm": 1.5983929634094238, "learning_rate": 1.9246295121048217e-05, "loss": 1.9112, "step": 14190 }, { "epoch": 0.5048440138654342, "grad_norm": 1.659288763999939, "learning_rate": 1.9244801750706807e-05, "loss": 1.8965, "step": 14200 }, { "epoch": 0.5051995378188605, "grad_norm": 1.6085327863693237, "learning_rate": 1.924330696042181e-05, "loss": 1.891, "step": 14210 }, { "epoch": 0.5055550617722869, "grad_norm": 1.627176284790039, "learning_rate": 1.9241810750422826e-05, "loss": 1.8777, "step": 14220 }, { "epoch": 0.5059105857257132, "grad_norm": 1.5823941230773926, "learning_rate": 1.9240313120939654e-05, "loss": 1.9021, "step": 14230 }, { "epoch": 0.5062661096791397, "grad_norm": 1.8027126789093018, "learning_rate": 1.9238814072202326e-05, "loss": 1.8911, "step": 14240 }, { "epoch": 0.506621633632566, "grad_norm": 1.660543441772461, "learning_rate": 1.9237313604441083e-05, "loss": 1.89, "step": 14250 }, { "epoch": 0.5069771575859924, "grad_norm": 1.633901834487915, "learning_rate": 1.923581171788639e-05, "loss": 1.8983, "step": 14260 }, { "epoch": 0.5073326815394187, "grad_norm": 1.521661400794983, "learning_rate": 1.9234308412768925e-05, "loss": 1.8813, "step": 14270 }, { "epoch": 0.5076882054928451, "grad_norm": 1.5929253101348877, "learning_rate": 1.9232803689319585e-05, "loss": 1.9159, "step": 14280 }, { "epoch": 0.5080437294462714, "grad_norm": 1.6317856311798096, "learning_rate": 1.9231297547769494e-05, "loss": 1.9374, "step": 14290 }, { "epoch": 0.5083992533996978, "grad_norm": 1.6200380325317383, "learning_rate": 1.9229789988349973e-05, "loss": 1.9038, "step": 14300 }, { "epoch": 0.5087547773531241, "grad_norm": 1.5889546871185303, "learning_rate": 1.9228281011292587e-05, "loss": 1.8463, "step": 14310 }, { "epoch": 0.5091103013065506, "grad_norm": 1.5688313245773315, "learning_rate": 1.92267706168291e-05, "loss": 1.8565, "step": 14320 }, { "epoch": 0.5094658252599769, "grad_norm": 1.6941825151443481, "learning_rate": 1.9225258805191494e-05, "loss": 1.8345, "step": 14330 }, { "epoch": 0.5098213492134033, "grad_norm": 1.541550874710083, "learning_rate": 1.922374557661198e-05, "loss": 1.9111, "step": 14340 }, { "epoch": 0.5101768731668296, "grad_norm": 1.5329803228378296, "learning_rate": 1.9222230931322977e-05, "loss": 1.9328, "step": 14350 }, { "epoch": 0.510532397120256, "grad_norm": 1.5915532112121582, "learning_rate": 1.922071486955713e-05, "loss": 1.8905, "step": 14360 }, { "epoch": 0.5108879210736823, "grad_norm": 1.7071433067321777, "learning_rate": 1.9219197391547294e-05, "loss": 1.8765, "step": 14370 }, { "epoch": 0.5112434450271087, "grad_norm": 1.560180425643921, "learning_rate": 1.921767849752654e-05, "loss": 1.9211, "step": 14380 }, { "epoch": 0.511598968980535, "grad_norm": 1.679560661315918, "learning_rate": 1.9216158187728165e-05, "loss": 1.8989, "step": 14390 }, { "epoch": 0.5119544929339614, "grad_norm": 1.572483777999878, "learning_rate": 1.921463646238568e-05, "loss": 1.9313, "step": 14400 }, { "epoch": 0.5123100168873878, "grad_norm": 1.6964569091796875, "learning_rate": 1.9213113321732807e-05, "loss": 1.938, "step": 14410 }, { "epoch": 0.5126655408408142, "grad_norm": 1.680965781211853, "learning_rate": 1.92115887660035e-05, "loss": 1.8979, "step": 14420 }, { "epoch": 0.5130210647942405, "grad_norm": 1.6551510095596313, "learning_rate": 1.921006279543191e-05, "loss": 1.8742, "step": 14430 }, { "epoch": 0.5133765887476669, "grad_norm": 1.6164863109588623, "learning_rate": 1.9208535410252425e-05, "loss": 1.9107, "step": 14440 }, { "epoch": 0.5137321127010932, "grad_norm": 1.6352697610855103, "learning_rate": 1.920700661069964e-05, "loss": 1.9052, "step": 14450 }, { "epoch": 0.5140876366545196, "grad_norm": 1.5313262939453125, "learning_rate": 1.9205476397008366e-05, "loss": 1.8627, "step": 14460 }, { "epoch": 0.5144431606079459, "grad_norm": 1.6885111331939697, "learning_rate": 1.9203944769413638e-05, "loss": 1.8998, "step": 14470 }, { "epoch": 0.5147986845613723, "grad_norm": 1.6766098737716675, "learning_rate": 1.9202411728150702e-05, "loss": 1.8591, "step": 14480 }, { "epoch": 0.5151542085147987, "grad_norm": 1.556369662284851, "learning_rate": 1.9200877273455024e-05, "loss": 1.872, "step": 14490 }, { "epoch": 0.5155097324682251, "grad_norm": 1.6718697547912598, "learning_rate": 1.9199341405562285e-05, "loss": 1.8891, "step": 14500 }, { "epoch": 0.5158652564216514, "grad_norm": 1.5532348155975342, "learning_rate": 1.919780412470839e-05, "loss": 1.8722, "step": 14510 }, { "epoch": 0.5162207803750778, "grad_norm": 1.6569695472717285, "learning_rate": 1.919626543112945e-05, "loss": 1.896, "step": 14520 }, { "epoch": 0.5165763043285041, "grad_norm": 1.6707987785339355, "learning_rate": 1.91947253250618e-05, "loss": 1.8953, "step": 14530 }, { "epoch": 0.5169318282819305, "grad_norm": 1.5621235370635986, "learning_rate": 1.919318380674199e-05, "loss": 1.8604, "step": 14540 }, { "epoch": 0.5172873522353568, "grad_norm": 1.6478444337844849, "learning_rate": 1.9191640876406793e-05, "loss": 1.8787, "step": 14550 }, { "epoch": 0.5176428761887832, "grad_norm": 1.6549452543258667, "learning_rate": 1.9190096534293188e-05, "loss": 1.8726, "step": 14560 }, { "epoch": 0.5179984001422095, "grad_norm": 1.6203386783599854, "learning_rate": 1.9188550780638376e-05, "loss": 1.9032, "step": 14570 }, { "epoch": 0.518353924095636, "grad_norm": 1.7803328037261963, "learning_rate": 1.9187003615679778e-05, "loss": 1.8958, "step": 14580 }, { "epoch": 0.5187094480490623, "grad_norm": 1.5585798025131226, "learning_rate": 1.9185455039655028e-05, "loss": 1.8964, "step": 14590 }, { "epoch": 0.5190649720024887, "grad_norm": 1.5389212369918823, "learning_rate": 1.9183905052801975e-05, "loss": 1.8958, "step": 14600 }, { "epoch": 0.519420495955915, "grad_norm": 1.609155535697937, "learning_rate": 1.918235365535869e-05, "loss": 1.8648, "step": 14610 }, { "epoch": 0.5197760199093414, "grad_norm": 1.6422481536865234, "learning_rate": 1.9180800847563462e-05, "loss": 1.8656, "step": 14620 }, { "epoch": 0.5201315438627677, "grad_norm": 1.6211472749710083, "learning_rate": 1.9179246629654782e-05, "loss": 1.8754, "step": 14630 }, { "epoch": 0.5204870678161941, "grad_norm": 1.5963804721832275, "learning_rate": 1.9177691001871377e-05, "loss": 1.8616, "step": 14640 }, { "epoch": 0.5208425917696204, "grad_norm": 1.593908667564392, "learning_rate": 1.917613396445218e-05, "loss": 1.9166, "step": 14650 }, { "epoch": 0.5211981157230469, "grad_norm": 1.6430562734603882, "learning_rate": 1.9174575517636338e-05, "loss": 1.9046, "step": 14660 }, { "epoch": 0.5215536396764732, "grad_norm": 1.6117980480194092, "learning_rate": 1.917301566166322e-05, "loss": 1.8737, "step": 14670 }, { "epoch": 0.5219091636298996, "grad_norm": 1.7191271781921387, "learning_rate": 1.9171454396772416e-05, "loss": 1.8638, "step": 14680 }, { "epoch": 0.5222646875833259, "grad_norm": 1.818167805671692, "learning_rate": 1.916989172320372e-05, "loss": 1.8808, "step": 14690 }, { "epoch": 0.5226202115367523, "grad_norm": 1.7359200716018677, "learning_rate": 1.916832764119715e-05, "loss": 1.9067, "step": 14700 }, { "epoch": 0.5229757354901786, "grad_norm": 1.5372364521026611, "learning_rate": 1.9166762150992944e-05, "loss": 1.9367, "step": 14710 }, { "epoch": 0.523331259443605, "grad_norm": 1.6493061780929565, "learning_rate": 1.9165195252831542e-05, "loss": 1.8905, "step": 14720 }, { "epoch": 0.5236867833970313, "grad_norm": 1.6419951915740967, "learning_rate": 1.916362694695362e-05, "loss": 1.9246, "step": 14730 }, { "epoch": 0.5240423073504578, "grad_norm": 1.6568384170532227, "learning_rate": 1.9162057233600052e-05, "loss": 1.891, "step": 14740 }, { "epoch": 0.5243978313038841, "grad_norm": 1.7263054847717285, "learning_rate": 1.916048611301194e-05, "loss": 1.8896, "step": 14750 }, { "epoch": 0.5247533552573105, "grad_norm": 1.6149612665176392, "learning_rate": 1.9158913585430602e-05, "loss": 1.8639, "step": 14760 }, { "epoch": 0.5251088792107368, "grad_norm": 1.6069010496139526, "learning_rate": 1.915733965109756e-05, "loss": 1.8915, "step": 14770 }, { "epoch": 0.5254644031641632, "grad_norm": 1.7398128509521484, "learning_rate": 1.9155764310254564e-05, "loss": 1.9007, "step": 14780 }, { "epoch": 0.5258199271175895, "grad_norm": 1.647915005683899, "learning_rate": 1.915418756314358e-05, "loss": 1.9293, "step": 14790 }, { "epoch": 0.5261754510710159, "grad_norm": 1.6997140645980835, "learning_rate": 1.9152609410006784e-05, "loss": 1.8964, "step": 14800 }, { "epoch": 0.5265309750244422, "grad_norm": 1.6560719013214111, "learning_rate": 1.9151029851086565e-05, "loss": 1.9442, "step": 14810 }, { "epoch": 0.5268864989778687, "grad_norm": 1.6268523931503296, "learning_rate": 1.9149448886625542e-05, "loss": 1.8794, "step": 14820 }, { "epoch": 0.527242022931295, "grad_norm": 1.5146976709365845, "learning_rate": 1.914786651686654e-05, "loss": 1.916, "step": 14830 }, { "epoch": 0.5275975468847214, "grad_norm": 1.564766764640808, "learning_rate": 1.9146282742052593e-05, "loss": 1.9018, "step": 14840 }, { "epoch": 0.5279530708381477, "grad_norm": 1.6078290939331055, "learning_rate": 1.9144697562426965e-05, "loss": 1.896, "step": 14850 }, { "epoch": 0.5283085947915741, "grad_norm": 1.6417776346206665, "learning_rate": 1.9143110978233133e-05, "loss": 1.8669, "step": 14860 }, { "epoch": 0.5286641187450004, "grad_norm": 1.5287001132965088, "learning_rate": 1.914152298971478e-05, "loss": 1.9229, "step": 14870 }, { "epoch": 0.5290196426984268, "grad_norm": 1.5520888566970825, "learning_rate": 1.9139933597115815e-05, "loss": 1.8709, "step": 14880 }, { "epoch": 0.5293751666518531, "grad_norm": 1.6008402109146118, "learning_rate": 1.9138342800680358e-05, "loss": 1.8714, "step": 14890 }, { "epoch": 0.5297306906052796, "grad_norm": 1.589961290359497, "learning_rate": 1.9136750600652743e-05, "loss": 1.9262, "step": 14900 }, { "epoch": 0.5300862145587059, "grad_norm": 1.7259788513183594, "learning_rate": 1.9135156997277523e-05, "loss": 1.8574, "step": 14910 }, { "epoch": 0.5304417385121323, "grad_norm": 1.539460301399231, "learning_rate": 1.9133561990799466e-05, "loss": 1.8557, "step": 14920 }, { "epoch": 0.5307972624655586, "grad_norm": 1.6305017471313477, "learning_rate": 1.9131965581463558e-05, "loss": 1.8761, "step": 14930 }, { "epoch": 0.531152786418985, "grad_norm": 1.6037888526916504, "learning_rate": 1.9130367769514988e-05, "loss": 1.913, "step": 14940 }, { "epoch": 0.5315083103724113, "grad_norm": 1.6008785963058472, "learning_rate": 1.912876855519918e-05, "loss": 1.8913, "step": 14950 }, { "epoch": 0.5318638343258377, "grad_norm": 1.6513231992721558, "learning_rate": 1.9127167938761762e-05, "loss": 1.8988, "step": 14960 }, { "epoch": 0.532219358279264, "grad_norm": 1.6702121496200562, "learning_rate": 1.9125565920448575e-05, "loss": 1.9275, "step": 14970 }, { "epoch": 0.5325748822326904, "grad_norm": 1.7012349367141724, "learning_rate": 1.912396250050568e-05, "loss": 1.8969, "step": 14980 }, { "epoch": 0.5329304061861168, "grad_norm": 1.5561339855194092, "learning_rate": 1.9122357679179356e-05, "loss": 1.8335, "step": 14990 }, { "epoch": 0.5332859301395432, "grad_norm": 1.6674284934997559, "learning_rate": 1.9120751456716083e-05, "loss": 1.8655, "step": 15000 }, { "epoch": 0.5336414540929695, "grad_norm": 1.689611792564392, "learning_rate": 1.911914383336258e-05, "loss": 1.8365, "step": 15010 }, { "epoch": 0.5339969780463959, "grad_norm": 1.5600208044052124, "learning_rate": 1.911753480936576e-05, "loss": 1.9306, "step": 15020 }, { "epoch": 0.5343525019998222, "grad_norm": 1.5981788635253906, "learning_rate": 1.9115924384972758e-05, "loss": 1.9228, "step": 15030 }, { "epoch": 0.5347080259532486, "grad_norm": 1.5904815196990967, "learning_rate": 1.911431256043093e-05, "loss": 1.8945, "step": 15040 }, { "epoch": 0.5350635499066749, "grad_norm": 1.650617003440857, "learning_rate": 1.911269933598784e-05, "loss": 1.8472, "step": 15050 }, { "epoch": 0.5354190738601013, "grad_norm": 1.7228983640670776, "learning_rate": 1.911108471189127e-05, "loss": 1.8869, "step": 15060 }, { "epoch": 0.5357745978135277, "grad_norm": 1.6754595041275024, "learning_rate": 1.9109468688389216e-05, "loss": 1.8485, "step": 15070 }, { "epoch": 0.5361301217669541, "grad_norm": 1.5948920249938965, "learning_rate": 1.9107851265729885e-05, "loss": 1.9075, "step": 15080 }, { "epoch": 0.5364856457203804, "grad_norm": 1.6747260093688965, "learning_rate": 1.9106232444161707e-05, "loss": 1.9437, "step": 15090 }, { "epoch": 0.5368411696738068, "grad_norm": 1.511064052581787, "learning_rate": 1.9104612223933322e-05, "loss": 1.8816, "step": 15100 }, { "epoch": 0.5371966936272331, "grad_norm": 1.492945909500122, "learning_rate": 1.910299060529359e-05, "loss": 1.8629, "step": 15110 }, { "epoch": 0.5375522175806595, "grad_norm": 1.6290943622589111, "learning_rate": 1.910136758849157e-05, "loss": 1.8982, "step": 15120 }, { "epoch": 0.5379077415340858, "grad_norm": 1.7283633947372437, "learning_rate": 1.9099743173776558e-05, "loss": 1.8189, "step": 15130 }, { "epoch": 0.5382632654875122, "grad_norm": 1.55548095703125, "learning_rate": 1.909811736139805e-05, "loss": 1.8938, "step": 15140 }, { "epoch": 0.5386187894409386, "grad_norm": 1.6615575551986694, "learning_rate": 1.9096490151605764e-05, "loss": 1.8888, "step": 15150 }, { "epoch": 0.538974313394365, "grad_norm": 1.5879693031311035, "learning_rate": 1.909486154464962e-05, "loss": 1.9041, "step": 15160 }, { "epoch": 0.5393298373477913, "grad_norm": 1.6335123777389526, "learning_rate": 1.9093231540779773e-05, "loss": 1.8635, "step": 15170 }, { "epoch": 0.5396853613012177, "grad_norm": 1.5720808506011963, "learning_rate": 1.9091600140246574e-05, "loss": 1.8662, "step": 15180 }, { "epoch": 0.540040885254644, "grad_norm": 1.6797782182693481, "learning_rate": 1.9089967343300602e-05, "loss": 1.9517, "step": 15190 }, { "epoch": 0.5403964092080704, "grad_norm": 1.7217504978179932, "learning_rate": 1.9088333150192638e-05, "loss": 1.9007, "step": 15200 }, { "epoch": 0.5407519331614967, "grad_norm": 1.624611258506775, "learning_rate": 1.9086697561173683e-05, "loss": 1.8755, "step": 15210 }, { "epoch": 0.5411074571149231, "grad_norm": 1.6011959314346313, "learning_rate": 1.908506057649496e-05, "loss": 1.8762, "step": 15220 }, { "epoch": 0.5414629810683494, "grad_norm": 1.6140680313110352, "learning_rate": 1.9083422196407897e-05, "loss": 1.8803, "step": 15230 }, { "epoch": 0.5418185050217759, "grad_norm": 1.6139609813690186, "learning_rate": 1.9081782421164136e-05, "loss": 1.9264, "step": 15240 }, { "epoch": 0.5421740289752022, "grad_norm": 1.6700936555862427, "learning_rate": 1.908014125101554e-05, "loss": 1.8885, "step": 15250 }, { "epoch": 0.5425295529286286, "grad_norm": 1.7281888723373413, "learning_rate": 1.907849868621418e-05, "loss": 1.8685, "step": 15260 }, { "epoch": 0.5428850768820549, "grad_norm": 1.6173104047775269, "learning_rate": 1.9076854727012344e-05, "loss": 1.8415, "step": 15270 }, { "epoch": 0.5432406008354813, "grad_norm": 1.607061743736267, "learning_rate": 1.9075209373662535e-05, "loss": 1.9398, "step": 15280 }, { "epoch": 0.5435961247889076, "grad_norm": 1.6720659732818604, "learning_rate": 1.907356262641747e-05, "loss": 1.888, "step": 15290 }, { "epoch": 0.543951648742334, "grad_norm": 1.563422679901123, "learning_rate": 1.9071914485530074e-05, "loss": 1.8557, "step": 15300 }, { "epoch": 0.5443071726957603, "grad_norm": 1.5465205907821655, "learning_rate": 1.9070264951253495e-05, "loss": 1.8867, "step": 15310 }, { "epoch": 0.5446626966491868, "grad_norm": 1.6641415357589722, "learning_rate": 1.906861402384109e-05, "loss": 1.8501, "step": 15320 }, { "epoch": 0.5450182206026131, "grad_norm": 1.7145063877105713, "learning_rate": 1.9066961703546432e-05, "loss": 1.893, "step": 15330 }, { "epoch": 0.5453737445560395, "grad_norm": 1.5639148950576782, "learning_rate": 1.906530799062331e-05, "loss": 1.8897, "step": 15340 }, { "epoch": 0.5457292685094658, "grad_norm": 1.6158241033554077, "learning_rate": 1.9063652885325718e-05, "loss": 1.885, "step": 15350 }, { "epoch": 0.5460847924628922, "grad_norm": 1.5545011758804321, "learning_rate": 1.906199638790787e-05, "loss": 1.8207, "step": 15360 }, { "epoch": 0.5464403164163185, "grad_norm": 1.7516465187072754, "learning_rate": 1.90603384986242e-05, "loss": 1.8562, "step": 15370 }, { "epoch": 0.5467958403697449, "grad_norm": 1.626286506652832, "learning_rate": 1.9058679217729345e-05, "loss": 1.8643, "step": 15380 }, { "epoch": 0.5471513643231712, "grad_norm": 1.6006121635437012, "learning_rate": 1.905701854547816e-05, "loss": 1.8397, "step": 15390 }, { "epoch": 0.5475068882765977, "grad_norm": 1.7442126274108887, "learning_rate": 1.9055356482125712e-05, "loss": 1.8768, "step": 15400 }, { "epoch": 0.547862412230024, "grad_norm": 1.5240834951400757, "learning_rate": 1.9053693027927287e-05, "loss": 1.881, "step": 15410 }, { "epoch": 0.5482179361834504, "grad_norm": 1.5544195175170898, "learning_rate": 1.905202818313838e-05, "loss": 1.9523, "step": 15420 }, { "epoch": 0.5485734601368767, "grad_norm": 1.69491708278656, "learning_rate": 1.90503619480147e-05, "loss": 1.88, "step": 15430 }, { "epoch": 0.5489289840903031, "grad_norm": 1.677173376083374, "learning_rate": 1.9048694322812174e-05, "loss": 1.8832, "step": 15440 }, { "epoch": 0.5492845080437294, "grad_norm": 1.7110683917999268, "learning_rate": 1.9047025307786932e-05, "loss": 1.8634, "step": 15450 }, { "epoch": 0.5496400319971558, "grad_norm": 1.6487089395523071, "learning_rate": 1.904535490319533e-05, "loss": 1.8999, "step": 15460 }, { "epoch": 0.5499955559505821, "grad_norm": 1.5610681772232056, "learning_rate": 1.904368310929393e-05, "loss": 1.8442, "step": 15470 }, { "epoch": 0.5503510799040086, "grad_norm": 1.6055057048797607, "learning_rate": 1.904200992633951e-05, "loss": 1.9059, "step": 15480 }, { "epoch": 0.5507066038574349, "grad_norm": 1.6054152250289917, "learning_rate": 1.9040335354589056e-05, "loss": 1.8852, "step": 15490 }, { "epoch": 0.5510621278108613, "grad_norm": 1.562628149986267, "learning_rate": 1.9038659394299775e-05, "loss": 1.8882, "step": 15500 }, { "epoch": 0.5514176517642876, "grad_norm": 1.6260756254196167, "learning_rate": 1.9036982045729088e-05, "loss": 1.8988, "step": 15510 }, { "epoch": 0.551773175717714, "grad_norm": 1.6427818536758423, "learning_rate": 1.9035303309134617e-05, "loss": 1.8722, "step": 15520 }, { "epoch": 0.5521286996711403, "grad_norm": 1.6877665519714355, "learning_rate": 1.903362318477421e-05, "loss": 1.8747, "step": 15530 }, { "epoch": 0.5524842236245667, "grad_norm": 1.5642024278640747, "learning_rate": 1.9031941672905923e-05, "loss": 1.8376, "step": 15540 }, { "epoch": 0.552839747577993, "grad_norm": 1.6180360317230225, "learning_rate": 1.9030258773788028e-05, "loss": 1.9095, "step": 15550 }, { "epoch": 0.5531952715314195, "grad_norm": 1.6126481294631958, "learning_rate": 1.9028574487679004e-05, "loss": 1.9073, "step": 15560 }, { "epoch": 0.5535507954848458, "grad_norm": 1.5349082946777344, "learning_rate": 1.902688881483755e-05, "loss": 1.8635, "step": 15570 }, { "epoch": 0.5539063194382722, "grad_norm": 1.5188136100769043, "learning_rate": 1.902520175552257e-05, "loss": 1.8821, "step": 15580 }, { "epoch": 0.5542618433916985, "grad_norm": 1.6134661436080933, "learning_rate": 1.9023513309993192e-05, "loss": 1.8955, "step": 15590 }, { "epoch": 0.5546173673451249, "grad_norm": 1.5765665769577026, "learning_rate": 1.9021823478508744e-05, "loss": 1.8862, "step": 15600 }, { "epoch": 0.5549728912985512, "grad_norm": 1.6248475313186646, "learning_rate": 1.902013226132878e-05, "loss": 1.8517, "step": 15610 }, { "epoch": 0.5553284152519776, "grad_norm": 1.5957690477371216, "learning_rate": 1.9018439658713055e-05, "loss": 1.8592, "step": 15620 }, { "epoch": 0.5556839392054039, "grad_norm": 1.6602823734283447, "learning_rate": 1.9016745670921547e-05, "loss": 1.8864, "step": 15630 }, { "epoch": 0.5560394631588303, "grad_norm": 1.805245041847229, "learning_rate": 1.9015050298214436e-05, "loss": 1.8493, "step": 15640 }, { "epoch": 0.5563949871122567, "grad_norm": 1.6458933353424072, "learning_rate": 1.9013353540852124e-05, "loss": 1.8952, "step": 15650 }, { "epoch": 0.5567505110656831, "grad_norm": 1.6207075119018555, "learning_rate": 1.9011655399095226e-05, "loss": 1.8594, "step": 15660 }, { "epoch": 0.5571060350191094, "grad_norm": 1.592824935913086, "learning_rate": 1.900995587320456e-05, "loss": 1.8967, "step": 15670 }, { "epoch": 0.5574615589725358, "grad_norm": 1.684127926826477, "learning_rate": 1.9008254963441163e-05, "loss": 1.8629, "step": 15680 }, { "epoch": 0.5578170829259621, "grad_norm": 1.7365632057189941, "learning_rate": 1.9006552670066288e-05, "loss": 1.8812, "step": 15690 }, { "epoch": 0.5581726068793885, "grad_norm": 1.6320279836654663, "learning_rate": 1.9004848993341398e-05, "loss": 1.8456, "step": 15700 }, { "epoch": 0.5585281308328148, "grad_norm": 1.6249363422393799, "learning_rate": 1.900314393352816e-05, "loss": 1.8636, "step": 15710 }, { "epoch": 0.5588836547862412, "grad_norm": 1.6237016916275024, "learning_rate": 1.900143749088846e-05, "loss": 1.8946, "step": 15720 }, { "epoch": 0.5592391787396676, "grad_norm": 1.5896066427230835, "learning_rate": 1.8999729665684406e-05, "loss": 1.8769, "step": 15730 }, { "epoch": 0.559594702693094, "grad_norm": 1.6980187892913818, "learning_rate": 1.8998020458178303e-05, "loss": 1.8685, "step": 15740 }, { "epoch": 0.5599502266465203, "grad_norm": 1.6312180757522583, "learning_rate": 1.8996309868632675e-05, "loss": 1.8814, "step": 15750 }, { "epoch": 0.5603057505999467, "grad_norm": 1.6168968677520752, "learning_rate": 1.899459789731026e-05, "loss": 1.9114, "step": 15760 }, { "epoch": 0.560661274553373, "grad_norm": 1.6933541297912598, "learning_rate": 1.8992884544474e-05, "loss": 1.8725, "step": 15770 }, { "epoch": 0.5610167985067994, "grad_norm": 1.6688987016677856, "learning_rate": 1.8991169810387067e-05, "loss": 1.8497, "step": 15780 }, { "epoch": 0.5613723224602257, "grad_norm": 1.6841078996658325, "learning_rate": 1.898945369531282e-05, "loss": 1.8917, "step": 15790 }, { "epoch": 0.5617278464136521, "grad_norm": 1.6986109018325806, "learning_rate": 1.8987736199514853e-05, "loss": 1.8933, "step": 15800 }, { "epoch": 0.5620833703670784, "grad_norm": 1.6620608568191528, "learning_rate": 1.898601732325696e-05, "loss": 1.8725, "step": 15810 }, { "epoch": 0.5624388943205049, "grad_norm": 1.6265780925750732, "learning_rate": 1.8984297066803146e-05, "loss": 1.8615, "step": 15820 }, { "epoch": 0.5627944182739312, "grad_norm": 1.47080397605896, "learning_rate": 1.8982575430417636e-05, "loss": 1.9122, "step": 15830 }, { "epoch": 0.5631499422273576, "grad_norm": 1.6252546310424805, "learning_rate": 1.898085241436486e-05, "loss": 1.8958, "step": 15840 }, { "epoch": 0.5635054661807839, "grad_norm": 1.6627707481384277, "learning_rate": 1.8979128018909464e-05, "loss": 1.875, "step": 15850 }, { "epoch": 0.5638609901342103, "grad_norm": 1.7096972465515137, "learning_rate": 1.8977402244316304e-05, "loss": 1.8635, "step": 15860 }, { "epoch": 0.5642165140876366, "grad_norm": 1.6762901544570923, "learning_rate": 1.897567509085045e-05, "loss": 1.8821, "step": 15870 }, { "epoch": 0.564572038041063, "grad_norm": 1.6387391090393066, "learning_rate": 1.897394655877718e-05, "loss": 1.877, "step": 15880 }, { "epoch": 0.5649275619944893, "grad_norm": 1.643298625946045, "learning_rate": 1.8972216648361984e-05, "loss": 1.851, "step": 15890 }, { "epoch": 0.5652830859479158, "grad_norm": 1.658422589302063, "learning_rate": 1.8970485359870567e-05, "loss": 1.8817, "step": 15900 }, { "epoch": 0.5656386099013421, "grad_norm": 1.6579492092132568, "learning_rate": 1.8968752693568842e-05, "loss": 1.8954, "step": 15910 }, { "epoch": 0.5659941338547685, "grad_norm": 1.616687536239624, "learning_rate": 1.896701864972294e-05, "loss": 1.8478, "step": 15920 }, { "epoch": 0.5663496578081948, "grad_norm": 1.60679292678833, "learning_rate": 1.8965283228599196e-05, "loss": 1.9111, "step": 15930 }, { "epoch": 0.5667051817616212, "grad_norm": 1.5435000658035278, "learning_rate": 1.8963546430464165e-05, "loss": 1.8981, "step": 15940 }, { "epoch": 0.5670607057150475, "grad_norm": 1.6048617362976074, "learning_rate": 1.8961808255584596e-05, "loss": 1.8765, "step": 15950 }, { "epoch": 0.5674162296684739, "grad_norm": 1.6783527135849, "learning_rate": 1.8960068704227476e-05, "loss": 1.8805, "step": 15960 }, { "epoch": 0.5677717536219002, "grad_norm": 1.6484862565994263, "learning_rate": 1.8958327776659985e-05, "loss": 1.8929, "step": 15970 }, { "epoch": 0.5681272775753267, "grad_norm": 1.6223924160003662, "learning_rate": 1.895658547314951e-05, "loss": 1.8706, "step": 15980 }, { "epoch": 0.568482801528753, "grad_norm": 1.5521831512451172, "learning_rate": 1.895484179396367e-05, "loss": 1.9197, "step": 15990 }, { "epoch": 0.5688383254821794, "grad_norm": 1.5772056579589844, "learning_rate": 1.8953096739370275e-05, "loss": 1.8611, "step": 16000 }, { "epoch": 0.5691938494356057, "grad_norm": 1.6748151779174805, "learning_rate": 1.895135030963736e-05, "loss": 1.8791, "step": 16010 }, { "epoch": 0.5695493733890321, "grad_norm": 1.585634708404541, "learning_rate": 1.8949602505033157e-05, "loss": 1.9258, "step": 16020 }, { "epoch": 0.5699048973424584, "grad_norm": 1.6667970418930054, "learning_rate": 1.8947853325826128e-05, "loss": 1.848, "step": 16030 }, { "epoch": 0.5702604212958848, "grad_norm": 1.658412218093872, "learning_rate": 1.8946102772284933e-05, "loss": 1.8529, "step": 16040 }, { "epoch": 0.5706159452493111, "grad_norm": 1.6104261875152588, "learning_rate": 1.894435084467844e-05, "loss": 1.8531, "step": 16050 }, { "epoch": 0.5709714692027376, "grad_norm": 1.6428375244140625, "learning_rate": 1.8942597543275744e-05, "loss": 1.8516, "step": 16060 }, { "epoch": 0.5713269931561639, "grad_norm": 1.7161728143692017, "learning_rate": 1.8940842868346134e-05, "loss": 1.8683, "step": 16070 }, { "epoch": 0.5716825171095903, "grad_norm": 1.6344237327575684, "learning_rate": 1.8939086820159117e-05, "loss": 1.8701, "step": 16080 }, { "epoch": 0.5720380410630166, "grad_norm": 1.6507536172866821, "learning_rate": 1.8937329398984416e-05, "loss": 1.8342, "step": 16090 }, { "epoch": 0.572393565016443, "grad_norm": 1.627859115600586, "learning_rate": 1.8935570605091955e-05, "loss": 1.8757, "step": 16100 }, { "epoch": 0.5727490889698693, "grad_norm": 1.6992720365524292, "learning_rate": 1.8933810438751875e-05, "loss": 1.889, "step": 16110 }, { "epoch": 0.5731046129232957, "grad_norm": 1.5473157167434692, "learning_rate": 1.8932048900234527e-05, "loss": 1.8439, "step": 16120 }, { "epoch": 0.573460136876722, "grad_norm": 1.617222547531128, "learning_rate": 1.8930285989810474e-05, "loss": 1.8851, "step": 16130 }, { "epoch": 0.5738156608301485, "grad_norm": 1.5728873014450073, "learning_rate": 1.8928521707750486e-05, "loss": 1.874, "step": 16140 }, { "epoch": 0.5741711847835748, "grad_norm": 1.623225212097168, "learning_rate": 1.8926756054325545e-05, "loss": 1.8365, "step": 16150 }, { "epoch": 0.5745267087370012, "grad_norm": 1.7246637344360352, "learning_rate": 1.8924989029806845e-05, "loss": 1.876, "step": 16160 }, { "epoch": 0.5748822326904275, "grad_norm": 1.6464879512786865, "learning_rate": 1.8923220634465787e-05, "loss": 1.8393, "step": 16170 }, { "epoch": 0.5752377566438539, "grad_norm": 1.6673486232757568, "learning_rate": 1.8921450868573993e-05, "loss": 1.8756, "step": 16180 }, { "epoch": 0.5755932805972802, "grad_norm": 1.6323940753936768, "learning_rate": 1.8919679732403284e-05, "loss": 1.8998, "step": 16190 }, { "epoch": 0.5759488045507066, "grad_norm": 1.5756714344024658, "learning_rate": 1.8917907226225695e-05, "loss": 1.9193, "step": 16200 }, { "epoch": 0.5763043285041329, "grad_norm": 1.5643407106399536, "learning_rate": 1.891613335031347e-05, "loss": 1.8532, "step": 16210 }, { "epoch": 0.5766598524575594, "grad_norm": 1.7220146656036377, "learning_rate": 1.891435810493907e-05, "loss": 1.8412, "step": 16220 }, { "epoch": 0.5770153764109857, "grad_norm": 1.6633564233779907, "learning_rate": 1.8912581490375153e-05, "loss": 1.8517, "step": 16230 }, { "epoch": 0.5773709003644121, "grad_norm": 1.605879545211792, "learning_rate": 1.8910803506894602e-05, "loss": 1.9017, "step": 16240 }, { "epoch": 0.5777264243178384, "grad_norm": 1.656543254852295, "learning_rate": 1.8909024154770508e-05, "loss": 1.9126, "step": 16250 }, { "epoch": 0.5780819482712648, "grad_norm": 1.6617867946624756, "learning_rate": 1.8907243434276162e-05, "loss": 1.8808, "step": 16260 }, { "epoch": 0.5784374722246911, "grad_norm": 1.6808924674987793, "learning_rate": 1.8905461345685073e-05, "loss": 1.8935, "step": 16270 }, { "epoch": 0.5787929961781175, "grad_norm": 1.7385969161987305, "learning_rate": 1.8903677889270957e-05, "loss": 1.8294, "step": 16280 }, { "epoch": 0.5791485201315438, "grad_norm": 1.5762659311294556, "learning_rate": 1.8901893065307745e-05, "loss": 1.8564, "step": 16290 }, { "epoch": 0.5795040440849702, "grad_norm": 1.6158792972564697, "learning_rate": 1.8900106874069577e-05, "loss": 1.8664, "step": 16300 }, { "epoch": 0.5798595680383966, "grad_norm": 1.6304837465286255, "learning_rate": 1.8898319315830793e-05, "loss": 1.8929, "step": 16310 }, { "epoch": 0.580215091991823, "grad_norm": 1.7296535968780518, "learning_rate": 1.8896530390865955e-05, "loss": 1.856, "step": 16320 }, { "epoch": 0.5805706159452493, "grad_norm": 1.59303879737854, "learning_rate": 1.8894740099449837e-05, "loss": 1.907, "step": 16330 }, { "epoch": 0.5809261398986757, "grad_norm": 1.5917261838912964, "learning_rate": 1.88929484418574e-05, "loss": 1.8705, "step": 16340 }, { "epoch": 0.581281663852102, "grad_norm": 1.5613733530044556, "learning_rate": 1.889115541836385e-05, "loss": 1.8972, "step": 16350 }, { "epoch": 0.5816371878055284, "grad_norm": 1.5381407737731934, "learning_rate": 1.8889361029244574e-05, "loss": 1.8423, "step": 16360 }, { "epoch": 0.5819927117589547, "grad_norm": 1.6103670597076416, "learning_rate": 1.8887565274775177e-05, "loss": 1.8679, "step": 16370 }, { "epoch": 0.5823482357123811, "grad_norm": 1.6302522420883179, "learning_rate": 1.888576815523148e-05, "loss": 1.8749, "step": 16380 }, { "epoch": 0.5827037596658075, "grad_norm": 1.6894242763519287, "learning_rate": 1.888396967088951e-05, "loss": 1.8551, "step": 16390 }, { "epoch": 0.5830592836192339, "grad_norm": 1.6959004402160645, "learning_rate": 1.88821698220255e-05, "loss": 1.8604, "step": 16400 }, { "epoch": 0.5834148075726602, "grad_norm": 1.767952561378479, "learning_rate": 1.88803686089159e-05, "loss": 1.9134, "step": 16410 }, { "epoch": 0.5837703315260866, "grad_norm": 1.6960941553115845, "learning_rate": 1.8878566031837362e-05, "loss": 1.8982, "step": 16420 }, { "epoch": 0.5841258554795129, "grad_norm": 1.559275507926941, "learning_rate": 1.8876762091066746e-05, "loss": 1.8515, "step": 16430 }, { "epoch": 0.5844813794329393, "grad_norm": 1.6764837503433228, "learning_rate": 1.8874956786881137e-05, "loss": 1.8995, "step": 16440 }, { "epoch": 0.5848369033863656, "grad_norm": 1.6259223222732544, "learning_rate": 1.8873150119557807e-05, "loss": 1.8746, "step": 16450 }, { "epoch": 0.585192427339792, "grad_norm": 1.640162706375122, "learning_rate": 1.8871342089374253e-05, "loss": 1.8844, "step": 16460 }, { "epoch": 0.5855479512932183, "grad_norm": 1.6161834001541138, "learning_rate": 1.886953269660818e-05, "loss": 1.8626, "step": 16470 }, { "epoch": 0.5859034752466448, "grad_norm": 1.7492423057556152, "learning_rate": 1.8867721941537497e-05, "loss": 1.9021, "step": 16480 }, { "epoch": 0.5862589992000711, "grad_norm": 1.652024269104004, "learning_rate": 1.886590982444033e-05, "loss": 1.8882, "step": 16490 }, { "epoch": 0.5866145231534975, "grad_norm": 1.52196204662323, "learning_rate": 1.8864096345594996e-05, "loss": 1.8257, "step": 16500 }, { "epoch": 0.5869700471069238, "grad_norm": 1.7476404905319214, "learning_rate": 1.8862281505280044e-05, "loss": 1.8592, "step": 16510 }, { "epoch": 0.5873255710603502, "grad_norm": 1.5485260486602783, "learning_rate": 1.886046530377422e-05, "loss": 1.8941, "step": 16520 }, { "epoch": 0.5876810950137765, "grad_norm": 1.5296543836593628, "learning_rate": 1.885864774135648e-05, "loss": 1.9159, "step": 16530 }, { "epoch": 0.5880366189672029, "grad_norm": 1.6564058065414429, "learning_rate": 1.8856828818305993e-05, "loss": 1.8667, "step": 16540 }, { "epoch": 0.5883921429206292, "grad_norm": 1.655967354774475, "learning_rate": 1.885500853490213e-05, "loss": 1.8859, "step": 16550 }, { "epoch": 0.5887476668740557, "grad_norm": 1.608729362487793, "learning_rate": 1.885318689142448e-05, "loss": 1.8904, "step": 16560 }, { "epoch": 0.589103190827482, "grad_norm": 1.6501888036727905, "learning_rate": 1.8851363888152832e-05, "loss": 1.8914, "step": 16570 }, { "epoch": 0.5894587147809084, "grad_norm": 1.6261394023895264, "learning_rate": 1.8849539525367188e-05, "loss": 1.9402, "step": 16580 }, { "epoch": 0.5898142387343347, "grad_norm": 1.6277108192443848, "learning_rate": 1.884771380334776e-05, "loss": 1.91, "step": 16590 }, { "epoch": 0.5901697626877611, "grad_norm": 1.6134692430496216, "learning_rate": 1.884588672237497e-05, "loss": 1.9328, "step": 16600 }, { "epoch": 0.5905252866411874, "grad_norm": 1.6460951566696167, "learning_rate": 1.8844058282729443e-05, "loss": 1.8587, "step": 16610 }, { "epoch": 0.5908808105946138, "grad_norm": 1.6859729290008545, "learning_rate": 1.8842228484692016e-05, "loss": 1.8527, "step": 16620 }, { "epoch": 0.5912363345480401, "grad_norm": 1.6426470279693604, "learning_rate": 1.8840397328543733e-05, "loss": 1.8648, "step": 16630 }, { "epoch": 0.5915918585014666, "grad_norm": 1.6782881021499634, "learning_rate": 1.883856481456585e-05, "loss": 1.864, "step": 16640 }, { "epoch": 0.5919473824548929, "grad_norm": 1.62833833694458, "learning_rate": 1.8836730943039833e-05, "loss": 1.8915, "step": 16650 }, { "epoch": 0.5923029064083193, "grad_norm": 1.571418285369873, "learning_rate": 1.8834895714247347e-05, "loss": 1.8538, "step": 16660 }, { "epoch": 0.5926584303617456, "grad_norm": 1.594092845916748, "learning_rate": 1.8833059128470275e-05, "loss": 1.854, "step": 16670 }, { "epoch": 0.593013954315172, "grad_norm": 1.636124610900879, "learning_rate": 1.8831221185990706e-05, "loss": 1.8864, "step": 16680 }, { "epoch": 0.5933694782685983, "grad_norm": 1.6358932256698608, "learning_rate": 1.8829381887090935e-05, "loss": 1.8813, "step": 16690 }, { "epoch": 0.5937250022220247, "grad_norm": 1.5682092905044556, "learning_rate": 1.8827541232053466e-05, "loss": 1.9199, "step": 16700 }, { "epoch": 0.594080526175451, "grad_norm": 1.6156322956085205, "learning_rate": 1.8825699221161013e-05, "loss": 1.8817, "step": 16710 }, { "epoch": 0.5944360501288775, "grad_norm": 1.5667834281921387, "learning_rate": 1.8823855854696496e-05, "loss": 1.8507, "step": 16720 }, { "epoch": 0.5947915740823038, "grad_norm": 1.6435546875, "learning_rate": 1.8822011132943048e-05, "loss": 1.8548, "step": 16730 }, { "epoch": 0.5951470980357302, "grad_norm": 1.58632493019104, "learning_rate": 1.8820165056184004e-05, "loss": 1.8876, "step": 16740 }, { "epoch": 0.5955026219891565, "grad_norm": 1.6850043535232544, "learning_rate": 1.8818317624702908e-05, "loss": 1.8642, "step": 16750 }, { "epoch": 0.5958581459425829, "grad_norm": 1.6237214803695679, "learning_rate": 1.8816468838783518e-05, "loss": 1.8476, "step": 16760 }, { "epoch": 0.5962136698960092, "grad_norm": 1.6198740005493164, "learning_rate": 1.8814618698709794e-05, "loss": 1.8812, "step": 16770 }, { "epoch": 0.5965691938494356, "grad_norm": 1.6180577278137207, "learning_rate": 1.8812767204765906e-05, "loss": 1.8844, "step": 16780 }, { "epoch": 0.5969247178028619, "grad_norm": 1.6989701986312866, "learning_rate": 1.881091435723623e-05, "loss": 1.8733, "step": 16790 }, { "epoch": 0.5972802417562884, "grad_norm": 1.6039592027664185, "learning_rate": 1.8809060156405355e-05, "loss": 1.9306, "step": 16800 }, { "epoch": 0.5976357657097147, "grad_norm": 1.6900005340576172, "learning_rate": 1.8807204602558076e-05, "loss": 1.9098, "step": 16810 }, { "epoch": 0.5979912896631411, "grad_norm": 1.7421122789382935, "learning_rate": 1.880534769597939e-05, "loss": 1.8799, "step": 16820 }, { "epoch": 0.5983468136165674, "grad_norm": 1.5648573637008667, "learning_rate": 1.8803489436954506e-05, "loss": 1.908, "step": 16830 }, { "epoch": 0.5987023375699938, "grad_norm": 1.618874430656433, "learning_rate": 1.8801629825768845e-05, "loss": 1.8763, "step": 16840 }, { "epoch": 0.5990578615234201, "grad_norm": 1.5616846084594727, "learning_rate": 1.8799768862708025e-05, "loss": 1.9007, "step": 16850 }, { "epoch": 0.5994133854768465, "grad_norm": 1.6301249265670776, "learning_rate": 1.8797906548057888e-05, "loss": 1.8384, "step": 16860 }, { "epoch": 0.5997689094302728, "grad_norm": 1.6246309280395508, "learning_rate": 1.8796042882104464e-05, "loss": 1.8478, "step": 16870 }, { "epoch": 0.6001244333836993, "grad_norm": 1.73928964138031, "learning_rate": 1.8794177865134008e-05, "loss": 1.8595, "step": 16880 }, { "epoch": 0.6004799573371256, "grad_norm": 1.5528075695037842, "learning_rate": 1.879231149743297e-05, "loss": 1.8669, "step": 16890 }, { "epoch": 0.600835481290552, "grad_norm": 1.6546438932418823, "learning_rate": 1.8790443779288014e-05, "loss": 1.9158, "step": 16900 }, { "epoch": 0.6011910052439783, "grad_norm": 1.5551692247390747, "learning_rate": 1.8788574710986012e-05, "loss": 1.8575, "step": 16910 }, { "epoch": 0.6015465291974047, "grad_norm": 1.635860800743103, "learning_rate": 1.8786704292814037e-05, "loss": 1.8457, "step": 16920 }, { "epoch": 0.601902053150831, "grad_norm": 1.5776456594467163, "learning_rate": 1.8784832525059377e-05, "loss": 1.8652, "step": 16930 }, { "epoch": 0.6022575771042574, "grad_norm": 1.719202995300293, "learning_rate": 1.8782959408009524e-05, "loss": 1.8412, "step": 16940 }, { "epoch": 0.6026131010576837, "grad_norm": 1.6435538530349731, "learning_rate": 1.8781084941952176e-05, "loss": 1.8631, "step": 16950 }, { "epoch": 0.6029686250111101, "grad_norm": 1.5469392538070679, "learning_rate": 1.8779209127175234e-05, "loss": 1.8409, "step": 16960 }, { "epoch": 0.6033241489645365, "grad_norm": 1.5901070833206177, "learning_rate": 1.8777331963966817e-05, "loss": 1.8437, "step": 16970 }, { "epoch": 0.6036796729179629, "grad_norm": 1.606291651725769, "learning_rate": 1.877545345261525e-05, "loss": 1.8883, "step": 16980 }, { "epoch": 0.6040351968713892, "grad_norm": 1.5249083042144775, "learning_rate": 1.8773573593409052e-05, "loss": 1.8804, "step": 16990 }, { "epoch": 0.6043907208248156, "grad_norm": 1.6308866739273071, "learning_rate": 1.877169238663696e-05, "loss": 1.846, "step": 17000 }, { "epoch": 0.6047462447782419, "grad_norm": 1.6662529706954956, "learning_rate": 1.8769809832587914e-05, "loss": 1.874, "step": 17010 }, { "epoch": 0.6051017687316683, "grad_norm": 1.7909958362579346, "learning_rate": 1.876792593155107e-05, "loss": 1.8738, "step": 17020 }, { "epoch": 0.6054572926850946, "grad_norm": 1.6001943349838257, "learning_rate": 1.8766040683815772e-05, "loss": 1.8614, "step": 17030 }, { "epoch": 0.605812816638521, "grad_norm": 1.5842840671539307, "learning_rate": 1.8764154089671597e-05, "loss": 1.8435, "step": 17040 }, { "epoch": 0.6061683405919474, "grad_norm": 1.627424955368042, "learning_rate": 1.87622661494083e-05, "loss": 1.8811, "step": 17050 }, { "epoch": 0.6065238645453738, "grad_norm": 1.738578200340271, "learning_rate": 1.876037686331586e-05, "loss": 1.8389, "step": 17060 }, { "epoch": 0.6068793884988001, "grad_norm": 1.6790657043457031, "learning_rate": 1.875848623168447e-05, "loss": 1.847, "step": 17070 }, { "epoch": 0.6072349124522265, "grad_norm": 1.7412775754928589, "learning_rate": 1.87565942548045e-05, "loss": 1.9113, "step": 17080 }, { "epoch": 0.6075904364056528, "grad_norm": 1.5749247074127197, "learning_rate": 1.8754700932966566e-05, "loss": 1.8789, "step": 17090 }, { "epoch": 0.6079459603590792, "grad_norm": 1.6527512073516846, "learning_rate": 1.875280626646146e-05, "loss": 1.877, "step": 17100 }, { "epoch": 0.6083014843125055, "grad_norm": 1.606726050376892, "learning_rate": 1.875091025558019e-05, "loss": 1.8464, "step": 17110 }, { "epoch": 0.6086570082659319, "grad_norm": 1.6692525148391724, "learning_rate": 1.8749012900613974e-05, "loss": 1.8497, "step": 17120 }, { "epoch": 0.6090125322193582, "grad_norm": 1.6277382373809814, "learning_rate": 1.874711420185423e-05, "loss": 1.8919, "step": 17130 }, { "epoch": 0.6093680561727847, "grad_norm": 1.5908669233322144, "learning_rate": 1.874521415959259e-05, "loss": 1.8778, "step": 17140 }, { "epoch": 0.609723580126211, "grad_norm": 1.6495097875595093, "learning_rate": 1.874331277412089e-05, "loss": 1.8786, "step": 17150 }, { "epoch": 0.6100791040796374, "grad_norm": 1.6455113887786865, "learning_rate": 1.874141004573117e-05, "loss": 1.8591, "step": 17160 }, { "epoch": 0.6104346280330637, "grad_norm": 1.6391041278839111, "learning_rate": 1.8739505974715672e-05, "loss": 1.8667, "step": 17170 }, { "epoch": 0.6107901519864901, "grad_norm": 1.6577038764953613, "learning_rate": 1.873760056136686e-05, "loss": 1.8706, "step": 17180 }, { "epoch": 0.6111456759399164, "grad_norm": 1.6082369089126587, "learning_rate": 1.873569380597738e-05, "loss": 1.7906, "step": 17190 }, { "epoch": 0.6115011998933428, "grad_norm": 1.674109697341919, "learning_rate": 1.8733785708840107e-05, "loss": 1.9073, "step": 17200 }, { "epoch": 0.6118567238467691, "grad_norm": 1.5402380228042603, "learning_rate": 1.8731876270248112e-05, "loss": 1.8687, "step": 17210 }, { "epoch": 0.6122122478001956, "grad_norm": 1.6595739126205444, "learning_rate": 1.872996549049467e-05, "loss": 1.8709, "step": 17220 }, { "epoch": 0.6125677717536219, "grad_norm": 1.6010525226593018, "learning_rate": 1.8728053369873266e-05, "loss": 1.8679, "step": 17230 }, { "epoch": 0.6129232957070483, "grad_norm": 1.5951142311096191, "learning_rate": 1.872613990867759e-05, "loss": 1.9297, "step": 17240 }, { "epoch": 0.6132788196604746, "grad_norm": 1.5142821073532104, "learning_rate": 1.8724225107201538e-05, "loss": 1.855, "step": 17250 }, { "epoch": 0.613634343613901, "grad_norm": 1.6318472623825073, "learning_rate": 1.872230896573921e-05, "loss": 1.8907, "step": 17260 }, { "epoch": 0.6139898675673273, "grad_norm": 1.62212073802948, "learning_rate": 1.8720391484584913e-05, "loss": 1.86, "step": 17270 }, { "epoch": 0.6143453915207537, "grad_norm": 1.6132879257202148, "learning_rate": 1.8718472664033163e-05, "loss": 1.8601, "step": 17280 }, { "epoch": 0.61470091547418, "grad_norm": 1.6132336854934692, "learning_rate": 1.8716552504378676e-05, "loss": 1.8229, "step": 17290 }, { "epoch": 0.6150564394276065, "grad_norm": 1.6995302438735962, "learning_rate": 1.871463100591638e-05, "loss": 1.8826, "step": 17300 }, { "epoch": 0.6154119633810328, "grad_norm": 1.594828724861145, "learning_rate": 1.87127081689414e-05, "loss": 1.8921, "step": 17310 }, { "epoch": 0.6157674873344592, "grad_norm": 1.5735836029052734, "learning_rate": 1.8710783993749073e-05, "loss": 1.8571, "step": 17320 }, { "epoch": 0.6161230112878855, "grad_norm": 1.6112326383590698, "learning_rate": 1.8708858480634946e-05, "loss": 1.8564, "step": 17330 }, { "epoch": 0.6164785352413119, "grad_norm": 1.7308849096298218, "learning_rate": 1.8706931629894757e-05, "loss": 1.8361, "step": 17340 }, { "epoch": 0.6168340591947382, "grad_norm": 1.5627094507217407, "learning_rate": 1.8705003441824467e-05, "loss": 1.8924, "step": 17350 }, { "epoch": 0.6171895831481646, "grad_norm": 1.608675479888916, "learning_rate": 1.8703073916720225e-05, "loss": 1.8852, "step": 17360 }, { "epoch": 0.6175451071015909, "grad_norm": 1.6587059497833252, "learning_rate": 1.87011430548784e-05, "loss": 1.8536, "step": 17370 }, { "epoch": 0.6179006310550174, "grad_norm": 1.6514772176742554, "learning_rate": 1.869921085659556e-05, "loss": 1.8676, "step": 17380 }, { "epoch": 0.6182561550084437, "grad_norm": 1.6302834749221802, "learning_rate": 1.8697277322168475e-05, "loss": 1.8937, "step": 17390 }, { "epoch": 0.6186116789618701, "grad_norm": 1.6737154722213745, "learning_rate": 1.8695342451894122e-05, "loss": 1.8453, "step": 17400 }, { "epoch": 0.6189672029152964, "grad_norm": 1.6103057861328125, "learning_rate": 1.8693406246069694e-05, "loss": 1.877, "step": 17410 }, { "epoch": 0.6193227268687228, "grad_norm": 1.6058531999588013, "learning_rate": 1.8691468704992574e-05, "loss": 1.8532, "step": 17420 }, { "epoch": 0.6196782508221491, "grad_norm": 1.6504783630371094, "learning_rate": 1.8689529828960355e-05, "loss": 1.8712, "step": 17430 }, { "epoch": 0.6200337747755755, "grad_norm": 1.5864588022232056, "learning_rate": 1.868758961827084e-05, "loss": 1.8637, "step": 17440 }, { "epoch": 0.6203892987290018, "grad_norm": 1.63741135597229, "learning_rate": 1.8685648073222028e-05, "loss": 1.8425, "step": 17450 }, { "epoch": 0.6207448226824283, "grad_norm": 1.5490930080413818, "learning_rate": 1.8683705194112134e-05, "loss": 1.8588, "step": 17460 }, { "epoch": 0.6211003466358546, "grad_norm": 1.6922720670700073, "learning_rate": 1.868176098123957e-05, "loss": 1.8639, "step": 17470 }, { "epoch": 0.621455870589281, "grad_norm": 1.6855324506759644, "learning_rate": 1.867981543490295e-05, "loss": 1.8502, "step": 17480 }, { "epoch": 0.6218113945427073, "grad_norm": 1.539638638496399, "learning_rate": 1.8677868555401108e-05, "loss": 1.8658, "step": 17490 }, { "epoch": 0.6221669184961337, "grad_norm": 1.695586919784546, "learning_rate": 1.8675920343033063e-05, "loss": 1.828, "step": 17500 }, { "epoch": 0.62252244244956, "grad_norm": 1.626246452331543, "learning_rate": 1.8673970798098054e-05, "loss": 1.8621, "step": 17510 }, { "epoch": 0.6228779664029864, "grad_norm": 1.833249807357788, "learning_rate": 1.8672019920895513e-05, "loss": 1.8883, "step": 17520 }, { "epoch": 0.6232334903564127, "grad_norm": 1.746085524559021, "learning_rate": 1.867006771172509e-05, "loss": 1.8685, "step": 17530 }, { "epoch": 0.6235890143098392, "grad_norm": 1.7327795028686523, "learning_rate": 1.8668114170886627e-05, "loss": 1.8666, "step": 17540 }, { "epoch": 0.6239445382632655, "grad_norm": 1.605921745300293, "learning_rate": 1.8666159298680177e-05, "loss": 1.8678, "step": 17550 }, { "epoch": 0.6243000622166919, "grad_norm": 1.5179563760757446, "learning_rate": 1.866420309540599e-05, "loss": 1.8845, "step": 17560 }, { "epoch": 0.6246555861701182, "grad_norm": 1.6529120206832886, "learning_rate": 1.8662245561364542e-05, "loss": 1.8428, "step": 17570 }, { "epoch": 0.6250111101235446, "grad_norm": 1.6774760484695435, "learning_rate": 1.8660286696856486e-05, "loss": 1.8814, "step": 17580 }, { "epoch": 0.6253666340769709, "grad_norm": 1.6613315343856812, "learning_rate": 1.8658326502182692e-05, "loss": 1.8949, "step": 17590 }, { "epoch": 0.6257221580303973, "grad_norm": 1.678603172302246, "learning_rate": 1.8656364977644233e-05, "loss": 1.8903, "step": 17600 }, { "epoch": 0.6260776819838236, "grad_norm": 1.6080899238586426, "learning_rate": 1.8654402123542392e-05, "loss": 1.8357, "step": 17610 }, { "epoch": 0.62643320593725, "grad_norm": 1.58425772190094, "learning_rate": 1.865243794017865e-05, "loss": 1.8993, "step": 17620 }, { "epoch": 0.6267887298906764, "grad_norm": 1.6958409547805786, "learning_rate": 1.865047242785469e-05, "loss": 1.8097, "step": 17630 }, { "epoch": 0.6271442538441028, "grad_norm": 1.5857570171356201, "learning_rate": 1.86485055868724e-05, "loss": 1.8372, "step": 17640 }, { "epoch": 0.6274997777975291, "grad_norm": 1.654848337173462, "learning_rate": 1.8646537417533886e-05, "loss": 1.8653, "step": 17650 }, { "epoch": 0.6278553017509555, "grad_norm": 1.7152830362319946, "learning_rate": 1.8644567920141436e-05, "loss": 1.8827, "step": 17660 }, { "epoch": 0.6282108257043818, "grad_norm": 1.5285979509353638, "learning_rate": 1.8642597094997552e-05, "loss": 1.8487, "step": 17670 }, { "epoch": 0.6285663496578082, "grad_norm": 1.608949899673462, "learning_rate": 1.8640624942404945e-05, "loss": 1.8678, "step": 17680 }, { "epoch": 0.6289218736112345, "grad_norm": 1.5404757261276245, "learning_rate": 1.8638651462666527e-05, "loss": 1.8938, "step": 17690 }, { "epoch": 0.629277397564661, "grad_norm": 1.5254802703857422, "learning_rate": 1.8636676656085407e-05, "loss": 1.8407, "step": 17700 }, { "epoch": 0.6296329215180873, "grad_norm": 1.770894169807434, "learning_rate": 1.8634700522964904e-05, "loss": 1.8741, "step": 17710 }, { "epoch": 0.6299884454715137, "grad_norm": 1.549384355545044, "learning_rate": 1.863272306360854e-05, "loss": 1.9207, "step": 17720 }, { "epoch": 0.63034396942494, "grad_norm": 1.648349642753601, "learning_rate": 1.8630744278320046e-05, "loss": 1.8157, "step": 17730 }, { "epoch": 0.6306994933783664, "grad_norm": 1.6413389444351196, "learning_rate": 1.8628764167403345e-05, "loss": 1.8864, "step": 17740 }, { "epoch": 0.6310550173317927, "grad_norm": 1.6191598176956177, "learning_rate": 1.8626782731162567e-05, "loss": 1.8505, "step": 17750 }, { "epoch": 0.6314105412852191, "grad_norm": 1.691037654876709, "learning_rate": 1.862479996990205e-05, "loss": 1.8194, "step": 17760 }, { "epoch": 0.6317660652386454, "grad_norm": 1.6275644302368164, "learning_rate": 1.8622815883926343e-05, "loss": 1.8535, "step": 17770 }, { "epoch": 0.6321215891920718, "grad_norm": 1.5613367557525635, "learning_rate": 1.8620830473540174e-05, "loss": 1.8215, "step": 17780 }, { "epoch": 0.6324771131454981, "grad_norm": 1.5440773963928223, "learning_rate": 1.86188437390485e-05, "loss": 1.8206, "step": 17790 }, { "epoch": 0.6328326370989246, "grad_norm": 1.7234688997268677, "learning_rate": 1.861685568075647e-05, "loss": 1.8477, "step": 17800 }, { "epoch": 0.6331881610523509, "grad_norm": 1.5920257568359375, "learning_rate": 1.8614866298969437e-05, "loss": 1.8264, "step": 17810 }, { "epoch": 0.6335436850057773, "grad_norm": 1.582025408744812, "learning_rate": 1.8612875593992955e-05, "loss": 1.8617, "step": 17820 }, { "epoch": 0.6338992089592036, "grad_norm": 1.632765293121338, "learning_rate": 1.861088356613278e-05, "loss": 1.8548, "step": 17830 }, { "epoch": 0.63425473291263, "grad_norm": 1.7121986150741577, "learning_rate": 1.8608890215694883e-05, "loss": 1.8589, "step": 17840 }, { "epoch": 0.6346102568660563, "grad_norm": 1.6721488237380981, "learning_rate": 1.8606895542985432e-05, "loss": 1.8442, "step": 17850 }, { "epoch": 0.6349657808194827, "grad_norm": 1.7103196382522583, "learning_rate": 1.8604899548310786e-05, "loss": 1.7886, "step": 17860 }, { "epoch": 0.635321304772909, "grad_norm": 1.597957968711853, "learning_rate": 1.8602902231977523e-05, "loss": 1.8576, "step": 17870 }, { "epoch": 0.6356768287263355, "grad_norm": 1.6161609888076782, "learning_rate": 1.8600903594292415e-05, "loss": 1.8836, "step": 17880 }, { "epoch": 0.6360323526797618, "grad_norm": 1.5441991090774536, "learning_rate": 1.8598903635562448e-05, "loss": 1.889, "step": 17890 }, { "epoch": 0.6363878766331882, "grad_norm": 1.5979788303375244, "learning_rate": 1.8596902356094796e-05, "loss": 1.8414, "step": 17900 }, { "epoch": 0.6367434005866145, "grad_norm": 1.5707616806030273, "learning_rate": 1.8594899756196847e-05, "loss": 1.8864, "step": 17910 }, { "epoch": 0.6370989245400409, "grad_norm": 1.68429434299469, "learning_rate": 1.8592895836176183e-05, "loss": 1.8358, "step": 17920 }, { "epoch": 0.6374544484934672, "grad_norm": 1.5664743185043335, "learning_rate": 1.8590890596340598e-05, "loss": 1.871, "step": 17930 }, { "epoch": 0.6378099724468936, "grad_norm": 1.7070468664169312, "learning_rate": 1.858888403699808e-05, "loss": 1.8993, "step": 17940 }, { "epoch": 0.6381654964003199, "grad_norm": 1.5247119665145874, "learning_rate": 1.8586876158456833e-05, "loss": 1.8519, "step": 17950 }, { "epoch": 0.6385210203537464, "grad_norm": 1.7177155017852783, "learning_rate": 1.8584866961025245e-05, "loss": 1.8297, "step": 17960 }, { "epoch": 0.6388765443071727, "grad_norm": 1.6578903198242188, "learning_rate": 1.8582856445011918e-05, "loss": 1.8982, "step": 17970 }, { "epoch": 0.6392320682605991, "grad_norm": 1.6425923109054565, "learning_rate": 1.8580844610725653e-05, "loss": 1.8607, "step": 17980 }, { "epoch": 0.6395875922140254, "grad_norm": 1.5677855014801025, "learning_rate": 1.8578831458475465e-05, "loss": 1.817, "step": 17990 }, { "epoch": 0.6399431161674518, "grad_norm": 1.5550886392593384, "learning_rate": 1.857681698857055e-05, "loss": 1.8454, "step": 18000 }, { "epoch": 0.6402986401208781, "grad_norm": 1.5829353332519531, "learning_rate": 1.8574801201320324e-05, "loss": 1.887, "step": 18010 }, { "epoch": 0.6406541640743045, "grad_norm": 1.652955412864685, "learning_rate": 1.8572784097034396e-05, "loss": 1.8449, "step": 18020 }, { "epoch": 0.6410096880277308, "grad_norm": 1.62899649143219, "learning_rate": 1.8570765676022585e-05, "loss": 1.8209, "step": 18030 }, { "epoch": 0.6413652119811573, "grad_norm": 1.6158806085586548, "learning_rate": 1.8568745938594905e-05, "loss": 1.8784, "step": 18040 }, { "epoch": 0.6417207359345836, "grad_norm": 1.5244545936584473, "learning_rate": 1.8566724885061574e-05, "loss": 1.8333, "step": 18050 }, { "epoch": 0.64207625988801, "grad_norm": 1.621248722076416, "learning_rate": 1.8564702515733016e-05, "loss": 1.8925, "step": 18060 }, { "epoch": 0.6424317838414363, "grad_norm": 1.6901414394378662, "learning_rate": 1.8562678830919854e-05, "loss": 1.8802, "step": 18070 }, { "epoch": 0.6427873077948627, "grad_norm": 1.7427568435668945, "learning_rate": 1.856065383093291e-05, "loss": 1.871, "step": 18080 }, { "epoch": 0.643142831748289, "grad_norm": 1.60033118724823, "learning_rate": 1.855862751608321e-05, "loss": 1.8079, "step": 18090 }, { "epoch": 0.6434983557017154, "grad_norm": 1.7531661987304688, "learning_rate": 1.8556599886681992e-05, "loss": 1.8374, "step": 18100 }, { "epoch": 0.6438538796551417, "grad_norm": 1.6297991275787354, "learning_rate": 1.855457094304068e-05, "loss": 1.8442, "step": 18110 }, { "epoch": 0.6442094036085682, "grad_norm": 1.7636593580245972, "learning_rate": 1.8552540685470908e-05, "loss": 1.8322, "step": 18120 }, { "epoch": 0.6445649275619945, "grad_norm": 1.6729167699813843, "learning_rate": 1.855050911428451e-05, "loss": 1.8663, "step": 18130 }, { "epoch": 0.6449204515154209, "grad_norm": 1.8134124279022217, "learning_rate": 1.8548476229793525e-05, "loss": 1.8381, "step": 18140 }, { "epoch": 0.6452759754688472, "grad_norm": 1.7415553331375122, "learning_rate": 1.854644203231019e-05, "loss": 1.884, "step": 18150 }, { "epoch": 0.6456314994222736, "grad_norm": 1.5733280181884766, "learning_rate": 1.854440652214695e-05, "loss": 1.8278, "step": 18160 }, { "epoch": 0.6459870233756999, "grad_norm": 1.6073261499404907, "learning_rate": 1.8542369699616437e-05, "loss": 1.8908, "step": 18170 }, { "epoch": 0.6463425473291263, "grad_norm": 1.7024801969528198, "learning_rate": 1.8540331565031506e-05, "loss": 1.8522, "step": 18180 }, { "epoch": 0.6466980712825526, "grad_norm": 1.533766508102417, "learning_rate": 1.853829211870519e-05, "loss": 1.8356, "step": 18190 }, { "epoch": 0.647053595235979, "grad_norm": 1.5265889167785645, "learning_rate": 1.8536251360950737e-05, "loss": 1.8897, "step": 18200 }, { "epoch": 0.6474091191894054, "grad_norm": 1.5848630666732788, "learning_rate": 1.8534209292081603e-05, "loss": 1.8548, "step": 18210 }, { "epoch": 0.6477646431428318, "grad_norm": 1.622403621673584, "learning_rate": 1.8532165912411425e-05, "loss": 1.829, "step": 18220 }, { "epoch": 0.6481201670962581, "grad_norm": 1.649625301361084, "learning_rate": 1.8530121222254064e-05, "loss": 1.8602, "step": 18230 }, { "epoch": 0.6484756910496845, "grad_norm": 1.5093032121658325, "learning_rate": 1.852807522192357e-05, "loss": 1.7971, "step": 18240 }, { "epoch": 0.6488312150031108, "grad_norm": 1.608844518661499, "learning_rate": 1.852602791173419e-05, "loss": 1.8989, "step": 18250 }, { "epoch": 0.6491867389565372, "grad_norm": 1.6054035425186157, "learning_rate": 1.8523979292000385e-05, "loss": 1.8686, "step": 18260 }, { "epoch": 0.6495422629099635, "grad_norm": 1.569411039352417, "learning_rate": 1.8521929363036802e-05, "loss": 1.8637, "step": 18270 }, { "epoch": 0.64989778686339, "grad_norm": 1.8090263605117798, "learning_rate": 1.8519878125158305e-05, "loss": 1.8861, "step": 18280 }, { "epoch": 0.6502533108168163, "grad_norm": 1.5960959196090698, "learning_rate": 1.8517825578679946e-05, "loss": 1.8546, "step": 18290 }, { "epoch": 0.6506088347702427, "grad_norm": 1.5911707878112793, "learning_rate": 1.8515771723916987e-05, "loss": 1.8012, "step": 18300 }, { "epoch": 0.650964358723669, "grad_norm": 1.6279571056365967, "learning_rate": 1.8513716561184883e-05, "loss": 1.8322, "step": 18310 }, { "epoch": 0.6513198826770954, "grad_norm": 1.5484329462051392, "learning_rate": 1.85116600907993e-05, "loss": 1.8195, "step": 18320 }, { "epoch": 0.6516754066305217, "grad_norm": 1.628717303276062, "learning_rate": 1.8509602313076095e-05, "loss": 1.8482, "step": 18330 }, { "epoch": 0.6520309305839481, "grad_norm": 1.7035515308380127, "learning_rate": 1.8507543228331327e-05, "loss": 1.8507, "step": 18340 }, { "epoch": 0.6523864545373744, "grad_norm": 1.6159610748291016, "learning_rate": 1.8505482836881262e-05, "loss": 1.9007, "step": 18350 }, { "epoch": 0.6527419784908008, "grad_norm": 1.68350088596344, "learning_rate": 1.8503421139042366e-05, "loss": 1.8793, "step": 18360 }, { "epoch": 0.6530975024442272, "grad_norm": 1.5864074230194092, "learning_rate": 1.85013581351313e-05, "loss": 1.8496, "step": 18370 }, { "epoch": 0.6534530263976536, "grad_norm": 1.6897317171096802, "learning_rate": 1.849929382546493e-05, "loss": 1.841, "step": 18380 }, { "epoch": 0.6538085503510799, "grad_norm": 1.6994426250457764, "learning_rate": 1.8497228210360317e-05, "loss": 1.8071, "step": 18390 }, { "epoch": 0.6541640743045063, "grad_norm": 1.728031873703003, "learning_rate": 1.8495161290134726e-05, "loss": 1.8652, "step": 18400 }, { "epoch": 0.6545195982579326, "grad_norm": 1.640601634979248, "learning_rate": 1.8493093065105627e-05, "loss": 1.8008, "step": 18410 }, { "epoch": 0.654875122211359, "grad_norm": 1.699002742767334, "learning_rate": 1.849102353559069e-05, "loss": 1.8631, "step": 18420 }, { "epoch": 0.6552306461647853, "grad_norm": 1.7025487422943115, "learning_rate": 1.848895270190777e-05, "loss": 1.8935, "step": 18430 }, { "epoch": 0.6555861701182117, "grad_norm": 1.599440574645996, "learning_rate": 1.848688056437495e-05, "loss": 1.8361, "step": 18440 }, { "epoch": 0.655941694071638, "grad_norm": 1.6044576168060303, "learning_rate": 1.848480712331048e-05, "loss": 1.8326, "step": 18450 }, { "epoch": 0.6562972180250645, "grad_norm": 1.664693832397461, "learning_rate": 1.848273237903284e-05, "loss": 1.8676, "step": 18460 }, { "epoch": 0.6566527419784908, "grad_norm": 1.6578155755996704, "learning_rate": 1.8480656331860692e-05, "loss": 1.8822, "step": 18470 }, { "epoch": 0.6570082659319172, "grad_norm": 1.5991641283035278, "learning_rate": 1.847857898211291e-05, "loss": 1.8952, "step": 18480 }, { "epoch": 0.6573637898853435, "grad_norm": 1.6421630382537842, "learning_rate": 1.847650033010855e-05, "loss": 1.8613, "step": 18490 }, { "epoch": 0.6577193138387699, "grad_norm": 1.5893738269805908, "learning_rate": 1.8474420376166893e-05, "loss": 1.8507, "step": 18500 }, { "epoch": 0.6580748377921962, "grad_norm": 1.6135833263397217, "learning_rate": 1.84723391206074e-05, "loss": 1.8942, "step": 18510 }, { "epoch": 0.6584303617456226, "grad_norm": 1.5302294492721558, "learning_rate": 1.847025656374974e-05, "loss": 1.8624, "step": 18520 }, { "epoch": 0.658785885699049, "grad_norm": 1.6853063106536865, "learning_rate": 1.846817270591378e-05, "loss": 1.8471, "step": 18530 }, { "epoch": 0.6591414096524754, "grad_norm": 1.6253217458724976, "learning_rate": 1.846608754741959e-05, "loss": 1.8594, "step": 18540 }, { "epoch": 0.6594969336059017, "grad_norm": 1.6094721555709839, "learning_rate": 1.8464001088587436e-05, "loss": 1.8688, "step": 18550 }, { "epoch": 0.6598524575593281, "grad_norm": 1.6854521036148071, "learning_rate": 1.8461913329737782e-05, "loss": 1.8704, "step": 18560 }, { "epoch": 0.6602079815127544, "grad_norm": 1.702346682548523, "learning_rate": 1.8459824271191298e-05, "loss": 1.8795, "step": 18570 }, { "epoch": 0.6605635054661808, "grad_norm": 1.6592522859573364, "learning_rate": 1.8457733913268848e-05, "loss": 1.8835, "step": 18580 }, { "epoch": 0.6609190294196071, "grad_norm": 1.5579978227615356, "learning_rate": 1.84556422562915e-05, "loss": 1.8281, "step": 18590 }, { "epoch": 0.6612745533730335, "grad_norm": 1.6369131803512573, "learning_rate": 1.8453549300580523e-05, "loss": 1.8923, "step": 18600 }, { "epoch": 0.6616300773264598, "grad_norm": 1.640381097793579, "learning_rate": 1.8451455046457373e-05, "loss": 1.8469, "step": 18610 }, { "epoch": 0.6619856012798863, "grad_norm": 1.5950998067855835, "learning_rate": 1.8449359494243722e-05, "loss": 1.8305, "step": 18620 }, { "epoch": 0.6623411252333126, "grad_norm": 1.5842294692993164, "learning_rate": 1.8447262644261427e-05, "loss": 1.8725, "step": 18630 }, { "epoch": 0.662696649186739, "grad_norm": 1.5862181186676025, "learning_rate": 1.8445164496832558e-05, "loss": 1.8628, "step": 18640 }, { "epoch": 0.6630521731401653, "grad_norm": 1.6465989351272583, "learning_rate": 1.844306505227937e-05, "loss": 1.879, "step": 18650 }, { "epoch": 0.6634076970935917, "grad_norm": 1.61064612865448, "learning_rate": 1.844096431092433e-05, "loss": 1.8565, "step": 18660 }, { "epoch": 0.663763221047018, "grad_norm": 1.6815714836120605, "learning_rate": 1.84388622730901e-05, "loss": 1.8892, "step": 18670 }, { "epoch": 0.6641187450004444, "grad_norm": 1.5897061824798584, "learning_rate": 1.843675893909953e-05, "loss": 1.8605, "step": 18680 }, { "epoch": 0.6644742689538707, "grad_norm": 1.6312036514282227, "learning_rate": 1.8434654309275692e-05, "loss": 1.8798, "step": 18690 }, { "epoch": 0.6648297929072972, "grad_norm": 1.5694767236709595, "learning_rate": 1.843254838394184e-05, "loss": 1.8647, "step": 18700 }, { "epoch": 0.6651853168607235, "grad_norm": 1.6571435928344727, "learning_rate": 1.8430441163421422e-05, "loss": 1.898, "step": 18710 }, { "epoch": 0.6655408408141499, "grad_norm": 1.5843617916107178, "learning_rate": 1.8428332648038103e-05, "loss": 1.8884, "step": 18720 }, { "epoch": 0.6658963647675762, "grad_norm": 1.666013240814209, "learning_rate": 1.8426222838115733e-05, "loss": 1.8537, "step": 18730 }, { "epoch": 0.6662518887210026, "grad_norm": 1.6343095302581787, "learning_rate": 1.8424111733978374e-05, "loss": 1.8917, "step": 18740 }, { "epoch": 0.6666074126744289, "grad_norm": 1.6593196392059326, "learning_rate": 1.8421999335950267e-05, "loss": 1.8811, "step": 18750 }, { "epoch": 0.6669629366278553, "grad_norm": 1.6329734325408936, "learning_rate": 1.841988564435587e-05, "loss": 1.858, "step": 18760 }, { "epoch": 0.6673184605812816, "grad_norm": 1.6318451166152954, "learning_rate": 1.8417770659519832e-05, "loss": 1.8396, "step": 18770 }, { "epoch": 0.667673984534708, "grad_norm": 1.611892580986023, "learning_rate": 1.8415654381767e-05, "loss": 1.8838, "step": 18780 }, { "epoch": 0.6680295084881344, "grad_norm": 1.6420646905899048, "learning_rate": 1.8413536811422423e-05, "loss": 1.8507, "step": 18790 }, { "epoch": 0.6683850324415608, "grad_norm": 1.7339340448379517, "learning_rate": 1.8411417948811343e-05, "loss": 1.8491, "step": 18800 }, { "epoch": 0.6687405563949871, "grad_norm": 1.7298238277435303, "learning_rate": 1.8409297794259205e-05, "loss": 1.8828, "step": 18810 }, { "epoch": 0.6690960803484135, "grad_norm": 1.5757405757904053, "learning_rate": 1.8407176348091656e-05, "loss": 1.8271, "step": 18820 }, { "epoch": 0.6694516043018398, "grad_norm": 1.5821876525878906, "learning_rate": 1.8405053610634528e-05, "loss": 1.8613, "step": 18830 }, { "epoch": 0.6698071282552662, "grad_norm": 1.5011639595031738, "learning_rate": 1.8402929582213872e-05, "loss": 1.9149, "step": 18840 }, { "epoch": 0.6701626522086925, "grad_norm": 1.745612621307373, "learning_rate": 1.8400804263155914e-05, "loss": 1.8757, "step": 18850 }, { "epoch": 0.670518176162119, "grad_norm": 1.7035101652145386, "learning_rate": 1.8398677653787098e-05, "loss": 1.8415, "step": 18860 }, { "epoch": 0.6708737001155453, "grad_norm": 1.5889948606491089, "learning_rate": 1.839654975443405e-05, "loss": 1.8464, "step": 18870 }, { "epoch": 0.6712292240689717, "grad_norm": 1.5949231386184692, "learning_rate": 1.8394420565423613e-05, "loss": 1.8797, "step": 18880 }, { "epoch": 0.671584748022398, "grad_norm": 1.6531540155410767, "learning_rate": 1.8392290087082806e-05, "loss": 1.8343, "step": 18890 }, { "epoch": 0.6719402719758244, "grad_norm": 1.5893442630767822, "learning_rate": 1.839015831973886e-05, "loss": 1.8457, "step": 18900 }, { "epoch": 0.6722957959292507, "grad_norm": 1.6916165351867676, "learning_rate": 1.8388025263719208e-05, "loss": 1.8506, "step": 18910 }, { "epoch": 0.6726513198826771, "grad_norm": 1.5590561628341675, "learning_rate": 1.8385890919351467e-05, "loss": 1.8665, "step": 18920 }, { "epoch": 0.6730068438361034, "grad_norm": 1.6616570949554443, "learning_rate": 1.8383755286963455e-05, "loss": 1.8355, "step": 18930 }, { "epoch": 0.6733623677895298, "grad_norm": 1.6090588569641113, "learning_rate": 1.8381618366883207e-05, "loss": 1.8628, "step": 18940 }, { "epoch": 0.6737178917429562, "grad_norm": 1.7417858839035034, "learning_rate": 1.8379480159438924e-05, "loss": 1.8802, "step": 18950 }, { "epoch": 0.6740734156963826, "grad_norm": 1.5920038223266602, "learning_rate": 1.837734066495903e-05, "loss": 1.8936, "step": 18960 }, { "epoch": 0.6744289396498089, "grad_norm": 1.5697931051254272, "learning_rate": 1.8375199883772138e-05, "loss": 1.8345, "step": 18970 }, { "epoch": 0.6747844636032353, "grad_norm": 1.6053522825241089, "learning_rate": 1.8373057816207054e-05, "loss": 1.8272, "step": 18980 }, { "epoch": 0.6751399875566616, "grad_norm": 1.5402426719665527, "learning_rate": 1.8370914462592793e-05, "loss": 1.8213, "step": 18990 }, { "epoch": 0.675495511510088, "grad_norm": 1.6520720720291138, "learning_rate": 1.8368769823258553e-05, "loss": 1.8394, "step": 19000 }, { "epoch": 0.6758510354635143, "grad_norm": 1.6406983137130737, "learning_rate": 1.836662389853374e-05, "loss": 1.8974, "step": 19010 }, { "epoch": 0.6762065594169407, "grad_norm": 1.491894006729126, "learning_rate": 1.836447668874796e-05, "loss": 1.8567, "step": 19020 }, { "epoch": 0.676562083370367, "grad_norm": 1.6023941040039062, "learning_rate": 1.8362328194231003e-05, "loss": 1.8623, "step": 19030 }, { "epoch": 0.6769176073237935, "grad_norm": 1.6651532649993896, "learning_rate": 1.836017841531287e-05, "loss": 1.8445, "step": 19040 }, { "epoch": 0.6772731312772198, "grad_norm": 1.6000490188598633, "learning_rate": 1.8358027352323747e-05, "loss": 1.8502, "step": 19050 }, { "epoch": 0.6776286552306462, "grad_norm": 1.562110424041748, "learning_rate": 1.835587500559403e-05, "loss": 1.8502, "step": 19060 }, { "epoch": 0.6779841791840725, "grad_norm": 1.6804255247116089, "learning_rate": 1.8353721375454304e-05, "loss": 1.8289, "step": 19070 }, { "epoch": 0.6783397031374989, "grad_norm": 1.663631796836853, "learning_rate": 1.8351566462235352e-05, "loss": 1.8722, "step": 19080 }, { "epoch": 0.6786952270909252, "grad_norm": 1.6991126537322998, "learning_rate": 1.8349410266268156e-05, "loss": 1.8386, "step": 19090 }, { "epoch": 0.6790507510443516, "grad_norm": 1.6776806116104126, "learning_rate": 1.83472527878839e-05, "loss": 1.8723, "step": 19100 }, { "epoch": 0.679406274997778, "grad_norm": 1.6884881258010864, "learning_rate": 1.834509402741395e-05, "loss": 1.8302, "step": 19110 }, { "epoch": 0.6797617989512044, "grad_norm": 1.5213853120803833, "learning_rate": 1.8342933985189876e-05, "loss": 1.8069, "step": 19120 }, { "epoch": 0.6801173229046307, "grad_norm": 1.5394185781478882, "learning_rate": 1.8340772661543458e-05, "loss": 1.8559, "step": 19130 }, { "epoch": 0.6804728468580571, "grad_norm": 1.665880560874939, "learning_rate": 1.8338610056806656e-05, "loss": 1.7783, "step": 19140 }, { "epoch": 0.6808283708114834, "grad_norm": 1.7000386714935303, "learning_rate": 1.8336446171311633e-05, "loss": 1.9049, "step": 19150 }, { "epoch": 0.6811838947649098, "grad_norm": 1.6025673151016235, "learning_rate": 1.8334281005390748e-05, "loss": 1.8844, "step": 19160 }, { "epoch": 0.6815394187183361, "grad_norm": 1.6624587774276733, "learning_rate": 1.8332114559376552e-05, "loss": 1.8551, "step": 19170 }, { "epoch": 0.6818949426717625, "grad_norm": 1.759018063545227, "learning_rate": 1.8329946833601808e-05, "loss": 1.8738, "step": 19180 }, { "epoch": 0.6822504666251888, "grad_norm": 1.6430401802062988, "learning_rate": 1.8327777828399457e-05, "loss": 1.862, "step": 19190 }, { "epoch": 0.6826059905786153, "grad_norm": 1.6533403396606445, "learning_rate": 1.8325607544102647e-05, "loss": 1.9021, "step": 19200 }, { "epoch": 0.6829615145320416, "grad_norm": 1.5370020866394043, "learning_rate": 1.832343598104472e-05, "loss": 1.8838, "step": 19210 }, { "epoch": 0.683317038485468, "grad_norm": 1.6419110298156738, "learning_rate": 1.8321263139559218e-05, "loss": 1.8398, "step": 19220 }, { "epoch": 0.6836725624388943, "grad_norm": 1.6041102409362793, "learning_rate": 1.831908901997987e-05, "loss": 1.839, "step": 19230 }, { "epoch": 0.6840280863923207, "grad_norm": 1.629111886024475, "learning_rate": 1.8316913622640607e-05, "loss": 1.8722, "step": 19240 }, { "epoch": 0.684383610345747, "grad_norm": 1.5613747835159302, "learning_rate": 1.8314736947875565e-05, "loss": 1.8715, "step": 19250 }, { "epoch": 0.6847391342991734, "grad_norm": 1.6306045055389404, "learning_rate": 1.831255899601906e-05, "loss": 1.835, "step": 19260 }, { "epoch": 0.6850946582525997, "grad_norm": 1.7598761320114136, "learning_rate": 1.831037976740561e-05, "loss": 1.8467, "step": 19270 }, { "epoch": 0.6854501822060262, "grad_norm": 1.5451831817626953, "learning_rate": 1.8308199262369935e-05, "loss": 1.8815, "step": 19280 }, { "epoch": 0.6858057061594525, "grad_norm": 1.6422587633132935, "learning_rate": 1.830601748124695e-05, "loss": 1.8161, "step": 19290 }, { "epoch": 0.6861612301128789, "grad_norm": 1.535333275794983, "learning_rate": 1.8303834424371752e-05, "loss": 1.8079, "step": 19300 }, { "epoch": 0.6865167540663052, "grad_norm": 1.7117635011672974, "learning_rate": 1.8301650092079655e-05, "loss": 1.8575, "step": 19310 }, { "epoch": 0.6868722780197316, "grad_norm": 1.553102970123291, "learning_rate": 1.829946448470616e-05, "loss": 1.8938, "step": 19320 }, { "epoch": 0.6872278019731579, "grad_norm": 1.6548651456832886, "learning_rate": 1.8297277602586952e-05, "loss": 1.8293, "step": 19330 }, { "epoch": 0.6875833259265843, "grad_norm": 1.6911052465438843, "learning_rate": 1.8295089446057935e-05, "loss": 1.8591, "step": 19340 }, { "epoch": 0.6879388498800106, "grad_norm": 1.631972074508667, "learning_rate": 1.8292900015455182e-05, "loss": 1.8898, "step": 19350 }, { "epoch": 0.6882943738334371, "grad_norm": 1.6100616455078125, "learning_rate": 1.829070931111499e-05, "loss": 1.8708, "step": 19360 }, { "epoch": 0.6886498977868634, "grad_norm": 1.6359871625900269, "learning_rate": 1.828851733337383e-05, "loss": 1.896, "step": 19370 }, { "epoch": 0.6890054217402898, "grad_norm": 1.5799747705459595, "learning_rate": 1.8286324082568376e-05, "loss": 1.83, "step": 19380 }, { "epoch": 0.6893609456937161, "grad_norm": 1.637993335723877, "learning_rate": 1.8284129559035497e-05, "loss": 1.8788, "step": 19390 }, { "epoch": 0.6897164696471425, "grad_norm": 1.6041759252548218, "learning_rate": 1.828193376311226e-05, "loss": 1.8239, "step": 19400 }, { "epoch": 0.6900719936005688, "grad_norm": 1.6677876710891724, "learning_rate": 1.8279736695135927e-05, "loss": 1.8772, "step": 19410 }, { "epoch": 0.6904275175539952, "grad_norm": 1.6391844749450684, "learning_rate": 1.8277538355443952e-05, "loss": 1.9006, "step": 19420 }, { "epoch": 0.6907830415074215, "grad_norm": 1.6198102235794067, "learning_rate": 1.8275338744373985e-05, "loss": 1.8581, "step": 19430 }, { "epoch": 0.691138565460848, "grad_norm": 1.7414060831069946, "learning_rate": 1.827313786226387e-05, "loss": 1.879, "step": 19440 }, { "epoch": 0.6914940894142743, "grad_norm": 1.5909712314605713, "learning_rate": 1.827093570945166e-05, "loss": 1.8889, "step": 19450 }, { "epoch": 0.6918496133677007, "grad_norm": 1.6056509017944336, "learning_rate": 1.826873228627558e-05, "loss": 1.8276, "step": 19460 }, { "epoch": 0.692205137321127, "grad_norm": 1.7446337938308716, "learning_rate": 1.8266527593074065e-05, "loss": 1.8435, "step": 19470 }, { "epoch": 0.6925606612745534, "grad_norm": 1.7024445533752441, "learning_rate": 1.8264321630185745e-05, "loss": 1.8977, "step": 19480 }, { "epoch": 0.6929161852279797, "grad_norm": 1.7875020503997803, "learning_rate": 1.826211439794944e-05, "loss": 1.8498, "step": 19490 }, { "epoch": 0.6932717091814061, "grad_norm": 1.6902720928192139, "learning_rate": 1.8259905896704167e-05, "loss": 1.8503, "step": 19500 }, { "epoch": 0.6936272331348324, "grad_norm": 1.670092225074768, "learning_rate": 1.825769612678914e-05, "loss": 1.8279, "step": 19510 }, { "epoch": 0.6939827570882589, "grad_norm": 1.6163181066513062, "learning_rate": 1.825548508854376e-05, "loss": 1.8397, "step": 19520 }, { "epoch": 0.6943382810416852, "grad_norm": 1.6588308811187744, "learning_rate": 1.8253272782307636e-05, "loss": 1.8506, "step": 19530 }, { "epoch": 0.6946938049951116, "grad_norm": 1.6108286380767822, "learning_rate": 1.825105920842056e-05, "loss": 1.8466, "step": 19540 }, { "epoch": 0.6950493289485379, "grad_norm": 1.7074838876724243, "learning_rate": 1.8248844367222526e-05, "loss": 1.848, "step": 19550 }, { "epoch": 0.6954048529019643, "grad_norm": 1.5951340198516846, "learning_rate": 1.8246628259053716e-05, "loss": 1.8351, "step": 19560 }, { "epoch": 0.6957603768553906, "grad_norm": 1.5945497751235962, "learning_rate": 1.8244410884254514e-05, "loss": 1.7857, "step": 19570 }, { "epoch": 0.696115900808817, "grad_norm": 1.590248942375183, "learning_rate": 1.8242192243165488e-05, "loss": 1.8183, "step": 19580 }, { "epoch": 0.6964714247622433, "grad_norm": 1.6188017129898071, "learning_rate": 1.8239972336127415e-05, "loss": 1.8749, "step": 19590 }, { "epoch": 0.6968269487156697, "grad_norm": 1.7116796970367432, "learning_rate": 1.8237751163481258e-05, "loss": 1.8329, "step": 19600 }, { "epoch": 0.697182472669096, "grad_norm": 1.6828880310058594, "learning_rate": 1.8235528725568174e-05, "loss": 1.8843, "step": 19610 }, { "epoch": 0.6975379966225225, "grad_norm": 1.5773065090179443, "learning_rate": 1.8233305022729513e-05, "loss": 1.7972, "step": 19620 }, { "epoch": 0.6978935205759488, "grad_norm": 1.7473276853561401, "learning_rate": 1.8231080055306824e-05, "loss": 1.8221, "step": 19630 }, { "epoch": 0.6982490445293752, "grad_norm": 1.6167118549346924, "learning_rate": 1.8228853823641847e-05, "loss": 1.8207, "step": 19640 }, { "epoch": 0.6986045684828015, "grad_norm": 1.5284688472747803, "learning_rate": 1.822662632807652e-05, "loss": 1.868, "step": 19650 }, { "epoch": 0.6989600924362279, "grad_norm": 1.6059260368347168, "learning_rate": 1.822439756895297e-05, "loss": 1.8879, "step": 19660 }, { "epoch": 0.6993156163896542, "grad_norm": 1.6518758535385132, "learning_rate": 1.8222167546613518e-05, "loss": 1.8398, "step": 19670 }, { "epoch": 0.6996711403430806, "grad_norm": 1.7268545627593994, "learning_rate": 1.8219936261400686e-05, "loss": 1.8351, "step": 19680 }, { "epoch": 0.700026664296507, "grad_norm": 1.6293846368789673, "learning_rate": 1.8217703713657186e-05, "loss": 1.8332, "step": 19690 }, { "epoch": 0.7003821882499334, "grad_norm": 1.6023805141448975, "learning_rate": 1.8215469903725917e-05, "loss": 1.8326, "step": 19700 }, { "epoch": 0.7007377122033597, "grad_norm": 1.5513273477554321, "learning_rate": 1.8213234831949984e-05, "loss": 1.8217, "step": 19710 }, { "epoch": 0.7010932361567861, "grad_norm": 1.6523417234420776, "learning_rate": 1.821099849867268e-05, "loss": 1.8465, "step": 19720 }, { "epoch": 0.7014487601102124, "grad_norm": 1.6089566946029663, "learning_rate": 1.8208760904237488e-05, "loss": 1.8304, "step": 19730 }, { "epoch": 0.7018042840636388, "grad_norm": 1.6778578758239746, "learning_rate": 1.8206522048988092e-05, "loss": 1.8375, "step": 19740 }, { "epoch": 0.7021598080170651, "grad_norm": 1.645105242729187, "learning_rate": 1.820428193326836e-05, "loss": 1.8401, "step": 19750 }, { "epoch": 0.7025153319704915, "grad_norm": 1.6396735906600952, "learning_rate": 1.8202040557422373e-05, "loss": 1.8505, "step": 19760 }, { "epoch": 0.7028708559239178, "grad_norm": 1.620423674583435, "learning_rate": 1.8199797921794372e-05, "loss": 1.8303, "step": 19770 }, { "epoch": 0.7032263798773443, "grad_norm": 1.6041761636734009, "learning_rate": 1.8197554026728834e-05, "loss": 1.8677, "step": 19780 }, { "epoch": 0.7035819038307706, "grad_norm": 1.585405707359314, "learning_rate": 1.819530887257039e-05, "loss": 1.7923, "step": 19790 }, { "epoch": 0.703937427784197, "grad_norm": 1.6111605167388916, "learning_rate": 1.819306245966389e-05, "loss": 1.84, "step": 19800 }, { "epoch": 0.7042929517376233, "grad_norm": 1.5929813385009766, "learning_rate": 1.819081478835437e-05, "loss": 1.8554, "step": 19810 }, { "epoch": 0.7046484756910497, "grad_norm": 1.593061923980713, "learning_rate": 1.818856585898705e-05, "loss": 1.8259, "step": 19820 }, { "epoch": 0.705003999644476, "grad_norm": 1.6575653553009033, "learning_rate": 1.818631567190736e-05, "loss": 1.8342, "step": 19830 }, { "epoch": 0.7053595235979024, "grad_norm": 1.563677430152893, "learning_rate": 1.8184064227460912e-05, "loss": 1.8102, "step": 19840 }, { "epoch": 0.7057150475513287, "grad_norm": 1.7218552827835083, "learning_rate": 1.818181152599351e-05, "loss": 1.801, "step": 19850 }, { "epoch": 0.7060705715047552, "grad_norm": 1.6597864627838135, "learning_rate": 1.817955756785116e-05, "loss": 1.8578, "step": 19860 }, { "epoch": 0.7064260954581815, "grad_norm": 1.6149235963821411, "learning_rate": 1.8177302353380053e-05, "loss": 1.8695, "step": 19870 }, { "epoch": 0.7067816194116079, "grad_norm": 1.667304277420044, "learning_rate": 1.817504588292658e-05, "loss": 1.9101, "step": 19880 }, { "epoch": 0.7071371433650342, "grad_norm": 1.4457412958145142, "learning_rate": 1.8172788156837312e-05, "loss": 1.8208, "step": 19890 }, { "epoch": 0.7074926673184606, "grad_norm": 1.7635536193847656, "learning_rate": 1.817052917545903e-05, "loss": 1.8476, "step": 19900 }, { "epoch": 0.7078481912718869, "grad_norm": 1.6602329015731812, "learning_rate": 1.8168268939138696e-05, "loss": 1.8366, "step": 19910 }, { "epoch": 0.7082037152253133, "grad_norm": 1.601374864578247, "learning_rate": 1.8166007448223467e-05, "loss": 1.8494, "step": 19920 }, { "epoch": 0.7085592391787396, "grad_norm": 1.667270302772522, "learning_rate": 1.8163744703060698e-05, "loss": 1.8107, "step": 19930 }, { "epoch": 0.7089147631321661, "grad_norm": 1.6129560470581055, "learning_rate": 1.816148070399793e-05, "loss": 1.8337, "step": 19940 }, { "epoch": 0.7092702870855924, "grad_norm": 1.6151974201202393, "learning_rate": 1.8159215451382894e-05, "loss": 1.8616, "step": 19950 }, { "epoch": 0.7096258110390188, "grad_norm": 1.6923866271972656, "learning_rate": 1.815694894556353e-05, "loss": 1.8846, "step": 19960 }, { "epoch": 0.7099813349924451, "grad_norm": 1.7572191953659058, "learning_rate": 1.8154681186887946e-05, "loss": 1.9025, "step": 19970 }, { "epoch": 0.7103368589458715, "grad_norm": 1.613426685333252, "learning_rate": 1.8152412175704464e-05, "loss": 1.808, "step": 19980 }, { "epoch": 0.7106923828992978, "grad_norm": 1.656322956085205, "learning_rate": 1.8150141912361586e-05, "loss": 1.8517, "step": 19990 }, { "epoch": 0.7110479068527242, "grad_norm": 1.6725047826766968, "learning_rate": 1.8147870397208017e-05, "loss": 1.8071, "step": 20000 }, { "epoch": 0.7114034308061505, "grad_norm": 1.5440058708190918, "learning_rate": 1.814559763059264e-05, "loss": 1.9088, "step": 20010 }, { "epoch": 0.711758954759577, "grad_norm": 1.6006666421890259, "learning_rate": 1.8143323612864542e-05, "loss": 1.8601, "step": 20020 }, { "epoch": 0.7121144787130033, "grad_norm": 1.6369351148605347, "learning_rate": 1.8141048344372994e-05, "loss": 1.823, "step": 20030 }, { "epoch": 0.7124700026664297, "grad_norm": 1.6196844577789307, "learning_rate": 1.8138771825467462e-05, "loss": 1.8227, "step": 20040 }, { "epoch": 0.712825526619856, "grad_norm": 1.5950103998184204, "learning_rate": 1.8136494056497614e-05, "loss": 1.8067, "step": 20050 }, { "epoch": 0.7131810505732824, "grad_norm": 1.6997015476226807, "learning_rate": 1.8134215037813293e-05, "loss": 1.7878, "step": 20060 }, { "epoch": 0.7135365745267087, "grad_norm": 1.6241003274917603, "learning_rate": 1.813193476976454e-05, "loss": 1.8432, "step": 20070 }, { "epoch": 0.7138920984801351, "grad_norm": 1.6806402206420898, "learning_rate": 1.81296532527016e-05, "loss": 1.8331, "step": 20080 }, { "epoch": 0.7142476224335614, "grad_norm": 1.7081027030944824, "learning_rate": 1.812737048697489e-05, "loss": 1.8474, "step": 20090 }, { "epoch": 0.7146031463869879, "grad_norm": 1.6192286014556885, "learning_rate": 1.8125086472935034e-05, "loss": 1.8737, "step": 20100 }, { "epoch": 0.7149586703404142, "grad_norm": 1.6434106826782227, "learning_rate": 1.812280121093284e-05, "loss": 1.8605, "step": 20110 }, { "epoch": 0.7153141942938406, "grad_norm": 1.6780080795288086, "learning_rate": 1.812051470131931e-05, "loss": 1.8252, "step": 20120 }, { "epoch": 0.7156697182472669, "grad_norm": 1.6471208333969116, "learning_rate": 1.811822694444564e-05, "loss": 1.8707, "step": 20130 }, { "epoch": 0.7160252422006933, "grad_norm": 1.6043684482574463, "learning_rate": 1.8115937940663212e-05, "loss": 1.8365, "step": 20140 }, { "epoch": 0.7163807661541196, "grad_norm": 1.75435209274292, "learning_rate": 1.8113647690323603e-05, "loss": 1.8368, "step": 20150 }, { "epoch": 0.716736290107546, "grad_norm": 1.616033911705017, "learning_rate": 1.8111356193778577e-05, "loss": 1.8305, "step": 20160 }, { "epoch": 0.7170918140609723, "grad_norm": 1.5769684314727783, "learning_rate": 1.8109063451380105e-05, "loss": 1.8513, "step": 20170 }, { "epoch": 0.7174473380143987, "grad_norm": 1.749438762664795, "learning_rate": 1.8106769463480328e-05, "loss": 1.8363, "step": 20180 }, { "epoch": 0.7178028619678251, "grad_norm": 1.6878730058670044, "learning_rate": 1.810447423043159e-05, "loss": 1.7915, "step": 20190 }, { "epoch": 0.7181583859212515, "grad_norm": 1.5220178365707397, "learning_rate": 1.810217775258643e-05, "loss": 1.8436, "step": 20200 }, { "epoch": 0.7185139098746778, "grad_norm": 1.58015775680542, "learning_rate": 1.8099880030297567e-05, "loss": 1.8548, "step": 20210 }, { "epoch": 0.7188694338281042, "grad_norm": 1.6119468212127686, "learning_rate": 1.809758106391792e-05, "loss": 1.8813, "step": 20220 }, { "epoch": 0.7192249577815305, "grad_norm": 1.719566822052002, "learning_rate": 1.809528085380059e-05, "loss": 1.855, "step": 20230 }, { "epoch": 0.7195804817349569, "grad_norm": 1.649501085281372, "learning_rate": 1.8092979400298877e-05, "loss": 1.8391, "step": 20240 }, { "epoch": 0.7199360056883832, "grad_norm": 1.703221321105957, "learning_rate": 1.8090676703766276e-05, "loss": 1.9056, "step": 20250 }, { "epoch": 0.7202915296418096, "grad_norm": 1.6187703609466553, "learning_rate": 1.808837276455646e-05, "loss": 1.8322, "step": 20260 }, { "epoch": 0.720647053595236, "grad_norm": 1.7238303422927856, "learning_rate": 1.80860675830233e-05, "loss": 1.8094, "step": 20270 }, { "epoch": 0.7210025775486624, "grad_norm": 1.573097586631775, "learning_rate": 1.8083761159520862e-05, "loss": 1.8923, "step": 20280 }, { "epoch": 0.7213581015020887, "grad_norm": 1.6466491222381592, "learning_rate": 1.8081453494403395e-05, "loss": 1.8035, "step": 20290 }, { "epoch": 0.7217136254555151, "grad_norm": 1.5461498498916626, "learning_rate": 1.807914458802534e-05, "loss": 1.8699, "step": 20300 }, { "epoch": 0.7220691494089414, "grad_norm": 1.7276533842086792, "learning_rate": 1.807683444074134e-05, "loss": 1.8877, "step": 20310 }, { "epoch": 0.7224246733623678, "grad_norm": 1.6024223566055298, "learning_rate": 1.8074523052906204e-05, "loss": 1.8637, "step": 20320 }, { "epoch": 0.7227801973157941, "grad_norm": 1.538771390914917, "learning_rate": 1.807221042487496e-05, "loss": 1.8827, "step": 20330 }, { "epoch": 0.7231357212692205, "grad_norm": 1.727202296257019, "learning_rate": 1.8069896557002805e-05, "loss": 1.8885, "step": 20340 }, { "epoch": 0.7234912452226469, "grad_norm": 1.6250510215759277, "learning_rate": 1.8067581449645137e-05, "loss": 1.8255, "step": 20350 }, { "epoch": 0.7238467691760733, "grad_norm": 1.5995020866394043, "learning_rate": 1.8065265103157546e-05, "loss": 1.834, "step": 20360 }, { "epoch": 0.7242022931294996, "grad_norm": 1.6498067378997803, "learning_rate": 1.80629475178958e-05, "loss": 1.8596, "step": 20370 }, { "epoch": 0.724557817082926, "grad_norm": 1.680664300918579, "learning_rate": 1.8060628694215875e-05, "loss": 1.8338, "step": 20380 }, { "epoch": 0.7249133410363523, "grad_norm": 1.6563605070114136, "learning_rate": 1.805830863247392e-05, "loss": 1.8466, "step": 20390 }, { "epoch": 0.7252688649897787, "grad_norm": 1.557987928390503, "learning_rate": 1.8055987333026286e-05, "loss": 1.8276, "step": 20400 }, { "epoch": 0.725624388943205, "grad_norm": 1.6893463134765625, "learning_rate": 1.8053664796229508e-05, "loss": 1.7999, "step": 20410 }, { "epoch": 0.7259799128966314, "grad_norm": 1.7025909423828125, "learning_rate": 1.8051341022440315e-05, "loss": 1.8007, "step": 20420 }, { "epoch": 0.7263354368500577, "grad_norm": 1.5952210426330566, "learning_rate": 1.8049016012015626e-05, "loss": 1.8007, "step": 20430 }, { "epoch": 0.7266909608034842, "grad_norm": 1.640624761581421, "learning_rate": 1.804668976531254e-05, "loss": 1.8254, "step": 20440 }, { "epoch": 0.7270464847569105, "grad_norm": 1.5979695320129395, "learning_rate": 1.8044362282688365e-05, "loss": 1.8436, "step": 20450 }, { "epoch": 0.7274020087103369, "grad_norm": 1.5913257598876953, "learning_rate": 1.804203356450058e-05, "loss": 1.8661, "step": 20460 }, { "epoch": 0.7277575326637632, "grad_norm": 1.637255072593689, "learning_rate": 1.803970361110686e-05, "loss": 1.8876, "step": 20470 }, { "epoch": 0.7281130566171896, "grad_norm": 1.7362253665924072, "learning_rate": 1.8037372422865076e-05, "loss": 1.8607, "step": 20480 }, { "epoch": 0.7284685805706159, "grad_norm": 1.586946964263916, "learning_rate": 1.8035040000133284e-05, "loss": 1.8653, "step": 20490 }, { "epoch": 0.7288241045240423, "grad_norm": 1.6418026685714722, "learning_rate": 1.803270634326973e-05, "loss": 1.8445, "step": 20500 }, { "epoch": 0.7291796284774686, "grad_norm": 1.6286134719848633, "learning_rate": 1.803037145263284e-05, "loss": 1.8348, "step": 20510 }, { "epoch": 0.7295351524308951, "grad_norm": 1.7448618412017822, "learning_rate": 1.802803532858125e-05, "loss": 1.8338, "step": 20520 }, { "epoch": 0.7298906763843214, "grad_norm": 1.7649121284484863, "learning_rate": 1.8025697971473774e-05, "loss": 1.8344, "step": 20530 }, { "epoch": 0.7302462003377478, "grad_norm": 1.579267144203186, "learning_rate": 1.8023359381669406e-05, "loss": 1.8344, "step": 20540 }, { "epoch": 0.7306017242911741, "grad_norm": 1.6077048778533936, "learning_rate": 1.8021019559527343e-05, "loss": 1.8123, "step": 20550 }, { "epoch": 0.7309572482446005, "grad_norm": 1.6492174863815308, "learning_rate": 1.8018678505406972e-05, "loss": 1.8624, "step": 20560 }, { "epoch": 0.7313127721980268, "grad_norm": 1.5793914794921875, "learning_rate": 1.801633621966786e-05, "loss": 1.817, "step": 20570 }, { "epoch": 0.7316682961514532, "grad_norm": 1.6046448945999146, "learning_rate": 1.8013992702669763e-05, "loss": 1.8189, "step": 20580 }, { "epoch": 0.7320238201048795, "grad_norm": 1.5873152017593384, "learning_rate": 1.801164795477264e-05, "loss": 1.9034, "step": 20590 }, { "epoch": 0.732379344058306, "grad_norm": 1.6548078060150146, "learning_rate": 1.800930197633662e-05, "loss": 1.8272, "step": 20600 }, { "epoch": 0.7327348680117323, "grad_norm": 1.6251716613769531, "learning_rate": 1.8006954767722037e-05, "loss": 1.8533, "step": 20610 }, { "epoch": 0.7330903919651587, "grad_norm": 1.6041985750198364, "learning_rate": 1.8004606329289408e-05, "loss": 1.8543, "step": 20620 }, { "epoch": 0.733445915918585, "grad_norm": 1.686060905456543, "learning_rate": 1.800225666139943e-05, "loss": 1.8671, "step": 20630 }, { "epoch": 0.7338014398720114, "grad_norm": 1.6034380197525024, "learning_rate": 1.799990576441301e-05, "loss": 1.8507, "step": 20640 }, { "epoch": 0.7341569638254377, "grad_norm": 1.6069995164871216, "learning_rate": 1.799755363869122e-05, "loss": 1.8578, "step": 20650 }, { "epoch": 0.7345124877788641, "grad_norm": 1.68937087059021, "learning_rate": 1.799520028459534e-05, "loss": 1.8286, "step": 20660 }, { "epoch": 0.7348680117322904, "grad_norm": 1.651024580001831, "learning_rate": 1.7992845702486824e-05, "loss": 1.8287, "step": 20670 }, { "epoch": 0.7352235356857169, "grad_norm": 1.6019994020462036, "learning_rate": 1.7990489892727322e-05, "loss": 1.8236, "step": 20680 }, { "epoch": 0.7355790596391432, "grad_norm": 1.6435049772262573, "learning_rate": 1.7988132855678676e-05, "loss": 1.8916, "step": 20690 }, { "epoch": 0.7359345835925696, "grad_norm": 1.6221009492874146, "learning_rate": 1.7985774591702907e-05, "loss": 1.8688, "step": 20700 }, { "epoch": 0.7362901075459959, "grad_norm": 1.6398005485534668, "learning_rate": 1.7983415101162235e-05, "loss": 1.8534, "step": 20710 }, { "epoch": 0.7366456314994223, "grad_norm": 1.5574488639831543, "learning_rate": 1.798105438441906e-05, "loss": 1.8703, "step": 20720 }, { "epoch": 0.7370011554528486, "grad_norm": 1.7029882669448853, "learning_rate": 1.797869244183597e-05, "loss": 1.8173, "step": 20730 }, { "epoch": 0.737356679406275, "grad_norm": 1.6728116273880005, "learning_rate": 1.797632927377575e-05, "loss": 1.8568, "step": 20740 }, { "epoch": 0.7377122033597013, "grad_norm": 1.5825793743133545, "learning_rate": 1.7973964880601364e-05, "loss": 1.825, "step": 20750 }, { "epoch": 0.7380677273131278, "grad_norm": 1.7137134075164795, "learning_rate": 1.797159926267597e-05, "loss": 1.8368, "step": 20760 }, { "epoch": 0.7384232512665541, "grad_norm": 1.5658918619155884, "learning_rate": 1.796923242036291e-05, "loss": 1.8178, "step": 20770 }, { "epoch": 0.7387787752199805, "grad_norm": 1.6084784269332886, "learning_rate": 1.7966864354025722e-05, "loss": 1.8116, "step": 20780 }, { "epoch": 0.7391342991734068, "grad_norm": 1.6091594696044922, "learning_rate": 1.796449506402812e-05, "loss": 1.835, "step": 20790 }, { "epoch": 0.7394898231268332, "grad_norm": 1.8020347356796265, "learning_rate": 1.7962124550734013e-05, "loss": 1.819, "step": 20800 }, { "epoch": 0.7398453470802595, "grad_norm": 1.6283994913101196, "learning_rate": 1.7959752814507498e-05, "loss": 1.8651, "step": 20810 }, { "epoch": 0.7402008710336859, "grad_norm": 1.6090404987335205, "learning_rate": 1.7957379855712858e-05, "loss": 1.8372, "step": 20820 }, { "epoch": 0.7405563949871122, "grad_norm": 1.6721382141113281, "learning_rate": 1.7955005674714567e-05, "loss": 1.8023, "step": 20830 }, { "epoch": 0.7409119189405386, "grad_norm": 1.683385968208313, "learning_rate": 1.795263027187728e-05, "loss": 1.8386, "step": 20840 }, { "epoch": 0.741267442893965, "grad_norm": 1.7011699676513672, "learning_rate": 1.795025364756585e-05, "loss": 1.8509, "step": 20850 }, { "epoch": 0.7416229668473914, "grad_norm": 1.6731226444244385, "learning_rate": 1.7947875802145307e-05, "loss": 1.84, "step": 20860 }, { "epoch": 0.7419784908008177, "grad_norm": 1.5665476322174072, "learning_rate": 1.7945496735980872e-05, "loss": 1.842, "step": 20870 }, { "epoch": 0.7423340147542441, "grad_norm": 1.6955523490905762, "learning_rate": 1.794311644943796e-05, "loss": 1.862, "step": 20880 }, { "epoch": 0.7426895387076704, "grad_norm": 1.7335302829742432, "learning_rate": 1.7940734942882164e-05, "loss": 1.8447, "step": 20890 }, { "epoch": 0.7430450626610968, "grad_norm": 1.6597357988357544, "learning_rate": 1.7938352216679267e-05, "loss": 1.8865, "step": 20900 }, { "epoch": 0.7434005866145231, "grad_norm": 1.6029428243637085, "learning_rate": 1.793596827119525e-05, "loss": 1.8061, "step": 20910 }, { "epoch": 0.7437561105679495, "grad_norm": 1.6280450820922852, "learning_rate": 1.7933583106796263e-05, "loss": 1.8168, "step": 20920 }, { "epoch": 0.7441116345213759, "grad_norm": 1.4758005142211914, "learning_rate": 1.7931196723848652e-05, "loss": 1.8337, "step": 20930 }, { "epoch": 0.7444671584748023, "grad_norm": 1.6232197284698486, "learning_rate": 1.7928809122718955e-05, "loss": 1.817, "step": 20940 }, { "epoch": 0.7448226824282286, "grad_norm": 1.726462960243225, "learning_rate": 1.792642030377389e-05, "loss": 1.8219, "step": 20950 }, { "epoch": 0.745178206381655, "grad_norm": 1.620409369468689, "learning_rate": 1.7924030267380365e-05, "loss": 1.8492, "step": 20960 }, { "epoch": 0.7455337303350813, "grad_norm": 1.5886775255203247, "learning_rate": 1.7921639013905477e-05, "loss": 1.815, "step": 20970 }, { "epoch": 0.7458892542885077, "grad_norm": 1.500231146812439, "learning_rate": 1.7919246543716502e-05, "loss": 1.8441, "step": 20980 }, { "epoch": 0.746244778241934, "grad_norm": 1.6466845273971558, "learning_rate": 1.7916852857180913e-05, "loss": 1.8523, "step": 20990 }, { "epoch": 0.7466003021953604, "grad_norm": 1.5844566822052002, "learning_rate": 1.7914457954666368e-05, "loss": 1.861, "step": 21000 }, { "epoch": 0.7469558261487868, "grad_norm": 1.6205304861068726, "learning_rate": 1.79120618365407e-05, "loss": 1.865, "step": 21010 }, { "epoch": 0.7473113501022132, "grad_norm": 1.576826810836792, "learning_rate": 1.7909664503171947e-05, "loss": 1.8186, "step": 21020 }, { "epoch": 0.7476668740556395, "grad_norm": 1.6844556331634521, "learning_rate": 1.790726595492832e-05, "loss": 1.8328, "step": 21030 }, { "epoch": 0.7480223980090659, "grad_norm": 1.679314136505127, "learning_rate": 1.7904866192178215e-05, "loss": 1.8652, "step": 21040 }, { "epoch": 0.7483779219624922, "grad_norm": 1.7030935287475586, "learning_rate": 1.7902465215290233e-05, "loss": 1.8043, "step": 21050 }, { "epoch": 0.7487334459159186, "grad_norm": 1.6377439498901367, "learning_rate": 1.790006302463314e-05, "loss": 1.8494, "step": 21060 }, { "epoch": 0.7490889698693449, "grad_norm": 1.6536033153533936, "learning_rate": 1.7897659620575905e-05, "loss": 1.8108, "step": 21070 }, { "epoch": 0.7494444938227713, "grad_norm": 1.8303552865982056, "learning_rate": 1.789525500348767e-05, "loss": 1.8273, "step": 21080 }, { "epoch": 0.7498000177761976, "grad_norm": 1.5810844898223877, "learning_rate": 1.7892849173737764e-05, "loss": 1.8176, "step": 21090 }, { "epoch": 0.7501555417296241, "grad_norm": 1.5926487445831299, "learning_rate": 1.789044213169572e-05, "loss": 1.8238, "step": 21100 }, { "epoch": 0.7505110656830504, "grad_norm": 1.6886205673217773, "learning_rate": 1.7888033877731233e-05, "loss": 1.8556, "step": 21110 }, { "epoch": 0.7508665896364768, "grad_norm": 1.6423076391220093, "learning_rate": 1.7885624412214204e-05, "loss": 1.8218, "step": 21120 }, { "epoch": 0.7512221135899031, "grad_norm": 1.5861601829528809, "learning_rate": 1.7883213735514708e-05, "loss": 1.8104, "step": 21130 }, { "epoch": 0.7515776375433295, "grad_norm": 1.657173752784729, "learning_rate": 1.788080184800301e-05, "loss": 1.8889, "step": 21140 }, { "epoch": 0.7519331614967558, "grad_norm": 1.7222282886505127, "learning_rate": 1.787838875004956e-05, "loss": 1.8262, "step": 21150 }, { "epoch": 0.7522886854501822, "grad_norm": 1.6098462343215942, "learning_rate": 1.7875974442024996e-05, "loss": 1.7838, "step": 21160 }, { "epoch": 0.7526442094036085, "grad_norm": 1.7243529558181763, "learning_rate": 1.7873558924300143e-05, "loss": 1.8078, "step": 21170 }, { "epoch": 0.752999733357035, "grad_norm": 1.546335220336914, "learning_rate": 1.7871142197246e-05, "loss": 1.8422, "step": 21180 }, { "epoch": 0.7533552573104613, "grad_norm": 1.7641113996505737, "learning_rate": 1.786872426123378e-05, "loss": 1.8102, "step": 21190 }, { "epoch": 0.7537107812638877, "grad_norm": 1.620474100112915, "learning_rate": 1.7866305116634843e-05, "loss": 1.8326, "step": 21200 }, { "epoch": 0.754066305217314, "grad_norm": 1.7397280931472778, "learning_rate": 1.7863884763820762e-05, "loss": 1.8162, "step": 21210 }, { "epoch": 0.7544218291707404, "grad_norm": 1.6662113666534424, "learning_rate": 1.786146320316329e-05, "loss": 1.7993, "step": 21220 }, { "epoch": 0.7547773531241667, "grad_norm": 1.6224888563156128, "learning_rate": 1.785904043503436e-05, "loss": 1.8431, "step": 21230 }, { "epoch": 0.7551328770775931, "grad_norm": 1.585585117340088, "learning_rate": 1.7856616459806097e-05, "loss": 1.8341, "step": 21240 }, { "epoch": 0.7554884010310194, "grad_norm": 1.7950774431228638, "learning_rate": 1.7854191277850806e-05, "loss": 1.832, "step": 21250 }, { "epoch": 0.7558439249844459, "grad_norm": 1.6702255010604858, "learning_rate": 1.785176488954098e-05, "loss": 1.8655, "step": 21260 }, { "epoch": 0.7561994489378722, "grad_norm": 1.6955862045288086, "learning_rate": 1.7849337295249302e-05, "loss": 1.8188, "step": 21270 }, { "epoch": 0.7565549728912986, "grad_norm": 1.7578234672546387, "learning_rate": 1.784690849534863e-05, "loss": 1.8362, "step": 21280 }, { "epoch": 0.7569104968447249, "grad_norm": 1.7137067317962646, "learning_rate": 1.7844478490212012e-05, "loss": 1.8034, "step": 21290 }, { "epoch": 0.7572660207981513, "grad_norm": 1.7158740758895874, "learning_rate": 1.7842047280212683e-05, "loss": 1.8079, "step": 21300 }, { "epoch": 0.7576215447515776, "grad_norm": 1.6505048274993896, "learning_rate": 1.7839614865724064e-05, "loss": 1.8146, "step": 21310 }, { "epoch": 0.757977068705004, "grad_norm": 1.6587018966674805, "learning_rate": 1.7837181247119756e-05, "loss": 1.8379, "step": 21320 }, { "epoch": 0.7583325926584303, "grad_norm": 1.5471725463867188, "learning_rate": 1.7834746424773545e-05, "loss": 1.866, "step": 21330 }, { "epoch": 0.7586881166118568, "grad_norm": 1.5741422176361084, "learning_rate": 1.7832310399059406e-05, "loss": 1.8118, "step": 21340 }, { "epoch": 0.7590436405652831, "grad_norm": 1.6571557521820068, "learning_rate": 1.7829873170351505e-05, "loss": 1.8439, "step": 21350 }, { "epoch": 0.7593991645187095, "grad_norm": 1.5959396362304688, "learning_rate": 1.7827434739024176e-05, "loss": 1.8135, "step": 21360 }, { "epoch": 0.7597546884721358, "grad_norm": 1.6723634004592896, "learning_rate": 1.782499510545195e-05, "loss": 1.8359, "step": 21370 }, { "epoch": 0.7601102124255622, "grad_norm": 1.68485426902771, "learning_rate": 1.7822554270009535e-05, "loss": 1.8569, "step": 21380 }, { "epoch": 0.7604657363789885, "grad_norm": 1.6855770349502563, "learning_rate": 1.7820112233071837e-05, "loss": 1.8868, "step": 21390 }, { "epoch": 0.7608212603324149, "grad_norm": 1.7310736179351807, "learning_rate": 1.781766899501393e-05, "loss": 1.8345, "step": 21400 }, { "epoch": 0.7611767842858412, "grad_norm": 1.6306757926940918, "learning_rate": 1.781522455621108e-05, "loss": 1.7802, "step": 21410 }, { "epoch": 0.7615323082392677, "grad_norm": 1.6657934188842773, "learning_rate": 1.7812778917038746e-05, "loss": 1.7914, "step": 21420 }, { "epoch": 0.761887832192694, "grad_norm": 1.5563294887542725, "learning_rate": 1.7810332077872555e-05, "loss": 1.8376, "step": 21430 }, { "epoch": 0.7622433561461204, "grad_norm": 1.5677244663238525, "learning_rate": 1.7807884039088326e-05, "loss": 1.8033, "step": 21440 }, { "epoch": 0.7625988800995467, "grad_norm": 1.5867130756378174, "learning_rate": 1.7805434801062064e-05, "loss": 1.8307, "step": 21450 }, { "epoch": 0.7629544040529731, "grad_norm": 1.6075420379638672, "learning_rate": 1.780298436416996e-05, "loss": 1.8702, "step": 21460 }, { "epoch": 0.7633099280063994, "grad_norm": 1.6167482137680054, "learning_rate": 1.780053272878838e-05, "loss": 1.8321, "step": 21470 }, { "epoch": 0.7636654519598258, "grad_norm": 1.7103121280670166, "learning_rate": 1.7798079895293884e-05, "loss": 1.8619, "step": 21480 }, { "epoch": 0.7640209759132521, "grad_norm": 1.6230714321136475, "learning_rate": 1.779562586406321e-05, "loss": 1.834, "step": 21490 }, { "epoch": 0.7643764998666785, "grad_norm": 1.6490651369094849, "learning_rate": 1.779317063547328e-05, "loss": 1.8218, "step": 21500 }, { "epoch": 0.7647320238201049, "grad_norm": 1.604256510734558, "learning_rate": 1.779071420990121e-05, "loss": 1.8535, "step": 21510 }, { "epoch": 0.7650875477735313, "grad_norm": 1.7728480100631714, "learning_rate": 1.7788256587724276e-05, "loss": 1.8961, "step": 21520 }, { "epoch": 0.7654430717269576, "grad_norm": 1.6369065046310425, "learning_rate": 1.778579776931997e-05, "loss": 1.7931, "step": 21530 }, { "epoch": 0.765798595680384, "grad_norm": 1.5738849639892578, "learning_rate": 1.7783337755065943e-05, "loss": 1.83, "step": 21540 }, { "epoch": 0.7661541196338103, "grad_norm": 1.7136260271072388, "learning_rate": 1.7780876545340037e-05, "loss": 1.8003, "step": 21550 }, { "epoch": 0.7665096435872367, "grad_norm": 1.6939854621887207, "learning_rate": 1.7778414140520283e-05, "loss": 1.7789, "step": 21560 }, { "epoch": 0.766865167540663, "grad_norm": 1.7023991346359253, "learning_rate": 1.7775950540984887e-05, "loss": 1.8382, "step": 21570 }, { "epoch": 0.7672206914940894, "grad_norm": 1.6543914079666138, "learning_rate": 1.777348574711224e-05, "loss": 1.8753, "step": 21580 }, { "epoch": 0.7675762154475158, "grad_norm": 1.6325597763061523, "learning_rate": 1.777101975928093e-05, "loss": 1.8415, "step": 21590 }, { "epoch": 0.7679317394009422, "grad_norm": 1.754928708076477, "learning_rate": 1.7768552577869702e-05, "loss": 1.8399, "step": 21600 }, { "epoch": 0.7682872633543685, "grad_norm": 1.6532490253448486, "learning_rate": 1.776608420325751e-05, "loss": 1.8202, "step": 21610 }, { "epoch": 0.7686427873077949, "grad_norm": 1.6243687868118286, "learning_rate": 1.7763614635823478e-05, "loss": 1.8437, "step": 21620 }, { "epoch": 0.7689983112612212, "grad_norm": 1.6320301294326782, "learning_rate": 1.7761143875946918e-05, "loss": 1.8326, "step": 21630 }, { "epoch": 0.7693538352146476, "grad_norm": 1.5413025617599487, "learning_rate": 1.7758671924007318e-05, "loss": 1.8824, "step": 21640 }, { "epoch": 0.7697093591680739, "grad_norm": 1.5559817552566528, "learning_rate": 1.775619878038436e-05, "loss": 1.8206, "step": 21650 }, { "epoch": 0.7700648831215003, "grad_norm": 1.6865465641021729, "learning_rate": 1.77537244454579e-05, "loss": 1.7951, "step": 21660 }, { "epoch": 0.7704204070749266, "grad_norm": 1.5911492109298706, "learning_rate": 1.7751248919607982e-05, "loss": 1.8744, "step": 21670 }, { "epoch": 0.7707759310283531, "grad_norm": 1.618155837059021, "learning_rate": 1.774877220321483e-05, "loss": 1.8521, "step": 21680 }, { "epoch": 0.7711314549817794, "grad_norm": 1.6658058166503906, "learning_rate": 1.7746294296658853e-05, "loss": 1.8676, "step": 21690 }, { "epoch": 0.7714869789352058, "grad_norm": 1.526122808456421, "learning_rate": 1.774381520032064e-05, "loss": 1.8191, "step": 21700 }, { "epoch": 0.7718425028886321, "grad_norm": 1.6593496799468994, "learning_rate": 1.774133491458097e-05, "loss": 1.8658, "step": 21710 }, { "epoch": 0.7721980268420585, "grad_norm": 1.6845260858535767, "learning_rate": 1.7738853439820796e-05, "loss": 1.8233, "step": 21720 }, { "epoch": 0.7725535507954848, "grad_norm": 1.6050273180007935, "learning_rate": 1.7736370776421255e-05, "loss": 1.8582, "step": 21730 }, { "epoch": 0.7729090747489112, "grad_norm": 1.564800500869751, "learning_rate": 1.7733886924763668e-05, "loss": 1.8745, "step": 21740 }, { "epoch": 0.7732645987023375, "grad_norm": 1.6082314252853394, "learning_rate": 1.7731401885229546e-05, "loss": 1.8409, "step": 21750 }, { "epoch": 0.773620122655764, "grad_norm": 1.6496139764785767, "learning_rate": 1.772891565820057e-05, "loss": 1.8375, "step": 21760 }, { "epoch": 0.7739756466091903, "grad_norm": 1.5334914922714233, "learning_rate": 1.7726428244058605e-05, "loss": 1.8176, "step": 21770 }, { "epoch": 0.7743311705626167, "grad_norm": 1.6704349517822266, "learning_rate": 1.7723939643185705e-05, "loss": 1.8231, "step": 21780 }, { "epoch": 0.774686694516043, "grad_norm": 1.5811091661453247, "learning_rate": 1.7721449855964114e-05, "loss": 1.8218, "step": 21790 }, { "epoch": 0.7750422184694694, "grad_norm": 1.62055242061615, "learning_rate": 1.7718958882776233e-05, "loss": 1.8821, "step": 21800 }, { "epoch": 0.7753977424228957, "grad_norm": 1.6094154119491577, "learning_rate": 1.7716466724004667e-05, "loss": 1.8133, "step": 21810 }, { "epoch": 0.7757532663763221, "grad_norm": 1.6229323148727417, "learning_rate": 1.7713973380032194e-05, "loss": 1.812, "step": 21820 }, { "epoch": 0.7761087903297484, "grad_norm": 1.5259974002838135, "learning_rate": 1.771147885124178e-05, "loss": 1.8637, "step": 21830 }, { "epoch": 0.7764643142831749, "grad_norm": 1.6902457475662231, "learning_rate": 1.770898313801656e-05, "loss": 1.8161, "step": 21840 }, { "epoch": 0.7768198382366012, "grad_norm": 1.6666581630706787, "learning_rate": 1.7706486240739875e-05, "loss": 1.8374, "step": 21850 }, { "epoch": 0.7771753621900276, "grad_norm": 1.5865390300750732, "learning_rate": 1.7703988159795214e-05, "loss": 1.843, "step": 21860 }, { "epoch": 0.7775308861434539, "grad_norm": 1.5332224369049072, "learning_rate": 1.770148889556628e-05, "loss": 1.7983, "step": 21870 }, { "epoch": 0.7778864100968803, "grad_norm": 1.8453980684280396, "learning_rate": 1.769898844843694e-05, "loss": 1.8229, "step": 21880 }, { "epoch": 0.7782419340503066, "grad_norm": 1.5702221393585205, "learning_rate": 1.7696486818791247e-05, "loss": 1.8288, "step": 21890 }, { "epoch": 0.778597458003733, "grad_norm": 1.593741774559021, "learning_rate": 1.7693984007013436e-05, "loss": 1.8044, "step": 21900 }, { "epoch": 0.7789529819571593, "grad_norm": 1.6769473552703857, "learning_rate": 1.7691480013487926e-05, "loss": 1.8076, "step": 21910 }, { "epoch": 0.7793085059105858, "grad_norm": 1.734527349472046, "learning_rate": 1.768897483859931e-05, "loss": 1.8317, "step": 21920 }, { "epoch": 0.7796640298640121, "grad_norm": 1.6322834491729736, "learning_rate": 1.7686468482732367e-05, "loss": 1.771, "step": 21930 }, { "epoch": 0.7800195538174385, "grad_norm": 1.663217306137085, "learning_rate": 1.7683960946272062e-05, "loss": 1.8372, "step": 21940 }, { "epoch": 0.7803750777708648, "grad_norm": 1.5416820049285889, "learning_rate": 1.7681452229603532e-05, "loss": 1.8194, "step": 21950 }, { "epoch": 0.7807306017242912, "grad_norm": 1.6156864166259766, "learning_rate": 1.7678942333112104e-05, "loss": 1.8473, "step": 21960 }, { "epoch": 0.7810861256777175, "grad_norm": 1.7344961166381836, "learning_rate": 1.7676431257183283e-05, "loss": 1.8233, "step": 21970 }, { "epoch": 0.7814416496311439, "grad_norm": 1.6741567850112915, "learning_rate": 1.7673919002202752e-05, "loss": 1.8435, "step": 21980 }, { "epoch": 0.7817971735845702, "grad_norm": 1.6223064661026, "learning_rate": 1.7671405568556377e-05, "loss": 1.855, "step": 21990 }, { "epoch": 0.7821526975379967, "grad_norm": 1.665303349494934, "learning_rate": 1.7668890956630204e-05, "loss": 1.8395, "step": 22000 }, { "epoch": 0.782508221491423, "grad_norm": 1.5375319719314575, "learning_rate": 1.7666375166810466e-05, "loss": 1.8605, "step": 22010 }, { "epoch": 0.7828637454448494, "grad_norm": 1.5674186944961548, "learning_rate": 1.7663858199483575e-05, "loss": 1.8481, "step": 22020 }, { "epoch": 0.7832192693982757, "grad_norm": 1.6025656461715698, "learning_rate": 1.766134005503611e-05, "loss": 1.8756, "step": 22030 }, { "epoch": 0.7835747933517021, "grad_norm": 1.5113604068756104, "learning_rate": 1.7658820733854857e-05, "loss": 1.8223, "step": 22040 }, { "epoch": 0.7839303173051284, "grad_norm": 1.7100368738174438, "learning_rate": 1.765630023632676e-05, "loss": 1.829, "step": 22050 }, { "epoch": 0.7842858412585548, "grad_norm": 1.693702220916748, "learning_rate": 1.7653778562838947e-05, "loss": 1.8651, "step": 22060 }, { "epoch": 0.7846413652119811, "grad_norm": 1.573425054550171, "learning_rate": 1.765125571377874e-05, "loss": 1.8451, "step": 22070 }, { "epoch": 0.7849968891654076, "grad_norm": 1.6213774681091309, "learning_rate": 1.7648731689533626e-05, "loss": 1.8439, "step": 22080 }, { "epoch": 0.7853524131188339, "grad_norm": 1.5298799276351929, "learning_rate": 1.764620649049128e-05, "loss": 1.7725, "step": 22090 }, { "epoch": 0.7857079370722603, "grad_norm": 1.7361146211624146, "learning_rate": 1.7643680117039567e-05, "loss": 1.8386, "step": 22100 }, { "epoch": 0.7860634610256866, "grad_norm": 1.5186938047409058, "learning_rate": 1.764115256956651e-05, "loss": 1.8684, "step": 22110 }, { "epoch": 0.786418984979113, "grad_norm": 1.6889774799346924, "learning_rate": 1.763862384846033e-05, "loss": 1.8854, "step": 22120 }, { "epoch": 0.7867745089325393, "grad_norm": 1.6956360340118408, "learning_rate": 1.763609395410942e-05, "loss": 1.8322, "step": 22130 }, { "epoch": 0.7871300328859657, "grad_norm": 1.5466184616088867, "learning_rate": 1.7633562886902357e-05, "loss": 1.8281, "step": 22140 }, { "epoch": 0.787485556839392, "grad_norm": 1.5690945386886597, "learning_rate": 1.76310306472279e-05, "loss": 1.8312, "step": 22150 }, { "epoch": 0.7878410807928184, "grad_norm": 1.5939441919326782, "learning_rate": 1.762849723547498e-05, "loss": 1.8282, "step": 22160 }, { "epoch": 0.7881966047462448, "grad_norm": 1.6708072423934937, "learning_rate": 1.7625962652032718e-05, "loss": 1.8702, "step": 22170 }, { "epoch": 0.7885521286996712, "grad_norm": 1.666355013847351, "learning_rate": 1.7623426897290406e-05, "loss": 1.8174, "step": 22180 }, { "epoch": 0.7889076526530975, "grad_norm": 1.6208865642547607, "learning_rate": 1.7620889971637524e-05, "loss": 1.8469, "step": 22190 }, { "epoch": 0.7892631766065239, "grad_norm": 1.5655211210250854, "learning_rate": 1.7618351875463723e-05, "loss": 1.874, "step": 22200 }, { "epoch": 0.7896187005599502, "grad_norm": 1.7172608375549316, "learning_rate": 1.761581260915884e-05, "loss": 1.8563, "step": 22210 }, { "epoch": 0.7899742245133766, "grad_norm": 1.7233494520187378, "learning_rate": 1.7613272173112894e-05, "loss": 1.803, "step": 22220 }, { "epoch": 0.7903297484668029, "grad_norm": 1.6641350984573364, "learning_rate": 1.761073056771608e-05, "loss": 1.8351, "step": 22230 }, { "epoch": 0.7906852724202293, "grad_norm": 1.4610012769699097, "learning_rate": 1.7608187793358766e-05, "loss": 1.8061, "step": 22240 }, { "epoch": 0.7910407963736557, "grad_norm": 1.6382813453674316, "learning_rate": 1.7605643850431512e-05, "loss": 1.7839, "step": 22250 }, { "epoch": 0.7913963203270821, "grad_norm": 1.718408465385437, "learning_rate": 1.7603098739325053e-05, "loss": 1.8069, "step": 22260 }, { "epoch": 0.7917518442805084, "grad_norm": 1.616042137145996, "learning_rate": 1.76005524604303e-05, "loss": 1.8654, "step": 22270 }, { "epoch": 0.7921073682339348, "grad_norm": 1.6004387140274048, "learning_rate": 1.759800501413834e-05, "loss": 1.8279, "step": 22280 }, { "epoch": 0.7924628921873611, "grad_norm": 1.590248942375183, "learning_rate": 1.759545640084045e-05, "loss": 1.8654, "step": 22290 }, { "epoch": 0.7928184161407875, "grad_norm": 1.6553932428359985, "learning_rate": 1.7592906620928085e-05, "loss": 1.8634, "step": 22300 }, { "epoch": 0.7931739400942138, "grad_norm": 1.6197720766067505, "learning_rate": 1.759035567479287e-05, "loss": 1.8303, "step": 22310 }, { "epoch": 0.7935294640476402, "grad_norm": 1.6615225076675415, "learning_rate": 1.7587803562826613e-05, "loss": 1.794, "step": 22320 }, { "epoch": 0.7938849880010665, "grad_norm": 1.5763661861419678, "learning_rate": 1.7585250285421307e-05, "loss": 1.7818, "step": 22330 }, { "epoch": 0.794240511954493, "grad_norm": 1.6498136520385742, "learning_rate": 1.7582695842969117e-05, "loss": 1.8302, "step": 22340 }, { "epoch": 0.7945960359079193, "grad_norm": 1.6580342054367065, "learning_rate": 1.7580140235862386e-05, "loss": 1.8225, "step": 22350 }, { "epoch": 0.7949515598613457, "grad_norm": 1.6113598346710205, "learning_rate": 1.7577583464493643e-05, "loss": 1.8366, "step": 22360 }, { "epoch": 0.795307083814772, "grad_norm": 1.7083193063735962, "learning_rate": 1.7575025529255593e-05, "loss": 1.8212, "step": 22370 }, { "epoch": 0.7956626077681984, "grad_norm": 1.5660282373428345, "learning_rate": 1.7572466430541123e-05, "loss": 1.8118, "step": 22380 }, { "epoch": 0.7960181317216247, "grad_norm": 1.6036208868026733, "learning_rate": 1.7569906168743284e-05, "loss": 1.8508, "step": 22390 }, { "epoch": 0.7963736556750511, "grad_norm": 1.7010608911514282, "learning_rate": 1.7567344744255315e-05, "loss": 1.8082, "step": 22400 }, { "epoch": 0.7967291796284774, "grad_norm": 1.5989950895309448, "learning_rate": 1.756478215747065e-05, "loss": 1.8377, "step": 22410 }, { "epoch": 0.7970847035819039, "grad_norm": 1.6668442487716675, "learning_rate": 1.7562218408782876e-05, "loss": 1.8462, "step": 22420 }, { "epoch": 0.7974402275353302, "grad_norm": 1.6710630655288696, "learning_rate": 1.7559653498585767e-05, "loss": 1.8333, "step": 22430 }, { "epoch": 0.7977957514887566, "grad_norm": 1.6532939672470093, "learning_rate": 1.755708742727328e-05, "loss": 1.8146, "step": 22440 }, { "epoch": 0.7981512754421829, "grad_norm": 1.5563749074935913, "learning_rate": 1.755452019523955e-05, "loss": 1.7988, "step": 22450 }, { "epoch": 0.7985067993956093, "grad_norm": 1.6442334651947021, "learning_rate": 1.7551951802878885e-05, "loss": 1.8098, "step": 22460 }, { "epoch": 0.7988623233490356, "grad_norm": 1.5837472677230835, "learning_rate": 1.7549382250585772e-05, "loss": 1.8456, "step": 22470 }, { "epoch": 0.799217847302462, "grad_norm": 1.6369633674621582, "learning_rate": 1.7546811538754882e-05, "loss": 1.8455, "step": 22480 }, { "epoch": 0.7995733712558883, "grad_norm": 1.636970043182373, "learning_rate": 1.7544239667781057e-05, "loss": 1.8105, "step": 22490 }, { "epoch": 0.7999288952093148, "grad_norm": 1.5929034948349, "learning_rate": 1.7541666638059323e-05, "loss": 1.8375, "step": 22500 }, { "epoch": 0.8002844191627411, "grad_norm": 1.5676238536834717, "learning_rate": 1.753909244998488e-05, "loss": 1.8694, "step": 22510 }, { "epoch": 0.8006399431161675, "grad_norm": 1.7536896467208862, "learning_rate": 1.7536517103953105e-05, "loss": 1.8125, "step": 22520 }, { "epoch": 0.8009954670695938, "grad_norm": 1.5093269348144531, "learning_rate": 1.753394060035956e-05, "loss": 1.7803, "step": 22530 }, { "epoch": 0.8013509910230202, "grad_norm": 1.655174970626831, "learning_rate": 1.7531362939599973e-05, "loss": 1.7836, "step": 22540 }, { "epoch": 0.8017065149764465, "grad_norm": 1.609383463859558, "learning_rate": 1.7528784122070265e-05, "loss": 1.8658, "step": 22550 }, { "epoch": 0.8020620389298729, "grad_norm": 1.585519790649414, "learning_rate": 1.7526204148166523e-05, "loss": 1.8446, "step": 22560 }, { "epoch": 0.8024175628832992, "grad_norm": 1.6932191848754883, "learning_rate": 1.7523623018285008e-05, "loss": 1.8236, "step": 22570 }, { "epoch": 0.8027730868367257, "grad_norm": 1.6669684648513794, "learning_rate": 1.7521040732822175e-05, "loss": 1.7894, "step": 22580 }, { "epoch": 0.803128610790152, "grad_norm": 1.7060233354568481, "learning_rate": 1.751845729217464e-05, "loss": 1.8038, "step": 22590 }, { "epoch": 0.8034841347435784, "grad_norm": 1.5615458488464355, "learning_rate": 1.751587269673921e-05, "loss": 1.8553, "step": 22600 }, { "epoch": 0.8038396586970047, "grad_norm": 1.607182502746582, "learning_rate": 1.7513286946912852e-05, "loss": 1.8425, "step": 22610 }, { "epoch": 0.8041951826504311, "grad_norm": 1.6426219940185547, "learning_rate": 1.7510700043092735e-05, "loss": 1.8096, "step": 22620 }, { "epoch": 0.8045507066038574, "grad_norm": 1.7619822025299072, "learning_rate": 1.7508111985676177e-05, "loss": 1.8166, "step": 22630 }, { "epoch": 0.8049062305572838, "grad_norm": 1.593801498413086, "learning_rate": 1.7505522775060697e-05, "loss": 1.8535, "step": 22640 }, { "epoch": 0.8052617545107101, "grad_norm": 1.6533886194229126, "learning_rate": 1.750293241164398e-05, "loss": 1.8856, "step": 22650 }, { "epoch": 0.8056172784641366, "grad_norm": 1.7397966384887695, "learning_rate": 1.750034089582389e-05, "loss": 1.8097, "step": 22660 }, { "epoch": 0.8059728024175629, "grad_norm": 1.6742297410964966, "learning_rate": 1.7497748227998462e-05, "loss": 1.8489, "step": 22670 }, { "epoch": 0.8063283263709893, "grad_norm": 1.584993839263916, "learning_rate": 1.749515440856592e-05, "loss": 1.893, "step": 22680 }, { "epoch": 0.8066838503244156, "grad_norm": 1.5936106443405151, "learning_rate": 1.7492559437924654e-05, "loss": 1.8456, "step": 22690 }, { "epoch": 0.807039374277842, "grad_norm": 1.5889955759048462, "learning_rate": 1.7489963316473236e-05, "loss": 1.8055, "step": 22700 }, { "epoch": 0.8073948982312683, "grad_norm": 1.6362390518188477, "learning_rate": 1.7487366044610418e-05, "loss": 1.8642, "step": 22710 }, { "epoch": 0.8077504221846947, "grad_norm": 1.6781909465789795, "learning_rate": 1.748476762273512e-05, "loss": 1.8462, "step": 22720 }, { "epoch": 0.808105946138121, "grad_norm": 1.6672321557998657, "learning_rate": 1.7482168051246448e-05, "loss": 1.8106, "step": 22730 }, { "epoch": 0.8084614700915475, "grad_norm": 1.6293582916259766, "learning_rate": 1.747956733054367e-05, "loss": 1.7957, "step": 22740 }, { "epoch": 0.8088169940449738, "grad_norm": 1.6819753646850586, "learning_rate": 1.7476965461026253e-05, "loss": 1.8337, "step": 22750 }, { "epoch": 0.8091725179984002, "grad_norm": 1.6322966814041138, "learning_rate": 1.7474362443093823e-05, "loss": 1.856, "step": 22760 }, { "epoch": 0.8095280419518265, "grad_norm": 1.5908470153808594, "learning_rate": 1.747175827714618e-05, "loss": 1.8528, "step": 22770 }, { "epoch": 0.8098835659052529, "grad_norm": 1.7300654649734497, "learning_rate": 1.7469152963583323e-05, "loss": 1.7907, "step": 22780 }, { "epoch": 0.8102390898586792, "grad_norm": 1.5681836605072021, "learning_rate": 1.7466546502805397e-05, "loss": 1.7838, "step": 22790 }, { "epoch": 0.8105946138121056, "grad_norm": 1.7767341136932373, "learning_rate": 1.7463938895212745e-05, "loss": 1.8224, "step": 22800 }, { "epoch": 0.8109501377655319, "grad_norm": 1.7716939449310303, "learning_rate": 1.7461330141205878e-05, "loss": 1.8844, "step": 22810 }, { "epoch": 0.8113056617189583, "grad_norm": 1.6044155359268188, "learning_rate": 1.745872024118548e-05, "loss": 1.8127, "step": 22820 }, { "epoch": 0.8116611856723847, "grad_norm": 1.6654382944107056, "learning_rate": 1.7456109195552425e-05, "loss": 1.8143, "step": 22830 }, { "epoch": 0.8120167096258111, "grad_norm": 1.6145553588867188, "learning_rate": 1.7453497004707748e-05, "loss": 1.8573, "step": 22840 }, { "epoch": 0.8123722335792374, "grad_norm": 1.6546883583068848, "learning_rate": 1.745088366905266e-05, "loss": 1.8458, "step": 22850 }, { "epoch": 0.8127277575326638, "grad_norm": 1.6224075555801392, "learning_rate": 1.744826918898856e-05, "loss": 1.8157, "step": 22860 }, { "epoch": 0.8130832814860901, "grad_norm": 1.6021678447723389, "learning_rate": 1.7445653564917016e-05, "loss": 1.8435, "step": 22870 }, { "epoch": 0.8134388054395165, "grad_norm": 1.6641144752502441, "learning_rate": 1.7443036797239767e-05, "loss": 1.8098, "step": 22880 }, { "epoch": 0.8137943293929428, "grad_norm": 1.5793033838272095, "learning_rate": 1.7440418886358735e-05, "loss": 1.8159, "step": 22890 }, { "epoch": 0.8141498533463692, "grad_norm": 1.817555546760559, "learning_rate": 1.743779983267601e-05, "loss": 1.8254, "step": 22900 }, { "epoch": 0.8145053772997956, "grad_norm": 1.6587536334991455, "learning_rate": 1.7435179636593874e-05, "loss": 1.8243, "step": 22910 }, { "epoch": 0.814860901253222, "grad_norm": 1.5765866041183472, "learning_rate": 1.7432558298514758e-05, "loss": 1.7785, "step": 22920 }, { "epoch": 0.8152164252066483, "grad_norm": 1.6833018064498901, "learning_rate": 1.742993581884129e-05, "loss": 1.8263, "step": 22930 }, { "epoch": 0.8155719491600747, "grad_norm": 1.6549988985061646, "learning_rate": 1.742731219797627e-05, "loss": 1.8692, "step": 22940 }, { "epoch": 0.815927473113501, "grad_norm": 1.6467914581298828, "learning_rate": 1.7424687436322664e-05, "loss": 1.8037, "step": 22950 }, { "epoch": 0.8162829970669274, "grad_norm": 1.562791347503662, "learning_rate": 1.742206153428362e-05, "loss": 1.8544, "step": 22960 }, { "epoch": 0.8166385210203537, "grad_norm": 1.605326771736145, "learning_rate": 1.7419434492262465e-05, "loss": 1.8014, "step": 22970 }, { "epoch": 0.8169940449737801, "grad_norm": 1.6755915880203247, "learning_rate": 1.7416806310662688e-05, "loss": 1.7874, "step": 22980 }, { "epoch": 0.8173495689272064, "grad_norm": 1.6122695207595825, "learning_rate": 1.741417698988797e-05, "loss": 1.8217, "step": 22990 }, { "epoch": 0.8177050928806329, "grad_norm": 1.6413447856903076, "learning_rate": 1.7411546530342148e-05, "loss": 1.8296, "step": 23000 }, { "epoch": 0.8180606168340592, "grad_norm": 1.6276954412460327, "learning_rate": 1.7408914932429254e-05, "loss": 1.8272, "step": 23010 }, { "epoch": 0.8184161407874856, "grad_norm": 1.5439822673797607, "learning_rate": 1.7406282196553477e-05, "loss": 1.8266, "step": 23020 }, { "epoch": 0.8187716647409119, "grad_norm": 1.7403044700622559, "learning_rate": 1.7403648323119195e-05, "loss": 1.8242, "step": 23030 }, { "epoch": 0.8191271886943383, "grad_norm": 1.7068008184432983, "learning_rate": 1.740101331253095e-05, "loss": 1.8083, "step": 23040 }, { "epoch": 0.8194827126477646, "grad_norm": 1.5520436763763428, "learning_rate": 1.7398377165193464e-05, "loss": 1.8036, "step": 23050 }, { "epoch": 0.819838236601191, "grad_norm": 1.6793220043182373, "learning_rate": 1.7395739881511637e-05, "loss": 1.7867, "step": 23060 }, { "epoch": 0.8201937605546173, "grad_norm": 1.6804542541503906, "learning_rate": 1.7393101461890536e-05, "loss": 1.8301, "step": 23070 }, { "epoch": 0.8205492845080438, "grad_norm": 1.6650656461715698, "learning_rate": 1.7390461906735403e-05, "loss": 1.8543, "step": 23080 }, { "epoch": 0.8209048084614701, "grad_norm": 1.6909743547439575, "learning_rate": 1.738782121645166e-05, "loss": 1.8101, "step": 23090 }, { "epoch": 0.8212603324148965, "grad_norm": 1.7118736505508423, "learning_rate": 1.7385179391444903e-05, "loss": 1.7889, "step": 23100 }, { "epoch": 0.8216158563683228, "grad_norm": 1.5747915506362915, "learning_rate": 1.7382536432120892e-05, "loss": 1.818, "step": 23110 }, { "epoch": 0.8219713803217492, "grad_norm": 1.6179157495498657, "learning_rate": 1.7379892338885577e-05, "loss": 1.8661, "step": 23120 }, { "epoch": 0.8223269042751755, "grad_norm": 1.6345182657241821, "learning_rate": 1.737724711214507e-05, "loss": 1.8063, "step": 23130 }, { "epoch": 0.8226824282286019, "grad_norm": 1.6581584215164185, "learning_rate": 1.7374600752305663e-05, "loss": 1.801, "step": 23140 }, { "epoch": 0.8230379521820282, "grad_norm": 1.647096872329712, "learning_rate": 1.7371953259773818e-05, "loss": 1.8225, "step": 23150 }, { "epoch": 0.8233934761354547, "grad_norm": 1.7149620056152344, "learning_rate": 1.7369304634956176e-05, "loss": 1.8333, "step": 23160 }, { "epoch": 0.823749000088881, "grad_norm": 1.6080249547958374, "learning_rate": 1.7366654878259547e-05, "loss": 1.7963, "step": 23170 }, { "epoch": 0.8241045240423074, "grad_norm": 1.6695677042007446, "learning_rate": 1.7364003990090923e-05, "loss": 1.862, "step": 23180 }, { "epoch": 0.8244600479957337, "grad_norm": 1.6274735927581787, "learning_rate": 1.7361351970857454e-05, "loss": 1.8066, "step": 23190 }, { "epoch": 0.8248155719491601, "grad_norm": 1.7230697870254517, "learning_rate": 1.735869882096648e-05, "loss": 1.7928, "step": 23200 }, { "epoch": 0.8251710959025864, "grad_norm": 1.6167101860046387, "learning_rate": 1.7356044540825504e-05, "loss": 1.8395, "step": 23210 }, { "epoch": 0.8255266198560128, "grad_norm": 1.6141185760498047, "learning_rate": 1.735338913084221e-05, "loss": 1.8165, "step": 23220 }, { "epoch": 0.8258821438094391, "grad_norm": 1.613447904586792, "learning_rate": 1.7350732591424452e-05, "loss": 1.8036, "step": 23230 }, { "epoch": 0.8262376677628656, "grad_norm": 1.720850944519043, "learning_rate": 1.734807492298026e-05, "loss": 1.831, "step": 23240 }, { "epoch": 0.8265931917162919, "grad_norm": 1.5681285858154297, "learning_rate": 1.734541612591783e-05, "loss": 1.805, "step": 23250 }, { "epoch": 0.8269487156697183, "grad_norm": 1.7178831100463867, "learning_rate": 1.734275620064554e-05, "loss": 1.8113, "step": 23260 }, { "epoch": 0.8273042396231446, "grad_norm": 1.5972732305526733, "learning_rate": 1.7340095147571937e-05, "loss": 1.8014, "step": 23270 }, { "epoch": 0.827659763576571, "grad_norm": 1.5485990047454834, "learning_rate": 1.733743296710574e-05, "loss": 1.8566, "step": 23280 }, { "epoch": 0.8280152875299973, "grad_norm": 1.5806162357330322, "learning_rate": 1.7334769659655846e-05, "loss": 1.8122, "step": 23290 }, { "epoch": 0.8283708114834237, "grad_norm": 1.6596871614456177, "learning_rate": 1.7332105225631325e-05, "loss": 1.8234, "step": 23300 }, { "epoch": 0.82872633543685, "grad_norm": 1.6023002862930298, "learning_rate": 1.7329439665441413e-05, "loss": 1.8423, "step": 23310 }, { "epoch": 0.8290818593902765, "grad_norm": 1.663435697555542, "learning_rate": 1.7326772979495522e-05, "loss": 1.8248, "step": 23320 }, { "epoch": 0.8294373833437028, "grad_norm": 1.6272053718566895, "learning_rate": 1.7324105168203243e-05, "loss": 1.827, "step": 23330 }, { "epoch": 0.8297929072971292, "grad_norm": 1.6218414306640625, "learning_rate": 1.732143623197433e-05, "loss": 1.7947, "step": 23340 }, { "epoch": 0.8301484312505555, "grad_norm": 1.7325440645217896, "learning_rate": 1.731876617121872e-05, "loss": 1.8324, "step": 23350 }, { "epoch": 0.8305039552039819, "grad_norm": 1.6133369207382202, "learning_rate": 1.731609498634651e-05, "loss": 1.8411, "step": 23360 }, { "epoch": 0.8308594791574082, "grad_norm": 1.5473828315734863, "learning_rate": 1.731342267776799e-05, "loss": 1.8226, "step": 23370 }, { "epoch": 0.8312150031108346, "grad_norm": 1.601293683052063, "learning_rate": 1.7310749245893598e-05, "loss": 1.8239, "step": 23380 }, { "epoch": 0.8315705270642609, "grad_norm": 1.6454339027404785, "learning_rate": 1.7308074691133962e-05, "loss": 1.7772, "step": 23390 }, { "epoch": 0.8319260510176874, "grad_norm": 1.5938347578048706, "learning_rate": 1.7305399013899874e-05, "loss": 1.8488, "step": 23400 }, { "epoch": 0.8322815749711137, "grad_norm": 1.5726723670959473, "learning_rate": 1.7302722214602302e-05, "loss": 1.7704, "step": 23410 }, { "epoch": 0.8326370989245401, "grad_norm": 1.7508751153945923, "learning_rate": 1.7300044293652388e-05, "loss": 1.8709, "step": 23420 }, { "epoch": 0.8329926228779664, "grad_norm": 1.6652165651321411, "learning_rate": 1.7297365251461445e-05, "loss": 1.8546, "step": 23430 }, { "epoch": 0.8333481468313928, "grad_norm": 1.731452226638794, "learning_rate": 1.7294685088440947e-05, "loss": 1.8703, "step": 23440 }, { "epoch": 0.8337036707848191, "grad_norm": 1.6252065896987915, "learning_rate": 1.729200380500256e-05, "loss": 1.7988, "step": 23450 }, { "epoch": 0.8340591947382455, "grad_norm": 1.6331079006195068, "learning_rate": 1.7289321401558115e-05, "loss": 1.8475, "step": 23460 }, { "epoch": 0.8344147186916718, "grad_norm": 1.627914547920227, "learning_rate": 1.7286637878519604e-05, "loss": 1.8565, "step": 23470 }, { "epoch": 0.8347702426450982, "grad_norm": 1.6057466268539429, "learning_rate": 1.7283953236299198e-05, "loss": 1.8619, "step": 23480 }, { "epoch": 0.8351257665985246, "grad_norm": 1.6166599988937378, "learning_rate": 1.728126747530925e-05, "loss": 1.8413, "step": 23490 }, { "epoch": 0.835481290551951, "grad_norm": 1.684545636177063, "learning_rate": 1.727858059596227e-05, "loss": 1.8224, "step": 23500 }, { "epoch": 0.8358368145053773, "grad_norm": 1.6563124656677246, "learning_rate": 1.727589259867095e-05, "loss": 1.8531, "step": 23510 }, { "epoch": 0.8361923384588037, "grad_norm": 1.5739567279815674, "learning_rate": 1.7273203483848148e-05, "loss": 1.7771, "step": 23520 }, { "epoch": 0.83654786241223, "grad_norm": 1.6655970811843872, "learning_rate": 1.727051325190689e-05, "loss": 1.8566, "step": 23530 }, { "epoch": 0.8369033863656564, "grad_norm": 1.5226960182189941, "learning_rate": 1.7267821903260382e-05, "loss": 1.8377, "step": 23540 }, { "epoch": 0.8372589103190827, "grad_norm": 1.6616383790969849, "learning_rate": 1.7265129438322004e-05, "loss": 1.8156, "step": 23550 }, { "epoch": 0.8376144342725091, "grad_norm": 1.642339825630188, "learning_rate": 1.7262435857505295e-05, "loss": 1.7684, "step": 23560 }, { "epoch": 0.8379699582259355, "grad_norm": 1.724085807800293, "learning_rate": 1.7259741161223976e-05, "loss": 1.818, "step": 23570 }, { "epoch": 0.8383254821793619, "grad_norm": 1.6169931888580322, "learning_rate": 1.725704534989193e-05, "loss": 1.8103, "step": 23580 }, { "epoch": 0.8386810061327882, "grad_norm": 1.6618402004241943, "learning_rate": 1.7254348423923222e-05, "loss": 1.7984, "step": 23590 }, { "epoch": 0.8390365300862146, "grad_norm": 1.6586053371429443, "learning_rate": 1.725165038373208e-05, "loss": 1.7949, "step": 23600 }, { "epoch": 0.8393920540396409, "grad_norm": 1.5934162139892578, "learning_rate": 1.724895122973291e-05, "loss": 1.8535, "step": 23610 }, { "epoch": 0.8397475779930673, "grad_norm": 1.6691250801086426, "learning_rate": 1.7246250962340282e-05, "loss": 1.832, "step": 23620 }, { "epoch": 0.8401031019464936, "grad_norm": 1.7262389659881592, "learning_rate": 1.724354958196894e-05, "loss": 1.7997, "step": 23630 }, { "epoch": 0.84045862589992, "grad_norm": 1.5825564861297607, "learning_rate": 1.7240847089033796e-05, "loss": 1.8178, "step": 23640 }, { "epoch": 0.8408141498533463, "grad_norm": 1.7624269723892212, "learning_rate": 1.7238143483949945e-05, "loss": 1.8354, "step": 23650 }, { "epoch": 0.8411696738067728, "grad_norm": 1.574318528175354, "learning_rate": 1.7235438767132633e-05, "loss": 1.8421, "step": 23660 }, { "epoch": 0.8415251977601991, "grad_norm": 1.6515188217163086, "learning_rate": 1.7232732938997296e-05, "loss": 1.8819, "step": 23670 }, { "epoch": 0.8418807217136255, "grad_norm": 1.6864103078842163, "learning_rate": 1.723002599995953e-05, "loss": 1.796, "step": 23680 }, { "epoch": 0.8422362456670518, "grad_norm": 1.6396405696868896, "learning_rate": 1.72273179504351e-05, "loss": 1.7978, "step": 23690 }, { "epoch": 0.8425917696204782, "grad_norm": 1.7666069269180298, "learning_rate": 1.722460879083995e-05, "loss": 1.8018, "step": 23700 }, { "epoch": 0.8429472935739045, "grad_norm": 1.7268292903900146, "learning_rate": 1.722189852159019e-05, "loss": 1.8005, "step": 23710 }, { "epoch": 0.8433028175273309, "grad_norm": 1.5880026817321777, "learning_rate": 1.7219187143102097e-05, "loss": 1.7837, "step": 23720 }, { "epoch": 0.8436583414807572, "grad_norm": 1.7330087423324585, "learning_rate": 1.7216474655792124e-05, "loss": 1.8226, "step": 23730 }, { "epoch": 0.8440138654341837, "grad_norm": 1.6118395328521729, "learning_rate": 1.7213761060076894e-05, "loss": 1.8469, "step": 23740 }, { "epoch": 0.84436938938761, "grad_norm": 1.6583901643753052, "learning_rate": 1.7211046356373193e-05, "loss": 1.8016, "step": 23750 }, { "epoch": 0.8447249133410364, "grad_norm": 1.6012097597122192, "learning_rate": 1.7208330545097985e-05, "loss": 1.8326, "step": 23760 }, { "epoch": 0.8450804372944627, "grad_norm": 1.5444685220718384, "learning_rate": 1.7205613626668404e-05, "loss": 1.8092, "step": 23770 }, { "epoch": 0.8454359612478891, "grad_norm": 1.6934510469436646, "learning_rate": 1.7202895601501746e-05, "loss": 1.7749, "step": 23780 }, { "epoch": 0.8457914852013154, "grad_norm": 1.6778422594070435, "learning_rate": 1.720017647001549e-05, "loss": 1.8355, "step": 23790 }, { "epoch": 0.8461470091547418, "grad_norm": 1.6825305223464966, "learning_rate": 1.719745623262727e-05, "loss": 1.8183, "step": 23800 }, { "epoch": 0.8465025331081681, "grad_norm": 1.6143616437911987, "learning_rate": 1.7194734889754903e-05, "loss": 1.832, "step": 23810 }, { "epoch": 0.8468580570615946, "grad_norm": 1.676255226135254, "learning_rate": 1.7192012441816367e-05, "loss": 1.815, "step": 23820 }, { "epoch": 0.8472135810150209, "grad_norm": 1.6026546955108643, "learning_rate": 1.7189288889229817e-05, "loss": 1.8267, "step": 23830 }, { "epoch": 0.8475691049684473, "grad_norm": 1.616668462753296, "learning_rate": 1.718656423241357e-05, "loss": 1.8386, "step": 23840 }, { "epoch": 0.8479246289218736, "grad_norm": 1.6833947896957397, "learning_rate": 1.7183838471786114e-05, "loss": 1.8074, "step": 23850 }, { "epoch": 0.8482801528753, "grad_norm": 1.5751172304153442, "learning_rate": 1.7181111607766113e-05, "loss": 1.8259, "step": 23860 }, { "epoch": 0.8486356768287263, "grad_norm": 1.5970380306243896, "learning_rate": 1.7178383640772396e-05, "loss": 1.8195, "step": 23870 }, { "epoch": 0.8489912007821527, "grad_norm": 1.6919461488723755, "learning_rate": 1.7175654571223962e-05, "loss": 1.82, "step": 23880 }, { "epoch": 0.849346724735579, "grad_norm": 1.6423909664154053, "learning_rate": 1.7172924399539975e-05, "loss": 1.8026, "step": 23890 }, { "epoch": 0.8497022486890055, "grad_norm": 1.7619222402572632, "learning_rate": 1.7170193126139775e-05, "loss": 1.8738, "step": 23900 }, { "epoch": 0.8500577726424318, "grad_norm": 1.6697863340377808, "learning_rate": 1.7167460751442872e-05, "loss": 1.7832, "step": 23910 }, { "epoch": 0.8504132965958582, "grad_norm": 1.7078362703323364, "learning_rate": 1.716472727586893e-05, "loss": 1.7962, "step": 23920 }, { "epoch": 0.8507688205492845, "grad_norm": 1.5840452909469604, "learning_rate": 1.716199269983781e-05, "loss": 1.8442, "step": 23930 }, { "epoch": 0.8511243445027109, "grad_norm": 1.636093258857727, "learning_rate": 1.7159257023769512e-05, "loss": 1.8077, "step": 23940 }, { "epoch": 0.8514798684561372, "grad_norm": 1.6575133800506592, "learning_rate": 1.7156520248084226e-05, "loss": 1.8022, "step": 23950 }, { "epoch": 0.8518353924095636, "grad_norm": 1.7173900604248047, "learning_rate": 1.7153782373202302e-05, "loss": 1.7997, "step": 23960 }, { "epoch": 0.8521909163629899, "grad_norm": 1.5716763734817505, "learning_rate": 1.7151043399544262e-05, "loss": 1.8387, "step": 23970 }, { "epoch": 0.8525464403164164, "grad_norm": 1.622989296913147, "learning_rate": 1.714830332753079e-05, "loss": 1.8472, "step": 23980 }, { "epoch": 0.8529019642698427, "grad_norm": 1.7212612628936768, "learning_rate": 1.7145562157582748e-05, "loss": 1.849, "step": 23990 }, { "epoch": 0.8532574882232691, "grad_norm": 1.6640286445617676, "learning_rate": 1.714281989012116e-05, "loss": 1.8478, "step": 24000 }, { "epoch": 0.8536130121766954, "grad_norm": 1.6266303062438965, "learning_rate": 1.7140076525567223e-05, "loss": 1.88, "step": 24010 }, { "epoch": 0.8539685361301218, "grad_norm": 1.750396966934204, "learning_rate": 1.7137332064342303e-05, "loss": 1.8144, "step": 24020 }, { "epoch": 0.8543240600835481, "grad_norm": 1.6980276107788086, "learning_rate": 1.7134586506867926e-05, "loss": 1.8469, "step": 24030 }, { "epoch": 0.8546795840369745, "grad_norm": 1.5892568826675415, "learning_rate": 1.7131839853565798e-05, "loss": 1.8194, "step": 24040 }, { "epoch": 0.8550351079904008, "grad_norm": 1.6542547941207886, "learning_rate": 1.7129092104857786e-05, "loss": 1.8202, "step": 24050 }, { "epoch": 0.8553906319438273, "grad_norm": 1.5758212804794312, "learning_rate": 1.7126343261165926e-05, "loss": 1.7851, "step": 24060 }, { "epoch": 0.8557461558972536, "grad_norm": 1.7901427745819092, "learning_rate": 1.7123593322912423e-05, "loss": 1.8496, "step": 24070 }, { "epoch": 0.85610167985068, "grad_norm": 1.7235887050628662, "learning_rate": 1.712084229051965e-05, "loss": 1.8359, "step": 24080 }, { "epoch": 0.8564572038041063, "grad_norm": 1.5175025463104248, "learning_rate": 1.711809016441015e-05, "loss": 1.8642, "step": 24090 }, { "epoch": 0.8568127277575327, "grad_norm": 1.7177553176879883, "learning_rate": 1.7115336945006633e-05, "loss": 1.8374, "step": 24100 }, { "epoch": 0.857168251710959, "grad_norm": 1.6444177627563477, "learning_rate": 1.7112582632731972e-05, "loss": 1.8006, "step": 24110 }, { "epoch": 0.8575237756643854, "grad_norm": 1.6575236320495605, "learning_rate": 1.710982722800922e-05, "loss": 1.8396, "step": 24120 }, { "epoch": 0.8578792996178117, "grad_norm": 1.6627295017242432, "learning_rate": 1.710707073126158e-05, "loss": 1.7414, "step": 24130 }, { "epoch": 0.8582348235712381, "grad_norm": 1.7354328632354736, "learning_rate": 1.7104313142912436e-05, "loss": 1.8258, "step": 24140 }, { "epoch": 0.8585903475246645, "grad_norm": 1.717443823814392, "learning_rate": 1.7101554463385342e-05, "loss": 1.8485, "step": 24150 }, { "epoch": 0.8589458714780909, "grad_norm": 1.6752740144729614, "learning_rate": 1.7098794693104008e-05, "loss": 1.8267, "step": 24160 }, { "epoch": 0.8593013954315172, "grad_norm": 1.7124459743499756, "learning_rate": 1.709603383249232e-05, "loss": 1.8576, "step": 24170 }, { "epoch": 0.8596569193849436, "grad_norm": 1.6862879991531372, "learning_rate": 1.7093271881974325e-05, "loss": 1.8241, "step": 24180 }, { "epoch": 0.8600124433383699, "grad_norm": 1.7252548933029175, "learning_rate": 1.7090508841974243e-05, "loss": 1.7997, "step": 24190 }, { "epoch": 0.8603679672917963, "grad_norm": 1.6613682508468628, "learning_rate": 1.7087744712916464e-05, "loss": 1.8332, "step": 24200 }, { "epoch": 0.8607234912452226, "grad_norm": 1.6545765399932861, "learning_rate": 1.7084979495225537e-05, "loss": 1.8372, "step": 24210 }, { "epoch": 0.861079015198649, "grad_norm": 1.613324522972107, "learning_rate": 1.708221318932618e-05, "loss": 1.7797, "step": 24220 }, { "epoch": 0.8614345391520754, "grad_norm": 1.7013195753097534, "learning_rate": 1.7079445795643286e-05, "loss": 1.7678, "step": 24230 }, { "epoch": 0.8617900631055018, "grad_norm": 1.6265993118286133, "learning_rate": 1.7076677314601907e-05, "loss": 1.751, "step": 24240 }, { "epoch": 0.8621455870589281, "grad_norm": 1.7178155183792114, "learning_rate": 1.7073907746627263e-05, "loss": 1.8304, "step": 24250 }, { "epoch": 0.8625011110123545, "grad_norm": 1.7472935914993286, "learning_rate": 1.707113709214474e-05, "loss": 1.788, "step": 24260 }, { "epoch": 0.8628566349657808, "grad_norm": 1.7131307125091553, "learning_rate": 1.7068365351579902e-05, "loss": 1.8344, "step": 24270 }, { "epoch": 0.8632121589192072, "grad_norm": 1.7252979278564453, "learning_rate": 1.706559252535846e-05, "loss": 1.8178, "step": 24280 }, { "epoch": 0.8635676828726335, "grad_norm": 1.554989218711853, "learning_rate": 1.7062818613906307e-05, "loss": 1.8321, "step": 24290 }, { "epoch": 0.8639232068260599, "grad_norm": 1.7460143566131592, "learning_rate": 1.7060043617649503e-05, "loss": 1.7565, "step": 24300 }, { "epoch": 0.8642787307794862, "grad_norm": 1.7684497833251953, "learning_rate": 1.705726753701426e-05, "loss": 1.8584, "step": 24310 }, { "epoch": 0.8646342547329127, "grad_norm": 1.634946346282959, "learning_rate": 1.705449037242698e-05, "loss": 1.7973, "step": 24320 }, { "epoch": 0.864989778686339, "grad_norm": 1.6846821308135986, "learning_rate": 1.7051712124314205e-05, "loss": 1.8134, "step": 24330 }, { "epoch": 0.8653453026397654, "grad_norm": 1.6562668085098267, "learning_rate": 1.7048932793102667e-05, "loss": 1.8097, "step": 24340 }, { "epoch": 0.8657008265931917, "grad_norm": 1.5983461141586304, "learning_rate": 1.7046152379219247e-05, "loss": 1.8386, "step": 24350 }, { "epoch": 0.8660563505466181, "grad_norm": 1.7765803337097168, "learning_rate": 1.7043370883091002e-05, "loss": 1.8211, "step": 24360 }, { "epoch": 0.8664118745000444, "grad_norm": 1.6079620122909546, "learning_rate": 1.704058830514515e-05, "loss": 1.8219, "step": 24370 }, { "epoch": 0.8667673984534708, "grad_norm": 1.606056809425354, "learning_rate": 1.703780464580908e-05, "loss": 1.8003, "step": 24380 }, { "epoch": 0.8671229224068971, "grad_norm": 1.673724889755249, "learning_rate": 1.7035019905510344e-05, "loss": 1.8186, "step": 24390 }, { "epoch": 0.8674784463603236, "grad_norm": 1.5252655744552612, "learning_rate": 1.703223408467666e-05, "loss": 1.8745, "step": 24400 }, { "epoch": 0.8678339703137499, "grad_norm": 1.60201895236969, "learning_rate": 1.7029447183735915e-05, "loss": 1.8072, "step": 24410 }, { "epoch": 0.8681894942671763, "grad_norm": 1.7763891220092773, "learning_rate": 1.7026659203116155e-05, "loss": 1.7658, "step": 24420 }, { "epoch": 0.8685450182206026, "grad_norm": 1.7289584875106812, "learning_rate": 1.70238701432456e-05, "loss": 1.8364, "step": 24430 }, { "epoch": 0.868900542174029, "grad_norm": 1.692323088645935, "learning_rate": 1.702108000455263e-05, "loss": 1.81, "step": 24440 }, { "epoch": 0.8692560661274553, "grad_norm": 1.7640386819839478, "learning_rate": 1.7018288787465796e-05, "loss": 1.8089, "step": 24450 }, { "epoch": 0.8696115900808817, "grad_norm": 1.5894044637680054, "learning_rate": 1.7015496492413807e-05, "loss": 1.8043, "step": 24460 }, { "epoch": 0.869967114034308, "grad_norm": 1.6370770931243896, "learning_rate": 1.7012703119825542e-05, "loss": 1.8437, "step": 24470 }, { "epoch": 0.8703226379877345, "grad_norm": 1.8560885190963745, "learning_rate": 1.700990867013005e-05, "loss": 1.7915, "step": 24480 }, { "epoch": 0.8706781619411608, "grad_norm": 1.6874840259552002, "learning_rate": 1.7007113143756542e-05, "loss": 1.7954, "step": 24490 }, { "epoch": 0.8710336858945872, "grad_norm": 1.6154260635375977, "learning_rate": 1.7004316541134387e-05, "loss": 1.8019, "step": 24500 }, { "epoch": 0.8713892098480135, "grad_norm": 1.7375465631484985, "learning_rate": 1.7001518862693132e-05, "loss": 1.7495, "step": 24510 }, { "epoch": 0.8717447338014399, "grad_norm": 1.743693232536316, "learning_rate": 1.6998720108862475e-05, "loss": 1.8341, "step": 24520 }, { "epoch": 0.8721002577548662, "grad_norm": 1.643489956855774, "learning_rate": 1.6995920280072297e-05, "loss": 1.859, "step": 24530 }, { "epoch": 0.8724557817082926, "grad_norm": 1.6341086626052856, "learning_rate": 1.6993119376752622e-05, "loss": 1.7679, "step": 24540 }, { "epoch": 0.8728113056617189, "grad_norm": 1.5878918170928955, "learning_rate": 1.699031739933366e-05, "loss": 1.825, "step": 24550 }, { "epoch": 0.8731668296151454, "grad_norm": 1.651410460472107, "learning_rate": 1.6987514348245776e-05, "loss": 1.8001, "step": 24560 }, { "epoch": 0.8735223535685717, "grad_norm": 1.656724214553833, "learning_rate": 1.6984710223919503e-05, "loss": 1.8573, "step": 24570 }, { "epoch": 0.8738778775219981, "grad_norm": 1.650752067565918, "learning_rate": 1.698190502678553e-05, "loss": 1.8491, "step": 24580 }, { "epoch": 0.8742334014754244, "grad_norm": 1.6090630292892456, "learning_rate": 1.6979098757274725e-05, "loss": 1.8489, "step": 24590 }, { "epoch": 0.8745889254288508, "grad_norm": 1.8214991092681885, "learning_rate": 1.6976291415818107e-05, "loss": 1.8129, "step": 24600 }, { "epoch": 0.8749444493822771, "grad_norm": 1.6846964359283447, "learning_rate": 1.697348300284687e-05, "loss": 1.8216, "step": 24610 }, { "epoch": 0.8752999733357035, "grad_norm": 1.6882396936416626, "learning_rate": 1.697067351879237e-05, "loss": 1.8095, "step": 24620 }, { "epoch": 0.8756554972891298, "grad_norm": 1.6624641418457031, "learning_rate": 1.6967862964086124e-05, "loss": 1.8097, "step": 24630 }, { "epoch": 0.8760110212425563, "grad_norm": 1.591064453125, "learning_rate": 1.6965051339159812e-05, "loss": 1.7942, "step": 24640 }, { "epoch": 0.8763665451959826, "grad_norm": 1.6376370191574097, "learning_rate": 1.6962238644445288e-05, "loss": 1.7863, "step": 24650 }, { "epoch": 0.876722069149409, "grad_norm": 1.6289722919464111, "learning_rate": 1.695942488037456e-05, "loss": 1.7999, "step": 24660 }, { "epoch": 0.8770775931028353, "grad_norm": 1.7085307836532593, "learning_rate": 1.6956610047379808e-05, "loss": 1.8486, "step": 24670 }, { "epoch": 0.8774331170562617, "grad_norm": 1.6344826221466064, "learning_rate": 1.6953794145893372e-05, "loss": 1.8198, "step": 24680 }, { "epoch": 0.877788641009688, "grad_norm": 1.6788198947906494, "learning_rate": 1.6950977176347755e-05, "loss": 1.7615, "step": 24690 }, { "epoch": 0.8781441649631144, "grad_norm": 1.5258077383041382, "learning_rate": 1.6948159139175624e-05, "loss": 1.8519, "step": 24700 }, { "epoch": 0.8784996889165407, "grad_norm": 1.676614761352539, "learning_rate": 1.6945340034809816e-05, "loss": 1.8018, "step": 24710 }, { "epoch": 0.8788552128699672, "grad_norm": 1.624168872833252, "learning_rate": 1.694251986368333e-05, "loss": 1.8087, "step": 24720 }, { "epoch": 0.8792107368233935, "grad_norm": 1.7765724658966064, "learning_rate": 1.6939698626229318e-05, "loss": 1.8441, "step": 24730 }, { "epoch": 0.8795662607768199, "grad_norm": 1.6141785383224487, "learning_rate": 1.693687632288111e-05, "loss": 1.8544, "step": 24740 }, { "epoch": 0.8799217847302462, "grad_norm": 1.7429922819137573, "learning_rate": 1.6934052954072196e-05, "loss": 1.8214, "step": 24750 }, { "epoch": 0.8802773086836726, "grad_norm": 1.6700776815414429, "learning_rate": 1.6931228520236223e-05, "loss": 1.8609, "step": 24760 }, { "epoch": 0.8806328326370989, "grad_norm": 1.5559288263320923, "learning_rate": 1.692840302180701e-05, "loss": 1.8422, "step": 24770 }, { "epoch": 0.8809883565905253, "grad_norm": 1.7079417705535889, "learning_rate": 1.692557645921853e-05, "loss": 1.8145, "step": 24780 }, { "epoch": 0.8813438805439516, "grad_norm": 1.636964201927185, "learning_rate": 1.6922748832904937e-05, "loss": 1.7924, "step": 24790 }, { "epoch": 0.881699404497378, "grad_norm": 1.730451226234436, "learning_rate": 1.6919920143300524e-05, "loss": 1.7988, "step": 24800 }, { "epoch": 0.8820549284508044, "grad_norm": 1.6825193166732788, "learning_rate": 1.6917090390839766e-05, "loss": 1.8124, "step": 24810 }, { "epoch": 0.8824104524042308, "grad_norm": 1.5790181159973145, "learning_rate": 1.6914259575957294e-05, "loss": 1.7826, "step": 24820 }, { "epoch": 0.8827659763576571, "grad_norm": 1.5225175619125366, "learning_rate": 1.6911427699087902e-05, "loss": 1.8169, "step": 24830 }, { "epoch": 0.8831215003110835, "grad_norm": 1.7250087261199951, "learning_rate": 1.6908594760666557e-05, "loss": 1.8296, "step": 24840 }, { "epoch": 0.8834770242645098, "grad_norm": 1.6308724880218506, "learning_rate": 1.6905760761128367e-05, "loss": 1.8544, "step": 24850 }, { "epoch": 0.8838325482179362, "grad_norm": 1.5742348432540894, "learning_rate": 1.690292570090863e-05, "loss": 1.834, "step": 24860 }, { "epoch": 0.8841880721713625, "grad_norm": 1.6509791612625122, "learning_rate": 1.690008958044278e-05, "loss": 1.8607, "step": 24870 }, { "epoch": 0.8845435961247889, "grad_norm": 1.661698341369629, "learning_rate": 1.689725240016644e-05, "loss": 1.7723, "step": 24880 }, { "epoch": 0.8848991200782153, "grad_norm": 1.709389090538025, "learning_rate": 1.6894414160515373e-05, "loss": 1.7786, "step": 24890 }, { "epoch": 0.8852546440316417, "grad_norm": 1.6019915342330933, "learning_rate": 1.6891574861925523e-05, "loss": 1.819, "step": 24900 }, { "epoch": 0.885610167985068, "grad_norm": 1.6010609865188599, "learning_rate": 1.6888734504832984e-05, "loss": 1.8405, "step": 24910 }, { "epoch": 0.8859656919384944, "grad_norm": 1.6091557741165161, "learning_rate": 1.6885893089674017e-05, "loss": 1.8148, "step": 24920 }, { "epoch": 0.8863212158919207, "grad_norm": 1.6396043300628662, "learning_rate": 1.6883050616885043e-05, "loss": 1.8317, "step": 24930 }, { "epoch": 0.8866767398453471, "grad_norm": 1.6629292964935303, "learning_rate": 1.6880207086902657e-05, "loss": 1.8529, "step": 24940 }, { "epoch": 0.8870322637987734, "grad_norm": 1.6890887022018433, "learning_rate": 1.68773625001636e-05, "loss": 1.7573, "step": 24950 }, { "epoch": 0.8873877877521998, "grad_norm": 1.6500388383865356, "learning_rate": 1.6874516857104782e-05, "loss": 1.7858, "step": 24960 }, { "epoch": 0.8877433117056261, "grad_norm": 1.7768943309783936, "learning_rate": 1.6871670158163282e-05, "loss": 1.8354, "step": 24970 }, { "epoch": 0.8880988356590526, "grad_norm": 1.720981478691101, "learning_rate": 1.6868822403776327e-05, "loss": 1.7871, "step": 24980 }, { "epoch": 0.8884543596124789, "grad_norm": 1.5580673217773438, "learning_rate": 1.6865973594381322e-05, "loss": 1.8176, "step": 24990 }, { "epoch": 0.8888098835659053, "grad_norm": 1.6760094165802002, "learning_rate": 1.6863123730415824e-05, "loss": 1.846, "step": 25000 }, { "epoch": 0.8891654075193316, "grad_norm": 1.5730007886886597, "learning_rate": 1.686027281231755e-05, "loss": 1.8024, "step": 25010 }, { "epoch": 0.889520931472758, "grad_norm": 1.6264647245407104, "learning_rate": 1.6857420840524387e-05, "loss": 1.7785, "step": 25020 }, { "epoch": 0.8898764554261843, "grad_norm": 1.6349519491195679, "learning_rate": 1.685456781547438e-05, "loss": 1.838, "step": 25030 }, { "epoch": 0.8902319793796107, "grad_norm": 1.589114785194397, "learning_rate": 1.6851713737605732e-05, "loss": 1.8203, "step": 25040 }, { "epoch": 0.890587503333037, "grad_norm": 1.6858025789260864, "learning_rate": 1.684885860735682e-05, "loss": 1.8338, "step": 25050 }, { "epoch": 0.8909430272864635, "grad_norm": 1.7134677171707153, "learning_rate": 1.6846002425166165e-05, "loss": 1.8172, "step": 25060 }, { "epoch": 0.8912985512398898, "grad_norm": 1.6317869424819946, "learning_rate": 1.6843145191472463e-05, "loss": 1.7995, "step": 25070 }, { "epoch": 0.8916540751933162, "grad_norm": 1.6795090436935425, "learning_rate": 1.6840286906714567e-05, "loss": 1.8136, "step": 25080 }, { "epoch": 0.8920095991467425, "grad_norm": 1.686813473701477, "learning_rate": 1.6837427571331488e-05, "loss": 1.7711, "step": 25090 }, { "epoch": 0.8923651231001689, "grad_norm": 1.6473668813705444, "learning_rate": 1.683456718576241e-05, "loss": 1.807, "step": 25100 }, { "epoch": 0.8927206470535952, "grad_norm": 1.6169072389602661, "learning_rate": 1.683170575044666e-05, "loss": 1.8179, "step": 25110 }, { "epoch": 0.8930761710070216, "grad_norm": 1.5349276065826416, "learning_rate": 1.6828843265823748e-05, "loss": 1.8566, "step": 25120 }, { "epoch": 0.8934316949604479, "grad_norm": 1.6595957279205322, "learning_rate": 1.6825979732333323e-05, "loss": 1.8345, "step": 25130 }, { "epoch": 0.8937872189138744, "grad_norm": 1.6424028873443604, "learning_rate": 1.6823115150415212e-05, "loss": 1.8239, "step": 25140 }, { "epoch": 0.8941427428673007, "grad_norm": 1.6583970785140991, "learning_rate": 1.6820249520509392e-05, "loss": 1.7771, "step": 25150 }, { "epoch": 0.8944982668207271, "grad_norm": 1.641650676727295, "learning_rate": 1.6817382843056012e-05, "loss": 1.8091, "step": 25160 }, { "epoch": 0.8948537907741534, "grad_norm": 1.6662938594818115, "learning_rate": 1.6814515118495373e-05, "loss": 1.84, "step": 25170 }, { "epoch": 0.8952093147275798, "grad_norm": 1.6732330322265625, "learning_rate": 1.681164634726794e-05, "loss": 1.8464, "step": 25180 }, { "epoch": 0.8955648386810061, "grad_norm": 1.6067739725112915, "learning_rate": 1.6808776529814336e-05, "loss": 1.8137, "step": 25190 }, { "epoch": 0.8959203626344325, "grad_norm": 1.6159056425094604, "learning_rate": 1.6805905666575347e-05, "loss": 1.8179, "step": 25200 }, { "epoch": 0.8962758865878588, "grad_norm": 1.5482304096221924, "learning_rate": 1.680303375799192e-05, "loss": 1.7954, "step": 25210 }, { "epoch": 0.8966314105412853, "grad_norm": 1.6053767204284668, "learning_rate": 1.6800160804505167e-05, "loss": 1.8061, "step": 25220 }, { "epoch": 0.8969869344947116, "grad_norm": 1.7497892379760742, "learning_rate": 1.679728680655635e-05, "loss": 1.8272, "step": 25230 }, { "epoch": 0.897342458448138, "grad_norm": 1.6293673515319824, "learning_rate": 1.67944117645869e-05, "loss": 1.7566, "step": 25240 }, { "epoch": 0.8976979824015643, "grad_norm": 1.6386243104934692, "learning_rate": 1.6791535679038405e-05, "loss": 1.8065, "step": 25250 }, { "epoch": 0.8980535063549907, "grad_norm": 1.7207205295562744, "learning_rate": 1.678865855035261e-05, "loss": 1.7955, "step": 25260 }, { "epoch": 0.898409030308417, "grad_norm": 1.705877661705017, "learning_rate": 1.6785780378971427e-05, "loss": 1.817, "step": 25270 }, { "epoch": 0.8987645542618434, "grad_norm": 1.626386046409607, "learning_rate": 1.6782901165336926e-05, "loss": 1.8166, "step": 25280 }, { "epoch": 0.8991200782152697, "grad_norm": 1.633610486984253, "learning_rate": 1.6780020909891333e-05, "loss": 1.8136, "step": 25290 }, { "epoch": 0.8994756021686962, "grad_norm": 1.6760858297348022, "learning_rate": 1.6777139613077046e-05, "loss": 1.8296, "step": 25300 }, { "epoch": 0.8998311261221225, "grad_norm": 1.6591938734054565, "learning_rate": 1.67742572753366e-05, "loss": 1.8122, "step": 25310 }, { "epoch": 0.9001866500755489, "grad_norm": 1.5998114347457886, "learning_rate": 1.6771373897112716e-05, "loss": 1.8481, "step": 25320 }, { "epoch": 0.9005421740289752, "grad_norm": 1.7435482740402222, "learning_rate": 1.6768489478848254e-05, "loss": 1.8044, "step": 25330 }, { "epoch": 0.9008976979824016, "grad_norm": 1.637698769569397, "learning_rate": 1.676560402098625e-05, "loss": 1.816, "step": 25340 }, { "epoch": 0.9012532219358279, "grad_norm": 1.6445133686065674, "learning_rate": 1.676271752396989e-05, "loss": 1.8083, "step": 25350 }, { "epoch": 0.9016087458892543, "grad_norm": 1.590224027633667, "learning_rate": 1.675982998824252e-05, "loss": 1.8029, "step": 25360 }, { "epoch": 0.9019642698426806, "grad_norm": 1.6143308877944946, "learning_rate": 1.6756941414247644e-05, "loss": 1.8448, "step": 25370 }, { "epoch": 0.902319793796107, "grad_norm": 1.5782301425933838, "learning_rate": 1.6754051802428936e-05, "loss": 1.8168, "step": 25380 }, { "epoch": 0.9026753177495334, "grad_norm": 1.6096141338348389, "learning_rate": 1.6751161153230225e-05, "loss": 1.8351, "step": 25390 }, { "epoch": 0.9030308417029598, "grad_norm": 1.7121573686599731, "learning_rate": 1.6748269467095484e-05, "loss": 1.7669, "step": 25400 }, { "epoch": 0.9033863656563861, "grad_norm": 1.7440303564071655, "learning_rate": 1.6745376744468867e-05, "loss": 1.8404, "step": 25410 }, { "epoch": 0.9037418896098125, "grad_norm": 1.6387438774108887, "learning_rate": 1.6742482985794676e-05, "loss": 1.8323, "step": 25420 }, { "epoch": 0.9040974135632388, "grad_norm": 1.6786017417907715, "learning_rate": 1.673958819151737e-05, "loss": 1.805, "step": 25430 }, { "epoch": 0.9044529375166652, "grad_norm": 1.655165672302246, "learning_rate": 1.673669236208158e-05, "loss": 1.7973, "step": 25440 }, { "epoch": 0.9048084614700915, "grad_norm": 1.6670957803726196, "learning_rate": 1.673379549793208e-05, "loss": 1.8664, "step": 25450 }, { "epoch": 0.905163985423518, "grad_norm": 1.590448260307312, "learning_rate": 1.6730897599513817e-05, "loss": 1.8278, "step": 25460 }, { "epoch": 0.9055195093769443, "grad_norm": 1.7210569381713867, "learning_rate": 1.6727998667271882e-05, "loss": 1.8282, "step": 25470 }, { "epoch": 0.9058750333303707, "grad_norm": 1.5972177982330322, "learning_rate": 1.672509870165154e-05, "loss": 1.8379, "step": 25480 }, { "epoch": 0.906230557283797, "grad_norm": 1.665274977684021, "learning_rate": 1.6722197703098203e-05, "loss": 1.7867, "step": 25490 }, { "epoch": 0.9065860812372234, "grad_norm": 1.6389977931976318, "learning_rate": 1.6719295672057445e-05, "loss": 1.8531, "step": 25500 }, { "epoch": 0.9069416051906497, "grad_norm": 1.698076605796814, "learning_rate": 1.6716392608975004e-05, "loss": 1.7753, "step": 25510 }, { "epoch": 0.9072971291440761, "grad_norm": 1.7209333181381226, "learning_rate": 1.6713488514296768e-05, "loss": 1.8097, "step": 25520 }, { "epoch": 0.9076526530975024, "grad_norm": 1.6083821058273315, "learning_rate": 1.671058338846879e-05, "loss": 1.8193, "step": 25530 }, { "epoch": 0.9080081770509288, "grad_norm": 1.6375142335891724, "learning_rate": 1.6707677231937282e-05, "loss": 1.7958, "step": 25540 }, { "epoch": 0.9083637010043552, "grad_norm": 1.6612564325332642, "learning_rate": 1.6704770045148612e-05, "loss": 1.8496, "step": 25550 }, { "epoch": 0.9087192249577816, "grad_norm": 1.6492503881454468, "learning_rate": 1.6701861828549296e-05, "loss": 1.8113, "step": 25560 }, { "epoch": 0.9090747489112079, "grad_norm": 1.566922903060913, "learning_rate": 1.6698952582586025e-05, "loss": 1.8328, "step": 25570 }, { "epoch": 0.9094302728646343, "grad_norm": 1.5768183469772339, "learning_rate": 1.6696042307705642e-05, "loss": 1.7921, "step": 25580 }, { "epoch": 0.9097857968180606, "grad_norm": 1.674768328666687, "learning_rate": 1.6693131004355145e-05, "loss": 1.8432, "step": 25590 }, { "epoch": 0.910141320771487, "grad_norm": 1.531554937362671, "learning_rate": 1.6690218672981687e-05, "loss": 1.7815, "step": 25600 }, { "epoch": 0.9104968447249133, "grad_norm": 1.5599174499511719, "learning_rate": 1.6687305314032592e-05, "loss": 1.7748, "step": 25610 }, { "epoch": 0.9108523686783397, "grad_norm": 1.6537657976150513, "learning_rate": 1.6684390927955333e-05, "loss": 1.851, "step": 25620 }, { "epoch": 0.911207892631766, "grad_norm": 1.6403638124465942, "learning_rate": 1.668147551519754e-05, "loss": 1.8258, "step": 25630 }, { "epoch": 0.9115634165851925, "grad_norm": 1.6577059030532837, "learning_rate": 1.6678559076206996e-05, "loss": 1.7702, "step": 25640 }, { "epoch": 0.9119189405386188, "grad_norm": 1.6512460708618164, "learning_rate": 1.6675641611431657e-05, "loss": 1.8846, "step": 25650 }, { "epoch": 0.9122744644920452, "grad_norm": 1.6656643152236938, "learning_rate": 1.667272312131962e-05, "loss": 1.8001, "step": 25660 }, { "epoch": 0.9126299884454715, "grad_norm": 1.7069002389907837, "learning_rate": 1.6669803606319152e-05, "loss": 1.8675, "step": 25670 }, { "epoch": 0.9129855123988979, "grad_norm": 1.5266073942184448, "learning_rate": 1.666688306687867e-05, "loss": 1.7878, "step": 25680 }, { "epoch": 0.9133410363523242, "grad_norm": 1.6533663272857666, "learning_rate": 1.666396150344675e-05, "loss": 1.8186, "step": 25690 }, { "epoch": 0.9136965603057506, "grad_norm": 1.644303321838379, "learning_rate": 1.6661038916472125e-05, "loss": 1.7981, "step": 25700 }, { "epoch": 0.914052084259177, "grad_norm": 1.7668614387512207, "learning_rate": 1.665811530640369e-05, "loss": 1.8579, "step": 25710 }, { "epoch": 0.9144076082126034, "grad_norm": 1.591511845588684, "learning_rate": 1.665519067369049e-05, "loss": 1.8095, "step": 25720 }, { "epoch": 0.9147631321660297, "grad_norm": 1.7172024250030518, "learning_rate": 1.6652265018781726e-05, "loss": 1.8013, "step": 25730 }, { "epoch": 0.9151186561194561, "grad_norm": 1.6277397871017456, "learning_rate": 1.6649338342126772e-05, "loss": 1.7721, "step": 25740 }, { "epoch": 0.9154741800728824, "grad_norm": 1.5798052549362183, "learning_rate": 1.6646410644175137e-05, "loss": 1.7835, "step": 25750 }, { "epoch": 0.9158297040263088, "grad_norm": 1.698182225227356, "learning_rate": 1.66434819253765e-05, "loss": 1.8238, "step": 25760 }, { "epoch": 0.9161852279797351, "grad_norm": 1.635740041732788, "learning_rate": 1.6640552186180698e-05, "loss": 1.801, "step": 25770 }, { "epoch": 0.9165407519331615, "grad_norm": 1.6899878978729248, "learning_rate": 1.6637621427037714e-05, "loss": 1.8247, "step": 25780 }, { "epoch": 0.9168962758865878, "grad_norm": 1.4938238859176636, "learning_rate": 1.6634689648397695e-05, "loss": 1.8334, "step": 25790 }, { "epoch": 0.9172517998400143, "grad_norm": 1.6516492366790771, "learning_rate": 1.663175685071095e-05, "loss": 1.834, "step": 25800 }, { "epoch": 0.9176073237934406, "grad_norm": 1.6564146280288696, "learning_rate": 1.662882303442793e-05, "loss": 1.786, "step": 25810 }, { "epoch": 0.917962847746867, "grad_norm": 1.6790539026260376, "learning_rate": 1.6625888199999258e-05, "loss": 1.8361, "step": 25820 }, { "epoch": 0.9183183717002933, "grad_norm": 1.7647924423217773, "learning_rate": 1.66229523478757e-05, "loss": 1.8182, "step": 25830 }, { "epoch": 0.9186738956537197, "grad_norm": 1.6306095123291016, "learning_rate": 1.662001547850819e-05, "loss": 1.8168, "step": 25840 }, { "epoch": 0.919029419607146, "grad_norm": 1.6261768341064453, "learning_rate": 1.6617077592347813e-05, "loss": 1.7882, "step": 25850 }, { "epoch": 0.9193849435605724, "grad_norm": 1.7016527652740479, "learning_rate": 1.6614138689845806e-05, "loss": 1.7979, "step": 25860 }, { "epoch": 0.9197404675139987, "grad_norm": 1.562110185623169, "learning_rate": 1.6611198771453562e-05, "loss": 1.8436, "step": 25870 }, { "epoch": 0.9200959914674252, "grad_norm": 1.5601515769958496, "learning_rate": 1.6608257837622646e-05, "loss": 1.8073, "step": 25880 }, { "epoch": 0.9204515154208515, "grad_norm": 1.5988566875457764, "learning_rate": 1.6605315888804753e-05, "loss": 1.7985, "step": 25890 }, { "epoch": 0.9208070393742779, "grad_norm": 1.6320873498916626, "learning_rate": 1.660237292545176e-05, "loss": 1.8177, "step": 25900 }, { "epoch": 0.9211625633277042, "grad_norm": 1.6602503061294556, "learning_rate": 1.6599428948015682e-05, "loss": 1.8008, "step": 25910 }, { "epoch": 0.9215180872811306, "grad_norm": 1.6478127241134644, "learning_rate": 1.6596483956948696e-05, "loss": 1.8151, "step": 25920 }, { "epoch": 0.9218736112345569, "grad_norm": 1.8068106174468994, "learning_rate": 1.6593537952703137e-05, "loss": 1.8107, "step": 25930 }, { "epoch": 0.9222291351879833, "grad_norm": 1.6383720636367798, "learning_rate": 1.659059093573149e-05, "loss": 1.8039, "step": 25940 }, { "epoch": 0.9225846591414096, "grad_norm": 1.669328212738037, "learning_rate": 1.6587642906486395e-05, "loss": 1.8336, "step": 25950 }, { "epoch": 0.922940183094836, "grad_norm": 1.7093185186386108, "learning_rate": 1.6584693865420655e-05, "loss": 1.7985, "step": 25960 }, { "epoch": 0.9232957070482624, "grad_norm": 1.5628633499145508, "learning_rate": 1.6581743812987222e-05, "loss": 1.7783, "step": 25970 }, { "epoch": 0.9236512310016888, "grad_norm": 1.7095357179641724, "learning_rate": 1.657879274963921e-05, "loss": 1.8462, "step": 25980 }, { "epoch": 0.9240067549551151, "grad_norm": 1.6446905136108398, "learning_rate": 1.6575840675829883e-05, "loss": 1.8119, "step": 25990 }, { "epoch": 0.9243622789085415, "grad_norm": 1.7160478830337524, "learning_rate": 1.6572887592012655e-05, "loss": 1.7828, "step": 26000 }, { "epoch": 0.9247178028619678, "grad_norm": 1.7009133100509644, "learning_rate": 1.6569933498641105e-05, "loss": 1.8093, "step": 26010 }, { "epoch": 0.9250733268153942, "grad_norm": 1.7161458730697632, "learning_rate": 1.656697839616897e-05, "loss": 1.7835, "step": 26020 }, { "epoch": 0.9254288507688205, "grad_norm": 1.5854977369308472, "learning_rate": 1.6564022285050124e-05, "loss": 1.8083, "step": 26030 }, { "epoch": 0.925784374722247, "grad_norm": 1.7766863107681274, "learning_rate": 1.656106516573861e-05, "loss": 1.7877, "step": 26040 }, { "epoch": 0.9261398986756733, "grad_norm": 1.7302604913711548, "learning_rate": 1.6558107038688625e-05, "loss": 1.8076, "step": 26050 }, { "epoch": 0.9264954226290997, "grad_norm": 1.652533769607544, "learning_rate": 1.655514790435452e-05, "loss": 1.7882, "step": 26060 }, { "epoch": 0.926850946582526, "grad_norm": 1.6904962062835693, "learning_rate": 1.65521877631908e-05, "loss": 1.7563, "step": 26070 }, { "epoch": 0.9272064705359524, "grad_norm": 1.7011816501617432, "learning_rate": 1.654922661565212e-05, "loss": 1.8147, "step": 26080 }, { "epoch": 0.9275619944893787, "grad_norm": 1.6210919618606567, "learning_rate": 1.6546264462193295e-05, "loss": 1.8108, "step": 26090 }, { "epoch": 0.9279175184428051, "grad_norm": 1.7529455423355103, "learning_rate": 1.6543301303269295e-05, "loss": 1.8035, "step": 26100 }, { "epoch": 0.9282730423962314, "grad_norm": 1.620786190032959, "learning_rate": 1.6540337139335245e-05, "loss": 1.8038, "step": 26110 }, { "epoch": 0.9286285663496578, "grad_norm": 1.72477388381958, "learning_rate": 1.6537371970846412e-05, "loss": 1.8131, "step": 26120 }, { "epoch": 0.9289840903030842, "grad_norm": 1.6291157007217407, "learning_rate": 1.6534405798258238e-05, "loss": 1.8056, "step": 26130 }, { "epoch": 0.9293396142565106, "grad_norm": 1.6260063648223877, "learning_rate": 1.6531438622026305e-05, "loss": 1.8343, "step": 26140 }, { "epoch": 0.9296951382099369, "grad_norm": 1.6301681995391846, "learning_rate": 1.652847044260635e-05, "loss": 1.769, "step": 26150 }, { "epoch": 0.9300506621633633, "grad_norm": 1.6698664426803589, "learning_rate": 1.652550126045427e-05, "loss": 1.7997, "step": 26160 }, { "epoch": 0.9304061861167896, "grad_norm": 1.7774896621704102, "learning_rate": 1.652253107602611e-05, "loss": 1.8359, "step": 26170 }, { "epoch": 0.930761710070216, "grad_norm": 1.7517472505569458, "learning_rate": 1.6519559889778077e-05, "loss": 1.829, "step": 26180 }, { "epoch": 0.9311172340236423, "grad_norm": 1.5541648864746094, "learning_rate": 1.651658770216652e-05, "loss": 1.7971, "step": 26190 }, { "epoch": 0.9314727579770687, "grad_norm": 1.5613819360733032, "learning_rate": 1.651361451364795e-05, "loss": 1.8104, "step": 26200 }, { "epoch": 0.931828281930495, "grad_norm": 1.6808326244354248, "learning_rate": 1.651064032467903e-05, "loss": 1.7883, "step": 26210 }, { "epoch": 0.9321838058839215, "grad_norm": 1.6574971675872803, "learning_rate": 1.6507665135716585e-05, "loss": 1.7869, "step": 26220 }, { "epoch": 0.9325393298373478, "grad_norm": 1.7454781532287598, "learning_rate": 1.6504688947217573e-05, "loss": 1.8373, "step": 26230 }, { "epoch": 0.9328948537907742, "grad_norm": 1.7351772785186768, "learning_rate": 1.650171175963913e-05, "loss": 1.8216, "step": 26240 }, { "epoch": 0.9332503777442005, "grad_norm": 1.5835143327713013, "learning_rate": 1.649873357343852e-05, "loss": 1.7755, "step": 26250 }, { "epoch": 0.9336059016976269, "grad_norm": 1.6227622032165527, "learning_rate": 1.6495754389073183e-05, "loss": 1.788, "step": 26260 }, { "epoch": 0.9339614256510532, "grad_norm": 1.6082923412322998, "learning_rate": 1.6492774207000698e-05, "loss": 1.7966, "step": 26270 }, { "epoch": 0.9343169496044796, "grad_norm": 1.5585588216781616, "learning_rate": 1.6489793027678807e-05, "loss": 1.8497, "step": 26280 }, { "epoch": 0.934672473557906, "grad_norm": 1.6597864627838135, "learning_rate": 1.6486810851565397e-05, "loss": 1.826, "step": 26290 }, { "epoch": 0.9350279975113324, "grad_norm": 1.668641209602356, "learning_rate": 1.6483827679118515e-05, "loss": 1.825, "step": 26300 }, { "epoch": 0.9353835214647587, "grad_norm": 1.6874868869781494, "learning_rate": 1.6480843510796352e-05, "loss": 1.8088, "step": 26310 }, { "epoch": 0.9357390454181851, "grad_norm": 1.579192876815796, "learning_rate": 1.6477858347057265e-05, "loss": 1.8205, "step": 26320 }, { "epoch": 0.9360945693716114, "grad_norm": 1.6124264001846313, "learning_rate": 1.647487218835975e-05, "loss": 1.8093, "step": 26330 }, { "epoch": 0.9364500933250378, "grad_norm": 1.6060585975646973, "learning_rate": 1.6471885035162465e-05, "loss": 1.8507, "step": 26340 }, { "epoch": 0.9368056172784641, "grad_norm": 1.5641084909439087, "learning_rate": 1.6468896887924218e-05, "loss": 1.8334, "step": 26350 }, { "epoch": 0.9371611412318905, "grad_norm": 1.6883052587509155, "learning_rate": 1.6465907747103968e-05, "loss": 1.8284, "step": 26360 }, { "epoch": 0.9375166651853168, "grad_norm": 1.6133944988250732, "learning_rate": 1.6462917613160833e-05, "loss": 1.7882, "step": 26370 }, { "epoch": 0.9378721891387433, "grad_norm": 1.6845167875289917, "learning_rate": 1.645992648655407e-05, "loss": 1.8002, "step": 26380 }, { "epoch": 0.9382277130921696, "grad_norm": 1.63066828250885, "learning_rate": 1.6456934367743106e-05, "loss": 1.8107, "step": 26390 }, { "epoch": 0.938583237045596, "grad_norm": 1.61935555934906, "learning_rate": 1.6453941257187508e-05, "loss": 1.7801, "step": 26400 }, { "epoch": 0.9389387609990223, "grad_norm": 1.6454601287841797, "learning_rate": 1.6450947155347002e-05, "loss": 1.7875, "step": 26410 }, { "epoch": 0.9392942849524487, "grad_norm": 1.5497645139694214, "learning_rate": 1.6447952062681456e-05, "loss": 1.798, "step": 26420 }, { "epoch": 0.939649808905875, "grad_norm": 1.6547778844833374, "learning_rate": 1.6444955979650906e-05, "loss": 1.8283, "step": 26430 }, { "epoch": 0.9400053328593014, "grad_norm": 1.652701497077942, "learning_rate": 1.6441958906715527e-05, "loss": 1.7966, "step": 26440 }, { "epoch": 0.9403608568127277, "grad_norm": 1.7806364297866821, "learning_rate": 1.643896084433565e-05, "loss": 1.801, "step": 26450 }, { "epoch": 0.9407163807661542, "grad_norm": 1.5824679136276245, "learning_rate": 1.643596179297176e-05, "loss": 1.8239, "step": 26460 }, { "epoch": 0.9410719047195805, "grad_norm": 1.7209099531173706, "learning_rate": 1.6432961753084495e-05, "loss": 1.8259, "step": 26470 }, { "epoch": 0.9414274286730069, "grad_norm": 1.7159688472747803, "learning_rate": 1.6429960725134634e-05, "loss": 1.7624, "step": 26480 }, { "epoch": 0.9417829526264332, "grad_norm": 1.6060218811035156, "learning_rate": 1.6426958709583128e-05, "loss": 1.8166, "step": 26490 }, { "epoch": 0.9421384765798596, "grad_norm": 1.6274778842926025, "learning_rate": 1.6423955706891056e-05, "loss": 1.8193, "step": 26500 }, { "epoch": 0.9424940005332859, "grad_norm": 1.705024003982544, "learning_rate": 1.6420951717519672e-05, "loss": 1.8024, "step": 26510 }, { "epoch": 0.9428495244867123, "grad_norm": 1.687160611152649, "learning_rate": 1.6417946741930358e-05, "loss": 1.7968, "step": 26520 }, { "epoch": 0.9432050484401386, "grad_norm": 1.6380635499954224, "learning_rate": 1.641494078058467e-05, "loss": 1.8179, "step": 26530 }, { "epoch": 0.9435605723935651, "grad_norm": 1.748347282409668, "learning_rate": 1.6411933833944294e-05, "loss": 1.8204, "step": 26540 }, { "epoch": 0.9439160963469914, "grad_norm": 1.6630667448043823, "learning_rate": 1.640892590247109e-05, "loss": 1.7902, "step": 26550 }, { "epoch": 0.9442716203004178, "grad_norm": 1.6317050457000732, "learning_rate": 1.6405916986627052e-05, "loss": 1.8064, "step": 26560 }, { "epoch": 0.9446271442538441, "grad_norm": 1.7291167974472046, "learning_rate": 1.6402907086874326e-05, "loss": 1.8267, "step": 26570 }, { "epoch": 0.9449826682072705, "grad_norm": 1.6491247415542603, "learning_rate": 1.6399896203675223e-05, "loss": 1.8358, "step": 26580 }, { "epoch": 0.9453381921606968, "grad_norm": 1.8621935844421387, "learning_rate": 1.639688433749219e-05, "loss": 1.8127, "step": 26590 }, { "epoch": 0.9456937161141232, "grad_norm": 1.6204941272735596, "learning_rate": 1.6393871488787826e-05, "loss": 1.7839, "step": 26600 }, { "epoch": 0.9460492400675495, "grad_norm": 1.6154526472091675, "learning_rate": 1.6390857658024896e-05, "loss": 1.8298, "step": 26610 }, { "epoch": 0.946404764020976, "grad_norm": 1.6763432025909424, "learning_rate": 1.6387842845666298e-05, "loss": 1.8138, "step": 26620 }, { "epoch": 0.9467602879744023, "grad_norm": 1.7188301086425781, "learning_rate": 1.638482705217509e-05, "loss": 1.8138, "step": 26630 }, { "epoch": 0.9471158119278287, "grad_norm": 1.6560871601104736, "learning_rate": 1.6381810278014486e-05, "loss": 1.8101, "step": 26640 }, { "epoch": 0.947471335881255, "grad_norm": 1.6502710580825806, "learning_rate": 1.6378792523647834e-05, "loss": 1.8108, "step": 26650 }, { "epoch": 0.9478268598346814, "grad_norm": 1.8194656372070312, "learning_rate": 1.6375773789538644e-05, "loss": 1.773, "step": 26660 }, { "epoch": 0.9481823837881077, "grad_norm": 1.7040694952011108, "learning_rate": 1.637275407615058e-05, "loss": 1.8403, "step": 26670 }, { "epoch": 0.9485379077415341, "grad_norm": 1.646286964416504, "learning_rate": 1.6369733383947445e-05, "loss": 1.8213, "step": 26680 }, { "epoch": 0.9488934316949604, "grad_norm": 1.5874391794204712, "learning_rate": 1.63667117133932e-05, "loss": 1.8257, "step": 26690 }, { "epoch": 0.9492489556483868, "grad_norm": 1.6585197448730469, "learning_rate": 1.6363689064951954e-05, "loss": 1.7766, "step": 26700 }, { "epoch": 0.9496044796018132, "grad_norm": 1.7515182495117188, "learning_rate": 1.6360665439087973e-05, "loss": 1.8237, "step": 26710 }, { "epoch": 0.9499600035552396, "grad_norm": 1.6164888143539429, "learning_rate": 1.635764083626566e-05, "loss": 1.8089, "step": 26720 }, { "epoch": 0.9503155275086659, "grad_norm": 1.596706748008728, "learning_rate": 1.6354615256949578e-05, "loss": 1.7892, "step": 26730 }, { "epoch": 0.9506710514620923, "grad_norm": 2.4562809467315674, "learning_rate": 1.6351588701604436e-05, "loss": 1.7705, "step": 26740 }, { "epoch": 0.9510265754155186, "grad_norm": 1.5344680547714233, "learning_rate": 1.6348561170695094e-05, "loss": 1.8005, "step": 26750 }, { "epoch": 0.951382099368945, "grad_norm": 1.6693150997161865, "learning_rate": 1.634553266468656e-05, "loss": 1.8471, "step": 26760 }, { "epoch": 0.9517376233223713, "grad_norm": 1.7177890539169312, "learning_rate": 1.6342503184044e-05, "loss": 1.7796, "step": 26770 }, { "epoch": 0.9520931472757977, "grad_norm": 1.7595194578170776, "learning_rate": 1.6339472729232716e-05, "loss": 1.7738, "step": 26780 }, { "epoch": 0.952448671229224, "grad_norm": 1.7894115447998047, "learning_rate": 1.6336441300718167e-05, "loss": 1.8033, "step": 26790 }, { "epoch": 0.9528041951826505, "grad_norm": 1.5425448417663574, "learning_rate": 1.6333408898965967e-05, "loss": 1.8276, "step": 26800 }, { "epoch": 0.9531597191360768, "grad_norm": 1.5785398483276367, "learning_rate": 1.633037552444187e-05, "loss": 1.778, "step": 26810 }, { "epoch": 0.9535152430895032, "grad_norm": 1.6010870933532715, "learning_rate": 1.6327341177611785e-05, "loss": 1.7984, "step": 26820 }, { "epoch": 0.9538707670429295, "grad_norm": 1.4971858263015747, "learning_rate": 1.632430585894177e-05, "loss": 1.8057, "step": 26830 }, { "epoch": 0.9542262909963559, "grad_norm": 1.6898208856582642, "learning_rate": 1.6321269568898025e-05, "loss": 1.8113, "step": 26840 }, { "epoch": 0.9545818149497822, "grad_norm": 1.6145386695861816, "learning_rate": 1.6318232307946912e-05, "loss": 1.8073, "step": 26850 }, { "epoch": 0.9549373389032086, "grad_norm": 1.5379389524459839, "learning_rate": 1.631519407655493e-05, "loss": 1.8026, "step": 26860 }, { "epoch": 0.955292862856635, "grad_norm": 1.575872540473938, "learning_rate": 1.6312154875188733e-05, "loss": 1.7668, "step": 26870 }, { "epoch": 0.9556483868100614, "grad_norm": 1.650305151939392, "learning_rate": 1.6309114704315127e-05, "loss": 1.8223, "step": 26880 }, { "epoch": 0.9560039107634877, "grad_norm": 1.562211275100708, "learning_rate": 1.630607356440106e-05, "loss": 1.8324, "step": 26890 }, { "epoch": 0.9563594347169141, "grad_norm": 1.560794472694397, "learning_rate": 1.630303145591363e-05, "loss": 1.7921, "step": 26900 }, { "epoch": 0.9567149586703404, "grad_norm": 1.6527752876281738, "learning_rate": 1.6299988379320094e-05, "loss": 1.755, "step": 26910 }, { "epoch": 0.9570704826237668, "grad_norm": 1.6441879272460938, "learning_rate": 1.6296944335087843e-05, "loss": 1.8096, "step": 26920 }, { "epoch": 0.9574260065771931, "grad_norm": 1.5838773250579834, "learning_rate": 1.6293899323684422e-05, "loss": 1.7996, "step": 26930 }, { "epoch": 0.9577815305306195, "grad_norm": 1.5352705717086792, "learning_rate": 1.629085334557753e-05, "loss": 1.8115, "step": 26940 }, { "epoch": 0.9581370544840458, "grad_norm": 1.7199256420135498, "learning_rate": 1.6287806401235008e-05, "loss": 1.7838, "step": 26950 }, { "epoch": 0.9584925784374723, "grad_norm": 1.829298734664917, "learning_rate": 1.6284758491124847e-05, "loss": 1.7503, "step": 26960 }, { "epoch": 0.9588481023908986, "grad_norm": 1.6695120334625244, "learning_rate": 1.6281709615715186e-05, "loss": 1.7866, "step": 26970 }, { "epoch": 0.959203626344325, "grad_norm": 1.6769123077392578, "learning_rate": 1.6278659775474318e-05, "loss": 1.7842, "step": 26980 }, { "epoch": 0.9595591502977513, "grad_norm": 1.6990450620651245, "learning_rate": 1.6275608970870674e-05, "loss": 1.7987, "step": 26990 }, { "epoch": 0.9599146742511777, "grad_norm": 1.6821962594985962, "learning_rate": 1.627255720237284e-05, "loss": 1.8118, "step": 27000 }, { "epoch": 0.960270198204604, "grad_norm": 1.6602524518966675, "learning_rate": 1.6269504470449548e-05, "loss": 1.8319, "step": 27010 }, { "epoch": 0.9606257221580304, "grad_norm": 1.5797219276428223, "learning_rate": 1.6266450775569683e-05, "loss": 1.8308, "step": 27020 }, { "epoch": 0.9609812461114567, "grad_norm": 1.637904405593872, "learning_rate": 1.626339611820227e-05, "loss": 1.7711, "step": 27030 }, { "epoch": 0.9613367700648832, "grad_norm": 1.626842975616455, "learning_rate": 1.626034049881648e-05, "loss": 1.8084, "step": 27040 }, { "epoch": 0.9616922940183095, "grad_norm": 1.7272893190383911, "learning_rate": 1.6257283917881644e-05, "loss": 1.7708, "step": 27050 }, { "epoch": 0.9620478179717359, "grad_norm": 1.6516095399856567, "learning_rate": 1.6254226375867234e-05, "loss": 1.8228, "step": 27060 }, { "epoch": 0.9624033419251622, "grad_norm": 1.5165112018585205, "learning_rate": 1.6251167873242865e-05, "loss": 1.7821, "step": 27070 }, { "epoch": 0.9627588658785886, "grad_norm": 1.5773776769638062, "learning_rate": 1.624810841047831e-05, "loss": 1.8263, "step": 27080 }, { "epoch": 0.9631143898320149, "grad_norm": 1.8211758136749268, "learning_rate": 1.6245047988043472e-05, "loss": 1.8452, "step": 27090 }, { "epoch": 0.9634699137854413, "grad_norm": 1.6081113815307617, "learning_rate": 1.6241986606408424e-05, "loss": 1.8437, "step": 27100 }, { "epoch": 0.9638254377388676, "grad_norm": 1.6721961498260498, "learning_rate": 1.623892426604337e-05, "loss": 1.7794, "step": 27110 }, { "epoch": 0.9641809616922941, "grad_norm": 1.6835254430770874, "learning_rate": 1.6235860967418666e-05, "loss": 1.8349, "step": 27120 }, { "epoch": 0.9645364856457204, "grad_norm": 1.625592589378357, "learning_rate": 1.6232796711004817e-05, "loss": 1.7948, "step": 27130 }, { "epoch": 0.9648920095991468, "grad_norm": 1.7034497261047363, "learning_rate": 1.6229731497272474e-05, "loss": 1.7869, "step": 27140 }, { "epoch": 0.9652475335525731, "grad_norm": 1.7472915649414062, "learning_rate": 1.6226665326692435e-05, "loss": 1.8121, "step": 27150 }, { "epoch": 0.9656030575059995, "grad_norm": 1.6585270166397095, "learning_rate": 1.622359819973564e-05, "loss": 1.7968, "step": 27160 }, { "epoch": 0.9659585814594258, "grad_norm": 1.6940916776657104, "learning_rate": 1.6220530116873186e-05, "loss": 1.8043, "step": 27170 }, { "epoch": 0.9663141054128522, "grad_norm": 1.6721597909927368, "learning_rate": 1.6217461078576307e-05, "loss": 1.8089, "step": 27180 }, { "epoch": 0.9666696293662785, "grad_norm": 1.5319979190826416, "learning_rate": 1.6214391085316395e-05, "loss": 1.8323, "step": 27190 }, { "epoch": 0.967025153319705, "grad_norm": 1.6966089010238647, "learning_rate": 1.621132013756497e-05, "loss": 1.8028, "step": 27200 }, { "epoch": 0.9673806772731313, "grad_norm": 1.677552580833435, "learning_rate": 1.620824823579372e-05, "loss": 1.7426, "step": 27210 }, { "epoch": 0.9677362012265577, "grad_norm": 1.5825179815292358, "learning_rate": 1.620517538047447e-05, "loss": 1.794, "step": 27220 }, { "epoch": 0.968091725179984, "grad_norm": 1.6787668466567993, "learning_rate": 1.6202101572079186e-05, "loss": 1.838, "step": 27230 }, { "epoch": 0.9684472491334104, "grad_norm": 1.7288752794265747, "learning_rate": 1.619902681107999e-05, "loss": 1.8007, "step": 27240 }, { "epoch": 0.9688027730868367, "grad_norm": 1.6401065587997437, "learning_rate": 1.619595109794914e-05, "loss": 1.811, "step": 27250 }, { "epoch": 0.9691582970402631, "grad_norm": 1.6266028881072998, "learning_rate": 1.6192874433159054e-05, "loss": 1.7568, "step": 27260 }, { "epoch": 0.9695138209936894, "grad_norm": 1.7548699378967285, "learning_rate": 1.618979681718228e-05, "loss": 1.872, "step": 27270 }, { "epoch": 0.9698693449471159, "grad_norm": 1.5368551015853882, "learning_rate": 1.618671825049153e-05, "loss": 1.79, "step": 27280 }, { "epoch": 0.9702248689005422, "grad_norm": 1.711429238319397, "learning_rate": 1.6183638733559646e-05, "loss": 1.8258, "step": 27290 }, { "epoch": 0.9705803928539686, "grad_norm": 1.6672272682189941, "learning_rate": 1.6180558266859625e-05, "loss": 1.7776, "step": 27300 }, { "epoch": 0.9709359168073949, "grad_norm": 1.6353920698165894, "learning_rate": 1.6177476850864606e-05, "loss": 1.8005, "step": 27310 }, { "epoch": 0.9712914407608213, "grad_norm": 1.6440942287445068, "learning_rate": 1.6174394486047874e-05, "loss": 1.8168, "step": 27320 }, { "epoch": 0.9716469647142476, "grad_norm": 1.7292405366897583, "learning_rate": 1.6171311172882866e-05, "loss": 1.806, "step": 27330 }, { "epoch": 0.972002488667674, "grad_norm": 1.6018248796463013, "learning_rate": 1.6168226911843155e-05, "loss": 1.8135, "step": 27340 }, { "epoch": 0.9723580126211003, "grad_norm": 1.6486098766326904, "learning_rate": 1.6165141703402466e-05, "loss": 1.8157, "step": 27350 }, { "epoch": 0.9727135365745267, "grad_norm": 1.7306561470031738, "learning_rate": 1.6162055548034663e-05, "loss": 1.8098, "step": 27360 }, { "epoch": 0.9730690605279531, "grad_norm": 1.6542526483535767, "learning_rate": 1.6158968446213766e-05, "loss": 1.7903, "step": 27370 }, { "epoch": 0.9734245844813795, "grad_norm": 1.7179967164993286, "learning_rate": 1.6155880398413938e-05, "loss": 1.7832, "step": 27380 }, { "epoch": 0.9737801084348058, "grad_norm": 1.8103654384613037, "learning_rate": 1.6152791405109473e-05, "loss": 1.8035, "step": 27390 }, { "epoch": 0.9741356323882322, "grad_norm": 1.5433931350708008, "learning_rate": 1.6149701466774827e-05, "loss": 1.7961, "step": 27400 }, { "epoch": 0.9744911563416585, "grad_norm": 1.764198899269104, "learning_rate": 1.6146610583884598e-05, "loss": 1.8266, "step": 27410 }, { "epoch": 0.9748466802950849, "grad_norm": 1.7470903396606445, "learning_rate": 1.614351875691352e-05, "loss": 1.8265, "step": 27420 }, { "epoch": 0.9752022042485112, "grad_norm": 1.6823208332061768, "learning_rate": 1.614042598633648e-05, "loss": 1.8151, "step": 27430 }, { "epoch": 0.9755577282019376, "grad_norm": 1.5959479808807373, "learning_rate": 1.613733227262851e-05, "loss": 1.8363, "step": 27440 }, { "epoch": 0.975913252155364, "grad_norm": 1.6768295764923096, "learning_rate": 1.6134237616264784e-05, "loss": 1.8201, "step": 27450 }, { "epoch": 0.9762687761087904, "grad_norm": 1.6030958890914917, "learning_rate": 1.6131142017720624e-05, "loss": 1.7891, "step": 27460 }, { "epoch": 0.9766243000622167, "grad_norm": 1.7325211763381958, "learning_rate": 1.612804547747149e-05, "loss": 1.8158, "step": 27470 }, { "epoch": 0.9769798240156431, "grad_norm": 1.6103816032409668, "learning_rate": 1.6124947995993e-05, "loss": 1.7845, "step": 27480 }, { "epoch": 0.9773353479690694, "grad_norm": 1.6940481662750244, "learning_rate": 1.6121849573760897e-05, "loss": 1.7905, "step": 27490 }, { "epoch": 0.9776908719224958, "grad_norm": 1.5602481365203857, "learning_rate": 1.6118750211251083e-05, "loss": 1.8666, "step": 27500 }, { "epoch": 0.9780463958759221, "grad_norm": 1.5811543464660645, "learning_rate": 1.6115649908939603e-05, "loss": 1.7462, "step": 27510 }, { "epoch": 0.9784019198293485, "grad_norm": 1.6332805156707764, "learning_rate": 1.6112548667302642e-05, "loss": 1.8658, "step": 27520 }, { "epoch": 0.9787574437827748, "grad_norm": 1.6110872030258179, "learning_rate": 1.6109446486816528e-05, "loss": 1.8511, "step": 27530 }, { "epoch": 0.9791129677362013, "grad_norm": 1.5546345710754395, "learning_rate": 1.6106343367957746e-05, "loss": 1.8003, "step": 27540 }, { "epoch": 0.9794684916896276, "grad_norm": 1.5880579948425293, "learning_rate": 1.610323931120291e-05, "loss": 1.7943, "step": 27550 }, { "epoch": 0.979824015643054, "grad_norm": 1.6306613683700562, "learning_rate": 1.610013431702878e-05, "loss": 1.7862, "step": 27560 }, { "epoch": 0.9801795395964803, "grad_norm": 1.6541390419006348, "learning_rate": 1.6097028385912268e-05, "loss": 1.7985, "step": 27570 }, { "epoch": 0.9805350635499067, "grad_norm": 1.6070232391357422, "learning_rate": 1.6093921518330424e-05, "loss": 1.7954, "step": 27580 }, { "epoch": 0.980890587503333, "grad_norm": 1.5823501348495483, "learning_rate": 1.6090813714760442e-05, "loss": 1.7746, "step": 27590 }, { "epoch": 0.9812461114567594, "grad_norm": 1.5954720973968506, "learning_rate": 1.6087704975679667e-05, "loss": 1.8562, "step": 27600 }, { "epoch": 0.9816016354101857, "grad_norm": 1.5298773050308228, "learning_rate": 1.6084595301565574e-05, "loss": 1.8005, "step": 27610 }, { "epoch": 0.9819571593636122, "grad_norm": 1.7437852621078491, "learning_rate": 1.60814846928958e-05, "loss": 1.7952, "step": 27620 }, { "epoch": 0.9823126833170385, "grad_norm": 1.7300840616226196, "learning_rate": 1.6078373150148104e-05, "loss": 1.844, "step": 27630 }, { "epoch": 0.9826682072704649, "grad_norm": 1.554736852645874, "learning_rate": 1.6075260673800404e-05, "loss": 1.8119, "step": 27640 }, { "epoch": 0.9830237312238912, "grad_norm": 1.6470636129379272, "learning_rate": 1.6072147264330756e-05, "loss": 1.8205, "step": 27650 }, { "epoch": 0.9833792551773176, "grad_norm": 1.5953969955444336, "learning_rate": 1.606903292221736e-05, "loss": 1.7882, "step": 27660 }, { "epoch": 0.9837347791307439, "grad_norm": 1.630172610282898, "learning_rate": 1.6065917647938562e-05, "loss": 1.8225, "step": 27670 }, { "epoch": 0.9840903030841703, "grad_norm": 1.59041428565979, "learning_rate": 1.6062801441972845e-05, "loss": 1.7515, "step": 27680 }, { "epoch": 0.9844458270375966, "grad_norm": 1.7602511644363403, "learning_rate": 1.605968430479884e-05, "loss": 1.7824, "step": 27690 }, { "epoch": 0.9848013509910231, "grad_norm": 1.7128077745437622, "learning_rate": 1.6056566236895327e-05, "loss": 1.8333, "step": 27700 }, { "epoch": 0.9851568749444494, "grad_norm": 1.7968146800994873, "learning_rate": 1.605344723874121e-05, "loss": 1.8075, "step": 27710 }, { "epoch": 0.9855123988978758, "grad_norm": 1.613105058670044, "learning_rate": 1.6050327310815553e-05, "loss": 1.7623, "step": 27720 }, { "epoch": 0.9858679228513021, "grad_norm": 1.74599289894104, "learning_rate": 1.604720645359756e-05, "loss": 1.7916, "step": 27730 }, { "epoch": 0.9862234468047285, "grad_norm": 1.6392744779586792, "learning_rate": 1.6044084667566565e-05, "loss": 1.7953, "step": 27740 }, { "epoch": 0.9865789707581548, "grad_norm": 1.6749186515808105, "learning_rate": 1.6040961953202067e-05, "loss": 1.7712, "step": 27750 }, { "epoch": 0.9869344947115812, "grad_norm": 1.640090823173523, "learning_rate": 1.603783831098369e-05, "loss": 1.7474, "step": 27760 }, { "epoch": 0.9872900186650075, "grad_norm": 1.628670334815979, "learning_rate": 1.60347137413912e-05, "loss": 1.7763, "step": 27770 }, { "epoch": 0.987645542618434, "grad_norm": 1.6321513652801514, "learning_rate": 1.6031588244904525e-05, "loss": 1.8094, "step": 27780 }, { "epoch": 0.9880010665718603, "grad_norm": 1.6589654684066772, "learning_rate": 1.602846182200371e-05, "loss": 1.8097, "step": 27790 }, { "epoch": 0.9883565905252867, "grad_norm": 1.6615729331970215, "learning_rate": 1.6025334473168962e-05, "loss": 1.8256, "step": 27800 }, { "epoch": 0.988712114478713, "grad_norm": 1.6079349517822266, "learning_rate": 1.6022206198880616e-05, "loss": 1.8137, "step": 27810 }, { "epoch": 0.9890676384321394, "grad_norm": 1.6241466999053955, "learning_rate": 1.6019076999619155e-05, "loss": 1.7848, "step": 27820 }, { "epoch": 0.9894231623855657, "grad_norm": 1.6167103052139282, "learning_rate": 1.6015946875865206e-05, "loss": 1.7953, "step": 27830 }, { "epoch": 0.9897786863389921, "grad_norm": 1.5251038074493408, "learning_rate": 1.601281582809954e-05, "loss": 1.7324, "step": 27840 }, { "epoch": 0.9901342102924184, "grad_norm": 1.5767289400100708, "learning_rate": 1.6009683856803063e-05, "loss": 1.7968, "step": 27850 }, { "epoch": 0.9904897342458449, "grad_norm": 1.695135235786438, "learning_rate": 1.6006550962456826e-05, "loss": 1.8523, "step": 27860 }, { "epoch": 0.9908452581992712, "grad_norm": 1.6537102460861206, "learning_rate": 1.6003417145542025e-05, "loss": 1.7757, "step": 27870 }, { "epoch": 0.9912007821526976, "grad_norm": 1.7178086042404175, "learning_rate": 1.600028240653999e-05, "loss": 1.7893, "step": 27880 }, { "epoch": 0.9915563061061239, "grad_norm": 1.6889991760253906, "learning_rate": 1.5997146745932198e-05, "loss": 1.7875, "step": 27890 }, { "epoch": 0.9919118300595503, "grad_norm": 1.6263501644134521, "learning_rate": 1.5994010164200268e-05, "loss": 1.7295, "step": 27900 }, { "epoch": 0.9922673540129766, "grad_norm": 1.5779887437820435, "learning_rate": 1.599087266182596e-05, "loss": 1.7868, "step": 27910 }, { "epoch": 0.992622877966403, "grad_norm": 1.6806020736694336, "learning_rate": 1.5987734239291177e-05, "loss": 1.7987, "step": 27920 }, { "epoch": 0.9929784019198293, "grad_norm": 1.634827971458435, "learning_rate": 1.5984594897077957e-05, "loss": 1.801, "step": 27930 }, { "epoch": 0.9933339258732558, "grad_norm": 1.7131608724594116, "learning_rate": 1.5981454635668483e-05, "loss": 1.8069, "step": 27940 }, { "epoch": 0.9936894498266821, "grad_norm": 1.5791804790496826, "learning_rate": 1.5978313455545085e-05, "loss": 1.7863, "step": 27950 }, { "epoch": 0.9940449737801085, "grad_norm": 1.7399296760559082, "learning_rate": 1.5975171357190223e-05, "loss": 1.7426, "step": 27960 }, { "epoch": 0.9944004977335348, "grad_norm": 1.627443552017212, "learning_rate": 1.5972028341086502e-05, "loss": 1.7914, "step": 27970 }, { "epoch": 0.9947560216869612, "grad_norm": 1.7265781164169312, "learning_rate": 1.5968884407716675e-05, "loss": 1.7873, "step": 27980 }, { "epoch": 0.9951115456403875, "grad_norm": 1.624820351600647, "learning_rate": 1.5965739557563627e-05, "loss": 1.7712, "step": 27990 }, { "epoch": 0.9954670695938139, "grad_norm": 1.6075184345245361, "learning_rate": 1.5962593791110388e-05, "loss": 1.7635, "step": 28000 }, { "epoch": 0.9958225935472402, "grad_norm": 1.6258301734924316, "learning_rate": 1.595944710884013e-05, "loss": 1.761, "step": 28010 }, { "epoch": 0.9961781175006666, "grad_norm": 1.6871589422225952, "learning_rate": 1.5956299511236163e-05, "loss": 1.7997, "step": 28020 }, { "epoch": 0.996533641454093, "grad_norm": 1.6195063591003418, "learning_rate": 1.5953150998781937e-05, "loss": 1.7717, "step": 28030 }, { "epoch": 0.9968891654075194, "grad_norm": 1.7741018533706665, "learning_rate": 1.595000157196104e-05, "loss": 1.7863, "step": 28040 }, { "epoch": 0.9972446893609457, "grad_norm": 1.6703013181686401, "learning_rate": 1.5946851231257214e-05, "loss": 1.7701, "step": 28050 }, { "epoch": 0.9976002133143721, "grad_norm": 1.6615813970565796, "learning_rate": 1.594369997715432e-05, "loss": 1.8063, "step": 28060 }, { "epoch": 0.9979557372677984, "grad_norm": 1.6776505708694458, "learning_rate": 1.594054781013638e-05, "loss": 1.7542, "step": 28070 }, { "epoch": 0.9983112612212248, "grad_norm": 1.6818045377731323, "learning_rate": 1.5937394730687545e-05, "loss": 1.8273, "step": 28080 }, { "epoch": 0.9986667851746511, "grad_norm": 1.69220769405365, "learning_rate": 1.5934240739292105e-05, "loss": 1.8174, "step": 28090 }, { "epoch": 0.9990223091280775, "grad_norm": 1.7431423664093018, "learning_rate": 1.5931085836434498e-05, "loss": 1.8147, "step": 28100 }, { "epoch": 0.9993778330815039, "grad_norm": 1.4685384035110474, "learning_rate": 1.5927930022599296e-05, "loss": 1.7891, "step": 28110 }, { "epoch": 0.9997333570349303, "grad_norm": 1.696838617324829, "learning_rate": 1.5924773298271207e-05, "loss": 1.7995, "step": 28120 }, { "epoch": 0.9999822238023287, "eval_loss": 1.8200198411941528, "eval_runtime": 9.6453, "eval_samples_per_second": 106.166, "eval_steps_per_second": 1.659, "step": 28127 }, { "epoch": 1.0000888809883566, "grad_norm": 1.7338460683822632, "learning_rate": 1.5921615663935088e-05, "loss": 1.7504, "step": 28130 }, { "epoch": 1.0004444049417829, "grad_norm": 1.7272236347198486, "learning_rate": 1.5918457120075935e-05, "loss": 1.7509, "step": 28140 }, { "epoch": 1.0007999288952094, "grad_norm": 1.6638456583023071, "learning_rate": 1.5915297667178876e-05, "loss": 1.7167, "step": 28150 }, { "epoch": 1.0011554528486357, "grad_norm": 1.5815426111221313, "learning_rate": 1.5912137305729184e-05, "loss": 1.6781, "step": 28160 }, { "epoch": 1.001510976802062, "grad_norm": 1.709189534187317, "learning_rate": 1.590897603621227e-05, "loss": 1.724, "step": 28170 }, { "epoch": 1.0018665007554883, "grad_norm": 1.631792664527893, "learning_rate": 1.5905813859113685e-05, "loss": 1.7253, "step": 28180 }, { "epoch": 1.0022220247089149, "grad_norm": 1.7928650379180908, "learning_rate": 1.5902650774919126e-05, "loss": 1.7281, "step": 28190 }, { "epoch": 1.0025775486623412, "grad_norm": 1.7226221561431885, "learning_rate": 1.5899486784114416e-05, "loss": 1.7169, "step": 28200 }, { "epoch": 1.0029330726157675, "grad_norm": 1.6352717876434326, "learning_rate": 1.5896321887185524e-05, "loss": 1.6984, "step": 28210 }, { "epoch": 1.0032885965691938, "grad_norm": 1.6581904888153076, "learning_rate": 1.5893156084618563e-05, "loss": 1.6843, "step": 28220 }, { "epoch": 1.0036441205226203, "grad_norm": 1.7965270280838013, "learning_rate": 1.5889989376899777e-05, "loss": 1.7198, "step": 28230 }, { "epoch": 1.0039996444760466, "grad_norm": 1.7627277374267578, "learning_rate": 1.5886821764515552e-05, "loss": 1.7669, "step": 28240 }, { "epoch": 1.004355168429473, "grad_norm": 1.6812225580215454, "learning_rate": 1.5883653247952415e-05, "loss": 1.6532, "step": 28250 }, { "epoch": 1.0047106923828992, "grad_norm": 1.7469241619110107, "learning_rate": 1.588048382769703e-05, "loss": 1.7292, "step": 28260 }, { "epoch": 1.0050662163363258, "grad_norm": 1.6404558420181274, "learning_rate": 1.5877313504236203e-05, "loss": 1.7052, "step": 28270 }, { "epoch": 1.005421740289752, "grad_norm": 1.7563822269439697, "learning_rate": 1.5874142278056867e-05, "loss": 1.7044, "step": 28280 }, { "epoch": 1.0057772642431784, "grad_norm": 1.6199296712875366, "learning_rate": 1.5870970149646113e-05, "loss": 1.7766, "step": 28290 }, { "epoch": 1.0061327881966047, "grad_norm": 1.7421317100524902, "learning_rate": 1.5867797119491154e-05, "loss": 1.7037, "step": 28300 }, { "epoch": 1.0064883121500312, "grad_norm": 1.7849583625793457, "learning_rate": 1.586462318807935e-05, "loss": 1.6933, "step": 28310 }, { "epoch": 1.0068438361034575, "grad_norm": 1.7753171920776367, "learning_rate": 1.586144835589819e-05, "loss": 1.7077, "step": 28320 }, { "epoch": 1.0071993600568838, "grad_norm": 1.9485249519348145, "learning_rate": 1.585827262343532e-05, "loss": 1.6975, "step": 28330 }, { "epoch": 1.0075548840103101, "grad_norm": 1.6119439601898193, "learning_rate": 1.5855095991178507e-05, "loss": 1.691, "step": 28340 }, { "epoch": 1.0079104079637367, "grad_norm": 1.7305185794830322, "learning_rate": 1.5851918459615658e-05, "loss": 1.7579, "step": 28350 }, { "epoch": 1.008265931917163, "grad_norm": 1.776729702949524, "learning_rate": 1.584874002923483e-05, "loss": 1.6817, "step": 28360 }, { "epoch": 1.0086214558705893, "grad_norm": 1.773091197013855, "learning_rate": 1.58455607005242e-05, "loss": 1.7401, "step": 28370 }, { "epoch": 1.0089769798240156, "grad_norm": 1.6762112379074097, "learning_rate": 1.5842380473972103e-05, "loss": 1.6955, "step": 28380 }, { "epoch": 1.009332503777442, "grad_norm": 1.6705354452133179, "learning_rate": 1.5839199350066994e-05, "loss": 1.7084, "step": 28390 }, { "epoch": 1.0096880277308684, "grad_norm": 1.9015192985534668, "learning_rate": 1.5836017329297477e-05, "loss": 1.6882, "step": 28400 }, { "epoch": 1.0100435516842947, "grad_norm": 1.7727024555206299, "learning_rate": 1.583283441215229e-05, "loss": 1.7069, "step": 28410 }, { "epoch": 1.010399075637721, "grad_norm": 1.7414228916168213, "learning_rate": 1.582965059912031e-05, "loss": 1.6679, "step": 28420 }, { "epoch": 1.0107545995911476, "grad_norm": 1.621151089668274, "learning_rate": 1.5826465890690556e-05, "loss": 1.6997, "step": 28430 }, { "epoch": 1.0111101235445739, "grad_norm": 1.7509541511535645, "learning_rate": 1.5823280287352167e-05, "loss": 1.7534, "step": 28440 }, { "epoch": 1.0114656474980002, "grad_norm": 1.6893500089645386, "learning_rate": 1.5820093789594436e-05, "loss": 1.7204, "step": 28450 }, { "epoch": 1.0118211714514265, "grad_norm": 1.8166451454162598, "learning_rate": 1.5816906397906796e-05, "loss": 1.7273, "step": 28460 }, { "epoch": 1.012176695404853, "grad_norm": 1.7543953657150269, "learning_rate": 1.5813718112778805e-05, "loss": 1.7063, "step": 28470 }, { "epoch": 1.0125322193582793, "grad_norm": 1.7828388214111328, "learning_rate": 1.5810528934700163e-05, "loss": 1.6872, "step": 28480 }, { "epoch": 1.0128877433117056, "grad_norm": 1.7891048192977905, "learning_rate": 1.580733886416071e-05, "loss": 1.6516, "step": 28490 }, { "epoch": 1.013243267265132, "grad_norm": 1.7601685523986816, "learning_rate": 1.5804147901650416e-05, "loss": 1.7025, "step": 28500 }, { "epoch": 1.0135987912185584, "grad_norm": 1.7422977685928345, "learning_rate": 1.5800956047659403e-05, "loss": 1.7617, "step": 28510 }, { "epoch": 1.0139543151719848, "grad_norm": 1.6051067113876343, "learning_rate": 1.5797763302677908e-05, "loss": 1.7054, "step": 28520 }, { "epoch": 1.014309839125411, "grad_norm": 1.8280378580093384, "learning_rate": 1.5794569667196324e-05, "loss": 1.7477, "step": 28530 }, { "epoch": 1.0146653630788374, "grad_norm": 1.639920949935913, "learning_rate": 1.579137514170517e-05, "loss": 1.7057, "step": 28540 }, { "epoch": 1.015020887032264, "grad_norm": 1.7240723371505737, "learning_rate": 1.5788179726695107e-05, "loss": 1.6872, "step": 28550 }, { "epoch": 1.0153764109856902, "grad_norm": 1.7161904573440552, "learning_rate": 1.578498342265693e-05, "loss": 1.6863, "step": 28560 }, { "epoch": 1.0157319349391165, "grad_norm": 1.6381793022155762, "learning_rate": 1.5781786230081576e-05, "loss": 1.7147, "step": 28570 }, { "epoch": 1.0160874588925428, "grad_norm": 1.7279282808303833, "learning_rate": 1.5778588149460104e-05, "loss": 1.7304, "step": 28580 }, { "epoch": 1.0164429828459693, "grad_norm": 1.7217353582382202, "learning_rate": 1.5775389181283727e-05, "loss": 1.7024, "step": 28590 }, { "epoch": 1.0167985067993957, "grad_norm": 1.6571637392044067, "learning_rate": 1.5772189326043782e-05, "loss": 1.7066, "step": 28600 }, { "epoch": 1.017154030752822, "grad_norm": 1.8319371938705444, "learning_rate": 1.5768988584231748e-05, "loss": 1.6951, "step": 28610 }, { "epoch": 1.0175095547062483, "grad_norm": 1.726785659790039, "learning_rate": 1.5765786956339238e-05, "loss": 1.7702, "step": 28620 }, { "epoch": 1.0178650786596748, "grad_norm": 1.6269569396972656, "learning_rate": 1.576258444285801e-05, "loss": 1.6715, "step": 28630 }, { "epoch": 1.018220602613101, "grad_norm": 1.6807979345321655, "learning_rate": 1.5759381044279936e-05, "loss": 1.7301, "step": 28640 }, { "epoch": 1.0185761265665274, "grad_norm": 1.6536914110183716, "learning_rate": 1.5756176761097048e-05, "loss": 1.7087, "step": 28650 }, { "epoch": 1.0189316505199537, "grad_norm": 1.7635669708251953, "learning_rate": 1.57529715938015e-05, "loss": 1.6841, "step": 28660 }, { "epoch": 1.0192871744733802, "grad_norm": 1.760072946548462, "learning_rate": 1.574976554288559e-05, "loss": 1.7422, "step": 28670 }, { "epoch": 1.0196426984268065, "grad_norm": 1.6653392314910889, "learning_rate": 1.574655860884174e-05, "loss": 1.6776, "step": 28680 }, { "epoch": 1.0199982223802329, "grad_norm": 1.7768166065216064, "learning_rate": 1.574335079216252e-05, "loss": 1.7074, "step": 28690 }, { "epoch": 1.0203537463336592, "grad_norm": 1.742034912109375, "learning_rate": 1.5740142093340632e-05, "loss": 1.6839, "step": 28700 }, { "epoch": 1.0207092702870857, "grad_norm": 1.6778193712234497, "learning_rate": 1.5736932512868904e-05, "loss": 1.6873, "step": 28710 }, { "epoch": 1.021064794240512, "grad_norm": 1.7619653940200806, "learning_rate": 1.5733722051240318e-05, "loss": 1.7114, "step": 28720 }, { "epoch": 1.0214203181939383, "grad_norm": 1.6607322692871094, "learning_rate": 1.573051070894797e-05, "loss": 1.6773, "step": 28730 }, { "epoch": 1.0217758421473646, "grad_norm": 1.7014106512069702, "learning_rate": 1.5727298486485112e-05, "loss": 1.7189, "step": 28740 }, { "epoch": 1.0221313661007911, "grad_norm": 1.695156216621399, "learning_rate": 1.572408538434512e-05, "loss": 1.7163, "step": 28750 }, { "epoch": 1.0224868900542174, "grad_norm": 1.7573307752609253, "learning_rate": 1.57208714030215e-05, "loss": 1.7234, "step": 28760 }, { "epoch": 1.0228424140076438, "grad_norm": 1.7744965553283691, "learning_rate": 1.5717656543007896e-05, "loss": 1.704, "step": 28770 }, { "epoch": 1.02319793796107, "grad_norm": 1.8379814624786377, "learning_rate": 1.5714440804798105e-05, "loss": 1.7446, "step": 28780 }, { "epoch": 1.0235534619144966, "grad_norm": 1.6961009502410889, "learning_rate": 1.5711224188886035e-05, "loss": 1.6822, "step": 28790 }, { "epoch": 1.023908985867923, "grad_norm": 1.6482083797454834, "learning_rate": 1.5708006695765737e-05, "loss": 1.6785, "step": 28800 }, { "epoch": 1.0242645098213492, "grad_norm": 1.6825973987579346, "learning_rate": 1.5704788325931403e-05, "loss": 1.7216, "step": 28810 }, { "epoch": 1.0246200337747755, "grad_norm": 1.7570611238479614, "learning_rate": 1.570156907987735e-05, "loss": 1.6912, "step": 28820 }, { "epoch": 1.024975557728202, "grad_norm": 1.7243115901947021, "learning_rate": 1.5698348958098035e-05, "loss": 1.7154, "step": 28830 }, { "epoch": 1.0253310816816283, "grad_norm": 1.759553074836731, "learning_rate": 1.569512796108805e-05, "loss": 1.733, "step": 28840 }, { "epoch": 1.0256866056350546, "grad_norm": 1.828395962715149, "learning_rate": 1.569190608934212e-05, "loss": 1.7296, "step": 28850 }, { "epoch": 1.026042129588481, "grad_norm": 1.7379239797592163, "learning_rate": 1.56886833433551e-05, "loss": 1.6896, "step": 28860 }, { "epoch": 1.0263976535419075, "grad_norm": 1.7324833869934082, "learning_rate": 1.5685459723621987e-05, "loss": 1.7296, "step": 28870 }, { "epoch": 1.0267531774953338, "grad_norm": 1.8051363229751587, "learning_rate": 1.5682235230637913e-05, "loss": 1.6911, "step": 28880 }, { "epoch": 1.02710870144876, "grad_norm": 1.824020266532898, "learning_rate": 1.567900986489813e-05, "loss": 1.6975, "step": 28890 }, { "epoch": 1.0274642254021864, "grad_norm": 1.8758701086044312, "learning_rate": 1.5675783626898043e-05, "loss": 1.7233, "step": 28900 }, { "epoch": 1.027819749355613, "grad_norm": 1.7881940603256226, "learning_rate": 1.5672556517133177e-05, "loss": 1.6468, "step": 28910 }, { "epoch": 1.0281752733090392, "grad_norm": 1.7071408033370972, "learning_rate": 1.5669328536099196e-05, "loss": 1.7275, "step": 28920 }, { "epoch": 1.0285307972624655, "grad_norm": 1.6805064678192139, "learning_rate": 1.56660996842919e-05, "loss": 1.7205, "step": 28930 }, { "epoch": 1.0288863212158919, "grad_norm": 1.7511591911315918, "learning_rate": 1.566286996220722e-05, "loss": 1.7114, "step": 28940 }, { "epoch": 1.0292418451693184, "grad_norm": 1.841711163520813, "learning_rate": 1.565963937034122e-05, "loss": 1.688, "step": 28950 }, { "epoch": 1.0295973691227447, "grad_norm": 1.788758635520935, "learning_rate": 1.5656407909190096e-05, "loss": 1.701, "step": 28960 }, { "epoch": 1.029952893076171, "grad_norm": 1.80824613571167, "learning_rate": 1.5653175579250186e-05, "loss": 1.7522, "step": 28970 }, { "epoch": 1.0303084170295973, "grad_norm": 1.8207590579986572, "learning_rate": 1.5649942381017953e-05, "loss": 1.7089, "step": 28980 }, { "epoch": 1.0306639409830238, "grad_norm": 1.8223505020141602, "learning_rate": 1.5646708314989997e-05, "loss": 1.7478, "step": 28990 }, { "epoch": 1.0310194649364501, "grad_norm": 1.8172624111175537, "learning_rate": 1.5643473381663047e-05, "loss": 1.7021, "step": 29000 }, { "epoch": 1.0313749888898764, "grad_norm": 1.7984023094177246, "learning_rate": 1.5640237581533967e-05, "loss": 1.7288, "step": 29010 }, { "epoch": 1.0317305128433027, "grad_norm": 1.8102020025253296, "learning_rate": 1.5637000915099766e-05, "loss": 1.733, "step": 29020 }, { "epoch": 1.0320860367967293, "grad_norm": 1.7115474939346313, "learning_rate": 1.5633763382857562e-05, "loss": 1.7229, "step": 29030 }, { "epoch": 1.0324415607501556, "grad_norm": 1.827752709388733, "learning_rate": 1.563052498530463e-05, "loss": 1.6787, "step": 29040 }, { "epoch": 1.032797084703582, "grad_norm": 1.7382855415344238, "learning_rate": 1.5627285722938363e-05, "loss": 1.7076, "step": 29050 }, { "epoch": 1.0331526086570082, "grad_norm": 1.73077392578125, "learning_rate": 1.562404559625629e-05, "loss": 1.7124, "step": 29060 }, { "epoch": 1.0335081326104347, "grad_norm": 1.7690300941467285, "learning_rate": 1.5620804605756082e-05, "loss": 1.7559, "step": 29070 }, { "epoch": 1.033863656563861, "grad_norm": 1.8357259035110474, "learning_rate": 1.5617562751935525e-05, "loss": 1.6902, "step": 29080 }, { "epoch": 1.0342191805172873, "grad_norm": 1.6743861436843872, "learning_rate": 1.5614320035292555e-05, "loss": 1.7236, "step": 29090 }, { "epoch": 1.0345747044707136, "grad_norm": 1.7483441829681396, "learning_rate": 1.5611076456325226e-05, "loss": 1.711, "step": 29100 }, { "epoch": 1.0349302284241402, "grad_norm": 1.6104241609573364, "learning_rate": 1.5607832015531736e-05, "loss": 1.7007, "step": 29110 }, { "epoch": 1.0352857523775665, "grad_norm": 1.6402735710144043, "learning_rate": 1.560458671341041e-05, "loss": 1.7371, "step": 29120 }, { "epoch": 1.0356412763309928, "grad_norm": 1.6589477062225342, "learning_rate": 1.5601340550459708e-05, "loss": 1.6802, "step": 29130 }, { "epoch": 1.035996800284419, "grad_norm": 1.6863501071929932, "learning_rate": 1.559809352717822e-05, "loss": 1.7383, "step": 29140 }, { "epoch": 1.0363523242378456, "grad_norm": 1.7066422700881958, "learning_rate": 1.559484564406466e-05, "loss": 1.7307, "step": 29150 }, { "epoch": 1.036707848191272, "grad_norm": 1.7184172868728638, "learning_rate": 1.5591596901617892e-05, "loss": 1.6952, "step": 29160 }, { "epoch": 1.0370633721446982, "grad_norm": 1.7480359077453613, "learning_rate": 1.55883473003369e-05, "loss": 1.7233, "step": 29170 }, { "epoch": 1.0374188960981245, "grad_norm": 1.810819387435913, "learning_rate": 1.55850968407208e-05, "loss": 1.7218, "step": 29180 }, { "epoch": 1.037774420051551, "grad_norm": 1.704298734664917, "learning_rate": 1.5581845523268847e-05, "loss": 1.6839, "step": 29190 }, { "epoch": 1.0381299440049774, "grad_norm": 1.755265474319458, "learning_rate": 1.557859334848042e-05, "loss": 1.7393, "step": 29200 }, { "epoch": 1.0384854679584037, "grad_norm": 1.7880268096923828, "learning_rate": 1.557534031685503e-05, "loss": 1.7197, "step": 29210 }, { "epoch": 1.03884099191183, "grad_norm": 1.7886680364608765, "learning_rate": 1.5572086428892325e-05, "loss": 1.7318, "step": 29220 }, { "epoch": 1.0391965158652565, "grad_norm": 1.8346315622329712, "learning_rate": 1.5568831685092083e-05, "loss": 1.7039, "step": 29230 }, { "epoch": 1.0395520398186828, "grad_norm": 1.7319073677062988, "learning_rate": 1.5565576085954213e-05, "loss": 1.7023, "step": 29240 }, { "epoch": 1.0399075637721091, "grad_norm": 1.759717583656311, "learning_rate": 1.556231963197875e-05, "loss": 1.6992, "step": 29250 }, { "epoch": 1.0402630877255354, "grad_norm": 1.7966276407241821, "learning_rate": 1.555906232366587e-05, "loss": 1.7402, "step": 29260 }, { "epoch": 1.040618611678962, "grad_norm": 1.7950040102005005, "learning_rate": 1.555580416151587e-05, "loss": 1.6918, "step": 29270 }, { "epoch": 1.0409741356323883, "grad_norm": 1.7796049118041992, "learning_rate": 1.555254514602919e-05, "loss": 1.7275, "step": 29280 }, { "epoch": 1.0413296595858146, "grad_norm": 1.8265540599822998, "learning_rate": 1.554928527770638e-05, "loss": 1.679, "step": 29290 }, { "epoch": 1.0416851835392409, "grad_norm": 1.799001693725586, "learning_rate": 1.5546024557048157e-05, "loss": 1.6946, "step": 29300 }, { "epoch": 1.0420407074926674, "grad_norm": 1.9293655157089233, "learning_rate": 1.5542762984555332e-05, "loss": 1.7243, "step": 29310 }, { "epoch": 1.0423962314460937, "grad_norm": 1.7115957736968994, "learning_rate": 1.5539500560728865e-05, "loss": 1.6875, "step": 29320 }, { "epoch": 1.04275175539952, "grad_norm": 1.7396397590637207, "learning_rate": 1.5536237286069847e-05, "loss": 1.7303, "step": 29330 }, { "epoch": 1.0431072793529463, "grad_norm": 1.6194676160812378, "learning_rate": 1.553297316107949e-05, "loss": 1.6769, "step": 29340 }, { "epoch": 1.0434628033063729, "grad_norm": 1.8141989707946777, "learning_rate": 1.552970818625915e-05, "loss": 1.7001, "step": 29350 }, { "epoch": 1.0438183272597992, "grad_norm": 1.7746583223342896, "learning_rate": 1.5526442362110304e-05, "loss": 1.7168, "step": 29360 }, { "epoch": 1.0441738512132255, "grad_norm": 1.7249982357025146, "learning_rate": 1.5523175689134563e-05, "loss": 1.7132, "step": 29370 }, { "epoch": 1.0445293751666518, "grad_norm": 1.8156765699386597, "learning_rate": 1.551990816783367e-05, "loss": 1.681, "step": 29380 }, { "epoch": 1.0448848991200783, "grad_norm": 1.779907464981079, "learning_rate": 1.5516639798709484e-05, "loss": 1.7307, "step": 29390 }, { "epoch": 1.0452404230735046, "grad_norm": 1.8506804704666138, "learning_rate": 1.551337058226402e-05, "loss": 1.714, "step": 29400 }, { "epoch": 1.045595947026931, "grad_norm": 1.7650202512741089, "learning_rate": 1.5510100518999407e-05, "loss": 1.6814, "step": 29410 }, { "epoch": 1.0459514709803572, "grad_norm": 1.693860411643982, "learning_rate": 1.5506829609417896e-05, "loss": 1.6948, "step": 29420 }, { "epoch": 1.0463069949337838, "grad_norm": 1.6548007726669312, "learning_rate": 1.5503557854021888e-05, "loss": 1.6587, "step": 29430 }, { "epoch": 1.04666251888721, "grad_norm": 1.7331669330596924, "learning_rate": 1.5500285253313904e-05, "loss": 1.7542, "step": 29440 }, { "epoch": 1.0470180428406364, "grad_norm": 1.8393986225128174, "learning_rate": 1.5497011807796586e-05, "loss": 1.7134, "step": 29450 }, { "epoch": 1.0473735667940627, "grad_norm": 1.802408218383789, "learning_rate": 1.5493737517972728e-05, "loss": 1.6933, "step": 29460 }, { "epoch": 1.0477290907474892, "grad_norm": 1.9663327932357788, "learning_rate": 1.5490462384345228e-05, "loss": 1.6986, "step": 29470 }, { "epoch": 1.0480846147009155, "grad_norm": 1.8177143335342407, "learning_rate": 1.5487186407417133e-05, "loss": 1.7366, "step": 29480 }, { "epoch": 1.0484401386543418, "grad_norm": 1.6974382400512695, "learning_rate": 1.548390958769161e-05, "loss": 1.7108, "step": 29490 }, { "epoch": 1.0487956626077681, "grad_norm": 1.7218098640441895, "learning_rate": 1.548063192567196e-05, "loss": 1.6861, "step": 29500 }, { "epoch": 1.0491511865611947, "grad_norm": 1.8506237268447876, "learning_rate": 1.547735342186161e-05, "loss": 1.7004, "step": 29510 }, { "epoch": 1.049506710514621, "grad_norm": 1.6885756254196167, "learning_rate": 1.5474074076764116e-05, "loss": 1.6575, "step": 29520 }, { "epoch": 1.0498622344680473, "grad_norm": 1.838579535484314, "learning_rate": 1.5470793890883167e-05, "loss": 1.6883, "step": 29530 }, { "epoch": 1.0502177584214736, "grad_norm": 1.7549519538879395, "learning_rate": 1.5467512864722576e-05, "loss": 1.6986, "step": 29540 }, { "epoch": 1.0505732823749, "grad_norm": 1.7127375602722168, "learning_rate": 1.5464230998786295e-05, "loss": 1.676, "step": 29550 }, { "epoch": 1.0509288063283264, "grad_norm": 1.8736270666122437, "learning_rate": 1.5460948293578395e-05, "loss": 1.7349, "step": 29560 }, { "epoch": 1.0512843302817527, "grad_norm": 1.725990653038025, "learning_rate": 1.545766474960307e-05, "loss": 1.6781, "step": 29570 }, { "epoch": 1.051639854235179, "grad_norm": 1.744989275932312, "learning_rate": 1.5454380367364668e-05, "loss": 1.7184, "step": 29580 }, { "epoch": 1.0519953781886056, "grad_norm": 1.7831934690475464, "learning_rate": 1.5451095147367637e-05, "loss": 1.7057, "step": 29590 }, { "epoch": 1.0523509021420319, "grad_norm": 1.7298821210861206, "learning_rate": 1.5447809090116566e-05, "loss": 1.7336, "step": 29600 }, { "epoch": 1.0527064260954582, "grad_norm": 1.6789156198501587, "learning_rate": 1.5444522196116182e-05, "loss": 1.7201, "step": 29610 }, { "epoch": 1.0530619500488845, "grad_norm": 1.647962212562561, "learning_rate": 1.5441234465871323e-05, "loss": 1.7174, "step": 29620 }, { "epoch": 1.053417474002311, "grad_norm": 1.9207396507263184, "learning_rate": 1.543794589988697e-05, "loss": 1.7256, "step": 29630 }, { "epoch": 1.0537729979557373, "grad_norm": 1.7186084985733032, "learning_rate": 1.543465649866822e-05, "loss": 1.6727, "step": 29640 }, { "epoch": 1.0541285219091636, "grad_norm": 1.6825263500213623, "learning_rate": 1.5431366262720313e-05, "loss": 1.7493, "step": 29650 }, { "epoch": 1.05448404586259, "grad_norm": 1.9607890844345093, "learning_rate": 1.5428075192548594e-05, "loss": 1.6711, "step": 29660 }, { "epoch": 1.0548395698160165, "grad_norm": 1.878871202468872, "learning_rate": 1.5424783288658564e-05, "loss": 1.7349, "step": 29670 }, { "epoch": 1.0551950937694428, "grad_norm": 1.799309492111206, "learning_rate": 1.5421490551555838e-05, "loss": 1.6914, "step": 29680 }, { "epoch": 1.055550617722869, "grad_norm": 1.80063796043396, "learning_rate": 1.541819698174615e-05, "loss": 1.7078, "step": 29690 }, { "epoch": 1.0559061416762954, "grad_norm": 1.7400059700012207, "learning_rate": 1.5414902579735383e-05, "loss": 1.6727, "step": 29700 }, { "epoch": 1.056261665629722, "grad_norm": 1.831801414489746, "learning_rate": 1.541160734602953e-05, "loss": 1.6939, "step": 29710 }, { "epoch": 1.0566171895831482, "grad_norm": 1.7798649072647095, "learning_rate": 1.540831128113472e-05, "loss": 1.7225, "step": 29720 }, { "epoch": 1.0569727135365745, "grad_norm": 1.744949221611023, "learning_rate": 1.5405014385557208e-05, "loss": 1.7369, "step": 29730 }, { "epoch": 1.0573282374900008, "grad_norm": 1.7667673826217651, "learning_rate": 1.540171665980337e-05, "loss": 1.6965, "step": 29740 }, { "epoch": 1.0576837614434274, "grad_norm": 1.7514196634292603, "learning_rate": 1.539841810437973e-05, "loss": 1.702, "step": 29750 }, { "epoch": 1.0580392853968537, "grad_norm": 1.6818276643753052, "learning_rate": 1.5395118719792915e-05, "loss": 1.7067, "step": 29760 }, { "epoch": 1.05839480935028, "grad_norm": 1.7409169673919678, "learning_rate": 1.539181850654969e-05, "loss": 1.667, "step": 29770 }, { "epoch": 1.0587503333037063, "grad_norm": 1.6888810396194458, "learning_rate": 1.5388517465156952e-05, "loss": 1.6842, "step": 29780 }, { "epoch": 1.0591058572571328, "grad_norm": 1.7124814987182617, "learning_rate": 1.5385215596121718e-05, "loss": 1.6986, "step": 29790 }, { "epoch": 1.059461381210559, "grad_norm": 1.6616439819335938, "learning_rate": 1.5381912899951133e-05, "loss": 1.6701, "step": 29800 }, { "epoch": 1.0598169051639854, "grad_norm": 1.7271385192871094, "learning_rate": 1.5378609377152472e-05, "loss": 1.7024, "step": 29810 }, { "epoch": 1.0601724291174117, "grad_norm": 1.8895982503890991, "learning_rate": 1.5375305028233135e-05, "loss": 1.7085, "step": 29820 }, { "epoch": 1.0605279530708382, "grad_norm": 1.7226389646530151, "learning_rate": 1.5371999853700647e-05, "loss": 1.7112, "step": 29830 }, { "epoch": 1.0608834770242646, "grad_norm": 1.779766321182251, "learning_rate": 1.5368693854062665e-05, "loss": 1.6956, "step": 29840 }, { "epoch": 1.0612390009776909, "grad_norm": 1.7279249429702759, "learning_rate": 1.536538702982697e-05, "loss": 1.7079, "step": 29850 }, { "epoch": 1.0615945249311172, "grad_norm": 1.8688111305236816, "learning_rate": 1.5362079381501467e-05, "loss": 1.6813, "step": 29860 }, { "epoch": 1.0619500488845437, "grad_norm": 1.7290921211242676, "learning_rate": 1.5358770909594188e-05, "loss": 1.7075, "step": 29870 }, { "epoch": 1.06230557283797, "grad_norm": 1.8950884342193604, "learning_rate": 1.5355461614613306e-05, "loss": 1.6859, "step": 29880 }, { "epoch": 1.0626610967913963, "grad_norm": 1.83281672000885, "learning_rate": 1.5352151497067093e-05, "loss": 1.7043, "step": 29890 }, { "epoch": 1.0630166207448226, "grad_norm": 1.6436896324157715, "learning_rate": 1.534884055746397e-05, "loss": 1.7091, "step": 29900 }, { "epoch": 1.0633721446982491, "grad_norm": 1.8379966020584106, "learning_rate": 1.5345528796312473e-05, "loss": 1.703, "step": 29910 }, { "epoch": 1.0637276686516755, "grad_norm": 1.8118281364440918, "learning_rate": 1.5342216214121273e-05, "loss": 1.7074, "step": 29920 }, { "epoch": 1.0640831926051018, "grad_norm": 1.7613012790679932, "learning_rate": 1.5338902811399154e-05, "loss": 1.6633, "step": 29930 }, { "epoch": 1.064438716558528, "grad_norm": 1.8082212209701538, "learning_rate": 1.5335588588655043e-05, "loss": 1.7131, "step": 29940 }, { "epoch": 1.0647942405119546, "grad_norm": 1.8668187856674194, "learning_rate": 1.5332273546397978e-05, "loss": 1.6984, "step": 29950 }, { "epoch": 1.065149764465381, "grad_norm": 1.8108575344085693, "learning_rate": 1.532895768513713e-05, "loss": 1.7179, "step": 29960 }, { "epoch": 1.0655052884188072, "grad_norm": 1.815421462059021, "learning_rate": 1.5325641005381793e-05, "loss": 1.697, "step": 29970 }, { "epoch": 1.0658608123722335, "grad_norm": 1.6751563549041748, "learning_rate": 1.5322323507641387e-05, "loss": 1.6947, "step": 29980 }, { "epoch": 1.06621633632566, "grad_norm": 1.7803194522857666, "learning_rate": 1.5319005192425466e-05, "loss": 1.6813, "step": 29990 }, { "epoch": 1.0665718602790863, "grad_norm": 1.685044527053833, "learning_rate": 1.5315686060243695e-05, "loss": 1.6922, "step": 30000 }, { "epoch": 1.0669273842325127, "grad_norm": 1.7172998189926147, "learning_rate": 1.5312366111605877e-05, "loss": 1.6736, "step": 30010 }, { "epoch": 1.067282908185939, "grad_norm": 1.677639126777649, "learning_rate": 1.5309045347021933e-05, "loss": 1.6962, "step": 30020 }, { "epoch": 1.0676384321393655, "grad_norm": 1.7140579223632812, "learning_rate": 1.530572376700191e-05, "loss": 1.6954, "step": 30030 }, { "epoch": 1.0679939560927918, "grad_norm": 1.7826284170150757, "learning_rate": 1.5302401372055987e-05, "loss": 1.7092, "step": 30040 }, { "epoch": 1.068349480046218, "grad_norm": 1.8108514547348022, "learning_rate": 1.5299078162694453e-05, "loss": 1.7132, "step": 30050 }, { "epoch": 1.0687050039996444, "grad_norm": 1.9069411754608154, "learning_rate": 1.5295754139427743e-05, "loss": 1.6956, "step": 30060 }, { "epoch": 1.069060527953071, "grad_norm": 1.8205689191818237, "learning_rate": 1.5292429302766403e-05, "loss": 1.6801, "step": 30070 }, { "epoch": 1.0694160519064972, "grad_norm": 1.7045793533325195, "learning_rate": 1.5289103653221103e-05, "loss": 1.6697, "step": 30080 }, { "epoch": 1.0697715758599236, "grad_norm": 1.7002933025360107, "learning_rate": 1.5285777191302648e-05, "loss": 1.7206, "step": 30090 }, { "epoch": 1.0701270998133499, "grad_norm": 1.7778072357177734, "learning_rate": 1.5282449917521957e-05, "loss": 1.7148, "step": 30100 }, { "epoch": 1.0704826237667764, "grad_norm": 1.7735117673873901, "learning_rate": 1.5279121832390077e-05, "loss": 1.6865, "step": 30110 }, { "epoch": 1.0708381477202027, "grad_norm": 1.7426199913024902, "learning_rate": 1.5275792936418188e-05, "loss": 1.6838, "step": 30120 }, { "epoch": 1.071193671673629, "grad_norm": 1.8336448669433594, "learning_rate": 1.5272463230117583e-05, "loss": 1.7238, "step": 30130 }, { "epoch": 1.0715491956270553, "grad_norm": 1.7430644035339355, "learning_rate": 1.526913271399968e-05, "loss": 1.7086, "step": 30140 }, { "epoch": 1.0719047195804818, "grad_norm": 1.8755711317062378, "learning_rate": 1.5265801388576034e-05, "loss": 1.713, "step": 30150 }, { "epoch": 1.0722602435339081, "grad_norm": 1.8830538988113403, "learning_rate": 1.526246925435831e-05, "loss": 1.6825, "step": 30160 }, { "epoch": 1.0726157674873344, "grad_norm": 1.708674669265747, "learning_rate": 1.5259136311858306e-05, "loss": 1.6879, "step": 30170 }, { "epoch": 1.0729712914407608, "grad_norm": 1.7258754968643188, "learning_rate": 1.5255802561587936e-05, "loss": 1.6937, "step": 30180 }, { "epoch": 1.0733268153941873, "grad_norm": 1.6590732336044312, "learning_rate": 1.525246800405925e-05, "loss": 1.6877, "step": 30190 }, { "epoch": 1.0736823393476136, "grad_norm": 1.7467076778411865, "learning_rate": 1.5249132639784414e-05, "loss": 1.7035, "step": 30200 }, { "epoch": 1.07403786330104, "grad_norm": 1.836143136024475, "learning_rate": 1.5245796469275714e-05, "loss": 1.6815, "step": 30210 }, { "epoch": 1.0743933872544662, "grad_norm": 1.9502182006835938, "learning_rate": 1.5242459493045564e-05, "loss": 1.7107, "step": 30220 }, { "epoch": 1.0747489112078927, "grad_norm": 1.838701844215393, "learning_rate": 1.5239121711606513e-05, "loss": 1.6712, "step": 30230 }, { "epoch": 1.075104435161319, "grad_norm": 1.7862120866775513, "learning_rate": 1.5235783125471213e-05, "loss": 1.7075, "step": 30240 }, { "epoch": 1.0754599591147453, "grad_norm": 1.7898902893066406, "learning_rate": 1.5232443735152456e-05, "loss": 1.7522, "step": 30250 }, { "epoch": 1.0758154830681717, "grad_norm": 1.9410507678985596, "learning_rate": 1.5229103541163146e-05, "loss": 1.6943, "step": 30260 }, { "epoch": 1.0761710070215982, "grad_norm": 1.6454249620437622, "learning_rate": 1.5225762544016318e-05, "loss": 1.7182, "step": 30270 }, { "epoch": 1.0765265309750245, "grad_norm": 1.7281266450881958, "learning_rate": 1.5222420744225133e-05, "loss": 1.7008, "step": 30280 }, { "epoch": 1.0768820549284508, "grad_norm": 1.7373607158660889, "learning_rate": 1.5219078142302863e-05, "loss": 1.685, "step": 30290 }, { "epoch": 1.077237578881877, "grad_norm": 1.750491738319397, "learning_rate": 1.5215734738762918e-05, "loss": 1.6883, "step": 30300 }, { "epoch": 1.0775931028353036, "grad_norm": 1.8068252801895142, "learning_rate": 1.5212390534118815e-05, "loss": 1.7167, "step": 30310 }, { "epoch": 1.07794862678873, "grad_norm": 1.6495577096939087, "learning_rate": 1.5209045528884212e-05, "loss": 1.6949, "step": 30320 }, { "epoch": 1.0783041507421562, "grad_norm": 1.734872817993164, "learning_rate": 1.5205699723572874e-05, "loss": 1.7253, "step": 30330 }, { "epoch": 1.0786596746955825, "grad_norm": 1.672032117843628, "learning_rate": 1.5202353118698701e-05, "loss": 1.7135, "step": 30340 }, { "epoch": 1.079015198649009, "grad_norm": 1.6830453872680664, "learning_rate": 1.5199005714775705e-05, "loss": 1.6763, "step": 30350 }, { "epoch": 1.0793707226024354, "grad_norm": 1.9315227270126343, "learning_rate": 1.5195657512318032e-05, "loss": 1.6966, "step": 30360 }, { "epoch": 1.0797262465558617, "grad_norm": 1.7042914628982544, "learning_rate": 1.5192308511839942e-05, "loss": 1.7142, "step": 30370 }, { "epoch": 1.080081770509288, "grad_norm": 1.8929035663604736, "learning_rate": 1.5188958713855822e-05, "loss": 1.7066, "step": 30380 }, { "epoch": 1.0804372944627145, "grad_norm": 1.83518385887146, "learning_rate": 1.5185608118880172e-05, "loss": 1.7335, "step": 30390 }, { "epoch": 1.0807928184161408, "grad_norm": 1.8305349349975586, "learning_rate": 1.5182256727427636e-05, "loss": 1.6995, "step": 30400 }, { "epoch": 1.0811483423695671, "grad_norm": 1.8416990041732788, "learning_rate": 1.5178904540012956e-05, "loss": 1.6695, "step": 30410 }, { "epoch": 1.0815038663229934, "grad_norm": 1.7948909997940063, "learning_rate": 1.5175551557151012e-05, "loss": 1.6918, "step": 30420 }, { "epoch": 1.08185939027642, "grad_norm": 1.766680121421814, "learning_rate": 1.5172197779356799e-05, "loss": 1.7204, "step": 30430 }, { "epoch": 1.0822149142298463, "grad_norm": 1.8292341232299805, "learning_rate": 1.5168843207145436e-05, "loss": 1.7263, "step": 30440 }, { "epoch": 1.0825704381832726, "grad_norm": 1.79512619972229, "learning_rate": 1.516548784103217e-05, "loss": 1.7272, "step": 30450 }, { "epoch": 1.082925962136699, "grad_norm": 1.7170366048812866, "learning_rate": 1.5162131681532355e-05, "loss": 1.7457, "step": 30460 }, { "epoch": 1.0832814860901254, "grad_norm": 1.8364603519439697, "learning_rate": 1.515877472916148e-05, "loss": 1.6868, "step": 30470 }, { "epoch": 1.0836370100435517, "grad_norm": 1.7465304136276245, "learning_rate": 1.5155416984435153e-05, "loss": 1.6837, "step": 30480 }, { "epoch": 1.083992533996978, "grad_norm": 1.655500888824463, "learning_rate": 1.5152058447869103e-05, "loss": 1.6866, "step": 30490 }, { "epoch": 1.0843480579504043, "grad_norm": 1.9954131841659546, "learning_rate": 1.5148699119979183e-05, "loss": 1.6422, "step": 30500 }, { "epoch": 1.0847035819038309, "grad_norm": 1.7527703046798706, "learning_rate": 1.5145339001281355e-05, "loss": 1.6732, "step": 30510 }, { "epoch": 1.0850591058572572, "grad_norm": 1.789578914642334, "learning_rate": 1.514197809229172e-05, "loss": 1.6732, "step": 30520 }, { "epoch": 1.0854146298106835, "grad_norm": 1.6960538625717163, "learning_rate": 1.5138616393526491e-05, "loss": 1.6768, "step": 30530 }, { "epoch": 1.0857701537641098, "grad_norm": 1.7201929092407227, "learning_rate": 1.5135253905502e-05, "loss": 1.7124, "step": 30540 }, { "epoch": 1.0861256777175363, "grad_norm": 1.8330485820770264, "learning_rate": 1.513189062873471e-05, "loss": 1.6952, "step": 30550 }, { "epoch": 1.0864812016709626, "grad_norm": 1.7012553215026855, "learning_rate": 1.5128526563741198e-05, "loss": 1.7371, "step": 30560 }, { "epoch": 1.086836725624389, "grad_norm": 1.8821933269500732, "learning_rate": 1.5125161711038159e-05, "loss": 1.7069, "step": 30570 }, { "epoch": 1.0871922495778152, "grad_norm": 1.7384915351867676, "learning_rate": 1.5121796071142418e-05, "loss": 1.7436, "step": 30580 }, { "epoch": 1.0875477735312418, "grad_norm": 1.8113093376159668, "learning_rate": 1.5118429644570914e-05, "loss": 1.6728, "step": 30590 }, { "epoch": 1.087903297484668, "grad_norm": 1.7170952558517456, "learning_rate": 1.511506243184071e-05, "loss": 1.7562, "step": 30600 }, { "epoch": 1.0882588214380944, "grad_norm": 1.7559735774993896, "learning_rate": 1.5111694433468987e-05, "loss": 1.7138, "step": 30610 }, { "epoch": 1.0886143453915207, "grad_norm": 1.8377747535705566, "learning_rate": 1.510832564997305e-05, "loss": 1.745, "step": 30620 }, { "epoch": 1.0889698693449472, "grad_norm": 1.7283035516738892, "learning_rate": 1.5104956081870325e-05, "loss": 1.6916, "step": 30630 }, { "epoch": 1.0893253932983735, "grad_norm": 1.6927980184555054, "learning_rate": 1.5101585729678352e-05, "loss": 1.6908, "step": 30640 }, { "epoch": 1.0896809172517998, "grad_norm": 1.7789459228515625, "learning_rate": 1.5098214593914797e-05, "loss": 1.6767, "step": 30650 }, { "epoch": 1.0900364412052261, "grad_norm": 1.7156596183776855, "learning_rate": 1.5094842675097448e-05, "loss": 1.7332, "step": 30660 }, { "epoch": 1.0903919651586527, "grad_norm": 1.7196613550186157, "learning_rate": 1.5091469973744205e-05, "loss": 1.7253, "step": 30670 }, { "epoch": 1.090747489112079, "grad_norm": 1.7847352027893066, "learning_rate": 1.5088096490373106e-05, "loss": 1.73, "step": 30680 }, { "epoch": 1.0911030130655053, "grad_norm": 1.7828787565231323, "learning_rate": 1.5084722225502285e-05, "loss": 1.6746, "step": 30690 }, { "epoch": 1.0914585370189316, "grad_norm": 1.8798949718475342, "learning_rate": 1.508134717965001e-05, "loss": 1.6783, "step": 30700 }, { "epoch": 1.0918140609723581, "grad_norm": 1.7675645351409912, "learning_rate": 1.5077971353334669e-05, "loss": 1.6952, "step": 30710 }, { "epoch": 1.0921695849257844, "grad_norm": 1.8203387260437012, "learning_rate": 1.5074594747074765e-05, "loss": 1.7111, "step": 30720 }, { "epoch": 1.0925251088792107, "grad_norm": 1.7350901365280151, "learning_rate": 1.5071217361388928e-05, "loss": 1.7125, "step": 30730 }, { "epoch": 1.092880632832637, "grad_norm": 1.7595888376235962, "learning_rate": 1.50678391967959e-05, "loss": 1.7157, "step": 30740 }, { "epoch": 1.0932361567860636, "grad_norm": 1.805652141571045, "learning_rate": 1.506446025381455e-05, "loss": 1.7066, "step": 30750 }, { "epoch": 1.0935916807394899, "grad_norm": 1.7728995084762573, "learning_rate": 1.5061080532963858e-05, "loss": 1.7122, "step": 30760 }, { "epoch": 1.0939472046929162, "grad_norm": 1.8523882627487183, "learning_rate": 1.505770003476293e-05, "loss": 1.704, "step": 30770 }, { "epoch": 1.0943027286463425, "grad_norm": 1.8022390604019165, "learning_rate": 1.5054318759730988e-05, "loss": 1.6893, "step": 30780 }, { "epoch": 1.094658252599769, "grad_norm": 1.7755857706069946, "learning_rate": 1.5050936708387371e-05, "loss": 1.7057, "step": 30790 }, { "epoch": 1.0950137765531953, "grad_norm": 1.7422014474868774, "learning_rate": 1.5047553881251551e-05, "loss": 1.6995, "step": 30800 }, { "epoch": 1.0953693005066216, "grad_norm": 1.6298151016235352, "learning_rate": 1.5044170278843103e-05, "loss": 1.7035, "step": 30810 }, { "epoch": 1.095724824460048, "grad_norm": 1.7250490188598633, "learning_rate": 1.5040785901681725e-05, "loss": 1.6674, "step": 30820 }, { "epoch": 1.0960803484134745, "grad_norm": 1.759215235710144, "learning_rate": 1.5037400750287239e-05, "loss": 1.6645, "step": 30830 }, { "epoch": 1.0964358723669008, "grad_norm": 1.6420128345489502, "learning_rate": 1.5034014825179584e-05, "loss": 1.673, "step": 30840 }, { "epoch": 1.096791396320327, "grad_norm": 1.8458141088485718, "learning_rate": 1.5030628126878815e-05, "loss": 1.7032, "step": 30850 }, { "epoch": 1.0971469202737534, "grad_norm": 1.7907358407974243, "learning_rate": 1.5027240655905106e-05, "loss": 1.7256, "step": 30860 }, { "epoch": 1.09750244422718, "grad_norm": 1.746254563331604, "learning_rate": 1.5023852412778754e-05, "loss": 1.7219, "step": 30870 }, { "epoch": 1.0978579681806062, "grad_norm": 1.6866568326950073, "learning_rate": 1.5020463398020174e-05, "loss": 1.6771, "step": 30880 }, { "epoch": 1.0982134921340325, "grad_norm": 1.8206251859664917, "learning_rate": 1.5017073612149888e-05, "loss": 1.6914, "step": 30890 }, { "epoch": 1.0985690160874588, "grad_norm": 1.6670016050338745, "learning_rate": 1.5013683055688559e-05, "loss": 1.7025, "step": 30900 }, { "epoch": 1.0989245400408854, "grad_norm": 1.6651729345321655, "learning_rate": 1.5010291729156945e-05, "loss": 1.7279, "step": 30910 }, { "epoch": 1.0992800639943117, "grad_norm": 1.7703865766525269, "learning_rate": 1.5006899633075937e-05, "loss": 1.7285, "step": 30920 }, { "epoch": 1.099635587947738, "grad_norm": 1.6907395124435425, "learning_rate": 1.5003506767966541e-05, "loss": 1.73, "step": 30930 }, { "epoch": 1.0999911119011643, "grad_norm": 1.7951709032058716, "learning_rate": 1.5000113134349876e-05, "loss": 1.6835, "step": 30940 }, { "epoch": 1.1003466358545908, "grad_norm": 1.7016987800598145, "learning_rate": 1.4996718732747187e-05, "loss": 1.7348, "step": 30950 }, { "epoch": 1.100702159808017, "grad_norm": 1.8535315990447998, "learning_rate": 1.4993323563679827e-05, "loss": 1.6841, "step": 30960 }, { "epoch": 1.1010576837614434, "grad_norm": 1.7085849046707153, "learning_rate": 1.4989927627669274e-05, "loss": 1.7157, "step": 30970 }, { "epoch": 1.1014132077148697, "grad_norm": 1.7176827192306519, "learning_rate": 1.4986530925237128e-05, "loss": 1.6792, "step": 30980 }, { "epoch": 1.1017687316682963, "grad_norm": 1.768995761871338, "learning_rate": 1.4983133456905099e-05, "loss": 1.7004, "step": 30990 }, { "epoch": 1.1021242556217226, "grad_norm": 1.8381508588790894, "learning_rate": 1.4979735223195015e-05, "loss": 1.718, "step": 31000 }, { "epoch": 1.1024797795751489, "grad_norm": 1.8250529766082764, "learning_rate": 1.4976336224628822e-05, "loss": 1.6902, "step": 31010 }, { "epoch": 1.1028353035285752, "grad_norm": 1.8430213928222656, "learning_rate": 1.4972936461728587e-05, "loss": 1.7276, "step": 31020 }, { "epoch": 1.1031908274820017, "grad_norm": 1.8098722696304321, "learning_rate": 1.4969535935016491e-05, "loss": 1.7129, "step": 31030 }, { "epoch": 1.103546351435428, "grad_norm": 1.816544771194458, "learning_rate": 1.4966134645014836e-05, "loss": 1.7628, "step": 31040 }, { "epoch": 1.1039018753888543, "grad_norm": 1.8728526830673218, "learning_rate": 1.4962732592246037e-05, "loss": 1.7386, "step": 31050 }, { "epoch": 1.1042573993422806, "grad_norm": 1.7653038501739502, "learning_rate": 1.495932977723263e-05, "loss": 1.7164, "step": 31060 }, { "epoch": 1.1046129232957071, "grad_norm": 1.8237130641937256, "learning_rate": 1.4955926200497262e-05, "loss": 1.6975, "step": 31070 }, { "epoch": 1.1049684472491335, "grad_norm": 1.7338460683822632, "learning_rate": 1.4952521862562705e-05, "loss": 1.675, "step": 31080 }, { "epoch": 1.1053239712025598, "grad_norm": 1.8000022172927856, "learning_rate": 1.4949116763951844e-05, "loss": 1.6761, "step": 31090 }, { "epoch": 1.105679495155986, "grad_norm": 1.7959693670272827, "learning_rate": 1.4945710905187675e-05, "loss": 1.7275, "step": 31100 }, { "epoch": 1.1060350191094126, "grad_norm": 1.7559113502502441, "learning_rate": 1.4942304286793323e-05, "loss": 1.7033, "step": 31110 }, { "epoch": 1.106390543062839, "grad_norm": 1.808515191078186, "learning_rate": 1.4938896909292023e-05, "loss": 1.7032, "step": 31120 }, { "epoch": 1.1067460670162652, "grad_norm": 1.6466903686523438, "learning_rate": 1.4935488773207123e-05, "loss": 1.6773, "step": 31130 }, { "epoch": 1.1071015909696915, "grad_norm": 1.7822074890136719, "learning_rate": 1.4932079879062094e-05, "loss": 1.7081, "step": 31140 }, { "epoch": 1.107457114923118, "grad_norm": 1.7436120510101318, "learning_rate": 1.4928670227380517e-05, "loss": 1.6902, "step": 31150 }, { "epoch": 1.1078126388765444, "grad_norm": 1.8608921766281128, "learning_rate": 1.4925259818686099e-05, "loss": 1.654, "step": 31160 }, { "epoch": 1.1081681628299707, "grad_norm": 1.692667841911316, "learning_rate": 1.4921848653502652e-05, "loss": 1.6963, "step": 31170 }, { "epoch": 1.108523686783397, "grad_norm": 1.9093800783157349, "learning_rate": 1.4918436732354117e-05, "loss": 1.6653, "step": 31180 }, { "epoch": 1.1088792107368235, "grad_norm": 1.6940561532974243, "learning_rate": 1.4915024055764535e-05, "loss": 1.7274, "step": 31190 }, { "epoch": 1.1092347346902498, "grad_norm": 1.655745267868042, "learning_rate": 1.4911610624258077e-05, "loss": 1.6956, "step": 31200 }, { "epoch": 1.109590258643676, "grad_norm": 1.78951096534729, "learning_rate": 1.4908196438359022e-05, "loss": 1.7363, "step": 31210 }, { "epoch": 1.1099457825971024, "grad_norm": 1.7750624418258667, "learning_rate": 1.4904781498591766e-05, "loss": 1.6992, "step": 31220 }, { "epoch": 1.110301306550529, "grad_norm": 1.730631709098816, "learning_rate": 1.4901365805480828e-05, "loss": 1.6887, "step": 31230 }, { "epoch": 1.1106568305039553, "grad_norm": 1.865522027015686, "learning_rate": 1.4897949359550837e-05, "loss": 1.7311, "step": 31240 }, { "epoch": 1.1110123544573816, "grad_norm": 1.821334719657898, "learning_rate": 1.489453216132653e-05, "loss": 1.6791, "step": 31250 }, { "epoch": 1.1113678784108079, "grad_norm": 1.8130940198898315, "learning_rate": 1.4891114211332776e-05, "loss": 1.7056, "step": 31260 }, { "epoch": 1.1117234023642344, "grad_norm": 1.850980281829834, "learning_rate": 1.4887695510094545e-05, "loss": 1.7552, "step": 31270 }, { "epoch": 1.1120789263176607, "grad_norm": 1.7095885276794434, "learning_rate": 1.4884276058136928e-05, "loss": 1.6868, "step": 31280 }, { "epoch": 1.112434450271087, "grad_norm": 1.7346407175064087, "learning_rate": 1.4880855855985132e-05, "loss": 1.6652, "step": 31290 }, { "epoch": 1.1127899742245133, "grad_norm": 1.8015046119689941, "learning_rate": 1.4877434904164485e-05, "loss": 1.6931, "step": 31300 }, { "epoch": 1.1131454981779398, "grad_norm": 1.7510502338409424, "learning_rate": 1.4874013203200415e-05, "loss": 1.6761, "step": 31310 }, { "epoch": 1.1135010221313661, "grad_norm": 1.7510874271392822, "learning_rate": 1.4870590753618478e-05, "loss": 1.7106, "step": 31320 }, { "epoch": 1.1138565460847925, "grad_norm": 1.8281718492507935, "learning_rate": 1.4867167555944339e-05, "loss": 1.6621, "step": 31330 }, { "epoch": 1.1142120700382188, "grad_norm": 1.7633572816848755, "learning_rate": 1.4863743610703783e-05, "loss": 1.6881, "step": 31340 }, { "epoch": 1.1145675939916453, "grad_norm": 1.7464405298233032, "learning_rate": 1.48603189184227e-05, "loss": 1.7036, "step": 31350 }, { "epoch": 1.1149231179450716, "grad_norm": 1.7094883918762207, "learning_rate": 1.485689347962711e-05, "loss": 1.7104, "step": 31360 }, { "epoch": 1.115278641898498, "grad_norm": 1.8825170993804932, "learning_rate": 1.4853467294843134e-05, "loss": 1.6911, "step": 31370 }, { "epoch": 1.1156341658519242, "grad_norm": 1.8182456493377686, "learning_rate": 1.4850040364597012e-05, "loss": 1.7122, "step": 31380 }, { "epoch": 1.1159896898053507, "grad_norm": 1.7755396366119385, "learning_rate": 1.4846612689415099e-05, "loss": 1.7314, "step": 31390 }, { "epoch": 1.116345213758777, "grad_norm": 1.7010819911956787, "learning_rate": 1.4843184269823867e-05, "loss": 1.7456, "step": 31400 }, { "epoch": 1.1167007377122034, "grad_norm": 1.8768155574798584, "learning_rate": 1.4839755106349898e-05, "loss": 1.7313, "step": 31410 }, { "epoch": 1.1170562616656297, "grad_norm": 1.764375925064087, "learning_rate": 1.4836325199519887e-05, "loss": 1.6891, "step": 31420 }, { "epoch": 1.1174117856190562, "grad_norm": 1.729925274848938, "learning_rate": 1.4832894549860655e-05, "loss": 1.723, "step": 31430 }, { "epoch": 1.1177673095724825, "grad_norm": 1.8742669820785522, "learning_rate": 1.4829463157899118e-05, "loss": 1.7135, "step": 31440 }, { "epoch": 1.1181228335259088, "grad_norm": 1.7695032358169556, "learning_rate": 1.4826031024162321e-05, "loss": 1.6789, "step": 31450 }, { "epoch": 1.118478357479335, "grad_norm": 1.727602243423462, "learning_rate": 1.482259814917742e-05, "loss": 1.6971, "step": 31460 }, { "epoch": 1.1188338814327616, "grad_norm": 1.7881238460540771, "learning_rate": 1.481916453347168e-05, "loss": 1.7014, "step": 31470 }, { "epoch": 1.119189405386188, "grad_norm": 1.6665465831756592, "learning_rate": 1.4815730177572487e-05, "loss": 1.6626, "step": 31480 }, { "epoch": 1.1195449293396142, "grad_norm": 1.7676050662994385, "learning_rate": 1.4812295082007331e-05, "loss": 1.6791, "step": 31490 }, { "epoch": 1.1199004532930406, "grad_norm": 1.8006497621536255, "learning_rate": 1.4808859247303826e-05, "loss": 1.7094, "step": 31500 }, { "epoch": 1.120255977246467, "grad_norm": 1.8111299276351929, "learning_rate": 1.480542267398969e-05, "loss": 1.7503, "step": 31510 }, { "epoch": 1.1206115011998934, "grad_norm": 1.8131684064865112, "learning_rate": 1.4801985362592764e-05, "loss": 1.6797, "step": 31520 }, { "epoch": 1.1209670251533197, "grad_norm": 1.7535158395767212, "learning_rate": 1.4798547313640992e-05, "loss": 1.6686, "step": 31530 }, { "epoch": 1.121322549106746, "grad_norm": 1.8932629823684692, "learning_rate": 1.479510852766244e-05, "loss": 1.7397, "step": 31540 }, { "epoch": 1.1216780730601725, "grad_norm": 1.7336300611495972, "learning_rate": 1.4791669005185285e-05, "loss": 1.7013, "step": 31550 }, { "epoch": 1.1220335970135988, "grad_norm": 1.7552558183670044, "learning_rate": 1.4788228746737816e-05, "loss": 1.7705, "step": 31560 }, { "epoch": 1.1223891209670251, "grad_norm": 1.7884132862091064, "learning_rate": 1.4784787752848432e-05, "loss": 1.684, "step": 31570 }, { "epoch": 1.1227446449204515, "grad_norm": 1.9435439109802246, "learning_rate": 1.478134602404565e-05, "loss": 1.7101, "step": 31580 }, { "epoch": 1.123100168873878, "grad_norm": 1.905207633972168, "learning_rate": 1.4777903560858098e-05, "loss": 1.7137, "step": 31590 }, { "epoch": 1.1234556928273043, "grad_norm": 1.7854448556900024, "learning_rate": 1.4774460363814518e-05, "loss": 1.6594, "step": 31600 }, { "epoch": 1.1238112167807306, "grad_norm": 1.77256178855896, "learning_rate": 1.4771016433443761e-05, "loss": 1.6195, "step": 31610 }, { "epoch": 1.124166740734157, "grad_norm": 1.6947712898254395, "learning_rate": 1.4767571770274796e-05, "loss": 1.659, "step": 31620 }, { "epoch": 1.1245222646875834, "grad_norm": 1.7803505659103394, "learning_rate": 1.4764126374836698e-05, "loss": 1.6957, "step": 31630 }, { "epoch": 1.1248777886410097, "grad_norm": 1.828350305557251, "learning_rate": 1.476068024765866e-05, "loss": 1.7002, "step": 31640 }, { "epoch": 1.125233312594436, "grad_norm": 1.7589242458343506, "learning_rate": 1.4757233389269986e-05, "loss": 1.6817, "step": 31650 }, { "epoch": 1.1255888365478623, "grad_norm": 1.739592432975769, "learning_rate": 1.475378580020009e-05, "loss": 1.6981, "step": 31660 }, { "epoch": 1.1259443605012889, "grad_norm": 1.8724303245544434, "learning_rate": 1.4750337480978506e-05, "loss": 1.6967, "step": 31670 }, { "epoch": 1.1262998844547152, "grad_norm": 1.8594884872436523, "learning_rate": 1.4746888432134868e-05, "loss": 1.6761, "step": 31680 }, { "epoch": 1.1266554084081415, "grad_norm": 1.7723699808120728, "learning_rate": 1.474343865419893e-05, "loss": 1.6856, "step": 31690 }, { "epoch": 1.1270109323615678, "grad_norm": 1.7817522287368774, "learning_rate": 1.4739988147700555e-05, "loss": 1.7061, "step": 31700 }, { "epoch": 1.127366456314994, "grad_norm": 1.6874125003814697, "learning_rate": 1.4736536913169719e-05, "loss": 1.7159, "step": 31710 }, { "epoch": 1.1277219802684206, "grad_norm": 1.8645119667053223, "learning_rate": 1.4733084951136516e-05, "loss": 1.6798, "step": 31720 }, { "epoch": 1.128077504221847, "grad_norm": 1.8583474159240723, "learning_rate": 1.4729632262131137e-05, "loss": 1.6438, "step": 31730 }, { "epoch": 1.1284330281752732, "grad_norm": 1.8013262748718262, "learning_rate": 1.4726178846683901e-05, "loss": 1.7132, "step": 31740 }, { "epoch": 1.1287885521286998, "grad_norm": 1.6753315925598145, "learning_rate": 1.4722724705325226e-05, "loss": 1.7321, "step": 31750 }, { "epoch": 1.129144076082126, "grad_norm": 1.7268919944763184, "learning_rate": 1.4719269838585645e-05, "loss": 1.7509, "step": 31760 }, { "epoch": 1.1294996000355524, "grad_norm": 1.669714093208313, "learning_rate": 1.471581424699581e-05, "loss": 1.7306, "step": 31770 }, { "epoch": 1.1298551239889787, "grad_norm": 1.8650387525558472, "learning_rate": 1.4712357931086474e-05, "loss": 1.7027, "step": 31780 }, { "epoch": 1.130210647942405, "grad_norm": 1.7554460763931274, "learning_rate": 1.4708900891388506e-05, "loss": 1.7183, "step": 31790 }, { "epoch": 1.1305661718958315, "grad_norm": 1.8117382526397705, "learning_rate": 1.4705443128432891e-05, "loss": 1.6967, "step": 31800 }, { "epoch": 1.1309216958492578, "grad_norm": 1.660362720489502, "learning_rate": 1.470198464275071e-05, "loss": 1.6875, "step": 31810 }, { "epoch": 1.1312772198026841, "grad_norm": 1.798330545425415, "learning_rate": 1.4698525434873173e-05, "loss": 1.6976, "step": 31820 }, { "epoch": 1.1316327437561107, "grad_norm": 1.8113417625427246, "learning_rate": 1.4695065505331584e-05, "loss": 1.6929, "step": 31830 }, { "epoch": 1.131988267709537, "grad_norm": 1.7701830863952637, "learning_rate": 1.4691604854657375e-05, "loss": 1.6945, "step": 31840 }, { "epoch": 1.1323437916629633, "grad_norm": 1.686472773551941, "learning_rate": 1.4688143483382076e-05, "loss": 1.7623, "step": 31850 }, { "epoch": 1.1326993156163896, "grad_norm": 1.8108429908752441, "learning_rate": 1.4684681392037334e-05, "loss": 1.7211, "step": 31860 }, { "epoch": 1.133054839569816, "grad_norm": 1.7331719398498535, "learning_rate": 1.4681218581154904e-05, "loss": 1.6902, "step": 31870 }, { "epoch": 1.1334103635232424, "grad_norm": 1.9255976676940918, "learning_rate": 1.4677755051266651e-05, "loss": 1.7045, "step": 31880 }, { "epoch": 1.1337658874766687, "grad_norm": 1.7227295637130737, "learning_rate": 1.4674290802904549e-05, "loss": 1.6928, "step": 31890 }, { "epoch": 1.134121411430095, "grad_norm": 1.915678858757019, "learning_rate": 1.4670825836600688e-05, "loss": 1.6951, "step": 31900 }, { "epoch": 1.1344769353835216, "grad_norm": 1.7096298933029175, "learning_rate": 1.4667360152887267e-05, "loss": 1.6935, "step": 31910 }, { "epoch": 1.1348324593369479, "grad_norm": 1.732093334197998, "learning_rate": 1.4663893752296589e-05, "loss": 1.7173, "step": 31920 }, { "epoch": 1.1351879832903742, "grad_norm": 1.7549011707305908, "learning_rate": 1.4660426635361078e-05, "loss": 1.6538, "step": 31930 }, { "epoch": 1.1355435072438005, "grad_norm": 1.7300089597702026, "learning_rate": 1.4656958802613253e-05, "loss": 1.6919, "step": 31940 }, { "epoch": 1.1358990311972268, "grad_norm": 1.8303147554397583, "learning_rate": 1.4653490254585756e-05, "loss": 1.6438, "step": 31950 }, { "epoch": 1.1362545551506533, "grad_norm": 1.86250638961792, "learning_rate": 1.4650020991811334e-05, "loss": 1.6658, "step": 31960 }, { "epoch": 1.1366100791040796, "grad_norm": 1.7613438367843628, "learning_rate": 1.4646551014822843e-05, "loss": 1.7075, "step": 31970 }, { "epoch": 1.136965603057506, "grad_norm": 2.0326385498046875, "learning_rate": 1.464308032415325e-05, "loss": 1.6903, "step": 31980 }, { "epoch": 1.1373211270109325, "grad_norm": 1.7846877574920654, "learning_rate": 1.4639608920335632e-05, "loss": 1.6944, "step": 31990 }, { "epoch": 1.1376766509643588, "grad_norm": 1.8086317777633667, "learning_rate": 1.4636136803903175e-05, "loss": 1.6527, "step": 32000 }, { "epoch": 1.138032174917785, "grad_norm": 1.8257627487182617, "learning_rate": 1.4632663975389173e-05, "loss": 1.7188, "step": 32010 }, { "epoch": 1.1383876988712114, "grad_norm": 1.6944304704666138, "learning_rate": 1.4629190435327032e-05, "loss": 1.6477, "step": 32020 }, { "epoch": 1.1387432228246377, "grad_norm": 1.7246496677398682, "learning_rate": 1.4625716184250262e-05, "loss": 1.6891, "step": 32030 }, { "epoch": 1.1390987467780642, "grad_norm": 1.7539235353469849, "learning_rate": 1.4622241222692495e-05, "loss": 1.6655, "step": 32040 }, { "epoch": 1.1394542707314905, "grad_norm": 1.825313687324524, "learning_rate": 1.4618765551187457e-05, "loss": 1.6999, "step": 32050 }, { "epoch": 1.1398097946849168, "grad_norm": 1.8041131496429443, "learning_rate": 1.4615289170268986e-05, "loss": 1.7108, "step": 32060 }, { "epoch": 1.1401653186383434, "grad_norm": 1.7969400882720947, "learning_rate": 1.461181208047104e-05, "loss": 1.6538, "step": 32070 }, { "epoch": 1.1405208425917697, "grad_norm": 1.6379525661468506, "learning_rate": 1.4608334282327672e-05, "loss": 1.7106, "step": 32080 }, { "epoch": 1.140876366545196, "grad_norm": 1.7600675821304321, "learning_rate": 1.4604855776373056e-05, "loss": 1.6834, "step": 32090 }, { "epoch": 1.1412318904986223, "grad_norm": 1.7814898490905762, "learning_rate": 1.4601376563141462e-05, "loss": 1.7124, "step": 32100 }, { "epoch": 1.1415874144520486, "grad_norm": 1.7427417039871216, "learning_rate": 1.4597896643167282e-05, "loss": 1.6646, "step": 32110 }, { "epoch": 1.1419429384054751, "grad_norm": 1.666539192199707, "learning_rate": 1.4594416016985005e-05, "loss": 1.7046, "step": 32120 }, { "epoch": 1.1422984623589014, "grad_norm": 1.7348567247390747, "learning_rate": 1.4590934685129236e-05, "loss": 1.6734, "step": 32130 }, { "epoch": 1.1426539863123277, "grad_norm": 1.6932833194732666, "learning_rate": 1.4587452648134686e-05, "loss": 1.7141, "step": 32140 }, { "epoch": 1.1430095102657543, "grad_norm": 1.7642148733139038, "learning_rate": 1.4583969906536168e-05, "loss": 1.7083, "step": 32150 }, { "epoch": 1.1433650342191806, "grad_norm": 1.7768909931182861, "learning_rate": 1.4580486460868616e-05, "loss": 1.7396, "step": 32160 }, { "epoch": 1.1437205581726069, "grad_norm": 1.8518837690353394, "learning_rate": 1.4577002311667067e-05, "loss": 1.7454, "step": 32170 }, { "epoch": 1.1440760821260332, "grad_norm": 1.8241009712219238, "learning_rate": 1.457351745946666e-05, "loss": 1.7187, "step": 32180 }, { "epoch": 1.1444316060794595, "grad_norm": 1.8031975030899048, "learning_rate": 1.4570031904802643e-05, "loss": 1.6595, "step": 32190 }, { "epoch": 1.144787130032886, "grad_norm": 1.8305586576461792, "learning_rate": 1.4566545648210382e-05, "loss": 1.7109, "step": 32200 }, { "epoch": 1.1451426539863123, "grad_norm": 1.798540711402893, "learning_rate": 1.4563058690225344e-05, "loss": 1.6938, "step": 32210 }, { "epoch": 1.1454981779397386, "grad_norm": 1.7197037935256958, "learning_rate": 1.45595710313831e-05, "loss": 1.7193, "step": 32220 }, { "epoch": 1.1458537018931652, "grad_norm": 1.7387158870697021, "learning_rate": 1.4556082672219333e-05, "loss": 1.6565, "step": 32230 }, { "epoch": 1.1462092258465915, "grad_norm": 1.7779680490493774, "learning_rate": 1.4552593613269839e-05, "loss": 1.6162, "step": 32240 }, { "epoch": 1.1465647498000178, "grad_norm": 1.738901138305664, "learning_rate": 1.4549103855070508e-05, "loss": 1.7132, "step": 32250 }, { "epoch": 1.146920273753444, "grad_norm": 1.907164454460144, "learning_rate": 1.4545613398157346e-05, "loss": 1.7251, "step": 32260 }, { "epoch": 1.1472757977068704, "grad_norm": 1.7884010076522827, "learning_rate": 1.4542122243066468e-05, "loss": 1.6939, "step": 32270 }, { "epoch": 1.147631321660297, "grad_norm": 1.812320590019226, "learning_rate": 1.4538630390334094e-05, "loss": 1.6683, "step": 32280 }, { "epoch": 1.1479868456137232, "grad_norm": 1.7077202796936035, "learning_rate": 1.4535137840496552e-05, "loss": 1.6924, "step": 32290 }, { "epoch": 1.1483423695671495, "grad_norm": 1.8116544485092163, "learning_rate": 1.4531644594090271e-05, "loss": 1.7156, "step": 32300 }, { "epoch": 1.148697893520576, "grad_norm": 1.7970168590545654, "learning_rate": 1.4528150651651793e-05, "loss": 1.6802, "step": 32310 }, { "epoch": 1.1490534174740024, "grad_norm": 1.7727383375167847, "learning_rate": 1.4524656013717766e-05, "loss": 1.7092, "step": 32320 }, { "epoch": 1.1494089414274287, "grad_norm": 1.8042786121368408, "learning_rate": 1.4521160680824945e-05, "loss": 1.6947, "step": 32330 }, { "epoch": 1.149764465380855, "grad_norm": 1.8238158226013184, "learning_rate": 1.4517664653510193e-05, "loss": 1.6564, "step": 32340 }, { "epoch": 1.1501199893342813, "grad_norm": 1.8085988759994507, "learning_rate": 1.4514167932310477e-05, "loss": 1.7017, "step": 32350 }, { "epoch": 1.1504755132877078, "grad_norm": 1.7852269411087036, "learning_rate": 1.451067051776287e-05, "loss": 1.7279, "step": 32360 }, { "epoch": 1.1508310372411341, "grad_norm": 1.7860541343688965, "learning_rate": 1.4507172410404553e-05, "loss": 1.7226, "step": 32370 }, { "epoch": 1.1511865611945604, "grad_norm": 1.9089226722717285, "learning_rate": 1.4503673610772815e-05, "loss": 1.6818, "step": 32380 }, { "epoch": 1.151542085147987, "grad_norm": 1.7474654912948608, "learning_rate": 1.4500174119405046e-05, "loss": 1.6729, "step": 32390 }, { "epoch": 1.1518976091014133, "grad_norm": 1.7584805488586426, "learning_rate": 1.449667393683875e-05, "loss": 1.6835, "step": 32400 }, { "epoch": 1.1522531330548396, "grad_norm": 1.6684837341308594, "learning_rate": 1.4493173063611532e-05, "loss": 1.6795, "step": 32410 }, { "epoch": 1.1526086570082659, "grad_norm": 1.8473206758499146, "learning_rate": 1.4489671500261105e-05, "loss": 1.6893, "step": 32420 }, { "epoch": 1.1529641809616922, "grad_norm": 1.8049877882003784, "learning_rate": 1.4486169247325283e-05, "loss": 1.6762, "step": 32430 }, { "epoch": 1.1533197049151187, "grad_norm": 1.6902273893356323, "learning_rate": 1.4482666305341994e-05, "loss": 1.6834, "step": 32440 }, { "epoch": 1.153675228868545, "grad_norm": 1.811184287071228, "learning_rate": 1.447916267484927e-05, "loss": 1.6907, "step": 32450 }, { "epoch": 1.1540307528219713, "grad_norm": 1.7988183498382568, "learning_rate": 1.4475658356385243e-05, "loss": 1.6991, "step": 32460 }, { "epoch": 1.1543862767753978, "grad_norm": 1.783347487449646, "learning_rate": 1.4472153350488152e-05, "loss": 1.6674, "step": 32470 }, { "epoch": 1.1547418007288242, "grad_norm": 1.907630205154419, "learning_rate": 1.4468647657696351e-05, "loss": 1.6922, "step": 32480 }, { "epoch": 1.1550973246822505, "grad_norm": 1.874877691268921, "learning_rate": 1.4465141278548284e-05, "loss": 1.7199, "step": 32490 }, { "epoch": 1.1554528486356768, "grad_norm": 1.783103108406067, "learning_rate": 1.4461634213582516e-05, "loss": 1.6528, "step": 32500 }, { "epoch": 1.155808372589103, "grad_norm": 1.8129419088363647, "learning_rate": 1.4458126463337707e-05, "loss": 1.7292, "step": 32510 }, { "epoch": 1.1561638965425296, "grad_norm": 1.7467507123947144, "learning_rate": 1.4454618028352623e-05, "loss": 1.6573, "step": 32520 }, { "epoch": 1.156519420495956, "grad_norm": 1.7516041994094849, "learning_rate": 1.4451108909166144e-05, "loss": 1.6471, "step": 32530 }, { "epoch": 1.1568749444493822, "grad_norm": 1.7575526237487793, "learning_rate": 1.4447599106317245e-05, "loss": 1.7071, "step": 32540 }, { "epoch": 1.1572304684028087, "grad_norm": 1.7154433727264404, "learning_rate": 1.4444088620345011e-05, "loss": 1.6896, "step": 32550 }, { "epoch": 1.157585992356235, "grad_norm": 1.7868378162384033, "learning_rate": 1.4440577451788627e-05, "loss": 1.6911, "step": 32560 }, { "epoch": 1.1579415163096614, "grad_norm": 1.8467434644699097, "learning_rate": 1.443706560118739e-05, "loss": 1.6923, "step": 32570 }, { "epoch": 1.1582970402630877, "grad_norm": 1.8598535060882568, "learning_rate": 1.4433553069080697e-05, "loss": 1.6951, "step": 32580 }, { "epoch": 1.158652564216514, "grad_norm": 1.8078055381774902, "learning_rate": 1.4430039856008052e-05, "loss": 1.7245, "step": 32590 }, { "epoch": 1.1590080881699405, "grad_norm": 1.7683792114257812, "learning_rate": 1.442652596250906e-05, "loss": 1.7172, "step": 32600 }, { "epoch": 1.1593636121233668, "grad_norm": 1.7138365507125854, "learning_rate": 1.4423011389123438e-05, "loss": 1.6751, "step": 32610 }, { "epoch": 1.1597191360767931, "grad_norm": 1.7290624380111694, "learning_rate": 1.4419496136390997e-05, "loss": 1.696, "step": 32620 }, { "epoch": 1.1600746600302196, "grad_norm": 1.7704051733016968, "learning_rate": 1.4415980204851661e-05, "loss": 1.6506, "step": 32630 }, { "epoch": 1.160430183983646, "grad_norm": 1.7916181087493896, "learning_rate": 1.441246359504545e-05, "loss": 1.6791, "step": 32640 }, { "epoch": 1.1607857079370723, "grad_norm": 1.8721511363983154, "learning_rate": 1.4408946307512502e-05, "loss": 1.7063, "step": 32650 }, { "epoch": 1.1611412318904986, "grad_norm": 1.9711841344833374, "learning_rate": 1.4405428342793042e-05, "loss": 1.7405, "step": 32660 }, { "epoch": 1.1614967558439249, "grad_norm": 1.9350999593734741, "learning_rate": 1.4401909701427412e-05, "loss": 1.7047, "step": 32670 }, { "epoch": 1.1618522797973514, "grad_norm": 1.8050715923309326, "learning_rate": 1.439839038395605e-05, "loss": 1.7093, "step": 32680 }, { "epoch": 1.1622078037507777, "grad_norm": 1.8852133750915527, "learning_rate": 1.4394870390919508e-05, "loss": 1.642, "step": 32690 }, { "epoch": 1.162563327704204, "grad_norm": 1.8427294492721558, "learning_rate": 1.4391349722858428e-05, "loss": 1.6879, "step": 32700 }, { "epoch": 1.1629188516576305, "grad_norm": 1.7559659481048584, "learning_rate": 1.4387828380313565e-05, "loss": 1.6581, "step": 32710 }, { "epoch": 1.1632743756110568, "grad_norm": 1.7213876247406006, "learning_rate": 1.4384306363825772e-05, "loss": 1.6987, "step": 32720 }, { "epoch": 1.1636298995644831, "grad_norm": 1.7299596071243286, "learning_rate": 1.4380783673936015e-05, "loss": 1.6892, "step": 32730 }, { "epoch": 1.1639854235179095, "grad_norm": 1.7985115051269531, "learning_rate": 1.437726031118535e-05, "loss": 1.7113, "step": 32740 }, { "epoch": 1.1643409474713358, "grad_norm": 1.757331371307373, "learning_rate": 1.4373736276114947e-05, "loss": 1.7089, "step": 32750 }, { "epoch": 1.1646964714247623, "grad_norm": 1.818609356880188, "learning_rate": 1.4370211569266077e-05, "loss": 1.6683, "step": 32760 }, { "epoch": 1.1650519953781886, "grad_norm": 1.9024662971496582, "learning_rate": 1.4366686191180113e-05, "loss": 1.7072, "step": 32770 }, { "epoch": 1.165407519331615, "grad_norm": 1.7936043739318848, "learning_rate": 1.4363160142398526e-05, "loss": 1.7066, "step": 32780 }, { "epoch": 1.1657630432850414, "grad_norm": 1.6970324516296387, "learning_rate": 1.4359633423462901e-05, "loss": 1.6845, "step": 32790 }, { "epoch": 1.1661185672384677, "grad_norm": 1.8118795156478882, "learning_rate": 1.4356106034914916e-05, "loss": 1.7008, "step": 32800 }, { "epoch": 1.166474091191894, "grad_norm": 1.7801399230957031, "learning_rate": 1.4352577977296358e-05, "loss": 1.6738, "step": 32810 }, { "epoch": 1.1668296151453204, "grad_norm": 1.745764136314392, "learning_rate": 1.434904925114911e-05, "loss": 1.6884, "step": 32820 }, { "epoch": 1.1671851390987467, "grad_norm": 1.8358365297317505, "learning_rate": 1.4345519857015168e-05, "loss": 1.707, "step": 32830 }, { "epoch": 1.1675406630521732, "grad_norm": 1.7131662368774414, "learning_rate": 1.4341989795436624e-05, "loss": 1.6752, "step": 32840 }, { "epoch": 1.1678961870055995, "grad_norm": 1.749521017074585, "learning_rate": 1.4338459066955672e-05, "loss": 1.6865, "step": 32850 }, { "epoch": 1.1682517109590258, "grad_norm": 1.747633934020996, "learning_rate": 1.4334927672114609e-05, "loss": 1.6788, "step": 32860 }, { "epoch": 1.1686072349124523, "grad_norm": 1.7349754571914673, "learning_rate": 1.4331395611455837e-05, "loss": 1.7481, "step": 32870 }, { "epoch": 1.1689627588658786, "grad_norm": 1.705256700515747, "learning_rate": 1.4327862885521855e-05, "loss": 1.6782, "step": 32880 }, { "epoch": 1.169318282819305, "grad_norm": 1.8096204996109009, "learning_rate": 1.432432949485527e-05, "loss": 1.6504, "step": 32890 }, { "epoch": 1.1696738067727313, "grad_norm": 1.8315744400024414, "learning_rate": 1.4320795439998788e-05, "loss": 1.7399, "step": 32900 }, { "epoch": 1.1700293307261576, "grad_norm": 1.6125462055206299, "learning_rate": 1.4317260721495219e-05, "loss": 1.687, "step": 32910 }, { "epoch": 1.170384854679584, "grad_norm": 1.7470366954803467, "learning_rate": 1.4313725339887472e-05, "loss": 1.6939, "step": 32920 }, { "epoch": 1.1707403786330104, "grad_norm": 1.6082324981689453, "learning_rate": 1.4310189295718562e-05, "loss": 1.6882, "step": 32930 }, { "epoch": 1.1710959025864367, "grad_norm": 1.7428748607635498, "learning_rate": 1.4306652589531597e-05, "loss": 1.6608, "step": 32940 }, { "epoch": 1.1714514265398632, "grad_norm": 1.682776927947998, "learning_rate": 1.43031152218698e-05, "loss": 1.7281, "step": 32950 }, { "epoch": 1.1718069504932895, "grad_norm": 1.8763965368270874, "learning_rate": 1.4299577193276486e-05, "loss": 1.7494, "step": 32960 }, { "epoch": 1.1721624744467158, "grad_norm": 1.809517741203308, "learning_rate": 1.429603850429507e-05, "loss": 1.7082, "step": 32970 }, { "epoch": 1.1725179984001421, "grad_norm": 1.7316521406173706, "learning_rate": 1.4292499155469082e-05, "loss": 1.6768, "step": 32980 }, { "epoch": 1.1728735223535685, "grad_norm": 1.684985637664795, "learning_rate": 1.4288959147342136e-05, "loss": 1.7036, "step": 32990 }, { "epoch": 1.173229046306995, "grad_norm": 1.8130797147750854, "learning_rate": 1.4285418480457955e-05, "loss": 1.6871, "step": 33000 }, { "epoch": 1.1735845702604213, "grad_norm": 1.7257000207901, "learning_rate": 1.4281877155360366e-05, "loss": 1.6493, "step": 33010 }, { "epoch": 1.1739400942138476, "grad_norm": 1.8485682010650635, "learning_rate": 1.4278335172593294e-05, "loss": 1.7094, "step": 33020 }, { "epoch": 1.1742956181672741, "grad_norm": 1.8036755323410034, "learning_rate": 1.4274792532700764e-05, "loss": 1.6827, "step": 33030 }, { "epoch": 1.1746511421207004, "grad_norm": 1.7438093423843384, "learning_rate": 1.4271249236226907e-05, "loss": 1.7338, "step": 33040 }, { "epoch": 1.1750066660741267, "grad_norm": 1.8052996397018433, "learning_rate": 1.4267705283715945e-05, "loss": 1.66, "step": 33050 }, { "epoch": 1.175362190027553, "grad_norm": 1.700236439704895, "learning_rate": 1.4264160675712211e-05, "loss": 1.6961, "step": 33060 }, { "epoch": 1.1757177139809794, "grad_norm": 1.7625881433486938, "learning_rate": 1.4260615412760132e-05, "loss": 1.6751, "step": 33070 }, { "epoch": 1.1760732379344059, "grad_norm": 1.8093489408493042, "learning_rate": 1.425706949540424e-05, "loss": 1.6533, "step": 33080 }, { "epoch": 1.1764287618878322, "grad_norm": 1.8260650634765625, "learning_rate": 1.4253522924189172e-05, "loss": 1.6723, "step": 33090 }, { "epoch": 1.1767842858412585, "grad_norm": 1.8158904314041138, "learning_rate": 1.4249975699659646e-05, "loss": 1.7197, "step": 33100 }, { "epoch": 1.177139809794685, "grad_norm": 1.719862937927246, "learning_rate": 1.4246427822360502e-05, "loss": 1.7096, "step": 33110 }, { "epoch": 1.1774953337481113, "grad_norm": 1.8489018678665161, "learning_rate": 1.424287929283667e-05, "loss": 1.6783, "step": 33120 }, { "epoch": 1.1778508577015376, "grad_norm": 1.748596429824829, "learning_rate": 1.4239330111633182e-05, "loss": 1.7238, "step": 33130 }, { "epoch": 1.178206381654964, "grad_norm": 1.8104336261749268, "learning_rate": 1.4235780279295168e-05, "loss": 1.6868, "step": 33140 }, { "epoch": 1.1785619056083902, "grad_norm": 1.733096957206726, "learning_rate": 1.4232229796367863e-05, "loss": 1.6899, "step": 33150 }, { "epoch": 1.1789174295618168, "grad_norm": 1.818442940711975, "learning_rate": 1.4228678663396599e-05, "loss": 1.741, "step": 33160 }, { "epoch": 1.179272953515243, "grad_norm": 1.8043625354766846, "learning_rate": 1.4225126880926804e-05, "loss": 1.6767, "step": 33170 }, { "epoch": 1.1796284774686694, "grad_norm": 1.9023876190185547, "learning_rate": 1.4221574449504014e-05, "loss": 1.6426, "step": 33180 }, { "epoch": 1.179984001422096, "grad_norm": 1.7926504611968994, "learning_rate": 1.4218021369673856e-05, "loss": 1.7023, "step": 33190 }, { "epoch": 1.1803395253755222, "grad_norm": 1.7633355855941772, "learning_rate": 1.4214467641982062e-05, "loss": 1.7391, "step": 33200 }, { "epoch": 1.1806950493289485, "grad_norm": 1.8657419681549072, "learning_rate": 1.4210913266974465e-05, "loss": 1.7346, "step": 33210 }, { "epoch": 1.1810505732823748, "grad_norm": 1.7032383680343628, "learning_rate": 1.420735824519699e-05, "loss": 1.6766, "step": 33220 }, { "epoch": 1.1814060972358011, "grad_norm": 1.809301733970642, "learning_rate": 1.4203802577195674e-05, "loss": 1.7146, "step": 33230 }, { "epoch": 1.1817616211892277, "grad_norm": 1.860851764678955, "learning_rate": 1.4200246263516635e-05, "loss": 1.7256, "step": 33240 }, { "epoch": 1.182117145142654, "grad_norm": 1.9773718118667603, "learning_rate": 1.419668930470611e-05, "loss": 1.7246, "step": 33250 }, { "epoch": 1.1824726690960803, "grad_norm": 1.7748452425003052, "learning_rate": 1.4193131701310418e-05, "loss": 1.682, "step": 33260 }, { "epoch": 1.1828281930495068, "grad_norm": 1.7930796146392822, "learning_rate": 1.418957345387599e-05, "loss": 1.7387, "step": 33270 }, { "epoch": 1.1831837170029331, "grad_norm": 1.67182195186615, "learning_rate": 1.4186014562949346e-05, "loss": 1.6883, "step": 33280 }, { "epoch": 1.1835392409563594, "grad_norm": 1.7434818744659424, "learning_rate": 1.4182455029077113e-05, "loss": 1.6746, "step": 33290 }, { "epoch": 1.1838947649097857, "grad_norm": 1.7789760828018188, "learning_rate": 1.4178894852806013e-05, "loss": 1.6776, "step": 33300 }, { "epoch": 1.184250288863212, "grad_norm": 1.6772903203964233, "learning_rate": 1.4175334034682864e-05, "loss": 1.7219, "step": 33310 }, { "epoch": 1.1846058128166386, "grad_norm": 1.7272416353225708, "learning_rate": 1.4171772575254585e-05, "loss": 1.665, "step": 33320 }, { "epoch": 1.1849613367700649, "grad_norm": 1.8471630811691284, "learning_rate": 1.41682104750682e-05, "loss": 1.7101, "step": 33330 }, { "epoch": 1.1853168607234912, "grad_norm": 1.7045025825500488, "learning_rate": 1.4164647734670818e-05, "loss": 1.7075, "step": 33340 }, { "epoch": 1.1856723846769177, "grad_norm": 1.8781580924987793, "learning_rate": 1.4161084354609657e-05, "loss": 1.6975, "step": 33350 }, { "epoch": 1.186027908630344, "grad_norm": 1.8737696409225464, "learning_rate": 1.415752033543203e-05, "loss": 1.7216, "step": 33360 }, { "epoch": 1.1863834325837703, "grad_norm": 1.8052611351013184, "learning_rate": 1.4153955677685347e-05, "loss": 1.6939, "step": 33370 }, { "epoch": 1.1867389565371966, "grad_norm": 1.688539981842041, "learning_rate": 1.4150390381917115e-05, "loss": 1.677, "step": 33380 }, { "epoch": 1.187094480490623, "grad_norm": 1.8261302709579468, "learning_rate": 1.4146824448674945e-05, "loss": 1.6752, "step": 33390 }, { "epoch": 1.1874500044440495, "grad_norm": 1.8232522010803223, "learning_rate": 1.4143257878506541e-05, "loss": 1.7094, "step": 33400 }, { "epoch": 1.1878055283974758, "grad_norm": 1.7392261028289795, "learning_rate": 1.4139690671959708e-05, "loss": 1.6903, "step": 33410 }, { "epoch": 1.188161052350902, "grad_norm": 1.805082082748413, "learning_rate": 1.413612282958234e-05, "loss": 1.6825, "step": 33420 }, { "epoch": 1.1885165763043286, "grad_norm": 1.7442923784255981, "learning_rate": 1.4132554351922444e-05, "loss": 1.6796, "step": 33430 }, { "epoch": 1.188872100257755, "grad_norm": 1.7606703042984009, "learning_rate": 1.4128985239528104e-05, "loss": 1.7187, "step": 33440 }, { "epoch": 1.1892276242111812, "grad_norm": 1.6859506368637085, "learning_rate": 1.4125415492947523e-05, "loss": 1.6878, "step": 33450 }, { "epoch": 1.1895831481646075, "grad_norm": 1.7987312078475952, "learning_rate": 1.412184511272899e-05, "loss": 1.6963, "step": 33460 }, { "epoch": 1.1899386721180338, "grad_norm": 1.8057392835617065, "learning_rate": 1.4118274099420893e-05, "loss": 1.7036, "step": 33470 }, { "epoch": 1.1902941960714604, "grad_norm": 1.7178395986557007, "learning_rate": 1.4114702453571711e-05, "loss": 1.6923, "step": 33480 }, { "epoch": 1.1906497200248867, "grad_norm": 1.8137010335922241, "learning_rate": 1.4111130175730038e-05, "loss": 1.7145, "step": 33490 }, { "epoch": 1.191005243978313, "grad_norm": 1.8016866445541382, "learning_rate": 1.4107557266444543e-05, "loss": 1.656, "step": 33500 }, { "epoch": 1.1913607679317395, "grad_norm": 1.936647891998291, "learning_rate": 1.4103983726264005e-05, "loss": 1.6887, "step": 33510 }, { "epoch": 1.1917162918851658, "grad_norm": 1.8246115446090698, "learning_rate": 1.41004095557373e-05, "loss": 1.6944, "step": 33520 }, { "epoch": 1.1920718158385921, "grad_norm": 1.708095908164978, "learning_rate": 1.40968347554134e-05, "loss": 1.6923, "step": 33530 }, { "epoch": 1.1924273397920184, "grad_norm": 1.852388620376587, "learning_rate": 1.4093259325841366e-05, "loss": 1.7221, "step": 33540 }, { "epoch": 1.1927828637454447, "grad_norm": 1.8310407400131226, "learning_rate": 1.4089683267570366e-05, "loss": 1.6939, "step": 33550 }, { "epoch": 1.1931383876988713, "grad_norm": 1.8411918878555298, "learning_rate": 1.4086106581149656e-05, "loss": 1.6536, "step": 33560 }, { "epoch": 1.1934939116522976, "grad_norm": 1.9055877923965454, "learning_rate": 1.40825292671286e-05, "loss": 1.7192, "step": 33570 }, { "epoch": 1.1938494356057239, "grad_norm": 1.8024219274520874, "learning_rate": 1.4078951326056642e-05, "loss": 1.7258, "step": 33580 }, { "epoch": 1.1942049595591504, "grad_norm": 1.8253586292266846, "learning_rate": 1.4075372758483336e-05, "loss": 1.7017, "step": 33590 }, { "epoch": 1.1945604835125767, "grad_norm": 1.7866944074630737, "learning_rate": 1.4071793564958332e-05, "loss": 1.7364, "step": 33600 }, { "epoch": 1.194916007466003, "grad_norm": 1.7497050762176514, "learning_rate": 1.406821374603136e-05, "loss": 1.7286, "step": 33610 }, { "epoch": 1.1952715314194293, "grad_norm": 1.8349730968475342, "learning_rate": 1.4064633302252268e-05, "loss": 1.7375, "step": 33620 }, { "epoch": 1.1956270553728556, "grad_norm": 1.8275095224380493, "learning_rate": 1.4061052234170985e-05, "loss": 1.7206, "step": 33630 }, { "epoch": 1.1959825793262822, "grad_norm": 1.7886320352554321, "learning_rate": 1.405747054233754e-05, "loss": 1.6903, "step": 33640 }, { "epoch": 1.1963381032797085, "grad_norm": 1.6897066831588745, "learning_rate": 1.4053888227302065e-05, "loss": 1.684, "step": 33650 }, { "epoch": 1.1966936272331348, "grad_norm": 1.7241648435592651, "learning_rate": 1.4050305289614777e-05, "loss": 1.7033, "step": 33660 }, { "epoch": 1.1970491511865613, "grad_norm": 1.8050079345703125, "learning_rate": 1.4046721729825988e-05, "loss": 1.7016, "step": 33670 }, { "epoch": 1.1974046751399876, "grad_norm": 1.7987489700317383, "learning_rate": 1.4043137548486114e-05, "loss": 1.7137, "step": 33680 }, { "epoch": 1.197760199093414, "grad_norm": 1.7785160541534424, "learning_rate": 1.4039552746145664e-05, "loss": 1.7021, "step": 33690 }, { "epoch": 1.1981157230468402, "grad_norm": 1.8410561084747314, "learning_rate": 1.4035967323355241e-05, "loss": 1.6731, "step": 33700 }, { "epoch": 1.1984712470002665, "grad_norm": 1.703798770904541, "learning_rate": 1.4032381280665544e-05, "loss": 1.7263, "step": 33710 }, { "epoch": 1.198826770953693, "grad_norm": 1.783011555671692, "learning_rate": 1.4028794618627364e-05, "loss": 1.7026, "step": 33720 }, { "epoch": 1.1991822949071194, "grad_norm": 1.7746280431747437, "learning_rate": 1.4025207337791593e-05, "loss": 1.6962, "step": 33730 }, { "epoch": 1.1995378188605457, "grad_norm": 1.820473313331604, "learning_rate": 1.402161943870921e-05, "loss": 1.7047, "step": 33740 }, { "epoch": 1.1998933428139722, "grad_norm": 1.762587308883667, "learning_rate": 1.4018030921931301e-05, "loss": 1.6823, "step": 33750 }, { "epoch": 1.2002488667673985, "grad_norm": 1.772749423980713, "learning_rate": 1.4014441788009031e-05, "loss": 1.698, "step": 33760 }, { "epoch": 1.2006043907208248, "grad_norm": 1.96071457862854, "learning_rate": 1.4010852037493677e-05, "loss": 1.6903, "step": 33770 }, { "epoch": 1.2009599146742511, "grad_norm": 1.803092122077942, "learning_rate": 1.4007261670936599e-05, "loss": 1.709, "step": 33780 }, { "epoch": 1.2013154386276774, "grad_norm": 1.7847096920013428, "learning_rate": 1.400367068888925e-05, "loss": 1.6616, "step": 33790 }, { "epoch": 1.201670962581104, "grad_norm": 1.7999202013015747, "learning_rate": 1.4000079091903187e-05, "loss": 1.7288, "step": 33800 }, { "epoch": 1.2020264865345303, "grad_norm": 1.7760398387908936, "learning_rate": 1.399648688053006e-05, "loss": 1.7137, "step": 33810 }, { "epoch": 1.2023820104879566, "grad_norm": 1.9159443378448486, "learning_rate": 1.3992894055321604e-05, "loss": 1.7019, "step": 33820 }, { "epoch": 1.202737534441383, "grad_norm": 1.7627336978912354, "learning_rate": 1.3989300616829655e-05, "loss": 1.7146, "step": 33830 }, { "epoch": 1.2030930583948094, "grad_norm": 1.7011263370513916, "learning_rate": 1.3985706565606146e-05, "loss": 1.6801, "step": 33840 }, { "epoch": 1.2034485823482357, "grad_norm": 1.8217169046401978, "learning_rate": 1.3982111902203101e-05, "loss": 1.6935, "step": 33850 }, { "epoch": 1.203804106301662, "grad_norm": 1.8142902851104736, "learning_rate": 1.397851662717263e-05, "loss": 1.7484, "step": 33860 }, { "epoch": 1.2041596302550883, "grad_norm": 1.8981884717941284, "learning_rate": 1.3974920741066958e-05, "loss": 1.7154, "step": 33870 }, { "epoch": 1.2045151542085148, "grad_norm": 1.6906952857971191, "learning_rate": 1.3971324244438377e-05, "loss": 1.6937, "step": 33880 }, { "epoch": 1.2048706781619412, "grad_norm": 1.6254799365997314, "learning_rate": 1.3967727137839297e-05, "loss": 1.7114, "step": 33890 }, { "epoch": 1.2052262021153675, "grad_norm": 1.7021291255950928, "learning_rate": 1.3964129421822203e-05, "loss": 1.6794, "step": 33900 }, { "epoch": 1.205581726068794, "grad_norm": 1.829317569732666, "learning_rate": 1.396053109693969e-05, "loss": 1.691, "step": 33910 }, { "epoch": 1.2059372500222203, "grad_norm": 1.8096377849578857, "learning_rate": 1.395693216374443e-05, "loss": 1.6996, "step": 33920 }, { "epoch": 1.2062927739756466, "grad_norm": 1.5986207723617554, "learning_rate": 1.3953332622789197e-05, "loss": 1.714, "step": 33930 }, { "epoch": 1.206648297929073, "grad_norm": 1.7374706268310547, "learning_rate": 1.3949732474626862e-05, "loss": 1.6701, "step": 33940 }, { "epoch": 1.2070038218824992, "grad_norm": 1.7917215824127197, "learning_rate": 1.3946131719810386e-05, "loss": 1.7326, "step": 33950 }, { "epoch": 1.2073593458359257, "grad_norm": 1.7431756258010864, "learning_rate": 1.3942530358892821e-05, "loss": 1.7173, "step": 33960 }, { "epoch": 1.207714869789352, "grad_norm": 1.858600378036499, "learning_rate": 1.3938928392427313e-05, "loss": 1.7309, "step": 33970 }, { "epoch": 1.2080703937427784, "grad_norm": 1.8365451097488403, "learning_rate": 1.3935325820967098e-05, "loss": 1.6759, "step": 33980 }, { "epoch": 1.2084259176962049, "grad_norm": 1.8952609300613403, "learning_rate": 1.3931722645065513e-05, "loss": 1.6591, "step": 33990 }, { "epoch": 1.2087814416496312, "grad_norm": 1.8191903829574585, "learning_rate": 1.3928118865275981e-05, "loss": 1.7209, "step": 34000 }, { "epoch": 1.2091369656030575, "grad_norm": 1.7062873840332031, "learning_rate": 1.3924514482152023e-05, "loss": 1.751, "step": 34010 }, { "epoch": 1.2094924895564838, "grad_norm": 2.049180030822754, "learning_rate": 1.3920909496247243e-05, "loss": 1.6886, "step": 34020 }, { "epoch": 1.2098480135099101, "grad_norm": 1.9235094785690308, "learning_rate": 1.3917303908115356e-05, "loss": 1.6867, "step": 34030 }, { "epoch": 1.2102035374633366, "grad_norm": 1.7062004804611206, "learning_rate": 1.3913697718310144e-05, "loss": 1.674, "step": 34040 }, { "epoch": 1.210559061416763, "grad_norm": 1.7808175086975098, "learning_rate": 1.3910090927385507e-05, "loss": 1.7013, "step": 34050 }, { "epoch": 1.2109145853701893, "grad_norm": 1.811781644821167, "learning_rate": 1.3906483535895414e-05, "loss": 1.6935, "step": 34060 }, { "epoch": 1.2112701093236158, "grad_norm": 1.755676507949829, "learning_rate": 1.3902875544393947e-05, "loss": 1.6571, "step": 34070 }, { "epoch": 1.211625633277042, "grad_norm": 1.848569631576538, "learning_rate": 1.3899266953435266e-05, "loss": 1.7062, "step": 34080 }, { "epoch": 1.2119811572304684, "grad_norm": 1.7534435987472534, "learning_rate": 1.3895657763573631e-05, "loss": 1.7107, "step": 34090 }, { "epoch": 1.2123366811838947, "grad_norm": 1.728899359703064, "learning_rate": 1.389204797536339e-05, "loss": 1.6623, "step": 34100 }, { "epoch": 1.212692205137321, "grad_norm": 1.7825510501861572, "learning_rate": 1.3888437589358982e-05, "loss": 1.6603, "step": 34110 }, { "epoch": 1.2130477290907475, "grad_norm": 1.7410731315612793, "learning_rate": 1.3884826606114941e-05, "loss": 1.6778, "step": 34120 }, { "epoch": 1.2134032530441738, "grad_norm": 1.7989734411239624, "learning_rate": 1.3881215026185893e-05, "loss": 1.6652, "step": 34130 }, { "epoch": 1.2137587769976002, "grad_norm": 1.8484128713607788, "learning_rate": 1.387760285012655e-05, "loss": 1.6831, "step": 34140 }, { "epoch": 1.2141143009510267, "grad_norm": 1.8029778003692627, "learning_rate": 1.3873990078491723e-05, "loss": 1.6536, "step": 34150 }, { "epoch": 1.214469824904453, "grad_norm": 1.713118553161621, "learning_rate": 1.387037671183631e-05, "loss": 1.68, "step": 34160 }, { "epoch": 1.2148253488578793, "grad_norm": 1.7993403673171997, "learning_rate": 1.3866762750715303e-05, "loss": 1.7248, "step": 34170 }, { "epoch": 1.2151808728113056, "grad_norm": 1.9654228687286377, "learning_rate": 1.386314819568378e-05, "loss": 1.7362, "step": 34180 }, { "epoch": 1.215536396764732, "grad_norm": 1.859349012374878, "learning_rate": 1.3859533047296916e-05, "loss": 1.6999, "step": 34190 }, { "epoch": 1.2158919207181584, "grad_norm": 1.712769627571106, "learning_rate": 1.3855917306109976e-05, "loss": 1.689, "step": 34200 }, { "epoch": 1.2162474446715847, "grad_norm": 1.7936985492706299, "learning_rate": 1.3852300972678316e-05, "loss": 1.6774, "step": 34210 }, { "epoch": 1.216602968625011, "grad_norm": 1.7569528818130493, "learning_rate": 1.3848684047557384e-05, "loss": 1.6438, "step": 34220 }, { "epoch": 1.2169584925784376, "grad_norm": 1.7265123128890991, "learning_rate": 1.3845066531302708e-05, "loss": 1.7195, "step": 34230 }, { "epoch": 1.2173140165318639, "grad_norm": 1.661879062652588, "learning_rate": 1.3841448424469923e-05, "loss": 1.7101, "step": 34240 }, { "epoch": 1.2176695404852902, "grad_norm": 1.9059056043624878, "learning_rate": 1.3837829727614745e-05, "loss": 1.6865, "step": 34250 }, { "epoch": 1.2180250644387165, "grad_norm": 1.8204132318496704, "learning_rate": 1.3834210441292986e-05, "loss": 1.6746, "step": 34260 }, { "epoch": 1.2183805883921428, "grad_norm": 1.8100894689559937, "learning_rate": 1.3830590566060545e-05, "loss": 1.6981, "step": 34270 }, { "epoch": 1.2187361123455693, "grad_norm": 1.8906034231185913, "learning_rate": 1.3826970102473407e-05, "loss": 1.6893, "step": 34280 }, { "epoch": 1.2190916362989956, "grad_norm": 1.7277697324752808, "learning_rate": 1.382334905108766e-05, "loss": 1.6698, "step": 34290 }, { "epoch": 1.219447160252422, "grad_norm": 1.8846702575683594, "learning_rate": 1.3819727412459471e-05, "loss": 1.715, "step": 34300 }, { "epoch": 1.2198026842058485, "grad_norm": 1.9642268419265747, "learning_rate": 1.38161051871451e-05, "loss": 1.6501, "step": 34310 }, { "epoch": 1.2201582081592748, "grad_norm": 1.7906246185302734, "learning_rate": 1.3812482375700899e-05, "loss": 1.7008, "step": 34320 }, { "epoch": 1.220513732112701, "grad_norm": 1.8596678972244263, "learning_rate": 1.380885897868331e-05, "loss": 1.711, "step": 34330 }, { "epoch": 1.2208692560661274, "grad_norm": 1.7478065490722656, "learning_rate": 1.3805234996648867e-05, "loss": 1.7013, "step": 34340 }, { "epoch": 1.2212247800195537, "grad_norm": 1.812665343284607, "learning_rate": 1.3801610430154182e-05, "loss": 1.6807, "step": 34350 }, { "epoch": 1.2215803039729802, "grad_norm": 1.7723488807678223, "learning_rate": 1.3797985279755975e-05, "loss": 1.6957, "step": 34360 }, { "epoch": 1.2219358279264065, "grad_norm": 1.8480596542358398, "learning_rate": 1.3794359546011042e-05, "loss": 1.7231, "step": 34370 }, { "epoch": 1.2222913518798328, "grad_norm": 1.8381192684173584, "learning_rate": 1.3790733229476272e-05, "loss": 1.6965, "step": 34380 }, { "epoch": 1.2226468758332594, "grad_norm": 1.8478515148162842, "learning_rate": 1.3787106330708646e-05, "loss": 1.6672, "step": 34390 }, { "epoch": 1.2230023997866857, "grad_norm": 1.7459863424301147, "learning_rate": 1.3783478850265238e-05, "loss": 1.6625, "step": 34400 }, { "epoch": 1.223357923740112, "grad_norm": 1.8091344833374023, "learning_rate": 1.3779850788703196e-05, "loss": 1.6789, "step": 34410 }, { "epoch": 1.2237134476935383, "grad_norm": 1.9412217140197754, "learning_rate": 1.3776222146579772e-05, "loss": 1.6679, "step": 34420 }, { "epoch": 1.2240689716469646, "grad_norm": 1.6879805326461792, "learning_rate": 1.3772592924452307e-05, "loss": 1.7144, "step": 34430 }, { "epoch": 1.2244244956003911, "grad_norm": 1.8009696006774902, "learning_rate": 1.3768963122878218e-05, "loss": 1.7023, "step": 34440 }, { "epoch": 1.2247800195538174, "grad_norm": 1.8283684253692627, "learning_rate": 1.3765332742415031e-05, "loss": 1.6903, "step": 34450 }, { "epoch": 1.2251355435072437, "grad_norm": 1.8491322994232178, "learning_rate": 1.376170178362034e-05, "loss": 1.7263, "step": 34460 }, { "epoch": 1.2254910674606703, "grad_norm": 1.8187154531478882, "learning_rate": 1.3758070247051844e-05, "loss": 1.677, "step": 34470 }, { "epoch": 1.2258465914140966, "grad_norm": 1.9667563438415527, "learning_rate": 1.3754438133267318e-05, "loss": 1.7341, "step": 34480 }, { "epoch": 1.2262021153675229, "grad_norm": 1.81467866897583, "learning_rate": 1.3750805442824638e-05, "loss": 1.697, "step": 34490 }, { "epoch": 1.2265576393209492, "grad_norm": 1.808092713356018, "learning_rate": 1.3747172176281755e-05, "loss": 1.6521, "step": 34500 }, { "epoch": 1.2269131632743755, "grad_norm": 1.8158385753631592, "learning_rate": 1.3743538334196724e-05, "loss": 1.6757, "step": 34510 }, { "epoch": 1.227268687227802, "grad_norm": 1.8510080575942993, "learning_rate": 1.373990391712768e-05, "loss": 1.6837, "step": 34520 }, { "epoch": 1.2276242111812283, "grad_norm": 1.7220662832260132, "learning_rate": 1.3736268925632841e-05, "loss": 1.6921, "step": 34530 }, { "epoch": 1.2279797351346546, "grad_norm": 1.6918028593063354, "learning_rate": 1.3732633360270522e-05, "loss": 1.7004, "step": 34540 }, { "epoch": 1.2283352590880812, "grad_norm": 1.8706375360488892, "learning_rate": 1.372899722159912e-05, "loss": 1.6898, "step": 34550 }, { "epoch": 1.2286907830415075, "grad_norm": 1.8237711191177368, "learning_rate": 1.3725360510177127e-05, "loss": 1.7009, "step": 34560 }, { "epoch": 1.2290463069949338, "grad_norm": 1.7138383388519287, "learning_rate": 1.372172322656312e-05, "loss": 1.6898, "step": 34570 }, { "epoch": 1.22940183094836, "grad_norm": 1.8330506086349487, "learning_rate": 1.3718085371315756e-05, "loss": 1.6476, "step": 34580 }, { "epoch": 1.2297573549017864, "grad_norm": 2.02473783493042, "learning_rate": 1.3714446944993798e-05, "loss": 1.6952, "step": 34590 }, { "epoch": 1.230112878855213, "grad_norm": 1.7767274379730225, "learning_rate": 1.3710807948156076e-05, "loss": 1.6857, "step": 34600 }, { "epoch": 1.2304684028086392, "grad_norm": 1.7729326486587524, "learning_rate": 1.370716838136152e-05, "loss": 1.7109, "step": 34610 }, { "epoch": 1.2308239267620655, "grad_norm": 1.718795657157898, "learning_rate": 1.3703528245169145e-05, "loss": 1.6839, "step": 34620 }, { "epoch": 1.231179450715492, "grad_norm": 1.7591612339019775, "learning_rate": 1.3699887540138052e-05, "loss": 1.686, "step": 34630 }, { "epoch": 1.2315349746689184, "grad_norm": 1.8209606409072876, "learning_rate": 1.369624626682743e-05, "loss": 1.6415, "step": 34640 }, { "epoch": 1.2318904986223447, "grad_norm": 1.776091456413269, "learning_rate": 1.3692604425796564e-05, "loss": 1.7014, "step": 34650 }, { "epoch": 1.232246022575771, "grad_norm": 1.828403115272522, "learning_rate": 1.3688962017604804e-05, "loss": 1.6566, "step": 34660 }, { "epoch": 1.2326015465291973, "grad_norm": 1.7923558950424194, "learning_rate": 1.368531904281161e-05, "loss": 1.6052, "step": 34670 }, { "epoch": 1.2329570704826238, "grad_norm": 1.8252242803573608, "learning_rate": 1.3681675501976517e-05, "loss": 1.7341, "step": 34680 }, { "epoch": 1.2333125944360501, "grad_norm": 1.7574840784072876, "learning_rate": 1.3678031395659152e-05, "loss": 1.6914, "step": 34690 }, { "epoch": 1.2336681183894764, "grad_norm": 1.817844271659851, "learning_rate": 1.3674386724419227e-05, "loss": 1.7124, "step": 34700 }, { "epoch": 1.234023642342903, "grad_norm": 1.7804555892944336, "learning_rate": 1.367074148881654e-05, "loss": 1.6931, "step": 34710 }, { "epoch": 1.2343791662963293, "grad_norm": 1.8270343542099, "learning_rate": 1.3667095689410976e-05, "loss": 1.7101, "step": 34720 }, { "epoch": 1.2347346902497556, "grad_norm": 1.6519355773925781, "learning_rate": 1.3663449326762505e-05, "loss": 1.7021, "step": 34730 }, { "epoch": 1.2350902142031819, "grad_norm": 1.8363620042800903, "learning_rate": 1.3659802401431189e-05, "loss": 1.7059, "step": 34740 }, { "epoch": 1.2354457381566082, "grad_norm": 1.9127928018569946, "learning_rate": 1.3656154913977169e-05, "loss": 1.6899, "step": 34750 }, { "epoch": 1.2358012621100347, "grad_norm": 1.982515573501587, "learning_rate": 1.3652506864960679e-05, "loss": 1.7332, "step": 34760 }, { "epoch": 1.236156786063461, "grad_norm": 1.9231630563735962, "learning_rate": 1.3648858254942039e-05, "loss": 1.6745, "step": 34770 }, { "epoch": 1.2365123100168873, "grad_norm": 1.6801555156707764, "learning_rate": 1.364520908448165e-05, "loss": 1.6811, "step": 34780 }, { "epoch": 1.2368678339703139, "grad_norm": 1.9552178382873535, "learning_rate": 1.3641559354139999e-05, "loss": 1.7243, "step": 34790 }, { "epoch": 1.2372233579237402, "grad_norm": 1.7672715187072754, "learning_rate": 1.3637909064477664e-05, "loss": 1.7373, "step": 34800 }, { "epoch": 1.2375788818771665, "grad_norm": 1.823472261428833, "learning_rate": 1.3634258216055305e-05, "loss": 1.701, "step": 34810 }, { "epoch": 1.2379344058305928, "grad_norm": 1.8090918064117432, "learning_rate": 1.3630606809433672e-05, "loss": 1.6791, "step": 34820 }, { "epoch": 1.238289929784019, "grad_norm": 1.8534045219421387, "learning_rate": 1.3626954845173599e-05, "loss": 1.6525, "step": 34830 }, { "epoch": 1.2386454537374456, "grad_norm": 1.9117337465286255, "learning_rate": 1.3623302323836001e-05, "loss": 1.6886, "step": 34840 }, { "epoch": 1.239000977690872, "grad_norm": 1.8775508403778076, "learning_rate": 1.3619649245981885e-05, "loss": 1.7097, "step": 34850 }, { "epoch": 1.2393565016442982, "grad_norm": 1.8384355306625366, "learning_rate": 1.3615995612172342e-05, "loss": 1.6513, "step": 34860 }, { "epoch": 1.2397120255977248, "grad_norm": 1.7076852321624756, "learning_rate": 1.3612341422968542e-05, "loss": 1.6979, "step": 34870 }, { "epoch": 1.240067549551151, "grad_norm": 1.8162572383880615, "learning_rate": 1.3608686678931751e-05, "loss": 1.6407, "step": 34880 }, { "epoch": 1.2404230735045774, "grad_norm": 1.76498544216156, "learning_rate": 1.3605031380623312e-05, "loss": 1.6917, "step": 34890 }, { "epoch": 1.2407785974580037, "grad_norm": 1.8899500370025635, "learning_rate": 1.360137552860466e-05, "loss": 1.6707, "step": 34900 }, { "epoch": 1.24113412141143, "grad_norm": 1.8753656148910522, "learning_rate": 1.3597719123437302e-05, "loss": 1.7146, "step": 34910 }, { "epoch": 1.2414896453648565, "grad_norm": 1.6240572929382324, "learning_rate": 1.3594062165682846e-05, "loss": 1.7083, "step": 34920 }, { "epoch": 1.2418451693182828, "grad_norm": 1.7258415222167969, "learning_rate": 1.3590404655902979e-05, "loss": 1.6886, "step": 34930 }, { "epoch": 1.2422006932717091, "grad_norm": 1.8822959661483765, "learning_rate": 1.3586746594659468e-05, "loss": 1.7159, "step": 34940 }, { "epoch": 1.2425562172251357, "grad_norm": 1.784230351448059, "learning_rate": 1.3583087982514168e-05, "loss": 1.6507, "step": 34950 }, { "epoch": 1.242911741178562, "grad_norm": 1.8265002965927124, "learning_rate": 1.3579428820029021e-05, "loss": 1.7289, "step": 34960 }, { "epoch": 1.2432672651319883, "grad_norm": 1.8411922454833984, "learning_rate": 1.357576910776605e-05, "loss": 1.6828, "step": 34970 }, { "epoch": 1.2436227890854146, "grad_norm": 1.806093692779541, "learning_rate": 1.3572108846287364e-05, "loss": 1.6783, "step": 34980 }, { "epoch": 1.2439783130388409, "grad_norm": 1.904137372970581, "learning_rate": 1.3568448036155158e-05, "loss": 1.6938, "step": 34990 }, { "epoch": 1.2443338369922674, "grad_norm": 1.7954858541488647, "learning_rate": 1.3564786677931706e-05, "loss": 1.7159, "step": 35000 }, { "epoch": 1.2446893609456937, "grad_norm": 1.7145042419433594, "learning_rate": 1.3561124772179372e-05, "loss": 1.6792, "step": 35010 }, { "epoch": 1.24504488489912, "grad_norm": 1.7948307991027832, "learning_rate": 1.3557462319460602e-05, "loss": 1.6726, "step": 35020 }, { "epoch": 1.2454004088525465, "grad_norm": 1.7869338989257812, "learning_rate": 1.3553799320337926e-05, "loss": 1.7123, "step": 35030 }, { "epoch": 1.2457559328059729, "grad_norm": 1.7811120748519897, "learning_rate": 1.3550135775373957e-05, "loss": 1.7266, "step": 35040 }, { "epoch": 1.2461114567593992, "grad_norm": 1.7845333814620972, "learning_rate": 1.354647168513139e-05, "loss": 1.6855, "step": 35050 }, { "epoch": 1.2464669807128255, "grad_norm": 1.8383392095565796, "learning_rate": 1.3542807050173008e-05, "loss": 1.6979, "step": 35060 }, { "epoch": 1.2468225046662518, "grad_norm": 1.8375091552734375, "learning_rate": 1.3539141871061679e-05, "loss": 1.7184, "step": 35070 }, { "epoch": 1.2471780286196783, "grad_norm": 1.8529398441314697, "learning_rate": 1.3535476148360349e-05, "loss": 1.6638, "step": 35080 }, { "epoch": 1.2475335525731046, "grad_norm": 1.7339930534362793, "learning_rate": 1.3531809882632052e-05, "loss": 1.6769, "step": 35090 }, { "epoch": 1.247889076526531, "grad_norm": 1.7067604064941406, "learning_rate": 1.35281430744399e-05, "loss": 1.6856, "step": 35100 }, { "epoch": 1.2482446004799574, "grad_norm": 1.8274784088134766, "learning_rate": 1.3524475724347093e-05, "loss": 1.6603, "step": 35110 }, { "epoch": 1.2486001244333838, "grad_norm": 1.8580459356307983, "learning_rate": 1.3520807832916913e-05, "loss": 1.7015, "step": 35120 }, { "epoch": 1.24895564838681, "grad_norm": 1.9023088216781616, "learning_rate": 1.3517139400712727e-05, "loss": 1.7105, "step": 35130 }, { "epoch": 1.2493111723402364, "grad_norm": 1.7594802379608154, "learning_rate": 1.3513470428297981e-05, "loss": 1.6994, "step": 35140 }, { "epoch": 1.2496666962936627, "grad_norm": 1.7522236108779907, "learning_rate": 1.3509800916236207e-05, "loss": 1.6827, "step": 35150 }, { "epoch": 1.2500222202470892, "grad_norm": 1.883756160736084, "learning_rate": 1.3506130865091017e-05, "loss": 1.6812, "step": 35160 }, { "epoch": 1.2503777442005155, "grad_norm": 1.8085405826568604, "learning_rate": 1.350246027542611e-05, "loss": 1.6747, "step": 35170 }, { "epoch": 1.2507332681539418, "grad_norm": 1.7783538103103638, "learning_rate": 1.3498789147805269e-05, "loss": 1.7187, "step": 35180 }, { "epoch": 1.2510887921073683, "grad_norm": 1.8453665971755981, "learning_rate": 1.3495117482792348e-05, "loss": 1.7129, "step": 35190 }, { "epoch": 1.2514443160607946, "grad_norm": 1.7402855157852173, "learning_rate": 1.3491445280951299e-05, "loss": 1.7051, "step": 35200 }, { "epoch": 1.251799840014221, "grad_norm": 1.868834137916565, "learning_rate": 1.3487772542846145e-05, "loss": 1.7227, "step": 35210 }, { "epoch": 1.2521553639676473, "grad_norm": 1.842235803604126, "learning_rate": 1.3484099269040997e-05, "loss": 1.7129, "step": 35220 }, { "epoch": 1.2525108879210736, "grad_norm": 1.8452571630477905, "learning_rate": 1.348042546010005e-05, "loss": 1.7115, "step": 35230 }, { "epoch": 1.2528664118745, "grad_norm": 1.778145670890808, "learning_rate": 1.3476751116587567e-05, "loss": 1.7321, "step": 35240 }, { "epoch": 1.2532219358279264, "grad_norm": 1.7019305229187012, "learning_rate": 1.347307623906792e-05, "loss": 1.6857, "step": 35250 }, { "epoch": 1.2535774597813527, "grad_norm": 1.7667639255523682, "learning_rate": 1.3469400828105537e-05, "loss": 1.6976, "step": 35260 }, { "epoch": 1.2539329837347792, "grad_norm": 1.8197112083435059, "learning_rate": 1.3465724884264939e-05, "loss": 1.6532, "step": 35270 }, { "epoch": 1.2542885076882055, "grad_norm": 1.7577474117279053, "learning_rate": 1.3462048408110729e-05, "loss": 1.7048, "step": 35280 }, { "epoch": 1.2546440316416319, "grad_norm": 1.8621435165405273, "learning_rate": 1.3458371400207591e-05, "loss": 1.7134, "step": 35290 }, { "epoch": 1.2549995555950582, "grad_norm": 1.8118704557418823, "learning_rate": 1.3454693861120287e-05, "loss": 1.7063, "step": 35300 }, { "epoch": 1.2553550795484845, "grad_norm": 1.9403353929519653, "learning_rate": 1.345101579141367e-05, "loss": 1.695, "step": 35310 }, { "epoch": 1.255710603501911, "grad_norm": 2.0728812217712402, "learning_rate": 1.3447337191652665e-05, "loss": 1.656, "step": 35320 }, { "epoch": 1.2560661274553373, "grad_norm": 1.7686488628387451, "learning_rate": 1.3443658062402284e-05, "loss": 1.709, "step": 35330 }, { "epoch": 1.2564216514087636, "grad_norm": 1.8105318546295166, "learning_rate": 1.3439978404227616e-05, "loss": 1.6674, "step": 35340 }, { "epoch": 1.2567771753621901, "grad_norm": 1.846477746963501, "learning_rate": 1.3436298217693832e-05, "loss": 1.6916, "step": 35350 }, { "epoch": 1.2571326993156164, "grad_norm": 1.7145262956619263, "learning_rate": 1.343261750336619e-05, "loss": 1.6748, "step": 35360 }, { "epoch": 1.2574882232690427, "grad_norm": 1.8705803155899048, "learning_rate": 1.342893626181002e-05, "loss": 1.6939, "step": 35370 }, { "epoch": 1.257843747222469, "grad_norm": 1.735656976699829, "learning_rate": 1.3425254493590741e-05, "loss": 1.6982, "step": 35380 }, { "epoch": 1.2581992711758954, "grad_norm": 1.774243712425232, "learning_rate": 1.3421572199273849e-05, "loss": 1.6894, "step": 35390 }, { "epoch": 1.258554795129322, "grad_norm": 1.7297285795211792, "learning_rate": 1.3417889379424918e-05, "loss": 1.6978, "step": 35400 }, { "epoch": 1.2589103190827482, "grad_norm": 1.833452582359314, "learning_rate": 1.341420603460961e-05, "loss": 1.7104, "step": 35410 }, { "epoch": 1.2592658430361745, "grad_norm": 1.8359464406967163, "learning_rate": 1.3410522165393664e-05, "loss": 1.7002, "step": 35420 }, { "epoch": 1.259621366989601, "grad_norm": 1.799144983291626, "learning_rate": 1.3406837772342896e-05, "loss": 1.6812, "step": 35430 }, { "epoch": 1.2599768909430273, "grad_norm": 1.920923113822937, "learning_rate": 1.3403152856023205e-05, "loss": 1.6805, "step": 35440 }, { "epoch": 1.2603324148964536, "grad_norm": 1.8498185873031616, "learning_rate": 1.3399467417000579e-05, "loss": 1.6733, "step": 35450 }, { "epoch": 1.26068793884988, "grad_norm": 1.8264330625534058, "learning_rate": 1.3395781455841068e-05, "loss": 1.6729, "step": 35460 }, { "epoch": 1.2610434628033063, "grad_norm": 1.7874951362609863, "learning_rate": 1.3392094973110817e-05, "loss": 1.7034, "step": 35470 }, { "epoch": 1.2613989867567328, "grad_norm": 1.9353516101837158, "learning_rate": 1.3388407969376048e-05, "loss": 1.7026, "step": 35480 }, { "epoch": 1.261754510710159, "grad_norm": 2.0174925327301025, "learning_rate": 1.3384720445203059e-05, "loss": 1.7073, "step": 35490 }, { "epoch": 1.2621100346635854, "grad_norm": 1.7028112411499023, "learning_rate": 1.3381032401158236e-05, "loss": 1.6538, "step": 35500 }, { "epoch": 1.262465558617012, "grad_norm": 1.7406957149505615, "learning_rate": 1.337734383780803e-05, "loss": 1.6865, "step": 35510 }, { "epoch": 1.2628210825704382, "grad_norm": 1.819901466369629, "learning_rate": 1.3373654755718992e-05, "loss": 1.7195, "step": 35520 }, { "epoch": 1.2631766065238645, "grad_norm": 1.7240650653839111, "learning_rate": 1.3369965155457734e-05, "loss": 1.7193, "step": 35530 }, { "epoch": 1.2635321304772908, "grad_norm": 1.927585244178772, "learning_rate": 1.3366275037590957e-05, "loss": 1.7325, "step": 35540 }, { "epoch": 1.2638876544307172, "grad_norm": 1.6727463006973267, "learning_rate": 1.336258440268544e-05, "loss": 1.712, "step": 35550 }, { "epoch": 1.2642431783841437, "grad_norm": 1.7826107740402222, "learning_rate": 1.3358893251308044e-05, "loss": 1.6523, "step": 35560 }, { "epoch": 1.26459870233757, "grad_norm": 1.875220537185669, "learning_rate": 1.3355201584025705e-05, "loss": 1.7161, "step": 35570 }, { "epoch": 1.2649542262909963, "grad_norm": 1.8240917921066284, "learning_rate": 1.3351509401405443e-05, "loss": 1.7365, "step": 35580 }, { "epoch": 1.2653097502444228, "grad_norm": 1.892587661743164, "learning_rate": 1.3347816704014346e-05, "loss": 1.6922, "step": 35590 }, { "epoch": 1.2656652741978491, "grad_norm": 1.767132043838501, "learning_rate": 1.3344123492419598e-05, "loss": 1.6761, "step": 35600 }, { "epoch": 1.2660207981512754, "grad_norm": 1.8336524963378906, "learning_rate": 1.3340429767188448e-05, "loss": 1.6573, "step": 35610 }, { "epoch": 1.2663763221047017, "grad_norm": 1.8649629354476929, "learning_rate": 1.3336735528888227e-05, "loss": 1.6805, "step": 35620 }, { "epoch": 1.266731846058128, "grad_norm": 1.763104796409607, "learning_rate": 1.3333040778086353e-05, "loss": 1.701, "step": 35630 }, { "epoch": 1.2670873700115546, "grad_norm": 1.7915949821472168, "learning_rate": 1.3329345515350316e-05, "loss": 1.6907, "step": 35640 }, { "epoch": 1.2674428939649809, "grad_norm": 1.8075286149978638, "learning_rate": 1.332564974124768e-05, "loss": 1.6914, "step": 35650 }, { "epoch": 1.2677984179184072, "grad_norm": 1.7828816175460815, "learning_rate": 1.3321953456346099e-05, "loss": 1.711, "step": 35660 }, { "epoch": 1.2681539418718337, "grad_norm": 1.7524465322494507, "learning_rate": 1.3318256661213294e-05, "loss": 1.6562, "step": 35670 }, { "epoch": 1.26850946582526, "grad_norm": 1.8603500127792358, "learning_rate": 1.3314559356417069e-05, "loss": 1.6989, "step": 35680 }, { "epoch": 1.2688649897786863, "grad_norm": 1.7611099481582642, "learning_rate": 1.3310861542525312e-05, "loss": 1.6909, "step": 35690 }, { "epoch": 1.2692205137321126, "grad_norm": 1.8640097379684448, "learning_rate": 1.3307163220105983e-05, "loss": 1.6944, "step": 35700 }, { "epoch": 1.269576037685539, "grad_norm": 1.925833821296692, "learning_rate": 1.3303464389727117e-05, "loss": 1.705, "step": 35710 }, { "epoch": 1.2699315616389655, "grad_norm": 1.918041706085205, "learning_rate": 1.3299765051956835e-05, "loss": 1.7184, "step": 35720 }, { "epoch": 1.2702870855923918, "grad_norm": 1.8870186805725098, "learning_rate": 1.3296065207363327e-05, "loss": 1.6881, "step": 35730 }, { "epoch": 1.270642609545818, "grad_norm": 1.8254750967025757, "learning_rate": 1.3292364856514874e-05, "loss": 1.6941, "step": 35740 }, { "epoch": 1.2709981334992446, "grad_norm": 1.7230134010314941, "learning_rate": 1.328866399997982e-05, "loss": 1.711, "step": 35750 }, { "epoch": 1.271353657452671, "grad_norm": 1.8757699728012085, "learning_rate": 1.3284962638326597e-05, "loss": 1.6883, "step": 35760 }, { "epoch": 1.2717091814060972, "grad_norm": 1.719381332397461, "learning_rate": 1.328126077212371e-05, "loss": 1.7017, "step": 35770 }, { "epoch": 1.2720647053595235, "grad_norm": 1.8136777877807617, "learning_rate": 1.327755840193974e-05, "loss": 1.7015, "step": 35780 }, { "epoch": 1.2724202293129498, "grad_norm": 1.7327030897140503, "learning_rate": 1.3273855528343349e-05, "loss": 1.7203, "step": 35790 }, { "epoch": 1.2727757532663764, "grad_norm": 1.8280681371688843, "learning_rate": 1.327015215190328e-05, "loss": 1.6835, "step": 35800 }, { "epoch": 1.2731312772198027, "grad_norm": 1.7831275463104248, "learning_rate": 1.3266448273188341e-05, "loss": 1.6701, "step": 35810 }, { "epoch": 1.273486801173229, "grad_norm": 1.9062355756759644, "learning_rate": 1.3262743892767431e-05, "loss": 1.6997, "step": 35820 }, { "epoch": 1.2738423251266555, "grad_norm": 1.8788557052612305, "learning_rate": 1.3259039011209515e-05, "loss": 1.6656, "step": 35830 }, { "epoch": 1.2741978490800818, "grad_norm": 1.877271294593811, "learning_rate": 1.3255333629083642e-05, "loss": 1.6937, "step": 35840 }, { "epoch": 1.2745533730335081, "grad_norm": 1.6842414140701294, "learning_rate": 1.3251627746958934e-05, "loss": 1.6561, "step": 35850 }, { "epoch": 1.2749088969869344, "grad_norm": 1.8604660034179688, "learning_rate": 1.324792136540459e-05, "loss": 1.6548, "step": 35860 }, { "epoch": 1.2752644209403607, "grad_norm": 1.8948099613189697, "learning_rate": 1.3244214484989892e-05, "loss": 1.6864, "step": 35870 }, { "epoch": 1.2756199448937873, "grad_norm": 1.9044815301895142, "learning_rate": 1.324050710628419e-05, "loss": 1.7124, "step": 35880 }, { "epoch": 1.2759754688472136, "grad_norm": 1.8357096910476685, "learning_rate": 1.3236799229856914e-05, "loss": 1.6783, "step": 35890 }, { "epoch": 1.2763309928006399, "grad_norm": 1.8171970844268799, "learning_rate": 1.3233090856277573e-05, "loss": 1.6754, "step": 35900 }, { "epoch": 1.2766865167540664, "grad_norm": 1.913097858428955, "learning_rate": 1.3229381986115746e-05, "loss": 1.6948, "step": 35910 }, { "epoch": 1.2770420407074927, "grad_norm": 1.8764780759811401, "learning_rate": 1.3225672619941094e-05, "loss": 1.6527, "step": 35920 }, { "epoch": 1.277397564660919, "grad_norm": 1.7733298540115356, "learning_rate": 1.3221962758323352e-05, "loss": 1.703, "step": 35930 }, { "epoch": 1.2777530886143453, "grad_norm": 1.6520118713378906, "learning_rate": 1.3218252401832334e-05, "loss": 1.6956, "step": 35940 }, { "epoch": 1.2781086125677716, "grad_norm": 1.903979778289795, "learning_rate": 1.3214541551037927e-05, "loss": 1.6827, "step": 35950 }, { "epoch": 1.2784641365211982, "grad_norm": 1.8394795656204224, "learning_rate": 1.321083020651009e-05, "loss": 1.6883, "step": 35960 }, { "epoch": 1.2788196604746245, "grad_norm": 1.7833898067474365, "learning_rate": 1.3207118368818866e-05, "loss": 1.6519, "step": 35970 }, { "epoch": 1.2791751844280508, "grad_norm": 1.7083699703216553, "learning_rate": 1.3203406038534369e-05, "loss": 1.6612, "step": 35980 }, { "epoch": 1.2795307083814773, "grad_norm": 1.6796025037765503, "learning_rate": 1.3199693216226792e-05, "loss": 1.7073, "step": 35990 }, { "epoch": 1.2798862323349036, "grad_norm": 1.7842929363250732, "learning_rate": 1.3195979902466398e-05, "loss": 1.6563, "step": 36000 }, { "epoch": 1.28024175628833, "grad_norm": 1.8019449710845947, "learning_rate": 1.3192266097823531e-05, "loss": 1.6759, "step": 36010 }, { "epoch": 1.2805972802417562, "grad_norm": 1.7412304878234863, "learning_rate": 1.3188551802868606e-05, "loss": 1.6781, "step": 36020 }, { "epoch": 1.2809528041951825, "grad_norm": 1.7398102283477783, "learning_rate": 1.3184837018172117e-05, "loss": 1.6621, "step": 36030 }, { "epoch": 1.281308328148609, "grad_norm": 1.9147517681121826, "learning_rate": 1.3181121744304628e-05, "loss": 1.7276, "step": 36040 }, { "epoch": 1.2816638521020354, "grad_norm": 1.8121700286865234, "learning_rate": 1.3177405981836788e-05, "loss": 1.6727, "step": 36050 }, { "epoch": 1.2820193760554617, "grad_norm": 1.7942123413085938, "learning_rate": 1.3173689731339315e-05, "loss": 1.689, "step": 36060 }, { "epoch": 1.2823749000088882, "grad_norm": 2.0419764518737793, "learning_rate": 1.3169972993382991e-05, "loss": 1.7237, "step": 36070 }, { "epoch": 1.2827304239623145, "grad_norm": 1.829637050628662, "learning_rate": 1.3166255768538699e-05, "loss": 1.7182, "step": 36080 }, { "epoch": 1.2830859479157408, "grad_norm": 1.7687910795211792, "learning_rate": 1.3162538057377367e-05, "loss": 1.7163, "step": 36090 }, { "epoch": 1.2834414718691671, "grad_norm": 1.6497434377670288, "learning_rate": 1.3158819860470021e-05, "loss": 1.7149, "step": 36100 }, { "epoch": 1.2837969958225934, "grad_norm": 1.7790758609771729, "learning_rate": 1.315510117838775e-05, "loss": 1.6979, "step": 36110 }, { "epoch": 1.28415251977602, "grad_norm": 1.9528772830963135, "learning_rate": 1.315138201170172e-05, "loss": 1.7035, "step": 36120 }, { "epoch": 1.2845080437294463, "grad_norm": 1.720088243484497, "learning_rate": 1.3147662360983176e-05, "loss": 1.6743, "step": 36130 }, { "epoch": 1.2848635676828726, "grad_norm": 1.839159607887268, "learning_rate": 1.3143942226803427e-05, "loss": 1.6764, "step": 36140 }, { "epoch": 1.285219091636299, "grad_norm": 1.770328164100647, "learning_rate": 1.3140221609733862e-05, "loss": 1.695, "step": 36150 }, { "epoch": 1.2855746155897254, "grad_norm": 1.7510783672332764, "learning_rate": 1.3136500510345948e-05, "loss": 1.6912, "step": 36160 }, { "epoch": 1.2859301395431517, "grad_norm": 1.9082367420196533, "learning_rate": 1.3132778929211225e-05, "loss": 1.6526, "step": 36170 }, { "epoch": 1.286285663496578, "grad_norm": 2.1014037132263184, "learning_rate": 1.3129056866901297e-05, "loss": 1.6782, "step": 36180 }, { "epoch": 1.2866411874500043, "grad_norm": 1.6842938661575317, "learning_rate": 1.312533432398786e-05, "loss": 1.6733, "step": 36190 }, { "epoch": 1.2869967114034309, "grad_norm": 1.7734049558639526, "learning_rate": 1.312161130104266e-05, "loss": 1.6739, "step": 36200 }, { "epoch": 1.2873522353568572, "grad_norm": 1.7987641096115112, "learning_rate": 1.3117887798637538e-05, "loss": 1.7179, "step": 36210 }, { "epoch": 1.2877077593102835, "grad_norm": 1.8217778205871582, "learning_rate": 1.3114163817344403e-05, "loss": 1.6731, "step": 36220 }, { "epoch": 1.28806328326371, "grad_norm": 1.8839751482009888, "learning_rate": 1.311043935773523e-05, "loss": 1.7021, "step": 36230 }, { "epoch": 1.2884188072171363, "grad_norm": 1.7346824407577515, "learning_rate": 1.3106714420382072e-05, "loss": 1.697, "step": 36240 }, { "epoch": 1.2887743311705626, "grad_norm": 1.930270791053772, "learning_rate": 1.3102989005857061e-05, "loss": 1.7169, "step": 36250 }, { "epoch": 1.289129855123989, "grad_norm": 1.8689700365066528, "learning_rate": 1.3099263114732392e-05, "loss": 1.7053, "step": 36260 }, { "epoch": 1.2894853790774152, "grad_norm": 1.8215107917785645, "learning_rate": 1.3095536747580344e-05, "loss": 1.6984, "step": 36270 }, { "epoch": 1.2898409030308418, "grad_norm": 1.6780635118484497, "learning_rate": 1.3091809904973259e-05, "loss": 1.6695, "step": 36280 }, { "epoch": 1.290196426984268, "grad_norm": 1.7548359632492065, "learning_rate": 1.3088082587483556e-05, "loss": 1.6656, "step": 36290 }, { "epoch": 1.2905519509376944, "grad_norm": 1.8181124925613403, "learning_rate": 1.3084354795683735e-05, "loss": 1.6819, "step": 36300 }, { "epoch": 1.290907474891121, "grad_norm": 1.7393866777420044, "learning_rate": 1.3080626530146354e-05, "loss": 1.7029, "step": 36310 }, { "epoch": 1.2912629988445472, "grad_norm": 1.8912163972854614, "learning_rate": 1.3076897791444057e-05, "loss": 1.6755, "step": 36320 }, { "epoch": 1.2916185227979735, "grad_norm": 1.8694416284561157, "learning_rate": 1.3073168580149547e-05, "loss": 1.6558, "step": 36330 }, { "epoch": 1.2919740467513998, "grad_norm": 1.9025851488113403, "learning_rate": 1.3069438896835611e-05, "loss": 1.7164, "step": 36340 }, { "epoch": 1.2923295707048261, "grad_norm": 1.8557045459747314, "learning_rate": 1.3065708742075109e-05, "loss": 1.7191, "step": 36350 }, { "epoch": 1.2926850946582527, "grad_norm": 1.769911527633667, "learning_rate": 1.3061978116440965e-05, "loss": 1.6958, "step": 36360 }, { "epoch": 1.293040618611679, "grad_norm": 1.8770461082458496, "learning_rate": 1.3058247020506181e-05, "loss": 1.6676, "step": 36370 }, { "epoch": 1.2933961425651053, "grad_norm": 1.7757688760757446, "learning_rate": 1.3054515454843832e-05, "loss": 1.7228, "step": 36380 }, { "epoch": 1.2937516665185318, "grad_norm": 1.818790316581726, "learning_rate": 1.3050783420027063e-05, "loss": 1.6467, "step": 36390 }, { "epoch": 1.294107190471958, "grad_norm": 2.066945791244507, "learning_rate": 1.3047050916629085e-05, "loss": 1.6781, "step": 36400 }, { "epoch": 1.2944627144253844, "grad_norm": 1.7122598886489868, "learning_rate": 1.3043317945223191e-05, "loss": 1.748, "step": 36410 }, { "epoch": 1.2948182383788107, "grad_norm": 1.7318012714385986, "learning_rate": 1.3039584506382745e-05, "loss": 1.674, "step": 36420 }, { "epoch": 1.295173762332237, "grad_norm": 1.8598337173461914, "learning_rate": 1.3035850600681175e-05, "loss": 1.6954, "step": 36430 }, { "epoch": 1.2955292862856636, "grad_norm": 1.7719144821166992, "learning_rate": 1.3032116228691991e-05, "loss": 1.7006, "step": 36440 }, { "epoch": 1.2958848102390899, "grad_norm": 1.800174593925476, "learning_rate": 1.3028381390988762e-05, "loss": 1.6485, "step": 36450 }, { "epoch": 1.2962403341925162, "grad_norm": 1.820265531539917, "learning_rate": 1.302464608814514e-05, "loss": 1.6604, "step": 36460 }, { "epoch": 1.2965958581459427, "grad_norm": 1.8295187950134277, "learning_rate": 1.3020910320734845e-05, "loss": 1.7148, "step": 36470 }, { "epoch": 1.296951382099369, "grad_norm": 1.8874891996383667, "learning_rate": 1.3017174089331666e-05, "loss": 1.7115, "step": 36480 }, { "epoch": 1.2973069060527953, "grad_norm": 1.7128124237060547, "learning_rate": 1.3013437394509462e-05, "loss": 1.6749, "step": 36490 }, { "epoch": 1.2976624300062216, "grad_norm": 1.8880102634429932, "learning_rate": 1.300970023684217e-05, "loss": 1.6478, "step": 36500 }, { "epoch": 1.298017953959648, "grad_norm": 1.7475587129592896, "learning_rate": 1.3005962616903797e-05, "loss": 1.6977, "step": 36510 }, { "epoch": 1.2983734779130744, "grad_norm": 1.8399840593338013, "learning_rate": 1.3002224535268408e-05, "loss": 1.687, "step": 36520 }, { "epoch": 1.2987290018665008, "grad_norm": 1.9142301082611084, "learning_rate": 1.2998485992510156e-05, "loss": 1.6904, "step": 36530 }, { "epoch": 1.299084525819927, "grad_norm": 1.7761683464050293, "learning_rate": 1.299474698920326e-05, "loss": 1.678, "step": 36540 }, { "epoch": 1.2994400497733536, "grad_norm": 1.825366735458374, "learning_rate": 1.2991007525921999e-05, "loss": 1.6866, "step": 36550 }, { "epoch": 1.29979557372678, "grad_norm": 1.8218415975570679, "learning_rate": 1.2987267603240736e-05, "loss": 1.6699, "step": 36560 }, { "epoch": 1.3001510976802062, "grad_norm": 1.7932249307632446, "learning_rate": 1.2983527221733902e-05, "loss": 1.6618, "step": 36570 }, { "epoch": 1.3005066216336325, "grad_norm": 1.7458404302597046, "learning_rate": 1.2979786381975991e-05, "loss": 1.7192, "step": 36580 }, { "epoch": 1.3008621455870588, "grad_norm": 1.9243357181549072, "learning_rate": 1.2976045084541578e-05, "loss": 1.6671, "step": 36590 }, { "epoch": 1.3012176695404853, "grad_norm": 1.8128772974014282, "learning_rate": 1.2972303330005296e-05, "loss": 1.7004, "step": 36600 }, { "epoch": 1.3015731934939117, "grad_norm": 1.8563944101333618, "learning_rate": 1.296856111894186e-05, "loss": 1.6885, "step": 36610 }, { "epoch": 1.301928717447338, "grad_norm": 1.721156120300293, "learning_rate": 1.2964818451926053e-05, "loss": 1.6827, "step": 36620 }, { "epoch": 1.3022842414007645, "grad_norm": 1.9434478282928467, "learning_rate": 1.296107532953272e-05, "loss": 1.6935, "step": 36630 }, { "epoch": 1.3026397653541908, "grad_norm": 1.8720649480819702, "learning_rate": 1.2957331752336782e-05, "loss": 1.6935, "step": 36640 }, { "epoch": 1.302995289307617, "grad_norm": 1.8462556600570679, "learning_rate": 1.2953587720913225e-05, "loss": 1.6932, "step": 36650 }, { "epoch": 1.3033508132610434, "grad_norm": 1.834159016609192, "learning_rate": 1.2949843235837119e-05, "loss": 1.6637, "step": 36660 }, { "epoch": 1.3037063372144697, "grad_norm": 1.7397176027297974, "learning_rate": 1.2946098297683582e-05, "loss": 1.6449, "step": 36670 }, { "epoch": 1.3040618611678962, "grad_norm": 1.9851504564285278, "learning_rate": 1.2942352907027822e-05, "loss": 1.6977, "step": 36680 }, { "epoch": 1.3044173851213225, "grad_norm": 1.8635345697402954, "learning_rate": 1.2938607064445105e-05, "loss": 1.6898, "step": 36690 }, { "epoch": 1.3047729090747489, "grad_norm": 1.7845070362091064, "learning_rate": 1.293486077051077e-05, "loss": 1.6941, "step": 36700 }, { "epoch": 1.3051284330281754, "grad_norm": 1.809970498085022, "learning_rate": 1.293111402580022e-05, "loss": 1.6713, "step": 36710 }, { "epoch": 1.3054839569816017, "grad_norm": 1.8623954057693481, "learning_rate": 1.2927366830888933e-05, "loss": 1.676, "step": 36720 }, { "epoch": 1.305839480935028, "grad_norm": 1.7936292886734009, "learning_rate": 1.2923619186352454e-05, "loss": 1.7016, "step": 36730 }, { "epoch": 1.3061950048884543, "grad_norm": 1.50461745262146, "learning_rate": 1.2919871092766403e-05, "loss": 1.6145, "step": 36740 }, { "epoch": 1.3065505288418806, "grad_norm": 1.7584197521209717, "learning_rate": 1.2916122550706458e-05, "loss": 1.6588, "step": 36750 }, { "epoch": 1.3069060527953071, "grad_norm": 1.8236706256866455, "learning_rate": 1.2912373560748374e-05, "loss": 1.7182, "step": 36760 }, { "epoch": 1.3072615767487334, "grad_norm": 1.6947407722473145, "learning_rate": 1.290862412346797e-05, "loss": 1.6811, "step": 36770 }, { "epoch": 1.3076171007021598, "grad_norm": 1.9147242307662964, "learning_rate": 1.2904874239441143e-05, "loss": 1.6334, "step": 36780 }, { "epoch": 1.3079726246555863, "grad_norm": 1.8855173587799072, "learning_rate": 1.2901123909243842e-05, "loss": 1.7351, "step": 36790 }, { "epoch": 1.3083281486090126, "grad_norm": 1.8250107765197754, "learning_rate": 1.2897373133452098e-05, "loss": 1.713, "step": 36800 }, { "epoch": 1.308683672562439, "grad_norm": 1.9745123386383057, "learning_rate": 1.2893621912642007e-05, "loss": 1.7065, "step": 36810 }, { "epoch": 1.3090391965158652, "grad_norm": 1.8133615255355835, "learning_rate": 1.2889870247389738e-05, "loss": 1.6815, "step": 36820 }, { "epoch": 1.3093947204692915, "grad_norm": 1.8287822008132935, "learning_rate": 1.2886118138271514e-05, "loss": 1.7009, "step": 36830 }, { "epoch": 1.309750244422718, "grad_norm": 1.7905579805374146, "learning_rate": 1.2882365585863643e-05, "loss": 1.6893, "step": 36840 }, { "epoch": 1.3101057683761443, "grad_norm": 1.9743231534957886, "learning_rate": 1.287861259074249e-05, "loss": 1.6891, "step": 36850 }, { "epoch": 1.3104612923295706, "grad_norm": 1.8222795724868774, "learning_rate": 1.2874859153484492e-05, "loss": 1.6787, "step": 36860 }, { "epoch": 1.3108168162829972, "grad_norm": 1.750848412513733, "learning_rate": 1.2871105274666154e-05, "loss": 1.6888, "step": 36870 }, { "epoch": 1.3111723402364235, "grad_norm": 1.8487437963485718, "learning_rate": 1.2867350954864048e-05, "loss": 1.6363, "step": 36880 }, { "epoch": 1.3115278641898498, "grad_norm": 1.7813891172409058, "learning_rate": 1.2863596194654813e-05, "loss": 1.6858, "step": 36890 }, { "epoch": 1.311883388143276, "grad_norm": 1.7855768203735352, "learning_rate": 1.2859840994615156e-05, "loss": 1.642, "step": 36900 }, { "epoch": 1.3122389120967024, "grad_norm": 1.7619067430496216, "learning_rate": 1.2856085355321852e-05, "loss": 1.7181, "step": 36910 }, { "epoch": 1.312594436050129, "grad_norm": 1.8910986185073853, "learning_rate": 1.2852329277351746e-05, "loss": 1.7122, "step": 36920 }, { "epoch": 1.3129499600035552, "grad_norm": 1.8386540412902832, "learning_rate": 1.2848572761281752e-05, "loss": 1.747, "step": 36930 }, { "epoch": 1.3133054839569815, "grad_norm": 1.8845070600509644, "learning_rate": 1.284481580768884e-05, "loss": 1.7065, "step": 36940 }, { "epoch": 1.313661007910408, "grad_norm": 1.8629308938980103, "learning_rate": 1.2841058417150059e-05, "loss": 1.7201, "step": 36950 }, { "epoch": 1.3140165318638344, "grad_norm": 1.7198325395584106, "learning_rate": 1.2837300590242517e-05, "loss": 1.6766, "step": 36960 }, { "epoch": 1.3143720558172607, "grad_norm": 1.796847939491272, "learning_rate": 1.2833542327543392e-05, "loss": 1.6875, "step": 36970 }, { "epoch": 1.314727579770687, "grad_norm": 1.7964692115783691, "learning_rate": 1.2829783629629933e-05, "loss": 1.6483, "step": 36980 }, { "epoch": 1.3150831037241133, "grad_norm": 1.7154792547225952, "learning_rate": 1.2826024497079452e-05, "loss": 1.7301, "step": 36990 }, { "epoch": 1.3154386276775398, "grad_norm": 1.9361765384674072, "learning_rate": 1.2822264930469329e-05, "loss": 1.7092, "step": 37000 }, { "epoch": 1.3157941516309661, "grad_norm": 1.7642241716384888, "learning_rate": 1.2818504930377007e-05, "loss": 1.6975, "step": 37010 }, { "epoch": 1.3161496755843924, "grad_norm": 1.816183090209961, "learning_rate": 1.2814744497380001e-05, "loss": 1.6853, "step": 37020 }, { "epoch": 1.316505199537819, "grad_norm": 1.8195050954818726, "learning_rate": 1.2810983632055887e-05, "loss": 1.6636, "step": 37030 }, { "epoch": 1.3168607234912453, "grad_norm": 1.9385960102081299, "learning_rate": 1.2807222334982312e-05, "loss": 1.6698, "step": 37040 }, { "epoch": 1.3172162474446716, "grad_norm": 1.8485578298568726, "learning_rate": 1.2803460606736989e-05, "loss": 1.7425, "step": 37050 }, { "epoch": 1.317571771398098, "grad_norm": 1.8752837181091309, "learning_rate": 1.2799698447897695e-05, "loss": 1.6967, "step": 37060 }, { "epoch": 1.3179272953515242, "grad_norm": 1.817765712738037, "learning_rate": 1.2795935859042272e-05, "loss": 1.6453, "step": 37070 }, { "epoch": 1.3182828193049507, "grad_norm": 1.7797666788101196, "learning_rate": 1.2792172840748633e-05, "loss": 1.6916, "step": 37080 }, { "epoch": 1.318638343258377, "grad_norm": 1.8178123235702515, "learning_rate": 1.2788409393594753e-05, "loss": 1.646, "step": 37090 }, { "epoch": 1.3189938672118033, "grad_norm": 1.9050863981246948, "learning_rate": 1.2784645518158674e-05, "loss": 1.6862, "step": 37100 }, { "epoch": 1.3193493911652299, "grad_norm": 1.8194165229797363, "learning_rate": 1.2780881215018502e-05, "loss": 1.7165, "step": 37110 }, { "epoch": 1.3197049151186562, "grad_norm": 1.7735542058944702, "learning_rate": 1.277711648475241e-05, "loss": 1.734, "step": 37120 }, { "epoch": 1.3200604390720825, "grad_norm": 1.9834961891174316, "learning_rate": 1.2773351327938643e-05, "loss": 1.6942, "step": 37130 }, { "epoch": 1.3204159630255088, "grad_norm": 1.795721411705017, "learning_rate": 1.2769585745155497e-05, "loss": 1.7135, "step": 37140 }, { "epoch": 1.320771486978935, "grad_norm": 1.7426166534423828, "learning_rate": 1.2765819736981346e-05, "loss": 1.6587, "step": 37150 }, { "epoch": 1.3211270109323616, "grad_norm": 1.8144384622573853, "learning_rate": 1.2762053303994627e-05, "loss": 1.6538, "step": 37160 }, { "epoch": 1.321482534885788, "grad_norm": 1.7716857194900513, "learning_rate": 1.2758286446773838e-05, "loss": 1.6843, "step": 37170 }, { "epoch": 1.3218380588392142, "grad_norm": 1.7559036016464233, "learning_rate": 1.2754519165897547e-05, "loss": 1.6962, "step": 37180 }, { "epoch": 1.3221935827926408, "grad_norm": 1.9076648950576782, "learning_rate": 1.2750751461944384e-05, "loss": 1.6504, "step": 37190 }, { "epoch": 1.322549106746067, "grad_norm": 1.8252453804016113, "learning_rate": 1.2746983335493042e-05, "loss": 1.6783, "step": 37200 }, { "epoch": 1.3229046306994934, "grad_norm": 1.7769012451171875, "learning_rate": 1.2743214787122282e-05, "loss": 1.6997, "step": 37210 }, { "epoch": 1.3232601546529197, "grad_norm": 1.8328056335449219, "learning_rate": 1.2739445817410931e-05, "loss": 1.6457, "step": 37220 }, { "epoch": 1.323615678606346, "grad_norm": 1.9582966566085815, "learning_rate": 1.273567642693788e-05, "loss": 1.679, "step": 37230 }, { "epoch": 1.3239712025597725, "grad_norm": 2.056260585784912, "learning_rate": 1.2731906616282081e-05, "loss": 1.6784, "step": 37240 }, { "epoch": 1.3243267265131988, "grad_norm": 1.9473516941070557, "learning_rate": 1.2728136386022558e-05, "loss": 1.6985, "step": 37250 }, { "epoch": 1.3246822504666251, "grad_norm": 1.698970913887024, "learning_rate": 1.272436573673839e-05, "loss": 1.6747, "step": 37260 }, { "epoch": 1.3250377744200517, "grad_norm": 1.7800887823104858, "learning_rate": 1.2720594669008728e-05, "loss": 1.703, "step": 37270 }, { "epoch": 1.325393298373478, "grad_norm": 1.6938693523406982, "learning_rate": 1.271682318341278e-05, "loss": 1.7243, "step": 37280 }, { "epoch": 1.3257488223269043, "grad_norm": 1.6857800483703613, "learning_rate": 1.2713051280529829e-05, "loss": 1.7032, "step": 37290 }, { "epoch": 1.3261043462803306, "grad_norm": 1.7712005376815796, "learning_rate": 1.2709278960939209e-05, "loss": 1.6491, "step": 37300 }, { "epoch": 1.3264598702337569, "grad_norm": 1.8196500539779663, "learning_rate": 1.2705506225220332e-05, "loss": 1.7185, "step": 37310 }, { "epoch": 1.3268153941871834, "grad_norm": 1.7919986248016357, "learning_rate": 1.270173307395266e-05, "loss": 1.6856, "step": 37320 }, { "epoch": 1.3271709181406097, "grad_norm": 1.8808327913284302, "learning_rate": 1.2697959507715727e-05, "loss": 1.7187, "step": 37330 }, { "epoch": 1.327526442094036, "grad_norm": 1.83591890335083, "learning_rate": 1.2694185527089132e-05, "loss": 1.6613, "step": 37340 }, { "epoch": 1.3278819660474626, "grad_norm": 1.803751826286316, "learning_rate": 1.2690411132652532e-05, "loss": 1.7158, "step": 37350 }, { "epoch": 1.3282374900008889, "grad_norm": 1.7949421405792236, "learning_rate": 1.2686636324985649e-05, "loss": 1.6868, "step": 37360 }, { "epoch": 1.3285930139543152, "grad_norm": 1.8414742946624756, "learning_rate": 1.2682861104668276e-05, "loss": 1.6864, "step": 37370 }, { "epoch": 1.3289485379077415, "grad_norm": 1.882709264755249, "learning_rate": 1.2679085472280255e-05, "loss": 1.6579, "step": 37380 }, { "epoch": 1.3293040618611678, "grad_norm": 1.8424874544143677, "learning_rate": 1.2675309428401502e-05, "loss": 1.6771, "step": 37390 }, { "epoch": 1.3296595858145943, "grad_norm": 1.8590044975280762, "learning_rate": 1.2671532973611999e-05, "loss": 1.7331, "step": 37400 }, { "epoch": 1.3300151097680206, "grad_norm": 1.7579020261764526, "learning_rate": 1.266775610849178e-05, "loss": 1.7365, "step": 37410 }, { "epoch": 1.330370633721447, "grad_norm": 1.7553985118865967, "learning_rate": 1.2663978833620954e-05, "loss": 1.7171, "step": 37420 }, { "epoch": 1.3307261576748735, "grad_norm": 1.8007732629776, "learning_rate": 1.2660201149579678e-05, "loss": 1.6695, "step": 37430 }, { "epoch": 1.3310816816282998, "grad_norm": 1.9665582180023193, "learning_rate": 1.2656423056948188e-05, "loss": 1.6685, "step": 37440 }, { "epoch": 1.331437205581726, "grad_norm": 1.8420867919921875, "learning_rate": 1.265264455630677e-05, "loss": 1.7187, "step": 37450 }, { "epoch": 1.3317927295351524, "grad_norm": 1.8061645030975342, "learning_rate": 1.264886564823578e-05, "loss": 1.6622, "step": 37460 }, { "epoch": 1.3321482534885787, "grad_norm": 1.8627655506134033, "learning_rate": 1.2645086333315636e-05, "loss": 1.7042, "step": 37470 }, { "epoch": 1.3325037774420052, "grad_norm": 1.8341363668441772, "learning_rate": 1.2641306612126813e-05, "loss": 1.6541, "step": 37480 }, { "epoch": 1.3328593013954315, "grad_norm": 1.8347748517990112, "learning_rate": 1.263752648524986e-05, "loss": 1.6792, "step": 37490 }, { "epoch": 1.3332148253488578, "grad_norm": 1.8106322288513184, "learning_rate": 1.2633745953265377e-05, "loss": 1.6903, "step": 37500 }, { "epoch": 1.3335703493022844, "grad_norm": 1.8910024166107178, "learning_rate": 1.2629965016754027e-05, "loss": 1.6358, "step": 37510 }, { "epoch": 1.3339258732557107, "grad_norm": 1.847715973854065, "learning_rate": 1.262618367629654e-05, "loss": 1.6791, "step": 37520 }, { "epoch": 1.334281397209137, "grad_norm": 1.8155648708343506, "learning_rate": 1.2622401932473705e-05, "loss": 1.6605, "step": 37530 }, { "epoch": 1.3346369211625633, "grad_norm": 1.8898736238479614, "learning_rate": 1.2618619785866377e-05, "loss": 1.7061, "step": 37540 }, { "epoch": 1.3349924451159896, "grad_norm": 1.9694591760635376, "learning_rate": 1.2614837237055468e-05, "loss": 1.6914, "step": 37550 }, { "epoch": 1.335347969069416, "grad_norm": 2.3319625854492188, "learning_rate": 1.261105428662196e-05, "loss": 1.6767, "step": 37560 }, { "epoch": 1.3357034930228424, "grad_norm": 1.8441003561019897, "learning_rate": 1.260727093514688e-05, "loss": 1.6525, "step": 37570 }, { "epoch": 1.3360590169762687, "grad_norm": 1.8745046854019165, "learning_rate": 1.260348718321133e-05, "loss": 1.6947, "step": 37580 }, { "epoch": 1.3364145409296952, "grad_norm": 1.7331849336624146, "learning_rate": 1.259970303139648e-05, "loss": 1.7215, "step": 37590 }, { "epoch": 1.3367700648831216, "grad_norm": 1.8349084854125977, "learning_rate": 1.2595918480283538e-05, "loss": 1.6903, "step": 37600 }, { "epoch": 1.3371255888365479, "grad_norm": 1.7928951978683472, "learning_rate": 1.2592133530453797e-05, "loss": 1.676, "step": 37610 }, { "epoch": 1.3374811127899742, "grad_norm": 2.0136008262634277, "learning_rate": 1.2588348182488599e-05, "loss": 1.6742, "step": 37620 }, { "epoch": 1.3378366367434005, "grad_norm": 1.8972746133804321, "learning_rate": 1.2584562436969348e-05, "loss": 1.7219, "step": 37630 }, { "epoch": 1.338192160696827, "grad_norm": 1.7846343517303467, "learning_rate": 1.258077629447751e-05, "loss": 1.6871, "step": 37640 }, { "epoch": 1.3385476846502533, "grad_norm": 1.8348819017410278, "learning_rate": 1.2576989755594617e-05, "loss": 1.697, "step": 37650 }, { "epoch": 1.3389032086036796, "grad_norm": 1.8878655433654785, "learning_rate": 1.2573202820902254e-05, "loss": 1.7241, "step": 37660 }, { "epoch": 1.3392587325571061, "grad_norm": 1.9071545600891113, "learning_rate": 1.2569415490982075e-05, "loss": 1.7057, "step": 37670 }, { "epoch": 1.3396142565105325, "grad_norm": 1.938977837562561, "learning_rate": 1.2565627766415784e-05, "loss": 1.7211, "step": 37680 }, { "epoch": 1.3399697804639588, "grad_norm": 1.8646774291992188, "learning_rate": 1.2561839647785159e-05, "loss": 1.7011, "step": 37690 }, { "epoch": 1.340325304417385, "grad_norm": 2.070342779159546, "learning_rate": 1.2558051135672022e-05, "loss": 1.6739, "step": 37700 }, { "epoch": 1.3406808283708114, "grad_norm": 1.8355119228363037, "learning_rate": 1.2554262230658271e-05, "loss": 1.6937, "step": 37710 }, { "epoch": 1.341036352324238, "grad_norm": 1.8518496751785278, "learning_rate": 1.2550472933325856e-05, "loss": 1.6695, "step": 37720 }, { "epoch": 1.3413918762776642, "grad_norm": 1.8776099681854248, "learning_rate": 1.2546683244256792e-05, "loss": 1.7149, "step": 37730 }, { "epoch": 1.3417474002310905, "grad_norm": 1.8292155265808105, "learning_rate": 1.254289316403315e-05, "loss": 1.6847, "step": 37740 }, { "epoch": 1.342102924184517, "grad_norm": 1.7810564041137695, "learning_rate": 1.2539102693237062e-05, "loss": 1.7141, "step": 37750 }, { "epoch": 1.3424584481379433, "grad_norm": 1.7767292261123657, "learning_rate": 1.2535311832450718e-05, "loss": 1.6408, "step": 37760 }, { "epoch": 1.3428139720913697, "grad_norm": 1.768619179725647, "learning_rate": 1.2531520582256374e-05, "loss": 1.6612, "step": 37770 }, { "epoch": 1.343169496044796, "grad_norm": 1.9853242635726929, "learning_rate": 1.252772894323634e-05, "loss": 1.7205, "step": 37780 }, { "epoch": 1.3435250199982223, "grad_norm": 1.761792778968811, "learning_rate": 1.2523936915972992e-05, "loss": 1.7178, "step": 37790 }, { "epoch": 1.3438805439516488, "grad_norm": 1.7345435619354248, "learning_rate": 1.252014450104876e-05, "loss": 1.7095, "step": 37800 }, { "epoch": 1.344236067905075, "grad_norm": 1.7151758670806885, "learning_rate": 1.251635169904613e-05, "loss": 1.6825, "step": 37810 }, { "epoch": 1.3445915918585014, "grad_norm": 1.7500386238098145, "learning_rate": 1.2512558510547658e-05, "loss": 1.6734, "step": 37820 }, { "epoch": 1.344947115811928, "grad_norm": 1.9393870830535889, "learning_rate": 1.2508764936135956e-05, "loss": 1.7165, "step": 37830 }, { "epoch": 1.3453026397653542, "grad_norm": 1.8957933187484741, "learning_rate": 1.2504970976393687e-05, "loss": 1.6754, "step": 37840 }, { "epoch": 1.3456581637187806, "grad_norm": 1.8289235830307007, "learning_rate": 1.2501176631903583e-05, "loss": 1.6886, "step": 37850 }, { "epoch": 1.3460136876722069, "grad_norm": 1.956375241279602, "learning_rate": 1.2497381903248426e-05, "loss": 1.68, "step": 37860 }, { "epoch": 1.3463692116256332, "grad_norm": 1.7941055297851562, "learning_rate": 1.2493586791011074e-05, "loss": 1.6469, "step": 37870 }, { "epoch": 1.3467247355790597, "grad_norm": 1.8033486604690552, "learning_rate": 1.2489791295774422e-05, "loss": 1.6262, "step": 37880 }, { "epoch": 1.347080259532486, "grad_norm": 1.9014389514923096, "learning_rate": 1.2485995418121441e-05, "loss": 1.683, "step": 37890 }, { "epoch": 1.3474357834859123, "grad_norm": 1.7786731719970703, "learning_rate": 1.2482199158635149e-05, "loss": 1.6627, "step": 37900 }, { "epoch": 1.3477913074393388, "grad_norm": 1.813820481300354, "learning_rate": 1.2478402517898632e-05, "loss": 1.741, "step": 37910 }, { "epoch": 1.3481468313927651, "grad_norm": 1.797961711883545, "learning_rate": 1.2474605496495024e-05, "loss": 1.7126, "step": 37920 }, { "epoch": 1.3485023553461915, "grad_norm": 1.7511776685714722, "learning_rate": 1.2470808095007535e-05, "loss": 1.6567, "step": 37930 }, { "epoch": 1.3488578792996178, "grad_norm": 1.8130065202713013, "learning_rate": 1.2467010314019408e-05, "loss": 1.6329, "step": 37940 }, { "epoch": 1.349213403253044, "grad_norm": 1.7048790454864502, "learning_rate": 1.2463212154113966e-05, "loss": 1.6743, "step": 37950 }, { "epoch": 1.3495689272064706, "grad_norm": 1.7872480154037476, "learning_rate": 1.245941361587458e-05, "loss": 1.6686, "step": 37960 }, { "epoch": 1.349924451159897, "grad_norm": 1.8660238981246948, "learning_rate": 1.2455614699884686e-05, "loss": 1.6878, "step": 37970 }, { "epoch": 1.3502799751133232, "grad_norm": 1.946341633796692, "learning_rate": 1.245181540672777e-05, "loss": 1.6678, "step": 37980 }, { "epoch": 1.3506354990667497, "grad_norm": 1.8169410228729248, "learning_rate": 1.2448015736987382e-05, "loss": 1.6439, "step": 37990 }, { "epoch": 1.350991023020176, "grad_norm": 1.7743910551071167, "learning_rate": 1.2444215691247128e-05, "loss": 1.7119, "step": 38000 }, { "epoch": 1.3513465469736023, "grad_norm": 1.8813501596450806, "learning_rate": 1.2440415270090665e-05, "loss": 1.7038, "step": 38010 }, { "epoch": 1.3517020709270287, "grad_norm": 1.8178991079330444, "learning_rate": 1.2436614474101719e-05, "loss": 1.6467, "step": 38020 }, { "epoch": 1.352057594880455, "grad_norm": 1.7223140001296997, "learning_rate": 1.2432813303864067e-05, "loss": 1.6908, "step": 38030 }, { "epoch": 1.3524131188338815, "grad_norm": 1.8696693181991577, "learning_rate": 1.2429011759961544e-05, "loss": 1.7457, "step": 38040 }, { "epoch": 1.3527686427873078, "grad_norm": 1.8467459678649902, "learning_rate": 1.242520984297805e-05, "loss": 1.6485, "step": 38050 }, { "epoch": 1.353124166740734, "grad_norm": 1.8918532133102417, "learning_rate": 1.2421407553497527e-05, "loss": 1.6325, "step": 38060 }, { "epoch": 1.3534796906941606, "grad_norm": 1.857740044593811, "learning_rate": 1.2417604892103988e-05, "loss": 1.6461, "step": 38070 }, { "epoch": 1.353835214647587, "grad_norm": 1.7795847654342651, "learning_rate": 1.241380185938149e-05, "loss": 1.6728, "step": 38080 }, { "epoch": 1.3541907386010132, "grad_norm": 1.821782112121582, "learning_rate": 1.2409998455914167e-05, "loss": 1.6939, "step": 38090 }, { "epoch": 1.3545462625544396, "grad_norm": 1.709168553352356, "learning_rate": 1.2406194682286188e-05, "loss": 1.6689, "step": 38100 }, { "epoch": 1.3549017865078659, "grad_norm": 1.741292953491211, "learning_rate": 1.2402390539081796e-05, "loss": 1.698, "step": 38110 }, { "epoch": 1.3552573104612924, "grad_norm": 1.8442113399505615, "learning_rate": 1.239858602688528e-05, "loss": 1.7049, "step": 38120 }, { "epoch": 1.3556128344147187, "grad_norm": 1.8573895692825317, "learning_rate": 1.2394781146280987e-05, "loss": 1.6743, "step": 38130 }, { "epoch": 1.355968358368145, "grad_norm": 1.7257417440414429, "learning_rate": 1.2390975897853329e-05, "loss": 1.7107, "step": 38140 }, { "epoch": 1.3563238823215715, "grad_norm": 1.7973076105117798, "learning_rate": 1.2387170282186762e-05, "loss": 1.6723, "step": 38150 }, { "epoch": 1.3566794062749978, "grad_norm": 1.8350764513015747, "learning_rate": 1.238336429986581e-05, "loss": 1.6761, "step": 38160 }, { "epoch": 1.3570349302284241, "grad_norm": 1.8104625940322876, "learning_rate": 1.2379557951475044e-05, "loss": 1.7014, "step": 38170 }, { "epoch": 1.3573904541818504, "grad_norm": 1.7754307985305786, "learning_rate": 1.2375751237599096e-05, "loss": 1.7155, "step": 38180 }, { "epoch": 1.3577459781352768, "grad_norm": 1.7645673751831055, "learning_rate": 1.2371944158822653e-05, "loss": 1.6503, "step": 38190 }, { "epoch": 1.3581015020887033, "grad_norm": 1.8259831666946411, "learning_rate": 1.2368136715730458e-05, "loss": 1.7034, "step": 38200 }, { "epoch": 1.3584570260421296, "grad_norm": 1.8655147552490234, "learning_rate": 1.2364328908907314e-05, "loss": 1.6771, "step": 38210 }, { "epoch": 1.358812549995556, "grad_norm": 1.8037327527999878, "learning_rate": 1.2360520738938075e-05, "loss": 1.6634, "step": 38220 }, { "epoch": 1.3591680739489824, "grad_norm": 1.8114920854568481, "learning_rate": 1.2356712206407653e-05, "loss": 1.6914, "step": 38230 }, { "epoch": 1.3595235979024087, "grad_norm": 1.8625586032867432, "learning_rate": 1.2352903311901012e-05, "loss": 1.6398, "step": 38240 }, { "epoch": 1.359879121855835, "grad_norm": 1.8780370950698853, "learning_rate": 1.2349094056003173e-05, "loss": 1.6635, "step": 38250 }, { "epoch": 1.3602346458092613, "grad_norm": 1.8319125175476074, "learning_rate": 1.2345284439299215e-05, "loss": 1.6816, "step": 38260 }, { "epoch": 1.3605901697626877, "grad_norm": 1.8563363552093506, "learning_rate": 1.2341474462374272e-05, "loss": 1.6866, "step": 38270 }, { "epoch": 1.3609456937161142, "grad_norm": 1.749904751777649, "learning_rate": 1.2337664125813533e-05, "loss": 1.6335, "step": 38280 }, { "epoch": 1.3613012176695405, "grad_norm": 1.7130019664764404, "learning_rate": 1.2333853430202242e-05, "loss": 1.7024, "step": 38290 }, { "epoch": 1.3616567416229668, "grad_norm": 1.8118706941604614, "learning_rate": 1.2330042376125699e-05, "loss": 1.7038, "step": 38300 }, { "epoch": 1.3620122655763933, "grad_norm": 1.9396083354949951, "learning_rate": 1.2326230964169258e-05, "loss": 1.6388, "step": 38310 }, { "epoch": 1.3623677895298196, "grad_norm": 1.8331371545791626, "learning_rate": 1.2322419194918325e-05, "loss": 1.6703, "step": 38320 }, { "epoch": 1.362723313483246, "grad_norm": 1.7760456800460815, "learning_rate": 1.2318607068958363e-05, "loss": 1.7169, "step": 38330 }, { "epoch": 1.3630788374366722, "grad_norm": 1.9254004955291748, "learning_rate": 1.2314794586874893e-05, "loss": 1.6992, "step": 38340 }, { "epoch": 1.3634343613900985, "grad_norm": 1.8271830081939697, "learning_rate": 1.2310981749253489e-05, "loss": 1.6943, "step": 38350 }, { "epoch": 1.363789885343525, "grad_norm": 1.9219883680343628, "learning_rate": 1.2307168556679782e-05, "loss": 1.6783, "step": 38360 }, { "epoch": 1.3641454092969514, "grad_norm": 1.7999000549316406, "learning_rate": 1.2303355009739447e-05, "loss": 1.7166, "step": 38370 }, { "epoch": 1.3645009332503777, "grad_norm": 1.9395989179611206, "learning_rate": 1.2299541109018224e-05, "loss": 1.6232, "step": 38380 }, { "epoch": 1.3648564572038042, "grad_norm": 1.7778639793395996, "learning_rate": 1.2295726855101911e-05, "loss": 1.6502, "step": 38390 }, { "epoch": 1.3652119811572305, "grad_norm": 1.763012170791626, "learning_rate": 1.2291912248576341e-05, "loss": 1.6677, "step": 38400 }, { "epoch": 1.3655675051106568, "grad_norm": 1.800807237625122, "learning_rate": 1.228809729002742e-05, "loss": 1.6758, "step": 38410 }, { "epoch": 1.3659230290640831, "grad_norm": 1.9781922101974487, "learning_rate": 1.2284281980041105e-05, "loss": 1.6432, "step": 38420 }, { "epoch": 1.3662785530175094, "grad_norm": 1.842842698097229, "learning_rate": 1.22804663192034e-05, "loss": 1.6869, "step": 38430 }, { "epoch": 1.366634076970936, "grad_norm": 1.7902802228927612, "learning_rate": 1.2276650308100364e-05, "loss": 1.6863, "step": 38440 }, { "epoch": 1.3669896009243623, "grad_norm": 1.973473072052002, "learning_rate": 1.2272833947318117e-05, "loss": 1.6542, "step": 38450 }, { "epoch": 1.3673451248777886, "grad_norm": 1.8320331573486328, "learning_rate": 1.2269017237442826e-05, "loss": 1.696, "step": 38460 }, { "epoch": 1.3677006488312151, "grad_norm": 1.817253589630127, "learning_rate": 1.2265200179060716e-05, "loss": 1.6416, "step": 38470 }, { "epoch": 1.3680561727846414, "grad_norm": 1.9107543230056763, "learning_rate": 1.2261382772758061e-05, "loss": 1.6853, "step": 38480 }, { "epoch": 1.3684116967380677, "grad_norm": 1.8446093797683716, "learning_rate": 1.2257565019121191e-05, "loss": 1.6747, "step": 38490 }, { "epoch": 1.368767220691494, "grad_norm": 1.741042971611023, "learning_rate": 1.2253746918736489e-05, "loss": 1.6507, "step": 38500 }, { "epoch": 1.3691227446449203, "grad_norm": 1.7928935289382935, "learning_rate": 1.2249928472190391e-05, "loss": 1.6964, "step": 38510 }, { "epoch": 1.3694782685983469, "grad_norm": 1.8920574188232422, "learning_rate": 1.2246109680069385e-05, "loss": 1.6542, "step": 38520 }, { "epoch": 1.3698337925517732, "grad_norm": 1.88433039188385, "learning_rate": 1.2242290542960017e-05, "loss": 1.6774, "step": 38530 }, { "epoch": 1.3701893165051995, "grad_norm": 1.817080020904541, "learning_rate": 1.2238471061448881e-05, "loss": 1.6833, "step": 38540 }, { "epoch": 1.370544840458626, "grad_norm": 1.862455129623413, "learning_rate": 1.2234651236122627e-05, "loss": 1.6817, "step": 38550 }, { "epoch": 1.3709003644120523, "grad_norm": 1.7352641820907593, "learning_rate": 1.2230831067567955e-05, "loss": 1.7034, "step": 38560 }, { "epoch": 1.3712558883654786, "grad_norm": 1.7810717821121216, "learning_rate": 1.2227010556371615e-05, "loss": 1.6744, "step": 38570 }, { "epoch": 1.371611412318905, "grad_norm": 1.6984426975250244, "learning_rate": 1.2223189703120416e-05, "loss": 1.6574, "step": 38580 }, { "epoch": 1.3719669362723312, "grad_norm": 1.8884998559951782, "learning_rate": 1.221936850840122e-05, "loss": 1.6797, "step": 38590 }, { "epoch": 1.3723224602257578, "grad_norm": 1.8598767518997192, "learning_rate": 1.2215546972800937e-05, "loss": 1.6687, "step": 38600 }, { "epoch": 1.372677984179184, "grad_norm": 1.7620718479156494, "learning_rate": 1.2211725096906533e-05, "loss": 1.6711, "step": 38610 }, { "epoch": 1.3730335081326104, "grad_norm": 1.8488720655441284, "learning_rate": 1.2207902881305018e-05, "loss": 1.6975, "step": 38620 }, { "epoch": 1.373389032086037, "grad_norm": 1.7783499956130981, "learning_rate": 1.2204080326583467e-05, "loss": 1.6539, "step": 38630 }, { "epoch": 1.3737445560394632, "grad_norm": 1.9365428686141968, "learning_rate": 1.2200257433328994e-05, "loss": 1.674, "step": 38640 }, { "epoch": 1.3741000799928895, "grad_norm": 1.7568024396896362, "learning_rate": 1.2196434202128777e-05, "loss": 1.6626, "step": 38650 }, { "epoch": 1.3744556039463158, "grad_norm": 1.9768433570861816, "learning_rate": 1.219261063357004e-05, "loss": 1.6946, "step": 38660 }, { "epoch": 1.3748111278997421, "grad_norm": 1.7951604127883911, "learning_rate": 1.2188786728240057e-05, "loss": 1.6704, "step": 38670 }, { "epoch": 1.3751666518531687, "grad_norm": 1.8011988401412964, "learning_rate": 1.2184962486726154e-05, "loss": 1.6936, "step": 38680 }, { "epoch": 1.375522175806595, "grad_norm": 1.8213082551956177, "learning_rate": 1.2181137909615713e-05, "loss": 1.6814, "step": 38690 }, { "epoch": 1.3758776997600213, "grad_norm": 1.7896612882614136, "learning_rate": 1.2177312997496164e-05, "loss": 1.6568, "step": 38700 }, { "epoch": 1.3762332237134478, "grad_norm": 1.6915911436080933, "learning_rate": 1.2173487750954993e-05, "loss": 1.6616, "step": 38710 }, { "epoch": 1.3765887476668741, "grad_norm": 1.857466459274292, "learning_rate": 1.2169662170579733e-05, "loss": 1.6526, "step": 38720 }, { "epoch": 1.3769442716203004, "grad_norm": 1.732410192489624, "learning_rate": 1.2165836256957963e-05, "loss": 1.6895, "step": 38730 }, { "epoch": 1.3772997955737267, "grad_norm": 1.8987367153167725, "learning_rate": 1.2162010010677327e-05, "loss": 1.6618, "step": 38740 }, { "epoch": 1.377655319527153, "grad_norm": 1.9594670534133911, "learning_rate": 1.2158183432325508e-05, "loss": 1.6631, "step": 38750 }, { "epoch": 1.3780108434805796, "grad_norm": 1.8008707761764526, "learning_rate": 1.2154356522490245e-05, "loss": 1.6483, "step": 38760 }, { "epoch": 1.3783663674340059, "grad_norm": 1.852552890777588, "learning_rate": 1.215052928175933e-05, "loss": 1.6651, "step": 38770 }, { "epoch": 1.3787218913874322, "grad_norm": 1.6587566137313843, "learning_rate": 1.2146701710720599e-05, "loss": 1.6777, "step": 38780 }, { "epoch": 1.3790774153408587, "grad_norm": 1.7821872234344482, "learning_rate": 1.2142873809961945e-05, "loss": 1.7093, "step": 38790 }, { "epoch": 1.379432939294285, "grad_norm": 1.9138959646224976, "learning_rate": 1.2139045580071313e-05, "loss": 1.6829, "step": 38800 }, { "epoch": 1.3797884632477113, "grad_norm": 2.0115551948547363, "learning_rate": 1.2135217021636691e-05, "loss": 1.7011, "step": 38810 }, { "epoch": 1.3801439872011376, "grad_norm": 1.8681602478027344, "learning_rate": 1.2131388135246121e-05, "loss": 1.6386, "step": 38820 }, { "epoch": 1.380499511154564, "grad_norm": 1.9055702686309814, "learning_rate": 1.2127558921487696e-05, "loss": 1.6843, "step": 38830 }, { "epoch": 1.3808550351079905, "grad_norm": 1.8282355070114136, "learning_rate": 1.2123729380949563e-05, "loss": 1.6799, "step": 38840 }, { "epoch": 1.3812105590614168, "grad_norm": 1.9232532978057861, "learning_rate": 1.2119899514219912e-05, "loss": 1.6534, "step": 38850 }, { "epoch": 1.381566083014843, "grad_norm": 1.777154564857483, "learning_rate": 1.2116069321886987e-05, "loss": 1.6606, "step": 38860 }, { "epoch": 1.3819216069682696, "grad_norm": 1.9864416122436523, "learning_rate": 1.2112238804539084e-05, "loss": 1.6887, "step": 38870 }, { "epoch": 1.382277130921696, "grad_norm": 1.8537176847457886, "learning_rate": 1.2108407962764543e-05, "loss": 1.6775, "step": 38880 }, { "epoch": 1.3826326548751222, "grad_norm": 1.8541337251663208, "learning_rate": 1.2104576797151758e-05, "loss": 1.6959, "step": 38890 }, { "epoch": 1.3829881788285485, "grad_norm": 1.7702661752700806, "learning_rate": 1.2100745308289175e-05, "loss": 1.6894, "step": 38900 }, { "epoch": 1.3833437027819748, "grad_norm": 1.857193112373352, "learning_rate": 1.209691349676528e-05, "loss": 1.6761, "step": 38910 }, { "epoch": 1.3836992267354014, "grad_norm": 1.9033740758895874, "learning_rate": 1.2093081363168625e-05, "loss": 1.6652, "step": 38920 }, { "epoch": 1.3840547506888277, "grad_norm": 1.838472843170166, "learning_rate": 1.2089248908087794e-05, "loss": 1.6514, "step": 38930 }, { "epoch": 1.384410274642254, "grad_norm": 1.7907769680023193, "learning_rate": 1.2085416132111429e-05, "loss": 1.6835, "step": 38940 }, { "epoch": 1.3847657985956805, "grad_norm": 1.8862502574920654, "learning_rate": 1.2081583035828226e-05, "loss": 1.6456, "step": 38950 }, { "epoch": 1.3851213225491068, "grad_norm": 1.8391231298446655, "learning_rate": 1.2077749619826915e-05, "loss": 1.6661, "step": 38960 }, { "epoch": 1.385476846502533, "grad_norm": 1.8332085609436035, "learning_rate": 1.2073915884696292e-05, "loss": 1.6714, "step": 38970 }, { "epoch": 1.3858323704559594, "grad_norm": 1.946373701095581, "learning_rate": 1.2070081831025195e-05, "loss": 1.6939, "step": 38980 }, { "epoch": 1.3861878944093857, "grad_norm": 1.7250920534133911, "learning_rate": 1.2066247459402507e-05, "loss": 1.6818, "step": 38990 }, { "epoch": 1.3865434183628123, "grad_norm": 1.8862600326538086, "learning_rate": 1.2062412770417161e-05, "loss": 1.7218, "step": 39000 }, { "epoch": 1.3868989423162386, "grad_norm": 1.7957911491394043, "learning_rate": 1.2058577764658148e-05, "loss": 1.6923, "step": 39010 }, { "epoch": 1.3872544662696649, "grad_norm": 2.0033061504364014, "learning_rate": 1.2054742442714497e-05, "loss": 1.699, "step": 39020 }, { "epoch": 1.3876099902230914, "grad_norm": 1.6367038488388062, "learning_rate": 1.2050906805175293e-05, "loss": 1.7128, "step": 39030 }, { "epoch": 1.3879655141765177, "grad_norm": 1.8533066511154175, "learning_rate": 1.2047070852629661e-05, "loss": 1.7222, "step": 39040 }, { "epoch": 1.388321038129944, "grad_norm": 1.8927814960479736, "learning_rate": 1.2043234585666782e-05, "loss": 1.6477, "step": 39050 }, { "epoch": 1.3886765620833703, "grad_norm": 1.680945634841919, "learning_rate": 1.2039398004875882e-05, "loss": 1.7053, "step": 39060 }, { "epoch": 1.3890320860367966, "grad_norm": 1.9230496883392334, "learning_rate": 1.2035561110846232e-05, "loss": 1.6782, "step": 39070 }, { "epoch": 1.3893876099902231, "grad_norm": 1.8632361888885498, "learning_rate": 1.2031723904167161e-05, "loss": 1.6711, "step": 39080 }, { "epoch": 1.3897431339436495, "grad_norm": 1.8879597187042236, "learning_rate": 1.2027886385428035e-05, "loss": 1.6603, "step": 39090 }, { "epoch": 1.3900986578970758, "grad_norm": 1.8897348642349243, "learning_rate": 1.2024048555218283e-05, "loss": 1.6896, "step": 39100 }, { "epoch": 1.3904541818505023, "grad_norm": 1.9044712781906128, "learning_rate": 1.2020210414127359e-05, "loss": 1.7011, "step": 39110 }, { "epoch": 1.3908097058039286, "grad_norm": 1.814377784729004, "learning_rate": 1.201637196274478e-05, "loss": 1.6547, "step": 39120 }, { "epoch": 1.391165229757355, "grad_norm": 1.9063407182693481, "learning_rate": 1.201253320166011e-05, "loss": 1.6876, "step": 39130 }, { "epoch": 1.3915207537107812, "grad_norm": 2.003798007965088, "learning_rate": 1.2008694131462962e-05, "loss": 1.686, "step": 39140 }, { "epoch": 1.3918762776642075, "grad_norm": 1.863899827003479, "learning_rate": 1.2004854752742988e-05, "loss": 1.6531, "step": 39150 }, { "epoch": 1.392231801617634, "grad_norm": 1.9100146293640137, "learning_rate": 1.2001015066089893e-05, "loss": 1.6482, "step": 39160 }, { "epoch": 1.3925873255710604, "grad_norm": 1.8719366788864136, "learning_rate": 1.1997175072093435e-05, "loss": 1.6845, "step": 39170 }, { "epoch": 1.3929428495244867, "grad_norm": 1.7857741117477417, "learning_rate": 1.1993334771343405e-05, "loss": 1.6882, "step": 39180 }, { "epoch": 1.3932983734779132, "grad_norm": 1.866883635520935, "learning_rate": 1.1989494164429654e-05, "loss": 1.6766, "step": 39190 }, { "epoch": 1.3936538974313395, "grad_norm": 1.9655028581619263, "learning_rate": 1.1985653251942074e-05, "loss": 1.6653, "step": 39200 }, { "epoch": 1.3940094213847658, "grad_norm": 1.7872508764266968, "learning_rate": 1.1981812034470601e-05, "loss": 1.6846, "step": 39210 }, { "epoch": 1.394364945338192, "grad_norm": 1.8483127355575562, "learning_rate": 1.1977970512605228e-05, "loss": 1.6923, "step": 39220 }, { "epoch": 1.3947204692916184, "grad_norm": 1.9430288076400757, "learning_rate": 1.1974128686935988e-05, "loss": 1.6806, "step": 39230 }, { "epoch": 1.395075993245045, "grad_norm": 1.7674256563186646, "learning_rate": 1.1970286558052957e-05, "loss": 1.6693, "step": 39240 }, { "epoch": 1.3954315171984712, "grad_norm": 1.8529326915740967, "learning_rate": 1.1966444126546263e-05, "loss": 1.6688, "step": 39250 }, { "epoch": 1.3957870411518976, "grad_norm": 1.9361050128936768, "learning_rate": 1.1962601393006083e-05, "loss": 1.6815, "step": 39260 }, { "epoch": 1.396142565105324, "grad_norm": 1.8823657035827637, "learning_rate": 1.1958758358022637e-05, "loss": 1.6776, "step": 39270 }, { "epoch": 1.3964980890587504, "grad_norm": 1.6542255878448486, "learning_rate": 1.1954915022186187e-05, "loss": 1.6878, "step": 39280 }, { "epoch": 1.3968536130121767, "grad_norm": 1.7041078805923462, "learning_rate": 1.1951071386087047e-05, "loss": 1.6859, "step": 39290 }, { "epoch": 1.397209136965603, "grad_norm": 1.8247658014297485, "learning_rate": 1.1947227450315575e-05, "loss": 1.6819, "step": 39300 }, { "epoch": 1.3975646609190293, "grad_norm": 1.8667571544647217, "learning_rate": 1.1943383215462175e-05, "loss": 1.6375, "step": 39310 }, { "epoch": 1.3979201848724558, "grad_norm": 1.8258676528930664, "learning_rate": 1.1939538682117298e-05, "loss": 1.6931, "step": 39320 }, { "epoch": 1.3982757088258821, "grad_norm": 1.7988314628601074, "learning_rate": 1.1935693850871442e-05, "loss": 1.6444, "step": 39330 }, { "epoch": 1.3986312327793085, "grad_norm": 1.7440369129180908, "learning_rate": 1.1931848722315145e-05, "loss": 1.7064, "step": 39340 }, { "epoch": 1.398986756732735, "grad_norm": 1.827201247215271, "learning_rate": 1.1928003297039001e-05, "loss": 1.7128, "step": 39350 }, { "epoch": 1.3993422806861613, "grad_norm": 1.7500665187835693, "learning_rate": 1.1924157575633639e-05, "loss": 1.6726, "step": 39360 }, { "epoch": 1.3996978046395876, "grad_norm": 1.8692196607589722, "learning_rate": 1.1920311558689734e-05, "loss": 1.6694, "step": 39370 }, { "epoch": 1.400053328593014, "grad_norm": 1.973334789276123, "learning_rate": 1.1916465246798017e-05, "loss": 1.6643, "step": 39380 }, { "epoch": 1.4004088525464402, "grad_norm": 1.8776823282241821, "learning_rate": 1.1912618640549252e-05, "loss": 1.6996, "step": 39390 }, { "epoch": 1.4007643764998667, "grad_norm": 1.7455120086669922, "learning_rate": 1.1908771740534257e-05, "loss": 1.6534, "step": 39400 }, { "epoch": 1.401119900453293, "grad_norm": 1.9548767805099487, "learning_rate": 1.1904924547343892e-05, "loss": 1.6756, "step": 39410 }, { "epoch": 1.4014754244067194, "grad_norm": 1.876504898071289, "learning_rate": 1.190107706156906e-05, "loss": 1.6896, "step": 39420 }, { "epoch": 1.4018309483601459, "grad_norm": 1.7416571378707886, "learning_rate": 1.1897229283800713e-05, "loss": 1.6689, "step": 39430 }, { "epoch": 1.4021864723135722, "grad_norm": 1.8590924739837646, "learning_rate": 1.189338121462984e-05, "loss": 1.7051, "step": 39440 }, { "epoch": 1.4025419962669985, "grad_norm": 1.8786284923553467, "learning_rate": 1.1889532854647485e-05, "loss": 1.6731, "step": 39450 }, { "epoch": 1.4028975202204248, "grad_norm": 1.8990617990493774, "learning_rate": 1.1885684204444732e-05, "loss": 1.7025, "step": 39460 }, { "epoch": 1.403253044173851, "grad_norm": 1.7606281042099, "learning_rate": 1.1881835264612706e-05, "loss": 1.6887, "step": 39470 }, { "epoch": 1.4036085681272776, "grad_norm": 1.8033561706542969, "learning_rate": 1.1877986035742589e-05, "loss": 1.6736, "step": 39480 }, { "epoch": 1.403964092080704, "grad_norm": 1.7199445962905884, "learning_rate": 1.1874136518425586e-05, "loss": 1.6996, "step": 39490 }, { "epoch": 1.4043196160341302, "grad_norm": 1.8428937196731567, "learning_rate": 1.1870286713252966e-05, "loss": 1.6787, "step": 39500 }, { "epoch": 1.4046751399875568, "grad_norm": 1.9001675844192505, "learning_rate": 1.1866436620816035e-05, "loss": 1.6595, "step": 39510 }, { "epoch": 1.405030663940983, "grad_norm": 1.7418138980865479, "learning_rate": 1.186258624170614e-05, "loss": 1.6787, "step": 39520 }, { "epoch": 1.4053861878944094, "grad_norm": 1.915069580078125, "learning_rate": 1.1858735576514677e-05, "loss": 1.6626, "step": 39530 }, { "epoch": 1.4057417118478357, "grad_norm": 1.9052213430404663, "learning_rate": 1.1854884625833085e-05, "loss": 1.6565, "step": 39540 }, { "epoch": 1.406097235801262, "grad_norm": 1.9232211112976074, "learning_rate": 1.1851033390252843e-05, "loss": 1.7102, "step": 39550 }, { "epoch": 1.4064527597546885, "grad_norm": 1.8242404460906982, "learning_rate": 1.184718187036548e-05, "loss": 1.6517, "step": 39560 }, { "epoch": 1.4068082837081148, "grad_norm": 1.757689356803894, "learning_rate": 1.1843330066762562e-05, "loss": 1.6502, "step": 39570 }, { "epoch": 1.4071638076615411, "grad_norm": 1.8252004384994507, "learning_rate": 1.1839477980035705e-05, "loss": 1.7388, "step": 39580 }, { "epoch": 1.4075193316149677, "grad_norm": 1.9856147766113281, "learning_rate": 1.1835625610776565e-05, "loss": 1.7049, "step": 39590 }, { "epoch": 1.407874855568394, "grad_norm": 1.8483895063400269, "learning_rate": 1.1831772959576839e-05, "loss": 1.6785, "step": 39600 }, { "epoch": 1.4082303795218203, "grad_norm": 1.8824540376663208, "learning_rate": 1.1827920027028273e-05, "loss": 1.6775, "step": 39610 }, { "epoch": 1.4085859034752466, "grad_norm": 1.814045786857605, "learning_rate": 1.182406681372265e-05, "loss": 1.6556, "step": 39620 }, { "epoch": 1.408941427428673, "grad_norm": 1.9120724201202393, "learning_rate": 1.1820213320251802e-05, "loss": 1.6917, "step": 39630 }, { "epoch": 1.4092969513820994, "grad_norm": 1.7687956094741821, "learning_rate": 1.18163595472076e-05, "loss": 1.6225, "step": 39640 }, { "epoch": 1.4096524753355257, "grad_norm": 1.8206088542938232, "learning_rate": 1.181250549518196e-05, "loss": 1.69, "step": 39650 }, { "epoch": 1.410007999288952, "grad_norm": 1.9125192165374756, "learning_rate": 1.1808651164766843e-05, "loss": 1.6492, "step": 39660 }, { "epoch": 1.4103635232423786, "grad_norm": 1.8194414377212524, "learning_rate": 1.1804796556554248e-05, "loss": 1.6986, "step": 39670 }, { "epoch": 1.4107190471958049, "grad_norm": 2.033812999725342, "learning_rate": 1.1800941671136215e-05, "loss": 1.6855, "step": 39680 }, { "epoch": 1.4110745711492312, "grad_norm": 1.7757148742675781, "learning_rate": 1.1797086509104834e-05, "loss": 1.6576, "step": 39690 }, { "epoch": 1.4114300951026575, "grad_norm": 1.9182121753692627, "learning_rate": 1.1793231071052233e-05, "loss": 1.6182, "step": 39700 }, { "epoch": 1.4117856190560838, "grad_norm": 1.8637202978134155, "learning_rate": 1.1789375357570582e-05, "loss": 1.6454, "step": 39710 }, { "epoch": 1.4121411430095103, "grad_norm": 1.7202696800231934, "learning_rate": 1.17855193692521e-05, "loss": 1.7092, "step": 39720 }, { "epoch": 1.4124966669629366, "grad_norm": 1.7852152585983276, "learning_rate": 1.1781663106689034e-05, "loss": 1.6899, "step": 39730 }, { "epoch": 1.412852190916363, "grad_norm": 1.73618745803833, "learning_rate": 1.1777806570473687e-05, "loss": 1.6783, "step": 39740 }, { "epoch": 1.4132077148697895, "grad_norm": 1.7458254098892212, "learning_rate": 1.17739497611984e-05, "loss": 1.6808, "step": 39750 }, { "epoch": 1.4135632388232158, "grad_norm": 1.760912299156189, "learning_rate": 1.177009267945555e-05, "loss": 1.648, "step": 39760 }, { "epoch": 1.413918762776642, "grad_norm": 1.7297006845474243, "learning_rate": 1.1766235325837563e-05, "loss": 1.6661, "step": 39770 }, { "epoch": 1.4142742867300684, "grad_norm": 1.8834292888641357, "learning_rate": 1.1762377700936903e-05, "loss": 1.6917, "step": 39780 }, { "epoch": 1.4146298106834947, "grad_norm": 1.8323746919631958, "learning_rate": 1.1758519805346083e-05, "loss": 1.6754, "step": 39790 }, { "epoch": 1.4149853346369212, "grad_norm": 1.8268206119537354, "learning_rate": 1.1754661639657643e-05, "loss": 1.6941, "step": 39800 }, { "epoch": 1.4153408585903475, "grad_norm": 1.8546395301818848, "learning_rate": 1.1750803204464176e-05, "loss": 1.7018, "step": 39810 }, { "epoch": 1.4156963825437738, "grad_norm": 1.7926831245422363, "learning_rate": 1.1746944500358316e-05, "loss": 1.6539, "step": 39820 }, { "epoch": 1.4160519064972004, "grad_norm": 1.8957622051239014, "learning_rate": 1.1743085527932736e-05, "loss": 1.7029, "step": 39830 }, { "epoch": 1.4164074304506267, "grad_norm": 1.705998420715332, "learning_rate": 1.1739226287780146e-05, "loss": 1.6544, "step": 39840 }, { "epoch": 1.416762954404053, "grad_norm": 1.7661248445510864, "learning_rate": 1.1735366780493305e-05, "loss": 1.6653, "step": 39850 }, { "epoch": 1.4171184783574793, "grad_norm": 1.7905749082565308, "learning_rate": 1.1731507006665006e-05, "loss": 1.6604, "step": 39860 }, { "epoch": 1.4174740023109056, "grad_norm": 1.7992613315582275, "learning_rate": 1.1727646966888086e-05, "loss": 1.6612, "step": 39870 }, { "epoch": 1.4178295262643321, "grad_norm": 1.849700689315796, "learning_rate": 1.1723786661755428e-05, "loss": 1.6686, "step": 39880 }, { "epoch": 1.4181850502177584, "grad_norm": 1.9370216131210327, "learning_rate": 1.1719926091859943e-05, "loss": 1.6644, "step": 39890 }, { "epoch": 1.4185405741711847, "grad_norm": 1.8564960956573486, "learning_rate": 1.1716065257794595e-05, "loss": 1.6729, "step": 39900 }, { "epoch": 1.4188960981246113, "grad_norm": 1.7115602493286133, "learning_rate": 1.1712204160152387e-05, "loss": 1.6881, "step": 39910 }, { "epoch": 1.4192516220780376, "grad_norm": 1.9424142837524414, "learning_rate": 1.1708342799526355e-05, "loss": 1.6612, "step": 39920 }, { "epoch": 1.4196071460314639, "grad_norm": 1.8880399465560913, "learning_rate": 1.1704481176509577e-05, "loss": 1.69, "step": 39930 }, { "epoch": 1.4199626699848902, "grad_norm": 1.8421772718429565, "learning_rate": 1.1700619291695179e-05, "loss": 1.6468, "step": 39940 }, { "epoch": 1.4203181939383165, "grad_norm": 1.8074126243591309, "learning_rate": 1.1696757145676318e-05, "loss": 1.6916, "step": 39950 }, { "epoch": 1.420673717891743, "grad_norm": 1.7604447603225708, "learning_rate": 1.16928947390462e-05, "loss": 1.6848, "step": 39960 }, { "epoch": 1.4210292418451693, "grad_norm": 1.9991328716278076, "learning_rate": 1.1689032072398068e-05, "loss": 1.7153, "step": 39970 }, { "epoch": 1.4213847657985956, "grad_norm": 1.9345002174377441, "learning_rate": 1.1685169146325197e-05, "loss": 1.6902, "step": 39980 }, { "epoch": 1.4217402897520222, "grad_norm": 1.8295515775680542, "learning_rate": 1.1681305961420915e-05, "loss": 1.6904, "step": 39990 }, { "epoch": 1.4220958137054485, "grad_norm": 1.9185062646865845, "learning_rate": 1.1677442518278575e-05, "loss": 1.6831, "step": 40000 }, { "epoch": 1.4224513376588748, "grad_norm": 1.8064296245574951, "learning_rate": 1.1673578817491582e-05, "loss": 1.677, "step": 40010 }, { "epoch": 1.422806861612301, "grad_norm": 1.8515894412994385, "learning_rate": 1.1669714859653377e-05, "loss": 1.6941, "step": 40020 }, { "epoch": 1.4231623855657274, "grad_norm": 1.7757716178894043, "learning_rate": 1.166585064535744e-05, "loss": 1.6463, "step": 40030 }, { "epoch": 1.423517909519154, "grad_norm": 1.834594488143921, "learning_rate": 1.1661986175197284e-05, "loss": 1.7113, "step": 40040 }, { "epoch": 1.4238734334725802, "grad_norm": 1.984318733215332, "learning_rate": 1.1658121449766475e-05, "loss": 1.6948, "step": 40050 }, { "epoch": 1.4242289574260065, "grad_norm": 1.7884846925735474, "learning_rate": 1.165425646965861e-05, "loss": 1.6818, "step": 40060 }, { "epoch": 1.424584481379433, "grad_norm": 1.8413163423538208, "learning_rate": 1.1650391235467322e-05, "loss": 1.6688, "step": 40070 }, { "epoch": 1.4249400053328594, "grad_norm": 2.025177478790283, "learning_rate": 1.1646525747786288e-05, "loss": 1.6483, "step": 40080 }, { "epoch": 1.4252955292862857, "grad_norm": 1.9283874034881592, "learning_rate": 1.1642660007209221e-05, "loss": 1.6825, "step": 40090 }, { "epoch": 1.425651053239712, "grad_norm": 1.8084274530410767, "learning_rate": 1.1638794014329881e-05, "loss": 1.6637, "step": 40100 }, { "epoch": 1.4260065771931383, "grad_norm": 2.027099609375, "learning_rate": 1.1634927769742053e-05, "loss": 1.6782, "step": 40110 }, { "epoch": 1.4263621011465648, "grad_norm": 1.707337498664856, "learning_rate": 1.163106127403957e-05, "loss": 1.6334, "step": 40120 }, { "epoch": 1.4267176250999911, "grad_norm": 1.9192252159118652, "learning_rate": 1.1627194527816304e-05, "loss": 1.6684, "step": 40130 }, { "epoch": 1.4270731490534174, "grad_norm": 1.83455491065979, "learning_rate": 1.1623327531666157e-05, "loss": 1.6631, "step": 40140 }, { "epoch": 1.427428673006844, "grad_norm": 1.937157392501831, "learning_rate": 1.1619460286183087e-05, "loss": 1.6805, "step": 40150 }, { "epoch": 1.4277841969602703, "grad_norm": 1.799842357635498, "learning_rate": 1.1615592791961068e-05, "loss": 1.6486, "step": 40160 }, { "epoch": 1.4281397209136966, "grad_norm": 1.7772995233535767, "learning_rate": 1.1611725049594122e-05, "loss": 1.6559, "step": 40170 }, { "epoch": 1.4284952448671229, "grad_norm": 1.8416128158569336, "learning_rate": 1.1607857059676317e-05, "loss": 1.6281, "step": 40180 }, { "epoch": 1.4288507688205492, "grad_norm": 1.7117491960525513, "learning_rate": 1.1603988822801749e-05, "loss": 1.699, "step": 40190 }, { "epoch": 1.4292062927739757, "grad_norm": 1.7336164712905884, "learning_rate": 1.1600120339564554e-05, "loss": 1.6589, "step": 40200 }, { "epoch": 1.429561816727402, "grad_norm": 1.8139528036117554, "learning_rate": 1.1596251610558906e-05, "loss": 1.6789, "step": 40210 }, { "epoch": 1.4299173406808283, "grad_norm": 1.9444037675857544, "learning_rate": 1.1592382636379025e-05, "loss": 1.6659, "step": 40220 }, { "epoch": 1.4302728646342548, "grad_norm": 1.7962716817855835, "learning_rate": 1.1588513417619152e-05, "loss": 1.6685, "step": 40230 }, { "epoch": 1.4306283885876812, "grad_norm": 1.8451653718948364, "learning_rate": 1.1584643954873577e-05, "loss": 1.6623, "step": 40240 }, { "epoch": 1.4309839125411075, "grad_norm": 1.8435050249099731, "learning_rate": 1.1580774248736629e-05, "loss": 1.7194, "step": 40250 }, { "epoch": 1.4313394364945338, "grad_norm": 1.7766956090927124, "learning_rate": 1.1576904299802665e-05, "loss": 1.6877, "step": 40260 }, { "epoch": 1.43169496044796, "grad_norm": 1.8470335006713867, "learning_rate": 1.1573034108666088e-05, "loss": 1.7135, "step": 40270 }, { "epoch": 1.4320504844013866, "grad_norm": 2.0463173389434814, "learning_rate": 1.1569163675921338e-05, "loss": 1.6935, "step": 40280 }, { "epoch": 1.432406008354813, "grad_norm": 1.9131762981414795, "learning_rate": 1.1565293002162883e-05, "loss": 1.6545, "step": 40290 }, { "epoch": 1.4327615323082392, "grad_norm": 1.7757117748260498, "learning_rate": 1.1561422087985237e-05, "loss": 1.6937, "step": 40300 }, { "epoch": 1.4331170562616657, "grad_norm": 1.7911549806594849, "learning_rate": 1.155755093398295e-05, "loss": 1.6911, "step": 40310 }, { "epoch": 1.433472580215092, "grad_norm": 1.7745696306228638, "learning_rate": 1.1553679540750606e-05, "loss": 1.6788, "step": 40320 }, { "epoch": 1.4338281041685184, "grad_norm": 1.824103593826294, "learning_rate": 1.1549807908882827e-05, "loss": 1.6678, "step": 40330 }, { "epoch": 1.4341836281219447, "grad_norm": 1.8068585395812988, "learning_rate": 1.1545936038974269e-05, "loss": 1.6559, "step": 40340 }, { "epoch": 1.434539152075371, "grad_norm": 1.8663021326065063, "learning_rate": 1.1542063931619629e-05, "loss": 1.6163, "step": 40350 }, { "epoch": 1.4348946760287975, "grad_norm": 1.7645716667175293, "learning_rate": 1.1538191587413637e-05, "loss": 1.6904, "step": 40360 }, { "epoch": 1.4352501999822238, "grad_norm": 1.8233351707458496, "learning_rate": 1.153431900695106e-05, "loss": 1.6601, "step": 40370 }, { "epoch": 1.4356057239356501, "grad_norm": 1.7978568077087402, "learning_rate": 1.1530446190826706e-05, "loss": 1.6534, "step": 40380 }, { "epoch": 1.4359612478890766, "grad_norm": 1.9237686395645142, "learning_rate": 1.1526573139635413e-05, "loss": 1.6658, "step": 40390 }, { "epoch": 1.436316771842503, "grad_norm": 1.7938878536224365, "learning_rate": 1.152269985397206e-05, "loss": 1.7059, "step": 40400 }, { "epoch": 1.4366722957959293, "grad_norm": 1.8476046323776245, "learning_rate": 1.1518826334431554e-05, "loss": 1.6844, "step": 40410 }, { "epoch": 1.4370278197493556, "grad_norm": 1.8722749948501587, "learning_rate": 1.1514952581608847e-05, "loss": 1.6734, "step": 40420 }, { "epoch": 1.4373833437027819, "grad_norm": 1.9160614013671875, "learning_rate": 1.1511078596098922e-05, "loss": 1.6755, "step": 40430 }, { "epoch": 1.4377388676562084, "grad_norm": 1.8917723894119263, "learning_rate": 1.1507204378496798e-05, "loss": 1.6371, "step": 40440 }, { "epoch": 1.4380943916096347, "grad_norm": 1.833464503288269, "learning_rate": 1.1503329929397531e-05, "loss": 1.673, "step": 40450 }, { "epoch": 1.438449915563061, "grad_norm": 2.0275955200195312, "learning_rate": 1.1499455249396216e-05, "loss": 1.6789, "step": 40460 }, { "epoch": 1.4388054395164875, "grad_norm": 1.8364850282669067, "learning_rate": 1.1495580339087974e-05, "loss": 1.6801, "step": 40470 }, { "epoch": 1.4391609634699138, "grad_norm": 1.827332615852356, "learning_rate": 1.1491705199067973e-05, "loss": 1.6975, "step": 40480 }, { "epoch": 1.4395164874233402, "grad_norm": 1.7486658096313477, "learning_rate": 1.1487829829931403e-05, "loss": 1.6993, "step": 40490 }, { "epoch": 1.4398720113767665, "grad_norm": 1.9424848556518555, "learning_rate": 1.14839542322735e-05, "loss": 1.6983, "step": 40500 }, { "epoch": 1.4402275353301928, "grad_norm": 1.9159152507781982, "learning_rate": 1.1480078406689529e-05, "loss": 1.6657, "step": 40510 }, { "epoch": 1.4405830592836193, "grad_norm": 1.753502368927002, "learning_rate": 1.1476202353774799e-05, "loss": 1.6404, "step": 40520 }, { "epoch": 1.4409385832370456, "grad_norm": 1.8607120513916016, "learning_rate": 1.1472326074124642e-05, "loss": 1.703, "step": 40530 }, { "epoch": 1.441294107190472, "grad_norm": 1.7924970388412476, "learning_rate": 1.1468449568334433e-05, "loss": 1.7019, "step": 40540 }, { "epoch": 1.4416496311438984, "grad_norm": 1.8681445121765137, "learning_rate": 1.1464572836999575e-05, "loss": 1.6766, "step": 40550 }, { "epoch": 1.4420051550973247, "grad_norm": 1.7013012170791626, "learning_rate": 1.1460695880715516e-05, "loss": 1.664, "step": 40560 }, { "epoch": 1.442360679050751, "grad_norm": 1.912614345550537, "learning_rate": 1.1456818700077723e-05, "loss": 1.6694, "step": 40570 }, { "epoch": 1.4427162030041774, "grad_norm": 1.9411284923553467, "learning_rate": 1.1452941295681715e-05, "loss": 1.6325, "step": 40580 }, { "epoch": 1.4430717269576037, "grad_norm": 1.845502257347107, "learning_rate": 1.1449063668123035e-05, "loss": 1.652, "step": 40590 }, { "epoch": 1.4434272509110302, "grad_norm": 1.7138339281082153, "learning_rate": 1.144518581799726e-05, "loss": 1.649, "step": 40600 }, { "epoch": 1.4437827748644565, "grad_norm": 1.8865113258361816, "learning_rate": 1.1441307745900003e-05, "loss": 1.6822, "step": 40610 }, { "epoch": 1.4441382988178828, "grad_norm": 1.7740147113800049, "learning_rate": 1.1437429452426915e-05, "loss": 1.6733, "step": 40620 }, { "epoch": 1.4444938227713093, "grad_norm": 1.9099332094192505, "learning_rate": 1.1433550938173677e-05, "loss": 1.7122, "step": 40630 }, { "epoch": 1.4448493467247356, "grad_norm": 1.9059197902679443, "learning_rate": 1.1429672203736001e-05, "loss": 1.6678, "step": 40640 }, { "epoch": 1.445204870678162, "grad_norm": 1.73043954372406, "learning_rate": 1.142579324970964e-05, "loss": 1.6927, "step": 40650 }, { "epoch": 1.4455603946315883, "grad_norm": 1.875238299369812, "learning_rate": 1.1421914076690376e-05, "loss": 1.7018, "step": 40660 }, { "epoch": 1.4459159185850146, "grad_norm": 1.8329334259033203, "learning_rate": 1.1418034685274026e-05, "loss": 1.6683, "step": 40670 }, { "epoch": 1.446271442538441, "grad_norm": 1.7928236722946167, "learning_rate": 1.1414155076056437e-05, "loss": 1.6834, "step": 40680 }, { "epoch": 1.4466269664918674, "grad_norm": 1.9530136585235596, "learning_rate": 1.1410275249633496e-05, "loss": 1.678, "step": 40690 }, { "epoch": 1.4469824904452937, "grad_norm": 1.8798272609710693, "learning_rate": 1.140639520660112e-05, "loss": 1.6921, "step": 40700 }, { "epoch": 1.4473380143987202, "grad_norm": 1.7171239852905273, "learning_rate": 1.140251494755526e-05, "loss": 1.6791, "step": 40710 }, { "epoch": 1.4476935383521465, "grad_norm": 1.737978219985962, "learning_rate": 1.1398634473091897e-05, "loss": 1.6708, "step": 40720 }, { "epoch": 1.4480490623055728, "grad_norm": 1.8468761444091797, "learning_rate": 1.1394753783807047e-05, "loss": 1.6682, "step": 40730 }, { "epoch": 1.4484045862589991, "grad_norm": 1.9467835426330566, "learning_rate": 1.139087288029676e-05, "loss": 1.6929, "step": 40740 }, { "epoch": 1.4487601102124255, "grad_norm": 1.847671627998352, "learning_rate": 1.138699176315712e-05, "loss": 1.663, "step": 40750 }, { "epoch": 1.449115634165852, "grad_norm": 2.084027051925659, "learning_rate": 1.138311043298424e-05, "loss": 1.6463, "step": 40760 }, { "epoch": 1.4494711581192783, "grad_norm": 1.8675135374069214, "learning_rate": 1.1379228890374274e-05, "loss": 1.66, "step": 40770 }, { "epoch": 1.4498266820727046, "grad_norm": 1.7958040237426758, "learning_rate": 1.1375347135923395e-05, "loss": 1.6281, "step": 40780 }, { "epoch": 1.4501822060261311, "grad_norm": 1.8652892112731934, "learning_rate": 1.1371465170227822e-05, "loss": 1.6369, "step": 40790 }, { "epoch": 1.4505377299795574, "grad_norm": 1.7667006254196167, "learning_rate": 1.1367582993883798e-05, "loss": 1.6318, "step": 40800 }, { "epoch": 1.4508932539329837, "grad_norm": 1.8608719110488892, "learning_rate": 1.13637006074876e-05, "loss": 1.6648, "step": 40810 }, { "epoch": 1.45124877788641, "grad_norm": 1.79188871383667, "learning_rate": 1.1359818011635538e-05, "loss": 1.6788, "step": 40820 }, { "epoch": 1.4516043018398364, "grad_norm": 1.860592007637024, "learning_rate": 1.1355935206923955e-05, "loss": 1.6826, "step": 40830 }, { "epoch": 1.4519598257932629, "grad_norm": 1.888957142829895, "learning_rate": 1.135205219394923e-05, "loss": 1.7112, "step": 40840 }, { "epoch": 1.4523153497466892, "grad_norm": 1.904823899269104, "learning_rate": 1.1348168973307762e-05, "loss": 1.6255, "step": 40850 }, { "epoch": 1.4526708737001155, "grad_norm": 1.7432588338851929, "learning_rate": 1.1344285545595991e-05, "loss": 1.6706, "step": 40860 }, { "epoch": 1.453026397653542, "grad_norm": 1.725298285484314, "learning_rate": 1.1340401911410392e-05, "loss": 1.6436, "step": 40870 }, { "epoch": 1.4533819216069683, "grad_norm": 1.8614853620529175, "learning_rate": 1.1336518071347467e-05, "loss": 1.6568, "step": 40880 }, { "epoch": 1.4537374455603946, "grad_norm": 1.7306885719299316, "learning_rate": 1.1332634026003742e-05, "loss": 1.68, "step": 40890 }, { "epoch": 1.454092969513821, "grad_norm": 1.769089698791504, "learning_rate": 1.1328749775975786e-05, "loss": 1.7314, "step": 40900 }, { "epoch": 1.4544484934672472, "grad_norm": 1.7994245290756226, "learning_rate": 1.1324865321860197e-05, "loss": 1.7049, "step": 40910 }, { "epoch": 1.4548040174206738, "grad_norm": 1.8408583402633667, "learning_rate": 1.13209806642536e-05, "loss": 1.7092, "step": 40920 }, { "epoch": 1.4551595413741, "grad_norm": 2.042452096939087, "learning_rate": 1.1317095803752657e-05, "loss": 1.6875, "step": 40930 }, { "epoch": 1.4555150653275264, "grad_norm": 1.9339094161987305, "learning_rate": 1.1313210740954057e-05, "loss": 1.6558, "step": 40940 }, { "epoch": 1.455870589280953, "grad_norm": 1.9632941484451294, "learning_rate": 1.1309325476454519e-05, "loss": 1.7093, "step": 40950 }, { "epoch": 1.4562261132343792, "grad_norm": 1.8465474843978882, "learning_rate": 1.1305440010850802e-05, "loss": 1.6853, "step": 40960 }, { "epoch": 1.4565816371878055, "grad_norm": 1.747512936592102, "learning_rate": 1.1301554344739683e-05, "loss": 1.684, "step": 40970 }, { "epoch": 1.4569371611412318, "grad_norm": 1.9150688648223877, "learning_rate": 1.1297668478717974e-05, "loss": 1.7034, "step": 40980 }, { "epoch": 1.4572926850946581, "grad_norm": 1.750643014907837, "learning_rate": 1.1293782413382523e-05, "loss": 1.6631, "step": 40990 }, { "epoch": 1.4576482090480847, "grad_norm": 1.7395461797714233, "learning_rate": 1.1289896149330209e-05, "loss": 1.6791, "step": 41000 }, { "epoch": 1.458003733001511, "grad_norm": 1.8940759897232056, "learning_rate": 1.1286009687157931e-05, "loss": 1.6268, "step": 41010 }, { "epoch": 1.4583592569549373, "grad_norm": 1.8179832696914673, "learning_rate": 1.1282123027462632e-05, "loss": 1.6772, "step": 41020 }, { "epoch": 1.4587147809083638, "grad_norm": 1.7574387788772583, "learning_rate": 1.1278236170841272e-05, "loss": 1.6809, "step": 41030 }, { "epoch": 1.4590703048617901, "grad_norm": 1.7769755125045776, "learning_rate": 1.1274349117890852e-05, "loss": 1.6538, "step": 41040 }, { "epoch": 1.4594258288152164, "grad_norm": 1.8514573574066162, "learning_rate": 1.1270461869208398e-05, "loss": 1.6794, "step": 41050 }, { "epoch": 1.4597813527686427, "grad_norm": 1.8502614498138428, "learning_rate": 1.1266574425390966e-05, "loss": 1.6804, "step": 41060 }, { "epoch": 1.460136876722069, "grad_norm": 1.898262858390808, "learning_rate": 1.1262686787035643e-05, "loss": 1.648, "step": 41070 }, { "epoch": 1.4604924006754956, "grad_norm": 1.8465478420257568, "learning_rate": 1.1258798954739547e-05, "loss": 1.6714, "step": 41080 }, { "epoch": 1.4608479246289219, "grad_norm": 1.9473800659179688, "learning_rate": 1.1254910929099827e-05, "loss": 1.6868, "step": 41090 }, { "epoch": 1.4612034485823482, "grad_norm": 1.9544756412506104, "learning_rate": 1.1251022710713655e-05, "loss": 1.7166, "step": 41100 }, { "epoch": 1.4615589725357747, "grad_norm": 1.8831160068511963, "learning_rate": 1.1247134300178235e-05, "loss": 1.651, "step": 41110 }, { "epoch": 1.461914496489201, "grad_norm": 2.1862456798553467, "learning_rate": 1.1243245698090812e-05, "loss": 1.6506, "step": 41120 }, { "epoch": 1.4622700204426273, "grad_norm": 1.830612063407898, "learning_rate": 1.1239356905048642e-05, "loss": 1.6733, "step": 41130 }, { "epoch": 1.4626255443960536, "grad_norm": 1.9072941541671753, "learning_rate": 1.123546792164902e-05, "loss": 1.6304, "step": 41140 }, { "epoch": 1.46298106834948, "grad_norm": 1.740522861480713, "learning_rate": 1.1231578748489277e-05, "loss": 1.6535, "step": 41150 }, { "epoch": 1.4633365923029065, "grad_norm": 1.8051260709762573, "learning_rate": 1.1227689386166758e-05, "loss": 1.6591, "step": 41160 }, { "epoch": 1.4636921162563328, "grad_norm": 1.7794371843338013, "learning_rate": 1.1223799835278844e-05, "loss": 1.6826, "step": 41170 }, { "epoch": 1.464047640209759, "grad_norm": 1.8125745058059692, "learning_rate": 1.121991009642295e-05, "loss": 1.6881, "step": 41180 }, { "epoch": 1.4644031641631856, "grad_norm": 1.9413714408874512, "learning_rate": 1.1216020170196516e-05, "loss": 1.6356, "step": 41190 }, { "epoch": 1.464758688116612, "grad_norm": 1.967499852180481, "learning_rate": 1.1212130057197009e-05, "loss": 1.6819, "step": 41200 }, { "epoch": 1.4651142120700382, "grad_norm": 1.929537296295166, "learning_rate": 1.1208239758021923e-05, "loss": 1.6923, "step": 41210 }, { "epoch": 1.4654697360234645, "grad_norm": 1.7280325889587402, "learning_rate": 1.1204349273268786e-05, "loss": 1.6852, "step": 41220 }, { "epoch": 1.4658252599768908, "grad_norm": 2.1349501609802246, "learning_rate": 1.1200458603535153e-05, "loss": 1.6731, "step": 41230 }, { "epoch": 1.4661807839303174, "grad_norm": 1.897236943244934, "learning_rate": 1.1196567749418606e-05, "loss": 1.6999, "step": 41240 }, { "epoch": 1.4665363078837437, "grad_norm": 1.8861037492752075, "learning_rate": 1.1192676711516752e-05, "loss": 1.6768, "step": 41250 }, { "epoch": 1.46689183183717, "grad_norm": 2.259942054748535, "learning_rate": 1.1188785490427234e-05, "loss": 1.6913, "step": 41260 }, { "epoch": 1.4672473557905965, "grad_norm": 1.8978254795074463, "learning_rate": 1.1184894086747721e-05, "loss": 1.6547, "step": 41270 }, { "epoch": 1.4676028797440228, "grad_norm": 1.8073842525482178, "learning_rate": 1.1181002501075908e-05, "loss": 1.6646, "step": 41280 }, { "epoch": 1.4679584036974491, "grad_norm": 1.9123353958129883, "learning_rate": 1.1177110734009512e-05, "loss": 1.6652, "step": 41290 }, { "epoch": 1.4683139276508754, "grad_norm": 1.7248234748840332, "learning_rate": 1.1173218786146287e-05, "loss": 1.7009, "step": 41300 }, { "epoch": 1.4686694516043017, "grad_norm": 1.7227554321289062, "learning_rate": 1.1169326658084013e-05, "loss": 1.6481, "step": 41310 }, { "epoch": 1.4690249755577283, "grad_norm": 1.7111023664474487, "learning_rate": 1.1165434350420496e-05, "loss": 1.6573, "step": 41320 }, { "epoch": 1.4693804995111546, "grad_norm": 1.849845290184021, "learning_rate": 1.1161541863753571e-05, "loss": 1.6412, "step": 41330 }, { "epoch": 1.4697360234645809, "grad_norm": 1.8790321350097656, "learning_rate": 1.11576491986811e-05, "loss": 1.6816, "step": 41340 }, { "epoch": 1.4700915474180074, "grad_norm": 2.038875102996826, "learning_rate": 1.1153756355800966e-05, "loss": 1.6827, "step": 41350 }, { "epoch": 1.4704470713714337, "grad_norm": 1.9586118459701538, "learning_rate": 1.1149863335711095e-05, "loss": 1.6575, "step": 41360 }, { "epoch": 1.47080259532486, "grad_norm": 1.8184459209442139, "learning_rate": 1.1145970139009424e-05, "loss": 1.6877, "step": 41370 }, { "epoch": 1.4711581192782863, "grad_norm": 1.867667317390442, "learning_rate": 1.1142076766293923e-05, "loss": 1.7235, "step": 41380 }, { "epoch": 1.4715136432317126, "grad_norm": 1.7963002920150757, "learning_rate": 1.1138183218162593e-05, "loss": 1.6558, "step": 41390 }, { "epoch": 1.4718691671851392, "grad_norm": 1.8462467193603516, "learning_rate": 1.1134289495213457e-05, "loss": 1.6822, "step": 41400 }, { "epoch": 1.4722246911385655, "grad_norm": 1.8079371452331543, "learning_rate": 1.1130395598044565e-05, "loss": 1.6507, "step": 41410 }, { "epoch": 1.4725802150919918, "grad_norm": 1.8218004703521729, "learning_rate": 1.1126501527253998e-05, "loss": 1.7074, "step": 41420 }, { "epoch": 1.4729357390454183, "grad_norm": 1.8829344511032104, "learning_rate": 1.1122607283439862e-05, "loss": 1.655, "step": 41430 }, { "epoch": 1.4732912629988446, "grad_norm": 2.0060136318206787, "learning_rate": 1.1118712867200284e-05, "loss": 1.6516, "step": 41440 }, { "epoch": 1.473646786952271, "grad_norm": 1.8415420055389404, "learning_rate": 1.1114818279133424e-05, "loss": 1.6614, "step": 41450 }, { "epoch": 1.4740023109056972, "grad_norm": 1.7176750898361206, "learning_rate": 1.1110923519837466e-05, "loss": 1.6794, "step": 41460 }, { "epoch": 1.4743578348591235, "grad_norm": 1.945716142654419, "learning_rate": 1.1107028589910623e-05, "loss": 1.6776, "step": 41470 }, { "epoch": 1.47471335881255, "grad_norm": 1.7233506441116333, "learning_rate": 1.1103133489951125e-05, "loss": 1.6975, "step": 41480 }, { "epoch": 1.4750688827659764, "grad_norm": 1.8264062404632568, "learning_rate": 1.1099238220557243e-05, "loss": 1.6433, "step": 41490 }, { "epoch": 1.4754244067194027, "grad_norm": 1.77842378616333, "learning_rate": 1.109534278232726e-05, "loss": 1.6663, "step": 41500 }, { "epoch": 1.4757799306728292, "grad_norm": 2.1303818225860596, "learning_rate": 1.1091447175859497e-05, "loss": 1.6863, "step": 41510 }, { "epoch": 1.4761354546262555, "grad_norm": 1.9709882736206055, "learning_rate": 1.108755140175229e-05, "loss": 1.6655, "step": 41520 }, { "epoch": 1.4764909785796818, "grad_norm": 1.8122498989105225, "learning_rate": 1.1083655460604008e-05, "loss": 1.6817, "step": 41530 }, { "epoch": 1.4768465025331081, "grad_norm": 1.9306962490081787, "learning_rate": 1.107975935301304e-05, "loss": 1.6562, "step": 41540 }, { "epoch": 1.4772020264865344, "grad_norm": 1.8164222240447998, "learning_rate": 1.1075863079577804e-05, "loss": 1.7255, "step": 41550 }, { "epoch": 1.477557550439961, "grad_norm": 1.6643892526626587, "learning_rate": 1.1071966640896748e-05, "loss": 1.677, "step": 41560 }, { "epoch": 1.4779130743933873, "grad_norm": 1.7539207935333252, "learning_rate": 1.1068070037568335e-05, "loss": 1.7027, "step": 41570 }, { "epoch": 1.4782685983468136, "grad_norm": 1.9250538349151611, "learning_rate": 1.1064173270191063e-05, "loss": 1.6921, "step": 41580 }, { "epoch": 1.47862412230024, "grad_norm": 1.8705344200134277, "learning_rate": 1.1060276339363448e-05, "loss": 1.7, "step": 41590 }, { "epoch": 1.4789796462536664, "grad_norm": 1.8841394186019897, "learning_rate": 1.1056379245684036e-05, "loss": 1.6806, "step": 41600 }, { "epoch": 1.4793351702070927, "grad_norm": 1.8760476112365723, "learning_rate": 1.1052481989751393e-05, "loss": 1.6466, "step": 41610 }, { "epoch": 1.479690694160519, "grad_norm": 1.9157289266586304, "learning_rate": 1.1048584572164118e-05, "loss": 1.6591, "step": 41620 }, { "epoch": 1.4800462181139453, "grad_norm": 1.6999433040618896, "learning_rate": 1.1044686993520825e-05, "loss": 1.6746, "step": 41630 }, { "epoch": 1.4804017420673719, "grad_norm": 1.8318113088607788, "learning_rate": 1.1040789254420164e-05, "loss": 1.6636, "step": 41640 }, { "epoch": 1.4807572660207982, "grad_norm": 1.930933952331543, "learning_rate": 1.1036891355460795e-05, "loss": 1.7151, "step": 41650 }, { "epoch": 1.4811127899742245, "grad_norm": 1.818397045135498, "learning_rate": 1.1032993297241417e-05, "loss": 1.6597, "step": 41660 }, { "epoch": 1.481468313927651, "grad_norm": 1.746368408203125, "learning_rate": 1.1029095080360745e-05, "loss": 1.6479, "step": 41670 }, { "epoch": 1.4818238378810773, "grad_norm": 1.9062423706054688, "learning_rate": 1.1025196705417522e-05, "loss": 1.6398, "step": 41680 }, { "epoch": 1.4821793618345036, "grad_norm": 1.9456902742385864, "learning_rate": 1.1021298173010513e-05, "loss": 1.6641, "step": 41690 }, { "epoch": 1.48253488578793, "grad_norm": 1.8529020547866821, "learning_rate": 1.1017399483738507e-05, "loss": 1.6792, "step": 41700 }, { "epoch": 1.4828904097413562, "grad_norm": 1.7703274488449097, "learning_rate": 1.1013500638200322e-05, "loss": 1.6948, "step": 41710 }, { "epoch": 1.4832459336947827, "grad_norm": 1.8067706823349, "learning_rate": 1.1009601636994792e-05, "loss": 1.7028, "step": 41720 }, { "epoch": 1.483601457648209, "grad_norm": 1.773571491241455, "learning_rate": 1.1005702480720778e-05, "loss": 1.6318, "step": 41730 }, { "epoch": 1.4839569816016354, "grad_norm": 1.800899863243103, "learning_rate": 1.1001803169977171e-05, "loss": 1.6743, "step": 41740 }, { "epoch": 1.484312505555062, "grad_norm": 1.7902923822402954, "learning_rate": 1.099790370536288e-05, "loss": 1.6847, "step": 41750 }, { "epoch": 1.4846680295084882, "grad_norm": 1.8870327472686768, "learning_rate": 1.0994004087476837e-05, "loss": 1.7068, "step": 41760 }, { "epoch": 1.4850235534619145, "grad_norm": 1.7531715631484985, "learning_rate": 1.0990104316918e-05, "loss": 1.6547, "step": 41770 }, { "epoch": 1.4853790774153408, "grad_norm": 2.0109541416168213, "learning_rate": 1.0986204394285345e-05, "loss": 1.6797, "step": 41780 }, { "epoch": 1.4857346013687671, "grad_norm": 1.689207673072815, "learning_rate": 1.0982304320177877e-05, "loss": 1.6096, "step": 41790 }, { "epoch": 1.4860901253221936, "grad_norm": 1.8851569890975952, "learning_rate": 1.0978404095194625e-05, "loss": 1.7236, "step": 41800 }, { "epoch": 1.48644564927562, "grad_norm": 1.8390060663223267, "learning_rate": 1.0974503719934642e-05, "loss": 1.6689, "step": 41810 }, { "epoch": 1.4868011732290463, "grad_norm": 1.8239262104034424, "learning_rate": 1.0970603194996994e-05, "loss": 1.6515, "step": 41820 }, { "epoch": 1.4871566971824728, "grad_norm": 1.805426001548767, "learning_rate": 1.0966702520980786e-05, "loss": 1.6885, "step": 41830 }, { "epoch": 1.487512221135899, "grad_norm": 1.8900763988494873, "learning_rate": 1.0962801698485127e-05, "loss": 1.6806, "step": 41840 }, { "epoch": 1.4878677450893254, "grad_norm": 1.8122882843017578, "learning_rate": 1.0958900728109167e-05, "loss": 1.7162, "step": 41850 }, { "epoch": 1.4882232690427517, "grad_norm": 1.731871247291565, "learning_rate": 1.0954999610452066e-05, "loss": 1.6979, "step": 41860 }, { "epoch": 1.488578792996178, "grad_norm": 1.8255044221878052, "learning_rate": 1.0951098346113011e-05, "loss": 1.6548, "step": 41870 }, { "epoch": 1.4889343169496045, "grad_norm": 1.8068124055862427, "learning_rate": 1.0947196935691215e-05, "loss": 1.679, "step": 41880 }, { "epoch": 1.4892898409030308, "grad_norm": 1.8259496688842773, "learning_rate": 1.0943295379785911e-05, "loss": 1.6882, "step": 41890 }, { "epoch": 1.4896453648564572, "grad_norm": 1.8950399160385132, "learning_rate": 1.093939367899635e-05, "loss": 1.6814, "step": 41900 }, { "epoch": 1.4900008888098837, "grad_norm": 1.8045412302017212, "learning_rate": 1.093549183392181e-05, "loss": 1.6489, "step": 41910 }, { "epoch": 1.49035641276331, "grad_norm": 2.060487985610962, "learning_rate": 1.093158984516159e-05, "loss": 1.6177, "step": 41920 }, { "epoch": 1.4907119367167363, "grad_norm": 1.9152122735977173, "learning_rate": 1.092768771331501e-05, "loss": 1.7015, "step": 41930 }, { "epoch": 1.4910674606701626, "grad_norm": 1.7958887815475464, "learning_rate": 1.092378543898141e-05, "loss": 1.6822, "step": 41940 }, { "epoch": 1.491422984623589, "grad_norm": 1.8429104089736938, "learning_rate": 1.0919883022760167e-05, "loss": 1.6516, "step": 41950 }, { "epoch": 1.4917785085770154, "grad_norm": 1.7905949354171753, "learning_rate": 1.0915980465250653e-05, "loss": 1.6476, "step": 41960 }, { "epoch": 1.4921340325304417, "grad_norm": 1.840455412864685, "learning_rate": 1.0912077767052285e-05, "loss": 1.6886, "step": 41970 }, { "epoch": 1.492489556483868, "grad_norm": 1.8871763944625854, "learning_rate": 1.090817492876449e-05, "loss": 1.7141, "step": 41980 }, { "epoch": 1.4928450804372946, "grad_norm": 1.9186160564422607, "learning_rate": 1.090427195098672e-05, "loss": 1.6503, "step": 41990 }, { "epoch": 1.4932006043907209, "grad_norm": 1.8501421213150024, "learning_rate": 1.0900368834318451e-05, "loss": 1.6743, "step": 42000 }, { "epoch": 1.4935561283441472, "grad_norm": 1.8806571960449219, "learning_rate": 1.0896465579359173e-05, "loss": 1.6962, "step": 42010 }, { "epoch": 1.4939116522975735, "grad_norm": 2.0296733379364014, "learning_rate": 1.0892562186708404e-05, "loss": 1.6732, "step": 42020 }, { "epoch": 1.4942671762509998, "grad_norm": 2.0110843181610107, "learning_rate": 1.0888658656965676e-05, "loss": 1.6754, "step": 42030 }, { "epoch": 1.4946227002044263, "grad_norm": 1.8877172470092773, "learning_rate": 1.0884754990730552e-05, "loss": 1.687, "step": 42040 }, { "epoch": 1.4949782241578526, "grad_norm": 1.9644426107406616, "learning_rate": 1.0880851188602608e-05, "loss": 1.6976, "step": 42050 }, { "epoch": 1.495333748111279, "grad_norm": 1.8313747644424438, "learning_rate": 1.0876947251181445e-05, "loss": 1.6712, "step": 42060 }, { "epoch": 1.4956892720647055, "grad_norm": 1.7843058109283447, "learning_rate": 1.0873043179066685e-05, "loss": 1.6794, "step": 42070 }, { "epoch": 1.4960447960181318, "grad_norm": 1.8117133378982544, "learning_rate": 1.0869138972857967e-05, "loss": 1.6775, "step": 42080 }, { "epoch": 1.496400319971558, "grad_norm": 1.9456593990325928, "learning_rate": 1.0865234633154948e-05, "loss": 1.7082, "step": 42090 }, { "epoch": 1.4967558439249844, "grad_norm": 1.7631021738052368, "learning_rate": 1.0861330160557317e-05, "loss": 1.6886, "step": 42100 }, { "epoch": 1.4971113678784107, "grad_norm": 1.8768460750579834, "learning_rate": 1.0857425555664773e-05, "loss": 1.6646, "step": 42110 }, { "epoch": 1.4974668918318372, "grad_norm": 1.8182553052902222, "learning_rate": 1.085352081907704e-05, "loss": 1.6766, "step": 42120 }, { "epoch": 1.4978224157852635, "grad_norm": 2.0107078552246094, "learning_rate": 1.0849615951393859e-05, "loss": 1.656, "step": 42130 }, { "epoch": 1.4981779397386898, "grad_norm": 1.837105631828308, "learning_rate": 1.0845710953214998e-05, "loss": 1.6473, "step": 42140 }, { "epoch": 1.4985334636921164, "grad_norm": 1.784293532371521, "learning_rate": 1.0841805825140238e-05, "loss": 1.6912, "step": 42150 }, { "epoch": 1.4988889876455427, "grad_norm": 1.8410252332687378, "learning_rate": 1.083790056776938e-05, "loss": 1.688, "step": 42160 }, { "epoch": 1.499244511598969, "grad_norm": 1.7848275899887085, "learning_rate": 1.0833995181702248e-05, "loss": 1.6566, "step": 42170 }, { "epoch": 1.4996000355523953, "grad_norm": 1.8361421823501587, "learning_rate": 1.0830089667538683e-05, "loss": 1.6873, "step": 42180 }, { "epoch": 1.4999555595058216, "grad_norm": 1.978108525276184, "learning_rate": 1.0826184025878552e-05, "loss": 1.6962, "step": 42190 }, { "epoch": 1.500311083459248, "grad_norm": 1.945601224899292, "learning_rate": 1.082227825732174e-05, "loss": 1.6857, "step": 42200 }, { "epoch": 1.5006666074126744, "grad_norm": 1.898393988609314, "learning_rate": 1.0818372362468134e-05, "loss": 1.6354, "step": 42210 }, { "epoch": 1.5010221313661007, "grad_norm": 1.9694855213165283, "learning_rate": 1.0814466341917668e-05, "loss": 1.698, "step": 42220 }, { "epoch": 1.5013776553195273, "grad_norm": 1.743110179901123, "learning_rate": 1.0810560196270282e-05, "loss": 1.6665, "step": 42230 }, { "epoch": 1.5017331792729536, "grad_norm": 1.8782473802566528, "learning_rate": 1.080665392612593e-05, "loss": 1.7023, "step": 42240 }, { "epoch": 1.5020887032263799, "grad_norm": 1.9239259958267212, "learning_rate": 1.0802747532084592e-05, "loss": 1.681, "step": 42250 }, { "epoch": 1.5024442271798062, "grad_norm": 1.898139238357544, "learning_rate": 1.0798841014746264e-05, "loss": 1.6735, "step": 42260 }, { "epoch": 1.5027997511332325, "grad_norm": 1.8255397081375122, "learning_rate": 1.079493437471097e-05, "loss": 1.632, "step": 42270 }, { "epoch": 1.5031552750866588, "grad_norm": 1.860229730606079, "learning_rate": 1.0791027612578736e-05, "loss": 1.6334, "step": 42280 }, { "epoch": 1.5035107990400853, "grad_norm": 1.6982882022857666, "learning_rate": 1.0787120728949622e-05, "loss": 1.6891, "step": 42290 }, { "epoch": 1.5038663229935116, "grad_norm": 1.6922639608383179, "learning_rate": 1.0783213724423701e-05, "loss": 1.7187, "step": 42300 }, { "epoch": 1.5042218469469382, "grad_norm": 2.0529677867889404, "learning_rate": 1.077930659960106e-05, "loss": 1.6358, "step": 42310 }, { "epoch": 1.5045773709003645, "grad_norm": 1.9002379179000854, "learning_rate": 1.0775399355081815e-05, "loss": 1.6742, "step": 42320 }, { "epoch": 1.5049328948537908, "grad_norm": 1.9297168254852295, "learning_rate": 1.077149199146609e-05, "loss": 1.7008, "step": 42330 }, { "epoch": 1.505288418807217, "grad_norm": 1.7973363399505615, "learning_rate": 1.076758450935403e-05, "loss": 1.7121, "step": 42340 }, { "epoch": 1.5056439427606434, "grad_norm": 1.9050586223602295, "learning_rate": 1.0763676909345805e-05, "loss": 1.6551, "step": 42350 }, { "epoch": 1.5059994667140697, "grad_norm": 1.843286395072937, "learning_rate": 1.0759769192041592e-05, "loss": 1.6768, "step": 42360 }, { "epoch": 1.5063549906674962, "grad_norm": 1.7610872983932495, "learning_rate": 1.0755861358041596e-05, "loss": 1.631, "step": 42370 }, { "epoch": 1.5067105146209225, "grad_norm": 1.7675762176513672, "learning_rate": 1.0751953407946034e-05, "loss": 1.6711, "step": 42380 }, { "epoch": 1.507066038574349, "grad_norm": 1.920958161354065, "learning_rate": 1.0748045342355145e-05, "loss": 1.6583, "step": 42390 }, { "epoch": 1.5074215625277754, "grad_norm": 1.882028579711914, "learning_rate": 1.0744137161869181e-05, "loss": 1.6389, "step": 42400 }, { "epoch": 1.5077770864812017, "grad_norm": 1.73197340965271, "learning_rate": 1.0740228867088417e-05, "loss": 1.6905, "step": 42410 }, { "epoch": 1.508132610434628, "grad_norm": 1.9308568239212036, "learning_rate": 1.0736320458613137e-05, "loss": 1.6706, "step": 42420 }, { "epoch": 1.5084881343880543, "grad_norm": 1.8369220495224, "learning_rate": 1.073241193704365e-05, "loss": 1.7041, "step": 42430 }, { "epoch": 1.5088436583414806, "grad_norm": 1.9122000932693481, "learning_rate": 1.0728503302980284e-05, "loss": 1.6227, "step": 42440 }, { "epoch": 1.5091991822949071, "grad_norm": 1.8644506931304932, "learning_rate": 1.072459455702338e-05, "loss": 1.6756, "step": 42450 }, { "epoch": 1.5095547062483334, "grad_norm": 1.8589321374893188, "learning_rate": 1.0720685699773292e-05, "loss": 1.6582, "step": 42460 }, { "epoch": 1.50991023020176, "grad_norm": 2.146574020385742, "learning_rate": 1.07167767318304e-05, "loss": 1.715, "step": 42470 }, { "epoch": 1.5102657541551863, "grad_norm": 1.9308947324752808, "learning_rate": 1.0712867653795101e-05, "loss": 1.6966, "step": 42480 }, { "epoch": 1.5106212781086126, "grad_norm": 1.797224760055542, "learning_rate": 1.0708958466267794e-05, "loss": 1.6977, "step": 42490 }, { "epoch": 1.5109768020620389, "grad_norm": 1.8617327213287354, "learning_rate": 1.0705049169848914e-05, "loss": 1.6539, "step": 42500 }, { "epoch": 1.5113323260154652, "grad_norm": 1.9332149028778076, "learning_rate": 1.0701139765138903e-05, "loss": 1.659, "step": 42510 }, { "epoch": 1.5116878499688915, "grad_norm": 2.0152933597564697, "learning_rate": 1.069723025273822e-05, "loss": 1.6548, "step": 42520 }, { "epoch": 1.512043373922318, "grad_norm": 1.9726402759552002, "learning_rate": 1.0693320633247342e-05, "loss": 1.6655, "step": 42530 }, { "epoch": 1.5123988978757443, "grad_norm": 1.789324402809143, "learning_rate": 1.068941090726676e-05, "loss": 1.66, "step": 42540 }, { "epoch": 1.5127544218291709, "grad_norm": 1.8948030471801758, "learning_rate": 1.0685501075396985e-05, "loss": 1.6341, "step": 42550 }, { "epoch": 1.5131099457825972, "grad_norm": 1.9974631071090698, "learning_rate": 1.0681591138238545e-05, "loss": 1.6326, "step": 42560 }, { "epoch": 1.5134654697360235, "grad_norm": 2.1601462364196777, "learning_rate": 1.067768109639198e-05, "loss": 1.6761, "step": 42570 }, { "epoch": 1.5138209936894498, "grad_norm": 1.733400821685791, "learning_rate": 1.067377095045785e-05, "loss": 1.6505, "step": 42580 }, { "epoch": 1.514176517642876, "grad_norm": 1.9919558763504028, "learning_rate": 1.066986070103672e-05, "loss": 1.6616, "step": 42590 }, { "epoch": 1.5145320415963024, "grad_norm": 1.8978710174560547, "learning_rate": 1.0665950348729191e-05, "loss": 1.6554, "step": 42600 }, { "epoch": 1.514887565549729, "grad_norm": 1.9027674198150635, "learning_rate": 1.066203989413586e-05, "loss": 1.6757, "step": 42610 }, { "epoch": 1.5152430895031552, "grad_norm": 1.774072527885437, "learning_rate": 1.0658129337857356e-05, "loss": 1.6608, "step": 42620 }, { "epoch": 1.5155986134565818, "grad_norm": 1.911702275276184, "learning_rate": 1.0654218680494313e-05, "loss": 1.6923, "step": 42630 }, { "epoch": 1.515954137410008, "grad_norm": 1.811378002166748, "learning_rate": 1.0650307922647383e-05, "loss": 1.6491, "step": 42640 }, { "epoch": 1.5163096613634344, "grad_norm": 1.7372981309890747, "learning_rate": 1.064639706491723e-05, "loss": 1.6513, "step": 42650 }, { "epoch": 1.5166651853168607, "grad_norm": 1.860128402709961, "learning_rate": 1.0642486107904542e-05, "loss": 1.6816, "step": 42660 }, { "epoch": 1.517020709270287, "grad_norm": 1.7836940288543701, "learning_rate": 1.0638575052210017e-05, "loss": 1.6984, "step": 42670 }, { "epoch": 1.5173762332237133, "grad_norm": 1.8072319030761719, "learning_rate": 1.0634663898434365e-05, "loss": 1.7054, "step": 42680 }, { "epoch": 1.5177317571771398, "grad_norm": 1.8161203861236572, "learning_rate": 1.0630752647178322e-05, "loss": 1.6648, "step": 42690 }, { "epoch": 1.5180872811305661, "grad_norm": 1.8995000123977661, "learning_rate": 1.0626841299042626e-05, "loss": 1.6746, "step": 42700 }, { "epoch": 1.5184428050839927, "grad_norm": 1.8162468671798706, "learning_rate": 1.0622929854628035e-05, "loss": 1.6689, "step": 42710 }, { "epoch": 1.518798329037419, "grad_norm": 1.919501543045044, "learning_rate": 1.0619018314535328e-05, "loss": 1.6541, "step": 42720 }, { "epoch": 1.5191538529908453, "grad_norm": 1.8064863681793213, "learning_rate": 1.0615106679365283e-05, "loss": 1.7194, "step": 42730 }, { "epoch": 1.5195093769442716, "grad_norm": 1.8879224061965942, "learning_rate": 1.0611194949718712e-05, "loss": 1.6766, "step": 42740 }, { "epoch": 1.5198649008976979, "grad_norm": 1.9638595581054688, "learning_rate": 1.0607283126196431e-05, "loss": 1.6924, "step": 42750 }, { "epoch": 1.5202204248511242, "grad_norm": 1.7927757501602173, "learning_rate": 1.0603371209399267e-05, "loss": 1.6996, "step": 42760 }, { "epoch": 1.5205759488045507, "grad_norm": 1.8108702898025513, "learning_rate": 1.0599459199928068e-05, "loss": 1.6731, "step": 42770 }, { "epoch": 1.520931472757977, "grad_norm": 1.9261244535446167, "learning_rate": 1.0595547098383696e-05, "loss": 1.6443, "step": 42780 }, { "epoch": 1.5212869967114035, "grad_norm": 1.9858245849609375, "learning_rate": 1.0591634905367024e-05, "loss": 1.6617, "step": 42790 }, { "epoch": 1.5216425206648299, "grad_norm": 1.9586430788040161, "learning_rate": 1.058772262147894e-05, "loss": 1.6843, "step": 42800 }, { "epoch": 1.5219980446182562, "grad_norm": 1.9105232954025269, "learning_rate": 1.0583810247320345e-05, "loss": 1.6589, "step": 42810 }, { "epoch": 1.5223535685716825, "grad_norm": 1.801959753036499, "learning_rate": 1.057989778349216e-05, "loss": 1.6795, "step": 42820 }, { "epoch": 1.5227090925251088, "grad_norm": 1.908121109008789, "learning_rate": 1.0575985230595307e-05, "loss": 1.6764, "step": 42830 }, { "epoch": 1.523064616478535, "grad_norm": 1.947690486907959, "learning_rate": 1.0572072589230735e-05, "loss": 1.6737, "step": 42840 }, { "epoch": 1.5234201404319616, "grad_norm": 1.8410011529922485, "learning_rate": 1.05681598599994e-05, "loss": 1.6743, "step": 42850 }, { "epoch": 1.523775664385388, "grad_norm": 1.9364418983459473, "learning_rate": 1.0564247043502274e-05, "loss": 1.6989, "step": 42860 }, { "epoch": 1.5241311883388144, "grad_norm": 1.897105097770691, "learning_rate": 1.056033414034034e-05, "loss": 1.6967, "step": 42870 }, { "epoch": 1.5244867122922408, "grad_norm": 1.74845552444458, "learning_rate": 1.0556421151114598e-05, "loss": 1.6728, "step": 42880 }, { "epoch": 1.524842236245667, "grad_norm": 1.8528046607971191, "learning_rate": 1.0552508076426053e-05, "loss": 1.707, "step": 42890 }, { "epoch": 1.5251977601990934, "grad_norm": 1.815856695175171, "learning_rate": 1.0548594916875731e-05, "loss": 1.6525, "step": 42900 }, { "epoch": 1.5255532841525197, "grad_norm": 1.8629300594329834, "learning_rate": 1.054468167306467e-05, "loss": 1.6732, "step": 42910 }, { "epoch": 1.525908808105946, "grad_norm": 1.8080686330795288, "learning_rate": 1.054076834559392e-05, "loss": 1.654, "step": 42920 }, { "epoch": 1.5262643320593725, "grad_norm": 1.900989055633545, "learning_rate": 1.0536854935064543e-05, "loss": 1.6557, "step": 42930 }, { "epoch": 1.5266198560127988, "grad_norm": 1.904876947402954, "learning_rate": 1.0532941442077613e-05, "loss": 1.6762, "step": 42940 }, { "epoch": 1.5269753799662253, "grad_norm": 1.8932329416275024, "learning_rate": 1.052902786723422e-05, "loss": 1.6663, "step": 42950 }, { "epoch": 1.5273309039196517, "grad_norm": 1.8724309206008911, "learning_rate": 1.0525114211135466e-05, "loss": 1.7035, "step": 42960 }, { "epoch": 1.527686427873078, "grad_norm": 1.8949474096298218, "learning_rate": 1.0521200474382456e-05, "loss": 1.6709, "step": 42970 }, { "epoch": 1.5280419518265043, "grad_norm": 1.7738467454910278, "learning_rate": 1.0517286657576324e-05, "loss": 1.6792, "step": 42980 }, { "epoch": 1.5283974757799306, "grad_norm": 1.849966049194336, "learning_rate": 1.0513372761318204e-05, "loss": 1.7021, "step": 42990 }, { "epoch": 1.5287529997333569, "grad_norm": 1.932142972946167, "learning_rate": 1.0509458786209248e-05, "loss": 1.6444, "step": 43000 }, { "epoch": 1.5291085236867834, "grad_norm": 1.8554933071136475, "learning_rate": 1.0505544732850617e-05, "loss": 1.6926, "step": 43010 }, { "epoch": 1.5294640476402097, "grad_norm": 1.861149549484253, "learning_rate": 1.0501630601843484e-05, "loss": 1.7165, "step": 43020 }, { "epoch": 1.5298195715936362, "grad_norm": 2.019615888595581, "learning_rate": 1.0497716393789034e-05, "loss": 1.6617, "step": 43030 }, { "epoch": 1.5301750955470625, "grad_norm": 1.7444164752960205, "learning_rate": 1.0493802109288472e-05, "loss": 1.6463, "step": 43040 }, { "epoch": 1.5305306195004889, "grad_norm": 2.0510358810424805, "learning_rate": 1.0489887748942997e-05, "loss": 1.6721, "step": 43050 }, { "epoch": 1.5308861434539152, "grad_norm": 1.8747687339782715, "learning_rate": 1.0485973313353837e-05, "loss": 1.6777, "step": 43060 }, { "epoch": 1.5312416674073415, "grad_norm": 1.7638757228851318, "learning_rate": 1.0482058803122223e-05, "loss": 1.6326, "step": 43070 }, { "epoch": 1.5315971913607678, "grad_norm": 1.9330956935882568, "learning_rate": 1.04781442188494e-05, "loss": 1.6331, "step": 43080 }, { "epoch": 1.5319527153141943, "grad_norm": 1.7835172414779663, "learning_rate": 1.0474229561136622e-05, "loss": 1.6277, "step": 43090 }, { "epoch": 1.5323082392676206, "grad_norm": 1.929072380065918, "learning_rate": 1.0470314830585158e-05, "loss": 1.6596, "step": 43100 }, { "epoch": 1.5326637632210471, "grad_norm": 1.9375463724136353, "learning_rate": 1.0466400027796283e-05, "loss": 1.6795, "step": 43110 }, { "epoch": 1.5330192871744734, "grad_norm": 1.801561713218689, "learning_rate": 1.0462485153371291e-05, "loss": 1.675, "step": 43120 }, { "epoch": 1.5333748111278998, "grad_norm": 1.8926273584365845, "learning_rate": 1.0458570207911479e-05, "loss": 1.6618, "step": 43130 }, { "epoch": 1.533730335081326, "grad_norm": 1.8269914388656616, "learning_rate": 1.0454655192018158e-05, "loss": 1.7096, "step": 43140 }, { "epoch": 1.5340858590347524, "grad_norm": 1.7992956638336182, "learning_rate": 1.0450740106292648e-05, "loss": 1.6604, "step": 43150 }, { "epoch": 1.5344413829881787, "grad_norm": 1.9935426712036133, "learning_rate": 1.0446824951336283e-05, "loss": 1.6469, "step": 43160 }, { "epoch": 1.5347969069416052, "grad_norm": 1.8476448059082031, "learning_rate": 1.0442909727750407e-05, "loss": 1.6753, "step": 43170 }, { "epoch": 1.5351524308950315, "grad_norm": 2.0077481269836426, "learning_rate": 1.0438994436136378e-05, "loss": 1.7129, "step": 43180 }, { "epoch": 1.535507954848458, "grad_norm": 1.8412647247314453, "learning_rate": 1.0435079077095555e-05, "loss": 1.6599, "step": 43190 }, { "epoch": 1.5358634788018843, "grad_norm": 1.9217013120651245, "learning_rate": 1.0431163651229313e-05, "loss": 1.6532, "step": 43200 }, { "epoch": 1.5362190027553106, "grad_norm": 1.7764896154403687, "learning_rate": 1.0427248159139038e-05, "loss": 1.6287, "step": 43210 }, { "epoch": 1.536574526708737, "grad_norm": 1.7792390584945679, "learning_rate": 1.0423332601426123e-05, "loss": 1.6412, "step": 43220 }, { "epoch": 1.5369300506621633, "grad_norm": 1.8325653076171875, "learning_rate": 1.0419416978691977e-05, "loss": 1.6553, "step": 43230 }, { "epoch": 1.5372855746155896, "grad_norm": 1.853739619255066, "learning_rate": 1.041550129153801e-05, "loss": 1.6539, "step": 43240 }, { "epoch": 1.537641098569016, "grad_norm": 2.0269429683685303, "learning_rate": 1.0411585540565654e-05, "loss": 1.6467, "step": 43250 }, { "epoch": 1.5379966225224424, "grad_norm": 1.9269630908966064, "learning_rate": 1.0407669726376335e-05, "loss": 1.651, "step": 43260 }, { "epoch": 1.538352146475869, "grad_norm": 1.8084664344787598, "learning_rate": 1.0403753849571505e-05, "loss": 1.6735, "step": 43270 }, { "epoch": 1.5387076704292952, "grad_norm": 1.8354097604751587, "learning_rate": 1.0399837910752613e-05, "loss": 1.705, "step": 43280 }, { "epoch": 1.5390631943827215, "grad_norm": 1.8562625646591187, "learning_rate": 1.0395921910521127e-05, "loss": 1.6455, "step": 43290 }, { "epoch": 1.5394187183361479, "grad_norm": 1.8544988632202148, "learning_rate": 1.0392005849478516e-05, "loss": 1.6868, "step": 43300 }, { "epoch": 1.5397742422895742, "grad_norm": 1.7014425992965698, "learning_rate": 1.0388089728226268e-05, "loss": 1.7071, "step": 43310 }, { "epoch": 1.5401297662430005, "grad_norm": 1.7603251934051514, "learning_rate": 1.038417354736587e-05, "loss": 1.6432, "step": 43320 }, { "epoch": 1.540485290196427, "grad_norm": 1.8588751554489136, "learning_rate": 1.0380257307498822e-05, "loss": 1.6296, "step": 43330 }, { "epoch": 1.5408408141498533, "grad_norm": 1.8138442039489746, "learning_rate": 1.0376341009226636e-05, "loss": 1.6166, "step": 43340 }, { "epoch": 1.5411963381032798, "grad_norm": 1.9501382112503052, "learning_rate": 1.037242465315083e-05, "loss": 1.7226, "step": 43350 }, { "epoch": 1.5415518620567061, "grad_norm": 1.8408522605895996, "learning_rate": 1.0368508239872933e-05, "loss": 1.6767, "step": 43360 }, { "epoch": 1.5419073860101324, "grad_norm": 1.803480625152588, "learning_rate": 1.0364591769994484e-05, "loss": 1.6946, "step": 43370 }, { "epoch": 1.5422629099635587, "grad_norm": 1.8548345565795898, "learning_rate": 1.0360675244117024e-05, "loss": 1.6889, "step": 43380 }, { "epoch": 1.542618433916985, "grad_norm": 1.9906445741653442, "learning_rate": 1.0356758662842106e-05, "loss": 1.6794, "step": 43390 }, { "epoch": 1.5429739578704114, "grad_norm": 2.089935302734375, "learning_rate": 1.0352842026771295e-05, "loss": 1.6765, "step": 43400 }, { "epoch": 1.543329481823838, "grad_norm": 1.9084968566894531, "learning_rate": 1.034892533650616e-05, "loss": 1.718, "step": 43410 }, { "epoch": 1.5436850057772642, "grad_norm": 1.7907205820083618, "learning_rate": 1.0345008592648282e-05, "loss": 1.6484, "step": 43420 }, { "epoch": 1.5440405297306907, "grad_norm": 1.8574295043945312, "learning_rate": 1.0341091795799247e-05, "loss": 1.6807, "step": 43430 }, { "epoch": 1.544396053684117, "grad_norm": 1.7244060039520264, "learning_rate": 1.0337174946560652e-05, "loss": 1.6497, "step": 43440 }, { "epoch": 1.5447515776375433, "grad_norm": 1.7605082988739014, "learning_rate": 1.03332580455341e-05, "loss": 1.6575, "step": 43450 }, { "epoch": 1.5451071015909696, "grad_norm": 1.7906380891799927, "learning_rate": 1.03293410933212e-05, "loss": 1.6667, "step": 43460 }, { "epoch": 1.545462625544396, "grad_norm": 2.0502443313598633, "learning_rate": 1.0325424090523573e-05, "loss": 1.6421, "step": 43470 }, { "epoch": 1.5458181494978223, "grad_norm": 1.7937922477722168, "learning_rate": 1.0321507037742846e-05, "loss": 1.6061, "step": 43480 }, { "epoch": 1.5461736734512488, "grad_norm": 1.8461755514144897, "learning_rate": 1.0317589935580654e-05, "loss": 1.6319, "step": 43490 }, { "epoch": 1.546529197404675, "grad_norm": 1.803354263305664, "learning_rate": 1.031367278463864e-05, "loss": 1.7119, "step": 43500 }, { "epoch": 1.5468847213581016, "grad_norm": 1.878377079963684, "learning_rate": 1.0309755585518453e-05, "loss": 1.6353, "step": 43510 }, { "epoch": 1.547240245311528, "grad_norm": 1.8846303224563599, "learning_rate": 1.030583833882175e-05, "loss": 1.6527, "step": 43520 }, { "epoch": 1.5475957692649542, "grad_norm": 1.7359410524368286, "learning_rate": 1.0301921045150196e-05, "loss": 1.6628, "step": 43530 }, { "epoch": 1.5479512932183805, "grad_norm": 1.7824934720993042, "learning_rate": 1.0298003705105462e-05, "loss": 1.6601, "step": 43540 }, { "epoch": 1.5483068171718068, "grad_norm": 1.8383424282073975, "learning_rate": 1.0294086319289227e-05, "loss": 1.6773, "step": 43550 }, { "epoch": 1.5486623411252332, "grad_norm": 1.9958280324935913, "learning_rate": 1.029016888830318e-05, "loss": 1.651, "step": 43560 }, { "epoch": 1.5490178650786597, "grad_norm": 1.8739871978759766, "learning_rate": 1.0286251412749009e-05, "loss": 1.6578, "step": 43570 }, { "epoch": 1.549373389032086, "grad_norm": 1.660166621208191, "learning_rate": 1.0282333893228413e-05, "loss": 1.6737, "step": 43580 }, { "epoch": 1.5497289129855125, "grad_norm": 1.8416285514831543, "learning_rate": 1.0278416330343104e-05, "loss": 1.6798, "step": 43590 }, { "epoch": 1.5500844369389388, "grad_norm": 1.8943604230880737, "learning_rate": 1.0274498724694792e-05, "loss": 1.6736, "step": 43600 }, { "epoch": 1.5504399608923651, "grad_norm": 1.836141586303711, "learning_rate": 1.02705810768852e-05, "loss": 1.6376, "step": 43610 }, { "epoch": 1.5507954848457914, "grad_norm": 1.83307945728302, "learning_rate": 1.0266663387516047e-05, "loss": 1.6696, "step": 43620 }, { "epoch": 1.5511510087992177, "grad_norm": 1.7649224996566772, "learning_rate": 1.0262745657189073e-05, "loss": 1.6532, "step": 43630 }, { "epoch": 1.551506532752644, "grad_norm": 1.9184762239456177, "learning_rate": 1.025882788650601e-05, "loss": 1.624, "step": 43640 }, { "epoch": 1.5518620567060706, "grad_norm": 1.6937661170959473, "learning_rate": 1.025491007606861e-05, "loss": 1.6886, "step": 43650 }, { "epoch": 1.5522175806594969, "grad_norm": 1.9641646146774292, "learning_rate": 1.0250992226478618e-05, "loss": 1.6505, "step": 43660 }, { "epoch": 1.5525731046129234, "grad_norm": 1.8957557678222656, "learning_rate": 1.0247074338337793e-05, "loss": 1.6816, "step": 43670 }, { "epoch": 1.5529286285663497, "grad_norm": 1.9250448942184448, "learning_rate": 1.0243156412247901e-05, "loss": 1.6568, "step": 43680 }, { "epoch": 1.553284152519776, "grad_norm": 1.8737907409667969, "learning_rate": 1.0239238448810711e-05, "loss": 1.6421, "step": 43690 }, { "epoch": 1.5536396764732023, "grad_norm": 1.9514378309249878, "learning_rate": 1.023532044862799e-05, "loss": 1.6403, "step": 43700 }, { "epoch": 1.5539952004266286, "grad_norm": 1.8503978252410889, "learning_rate": 1.0231402412301526e-05, "loss": 1.698, "step": 43710 }, { "epoch": 1.554350724380055, "grad_norm": 1.8692011833190918, "learning_rate": 1.0227484340433102e-05, "loss": 1.6409, "step": 43720 }, { "epoch": 1.5547062483334815, "grad_norm": 1.9184273481369019, "learning_rate": 1.022356623362451e-05, "loss": 1.6459, "step": 43730 }, { "epoch": 1.5550617722869078, "grad_norm": 1.9160983562469482, "learning_rate": 1.0219648092477545e-05, "loss": 1.6866, "step": 43740 }, { "epoch": 1.5554172962403343, "grad_norm": 1.8987330198287964, "learning_rate": 1.0215729917594013e-05, "loss": 1.6661, "step": 43750 }, { "epoch": 1.5557728201937606, "grad_norm": 1.7964797019958496, "learning_rate": 1.0211811709575717e-05, "loss": 1.6423, "step": 43760 }, { "epoch": 1.556128344147187, "grad_norm": 1.8204917907714844, "learning_rate": 1.020789346902447e-05, "loss": 1.7091, "step": 43770 }, { "epoch": 1.5564838681006132, "grad_norm": 1.8866429328918457, "learning_rate": 1.020397519654209e-05, "loss": 1.6482, "step": 43780 }, { "epoch": 1.5568393920540395, "grad_norm": 1.9378949403762817, "learning_rate": 1.0200056892730399e-05, "loss": 1.6639, "step": 43790 }, { "epoch": 1.5571949160074658, "grad_norm": 1.861153483390808, "learning_rate": 1.0196138558191222e-05, "loss": 1.6352, "step": 43800 }, { "epoch": 1.5575504399608924, "grad_norm": 1.7982897758483887, "learning_rate": 1.0192220193526395e-05, "loss": 1.6641, "step": 43810 }, { "epoch": 1.5579059639143187, "grad_norm": 1.8335530757904053, "learning_rate": 1.018830179933775e-05, "loss": 1.6008, "step": 43820 }, { "epoch": 1.5582614878677452, "grad_norm": 1.9274227619171143, "learning_rate": 1.0184383376227128e-05, "loss": 1.6588, "step": 43830 }, { "epoch": 1.5586170118211715, "grad_norm": 1.942944884300232, "learning_rate": 1.0180464924796376e-05, "loss": 1.6425, "step": 43840 }, { "epoch": 1.5589725357745978, "grad_norm": 1.8403658866882324, "learning_rate": 1.0176546445647346e-05, "loss": 1.6101, "step": 43850 }, { "epoch": 1.5593280597280241, "grad_norm": 1.8369762897491455, "learning_rate": 1.0172627939381885e-05, "loss": 1.6762, "step": 43860 }, { "epoch": 1.5596835836814504, "grad_norm": 1.7631001472473145, "learning_rate": 1.016870940660186e-05, "loss": 1.6593, "step": 43870 }, { "epoch": 1.5600391076348767, "grad_norm": 2.2434041500091553, "learning_rate": 1.0164790847909122e-05, "loss": 1.6731, "step": 43880 }, { "epoch": 1.5603946315883033, "grad_norm": 1.7569379806518555, "learning_rate": 1.0160872263905545e-05, "loss": 1.6719, "step": 43890 }, { "epoch": 1.5607501555417296, "grad_norm": 1.9306408166885376, "learning_rate": 1.0156953655192998e-05, "loss": 1.7287, "step": 43900 }, { "epoch": 1.561105679495156, "grad_norm": 1.816402554512024, "learning_rate": 1.0153035022373352e-05, "loss": 1.6788, "step": 43910 }, { "epoch": 1.5614612034485824, "grad_norm": 1.8105896711349487, "learning_rate": 1.0149116366048488e-05, "loss": 1.6809, "step": 43920 }, { "epoch": 1.5618167274020087, "grad_norm": 1.7978081703186035, "learning_rate": 1.0145197686820285e-05, "loss": 1.6415, "step": 43930 }, { "epoch": 1.562172251355435, "grad_norm": 1.8552039861679077, "learning_rate": 1.014127898529063e-05, "loss": 1.6978, "step": 43940 }, { "epoch": 1.5625277753088613, "grad_norm": 2.05350923538208, "learning_rate": 1.0137360262061404e-05, "loss": 1.6772, "step": 43950 }, { "epoch": 1.5628832992622876, "grad_norm": 1.7288767099380493, "learning_rate": 1.0133441517734504e-05, "loss": 1.6627, "step": 43960 }, { "epoch": 1.5632388232157142, "grad_norm": 1.8969210386276245, "learning_rate": 1.0129522752911824e-05, "loss": 1.6602, "step": 43970 }, { "epoch": 1.5635943471691405, "grad_norm": 1.8368299007415771, "learning_rate": 1.012560396819526e-05, "loss": 1.7052, "step": 43980 }, { "epoch": 1.563949871122567, "grad_norm": 2.012700319290161, "learning_rate": 1.0121685164186719e-05, "loss": 1.6564, "step": 43990 }, { "epoch": 1.5643053950759933, "grad_norm": 2.0532262325286865, "learning_rate": 1.0117766341488096e-05, "loss": 1.674, "step": 44000 }, { "epoch": 1.5646609190294196, "grad_norm": 1.783205270767212, "learning_rate": 1.0113847500701304e-05, "loss": 1.6896, "step": 44010 }, { "epoch": 1.565016442982846, "grad_norm": 1.8086717128753662, "learning_rate": 1.0109928642428245e-05, "loss": 1.6704, "step": 44020 }, { "epoch": 1.5653719669362722, "grad_norm": 1.8439959287643433, "learning_rate": 1.0106009767270839e-05, "loss": 1.6411, "step": 44030 }, { "epoch": 1.5657274908896985, "grad_norm": 1.915297269821167, "learning_rate": 1.0102090875830997e-05, "loss": 1.6614, "step": 44040 }, { "epoch": 1.566083014843125, "grad_norm": 1.895838737487793, "learning_rate": 1.0098171968710634e-05, "loss": 1.7175, "step": 44050 }, { "epoch": 1.5664385387965514, "grad_norm": 1.8406438827514648, "learning_rate": 1.0094253046511678e-05, "loss": 1.6552, "step": 44060 }, { "epoch": 1.566794062749978, "grad_norm": 2.7574586868286133, "learning_rate": 1.009033410983604e-05, "loss": 1.672, "step": 44070 }, { "epoch": 1.5671495867034042, "grad_norm": 1.9110980033874512, "learning_rate": 1.008641515928565e-05, "loss": 1.6929, "step": 44080 }, { "epoch": 1.5675051106568305, "grad_norm": 2.3096587657928467, "learning_rate": 1.0082496195462434e-05, "loss": 1.6427, "step": 44090 }, { "epoch": 1.5678606346102568, "grad_norm": 1.798247218132019, "learning_rate": 1.007857721896832e-05, "loss": 1.6606, "step": 44100 }, { "epoch": 1.5682161585636831, "grad_norm": 1.7407604455947876, "learning_rate": 1.0074658230405237e-05, "loss": 1.6417, "step": 44110 }, { "epoch": 1.5685716825171094, "grad_norm": 1.8989253044128418, "learning_rate": 1.007073923037512e-05, "loss": 1.6634, "step": 44120 }, { "epoch": 1.568927206470536, "grad_norm": 1.8761156797409058, "learning_rate": 1.0066820219479899e-05, "loss": 1.6444, "step": 44130 }, { "epoch": 1.5692827304239623, "grad_norm": 1.8708429336547852, "learning_rate": 1.006290119832151e-05, "loss": 1.6565, "step": 44140 }, { "epoch": 1.5696382543773888, "grad_norm": 1.939834475517273, "learning_rate": 1.0058982167501892e-05, "loss": 1.6318, "step": 44150 }, { "epoch": 1.569993778330815, "grad_norm": 1.8468369245529175, "learning_rate": 1.0055063127622985e-05, "loss": 1.6896, "step": 44160 }, { "epoch": 1.5703493022842414, "grad_norm": 1.8790068626403809, "learning_rate": 1.0051144079286725e-05, "loss": 1.6607, "step": 44170 }, { "epoch": 1.5707048262376677, "grad_norm": 1.975175142288208, "learning_rate": 1.0047225023095056e-05, "loss": 1.6839, "step": 44180 }, { "epoch": 1.571060350191094, "grad_norm": 1.8378052711486816, "learning_rate": 1.0043305959649921e-05, "loss": 1.6619, "step": 44190 }, { "epoch": 1.5714158741445203, "grad_norm": 1.7882580757141113, "learning_rate": 1.003938688955326e-05, "loss": 1.7007, "step": 44200 }, { "epoch": 1.5717713980979469, "grad_norm": 1.7554378509521484, "learning_rate": 1.0035467813407019e-05, "loss": 1.6374, "step": 44210 }, { "epoch": 1.5721269220513732, "grad_norm": 1.930601716041565, "learning_rate": 1.0031548731813143e-05, "loss": 1.6368, "step": 44220 }, { "epoch": 1.5724824460047997, "grad_norm": 1.8473033905029297, "learning_rate": 1.0027629645373582e-05, "loss": 1.6585, "step": 44230 }, { "epoch": 1.572837969958226, "grad_norm": 1.7615290880203247, "learning_rate": 1.0023710554690282e-05, "loss": 1.6343, "step": 44240 }, { "epoch": 1.5731934939116523, "grad_norm": 1.8404291868209839, "learning_rate": 1.001979146036519e-05, "loss": 1.6345, "step": 44250 }, { "epoch": 1.5735490178650786, "grad_norm": 1.7335777282714844, "learning_rate": 1.0015872363000252e-05, "loss": 1.66, "step": 44260 }, { "epoch": 1.573904541818505, "grad_norm": 1.8358724117279053, "learning_rate": 1.0011953263197418e-05, "loss": 1.6428, "step": 44270 }, { "epoch": 1.5742600657719312, "grad_norm": 1.8799185752868652, "learning_rate": 1.0008034161558637e-05, "loss": 1.6114, "step": 44280 }, { "epoch": 1.5746155897253578, "grad_norm": 1.9736826419830322, "learning_rate": 1.0004115058685859e-05, "loss": 1.6545, "step": 44290 }, { "epoch": 1.574971113678784, "grad_norm": 1.757722020149231, "learning_rate": 1.0000195955181037e-05, "loss": 1.6981, "step": 44300 }, { "epoch": 1.5753266376322106, "grad_norm": 1.8227778673171997, "learning_rate": 9.996276851646118e-06, "loss": 1.6525, "step": 44310 }, { "epoch": 1.575682161585637, "grad_norm": 1.8586345911026, "learning_rate": 9.992357748683047e-06, "loss": 1.6699, "step": 44320 }, { "epoch": 1.5760376855390632, "grad_norm": 1.9969902038574219, "learning_rate": 9.98843864689378e-06, "loss": 1.6382, "step": 44330 }, { "epoch": 1.5763932094924895, "grad_norm": 1.8302338123321533, "learning_rate": 9.984519546880262e-06, "loss": 1.6258, "step": 44340 }, { "epoch": 1.5767487334459158, "grad_norm": 1.8657716512680054, "learning_rate": 9.980600449244446e-06, "loss": 1.6407, "step": 44350 }, { "epoch": 1.5771042573993421, "grad_norm": 1.9813754558563232, "learning_rate": 9.97668135458828e-06, "loss": 1.6302, "step": 44360 }, { "epoch": 1.5774597813527687, "grad_norm": 1.9710670709609985, "learning_rate": 9.972762263513713e-06, "loss": 1.6526, "step": 44370 }, { "epoch": 1.577815305306195, "grad_norm": 1.9619462490081787, "learning_rate": 9.96884317662269e-06, "loss": 1.6125, "step": 44380 }, { "epoch": 1.5781708292596215, "grad_norm": 1.7616794109344482, "learning_rate": 9.964924094517158e-06, "loss": 1.6731, "step": 44390 }, { "epoch": 1.5785263532130478, "grad_norm": 1.8521478176116943, "learning_rate": 9.961005017799068e-06, "loss": 1.6455, "step": 44400 }, { "epoch": 1.578881877166474, "grad_norm": 1.7463682889938354, "learning_rate": 9.957085947070361e-06, "loss": 1.6594, "step": 44410 }, { "epoch": 1.5792374011199004, "grad_norm": 1.8019137382507324, "learning_rate": 9.953166882932985e-06, "loss": 1.6461, "step": 44420 }, { "epoch": 1.5795929250733267, "grad_norm": 1.9217017889022827, "learning_rate": 9.949247825988881e-06, "loss": 1.6513, "step": 44430 }, { "epoch": 1.579948449026753, "grad_norm": 1.771942138671875, "learning_rate": 9.945328776839994e-06, "loss": 1.6564, "step": 44440 }, { "epoch": 1.5803039729801795, "grad_norm": 1.7770013809204102, "learning_rate": 9.941409736088266e-06, "loss": 1.6698, "step": 44450 }, { "epoch": 1.5806594969336059, "grad_norm": 1.9023828506469727, "learning_rate": 9.937490704335631e-06, "loss": 1.6992, "step": 44460 }, { "epoch": 1.5810150208870324, "grad_norm": 1.9144151210784912, "learning_rate": 9.933571682184034e-06, "loss": 1.6413, "step": 44470 }, { "epoch": 1.5813705448404587, "grad_norm": 1.8934605121612549, "learning_rate": 9.929652670235409e-06, "loss": 1.6479, "step": 44480 }, { "epoch": 1.581726068793885, "grad_norm": 1.8610419034957886, "learning_rate": 9.925733669091692e-06, "loss": 1.6547, "step": 44490 }, { "epoch": 1.5820815927473113, "grad_norm": 1.870526909828186, "learning_rate": 9.92181467935482e-06, "loss": 1.6422, "step": 44500 }, { "epoch": 1.5824371167007376, "grad_norm": 1.9301140308380127, "learning_rate": 9.917895701626721e-06, "loss": 1.635, "step": 44510 }, { "epoch": 1.582792640654164, "grad_norm": 1.8191717863082886, "learning_rate": 9.913976736509328e-06, "loss": 1.6568, "step": 44520 }, { "epoch": 1.5831481646075904, "grad_norm": 1.8834309577941895, "learning_rate": 9.910057784604568e-06, "loss": 1.666, "step": 44530 }, { "epoch": 1.5835036885610168, "grad_norm": 1.832620620727539, "learning_rate": 9.90613884651437e-06, "loss": 1.6647, "step": 44540 }, { "epoch": 1.5838592125144433, "grad_norm": 1.8681204319000244, "learning_rate": 9.902219922840658e-06, "loss": 1.6673, "step": 44550 }, { "epoch": 1.5842147364678696, "grad_norm": 1.897397756576538, "learning_rate": 9.898301014185351e-06, "loss": 1.6643, "step": 44560 }, { "epoch": 1.584570260421296, "grad_norm": 1.8625174760818481, "learning_rate": 9.89438212115037e-06, "loss": 1.6968, "step": 44570 }, { "epoch": 1.5849257843747222, "grad_norm": 1.749588131904602, "learning_rate": 9.890463244337633e-06, "loss": 1.6315, "step": 44580 }, { "epoch": 1.5852813083281485, "grad_norm": 1.8423128128051758, "learning_rate": 9.886544384349053e-06, "loss": 1.6436, "step": 44590 }, { "epoch": 1.5856368322815748, "grad_norm": 1.7982268333435059, "learning_rate": 9.882625541786548e-06, "loss": 1.6562, "step": 44600 }, { "epoch": 1.5859923562350013, "grad_norm": 1.850053310394287, "learning_rate": 9.878706717252025e-06, "loss": 1.6519, "step": 44610 }, { "epoch": 1.5863478801884277, "grad_norm": 1.954933524131775, "learning_rate": 9.874787911347388e-06, "loss": 1.6159, "step": 44620 }, { "epoch": 1.5867034041418542, "grad_norm": 2.061009645462036, "learning_rate": 9.870869124674543e-06, "loss": 1.6882, "step": 44630 }, { "epoch": 1.5870589280952805, "grad_norm": 1.9090464115142822, "learning_rate": 9.866950357835388e-06, "loss": 1.6539, "step": 44640 }, { "epoch": 1.5874144520487068, "grad_norm": 1.8968024253845215, "learning_rate": 9.863031611431826e-06, "loss": 1.6469, "step": 44650 }, { "epoch": 1.587769976002133, "grad_norm": 1.9923118352890015, "learning_rate": 9.859112886065748e-06, "loss": 1.6424, "step": 44660 }, { "epoch": 1.5881254999555594, "grad_norm": 1.6754106283187866, "learning_rate": 9.855194182339048e-06, "loss": 1.6609, "step": 44670 }, { "epoch": 1.5884810239089857, "grad_norm": 1.7623523473739624, "learning_rate": 9.851275500853616e-06, "loss": 1.647, "step": 44680 }, { "epoch": 1.5888365478624122, "grad_norm": 1.912752628326416, "learning_rate": 9.847356842211332e-06, "loss": 1.652, "step": 44690 }, { "epoch": 1.5891920718158385, "grad_norm": 2.1574203968048096, "learning_rate": 9.84343820701408e-06, "loss": 1.6735, "step": 44700 }, { "epoch": 1.589547595769265, "grad_norm": 1.8257741928100586, "learning_rate": 9.839519595863738e-06, "loss": 1.6568, "step": 44710 }, { "epoch": 1.5899031197226914, "grad_norm": 2.158803701400757, "learning_rate": 9.83560100936218e-06, "loss": 1.6592, "step": 44720 }, { "epoch": 1.5902586436761177, "grad_norm": 1.89064359664917, "learning_rate": 9.831682448111278e-06, "loss": 1.6566, "step": 44730 }, { "epoch": 1.590614167629544, "grad_norm": 1.7633954286575317, "learning_rate": 9.827763912712895e-06, "loss": 1.6794, "step": 44740 }, { "epoch": 1.5909696915829703, "grad_norm": 1.973594069480896, "learning_rate": 9.823845403768894e-06, "loss": 1.6717, "step": 44750 }, { "epoch": 1.5913252155363966, "grad_norm": 1.7964091300964355, "learning_rate": 9.819926921881136e-06, "loss": 1.6958, "step": 44760 }, { "epoch": 1.5916807394898231, "grad_norm": 1.911806344985962, "learning_rate": 9.816008467651477e-06, "loss": 1.6587, "step": 44770 }, { "epoch": 1.5920362634432494, "grad_norm": 1.8351384401321411, "learning_rate": 9.81209004168176e-06, "loss": 1.6602, "step": 44780 }, { "epoch": 1.592391787396676, "grad_norm": 1.9146497249603271, "learning_rate": 9.808171644573832e-06, "loss": 1.6896, "step": 44790 }, { "epoch": 1.5927473113501023, "grad_norm": 1.922183871269226, "learning_rate": 9.80425327692954e-06, "loss": 1.6549, "step": 44800 }, { "epoch": 1.5931028353035286, "grad_norm": 1.9478572607040405, "learning_rate": 9.800334939350717e-06, "loss": 1.6179, "step": 44810 }, { "epoch": 1.593458359256955, "grad_norm": 1.950107455253601, "learning_rate": 9.796416632439193e-06, "loss": 1.6645, "step": 44820 }, { "epoch": 1.5938138832103812, "grad_norm": 2.008389472961426, "learning_rate": 9.7924983567968e-06, "loss": 1.6387, "step": 44830 }, { "epoch": 1.5941694071638075, "grad_norm": 1.8459104299545288, "learning_rate": 9.788580113025358e-06, "loss": 1.6924, "step": 44840 }, { "epoch": 1.594524931117234, "grad_norm": 1.7717188596725464, "learning_rate": 9.784661901726684e-06, "loss": 1.6483, "step": 44850 }, { "epoch": 1.5948804550706603, "grad_norm": 1.813963532447815, "learning_rate": 9.780743723502594e-06, "loss": 1.6854, "step": 44860 }, { "epoch": 1.5952359790240869, "grad_norm": 1.8415963649749756, "learning_rate": 9.776825578954891e-06, "loss": 1.6045, "step": 44870 }, { "epoch": 1.5955915029775132, "grad_norm": 2.128248453140259, "learning_rate": 9.772907468685381e-06, "loss": 1.6182, "step": 44880 }, { "epoch": 1.5959470269309395, "grad_norm": 2.024554491043091, "learning_rate": 9.768989393295858e-06, "loss": 1.6417, "step": 44890 }, { "epoch": 1.5963025508843658, "grad_norm": 1.7415904998779297, "learning_rate": 9.765071353388119e-06, "loss": 1.6527, "step": 44900 }, { "epoch": 1.596658074837792, "grad_norm": 1.8566675186157227, "learning_rate": 9.761153349563947e-06, "loss": 1.6412, "step": 44910 }, { "epoch": 1.5970135987912184, "grad_norm": 1.8245688676834106, "learning_rate": 9.75723538242512e-06, "loss": 1.6264, "step": 44920 }, { "epoch": 1.597369122744645, "grad_norm": 1.8760472536087036, "learning_rate": 9.75331745257342e-06, "loss": 1.6614, "step": 44930 }, { "epoch": 1.5977246466980712, "grad_norm": 2.0607192516326904, "learning_rate": 9.749399560610612e-06, "loss": 1.6491, "step": 44940 }, { "epoch": 1.5980801706514978, "grad_norm": 2.002857208251953, "learning_rate": 9.745481707138458e-06, "loss": 1.6838, "step": 44950 }, { "epoch": 1.598435694604924, "grad_norm": 1.7652947902679443, "learning_rate": 9.741563892758717e-06, "loss": 1.6937, "step": 44960 }, { "epoch": 1.5987912185583504, "grad_norm": 1.86318838596344, "learning_rate": 9.737646118073143e-06, "loss": 1.6976, "step": 44970 }, { "epoch": 1.5991467425117767, "grad_norm": 1.9008982181549072, "learning_rate": 9.73372838368348e-06, "loss": 1.6898, "step": 44980 }, { "epoch": 1.599502266465203, "grad_norm": 1.8608051538467407, "learning_rate": 9.729810690191468e-06, "loss": 1.6505, "step": 44990 }, { "epoch": 1.5998577904186293, "grad_norm": 1.8600138425827026, "learning_rate": 9.72589303819884e-06, "loss": 1.6205, "step": 45000 }, { "epoch": 1.6002133143720558, "grad_norm": 1.96610689163208, "learning_rate": 9.721975428307322e-06, "loss": 1.6532, "step": 45010 }, { "epoch": 1.6005688383254821, "grad_norm": 1.90450119972229, "learning_rate": 9.718057861118634e-06, "loss": 1.6325, "step": 45020 }, { "epoch": 1.6009243622789087, "grad_norm": 1.9000378847122192, "learning_rate": 9.714140337234493e-06, "loss": 1.6688, "step": 45030 }, { "epoch": 1.601279886232335, "grad_norm": 1.8246891498565674, "learning_rate": 9.710222857256605e-06, "loss": 1.6716, "step": 45040 }, { "epoch": 1.6016354101857613, "grad_norm": 2.0795390605926514, "learning_rate": 9.706305421786666e-06, "loss": 1.6275, "step": 45050 }, { "epoch": 1.6019909341391876, "grad_norm": 1.9774959087371826, "learning_rate": 9.702388031426373e-06, "loss": 1.626, "step": 45060 }, { "epoch": 1.602346458092614, "grad_norm": 1.8395792245864868, "learning_rate": 9.698470686777414e-06, "loss": 1.6671, "step": 45070 }, { "epoch": 1.6027019820460402, "grad_norm": 1.8831536769866943, "learning_rate": 9.694553388441466e-06, "loss": 1.6759, "step": 45080 }, { "epoch": 1.6030575059994667, "grad_norm": 1.9469261169433594, "learning_rate": 9.690636137020207e-06, "loss": 1.6633, "step": 45090 }, { "epoch": 1.603413029952893, "grad_norm": 1.8184782266616821, "learning_rate": 9.68671893311529e-06, "loss": 1.6192, "step": 45100 }, { "epoch": 1.6037685539063196, "grad_norm": 1.8013615608215332, "learning_rate": 9.682801777328386e-06, "loss": 1.6433, "step": 45110 }, { "epoch": 1.6041240778597459, "grad_norm": 1.8400076627731323, "learning_rate": 9.678884670261138e-06, "loss": 1.6392, "step": 45120 }, { "epoch": 1.6044796018131722, "grad_norm": 1.9208611249923706, "learning_rate": 9.674967612515192e-06, "loss": 1.6094, "step": 45130 }, { "epoch": 1.6048351257665985, "grad_norm": 1.9665645360946655, "learning_rate": 9.67105060469218e-06, "loss": 1.6943, "step": 45140 }, { "epoch": 1.6051906497200248, "grad_norm": 1.8760857582092285, "learning_rate": 9.667133647393736e-06, "loss": 1.7079, "step": 45150 }, { "epoch": 1.605546173673451, "grad_norm": 1.9586379528045654, "learning_rate": 9.663216741221474e-06, "loss": 1.6676, "step": 45160 }, { "epoch": 1.6059016976268776, "grad_norm": 1.9238276481628418, "learning_rate": 9.659299886777012e-06, "loss": 1.6716, "step": 45170 }, { "epoch": 1.606257221580304, "grad_norm": 1.8094438314437866, "learning_rate": 9.655383084661949e-06, "loss": 1.6769, "step": 45180 }, { "epoch": 1.6066127455337305, "grad_norm": 1.9990025758743286, "learning_rate": 9.651466335477883e-06, "loss": 1.6519, "step": 45190 }, { "epoch": 1.6069682694871568, "grad_norm": 2.1307549476623535, "learning_rate": 9.647549639826402e-06, "loss": 1.644, "step": 45200 }, { "epoch": 1.607323793440583, "grad_norm": 1.9053353071212769, "learning_rate": 9.643632998309087e-06, "loss": 1.6658, "step": 45210 }, { "epoch": 1.6076793173940094, "grad_norm": 1.9088976383209229, "learning_rate": 9.63971641152751e-06, "loss": 1.6273, "step": 45220 }, { "epoch": 1.6080348413474357, "grad_norm": 1.8036609888076782, "learning_rate": 9.635799880083233e-06, "loss": 1.6782, "step": 45230 }, { "epoch": 1.608390365300862, "grad_norm": 2.0587072372436523, "learning_rate": 9.631883404577812e-06, "loss": 1.682, "step": 45240 }, { "epoch": 1.6087458892542885, "grad_norm": 1.741700530052185, "learning_rate": 9.627966985612794e-06, "loss": 1.6443, "step": 45250 }, { "epoch": 1.6091014132077148, "grad_norm": 1.8790136575698853, "learning_rate": 9.624050623789711e-06, "loss": 1.6095, "step": 45260 }, { "epoch": 1.6094569371611414, "grad_norm": 1.8529118299484253, "learning_rate": 9.620134319710095e-06, "loss": 1.6545, "step": 45270 }, { "epoch": 1.6098124611145677, "grad_norm": 1.9063934087753296, "learning_rate": 9.616218073975465e-06, "loss": 1.6814, "step": 45280 }, { "epoch": 1.610167985067994, "grad_norm": 1.8092721700668335, "learning_rate": 9.612301887187332e-06, "loss": 1.6598, "step": 45290 }, { "epoch": 1.6105235090214203, "grad_norm": 1.9182853698730469, "learning_rate": 9.608385759947201e-06, "loss": 1.6163, "step": 45300 }, { "epoch": 1.6108790329748466, "grad_norm": 1.8925962448120117, "learning_rate": 9.60446969285656e-06, "loss": 1.6523, "step": 45310 }, { "epoch": 1.6112345569282729, "grad_norm": 1.7887133359909058, "learning_rate": 9.600553686516894e-06, "loss": 1.6825, "step": 45320 }, { "epoch": 1.6115900808816994, "grad_norm": 1.8483270406723022, "learning_rate": 9.596637741529678e-06, "loss": 1.6532, "step": 45330 }, { "epoch": 1.6119456048351257, "grad_norm": 2.0222012996673584, "learning_rate": 9.592721858496376e-06, "loss": 1.6712, "step": 45340 }, { "epoch": 1.6123011287885523, "grad_norm": 1.7934224605560303, "learning_rate": 9.588806038018444e-06, "loss": 1.6788, "step": 45350 }, { "epoch": 1.6126566527419786, "grad_norm": 1.889206051826477, "learning_rate": 9.584890280697325e-06, "loss": 1.6501, "step": 45360 }, { "epoch": 1.6130121766954049, "grad_norm": 1.8354737758636475, "learning_rate": 9.580974587134454e-06, "loss": 1.6405, "step": 45370 }, { "epoch": 1.6133677006488312, "grad_norm": 1.8703527450561523, "learning_rate": 9.577058957931261e-06, "loss": 1.6734, "step": 45380 }, { "epoch": 1.6137232246022575, "grad_norm": 2.211743116378784, "learning_rate": 9.57314339368916e-06, "loss": 1.6122, "step": 45390 }, { "epoch": 1.6140787485556838, "grad_norm": 1.9559645652770996, "learning_rate": 9.569227895009556e-06, "loss": 1.6778, "step": 45400 }, { "epoch": 1.6144342725091103, "grad_norm": 1.844721794128418, "learning_rate": 9.565312462493853e-06, "loss": 1.6679, "step": 45410 }, { "epoch": 1.6147897964625366, "grad_norm": 1.7419184446334839, "learning_rate": 9.561397096743424e-06, "loss": 1.6205, "step": 45420 }, { "epoch": 1.6151453204159631, "grad_norm": 1.9916986227035522, "learning_rate": 9.557481798359653e-06, "loss": 1.6342, "step": 45430 }, { "epoch": 1.6155008443693895, "grad_norm": 1.785646915435791, "learning_rate": 9.553566567943902e-06, "loss": 1.6274, "step": 45440 }, { "epoch": 1.6158563683228158, "grad_norm": 1.9873098134994507, "learning_rate": 9.549651406097528e-06, "loss": 1.6946, "step": 45450 }, { "epoch": 1.616211892276242, "grad_norm": 1.9251453876495361, "learning_rate": 9.545736313421873e-06, "loss": 1.6585, "step": 45460 }, { "epoch": 1.6165674162296684, "grad_norm": 1.9129831790924072, "learning_rate": 9.541821290518272e-06, "loss": 1.6093, "step": 45470 }, { "epoch": 1.6169229401830947, "grad_norm": 1.938437581062317, "learning_rate": 9.53790633798805e-06, "loss": 1.6951, "step": 45480 }, { "epoch": 1.6172784641365212, "grad_norm": 1.880732774734497, "learning_rate": 9.533991456432514e-06, "loss": 1.6648, "step": 45490 }, { "epoch": 1.6176339880899475, "grad_norm": 1.8452805280685425, "learning_rate": 9.53007664645297e-06, "loss": 1.7125, "step": 45500 }, { "epoch": 1.617989512043374, "grad_norm": 1.8619343042373657, "learning_rate": 9.526161908650707e-06, "loss": 1.6481, "step": 45510 }, { "epoch": 1.6183450359968004, "grad_norm": 1.9093763828277588, "learning_rate": 9.522247243627003e-06, "loss": 1.6185, "step": 45520 }, { "epoch": 1.6187005599502267, "grad_norm": 1.8485217094421387, "learning_rate": 9.518332651983126e-06, "loss": 1.644, "step": 45530 }, { "epoch": 1.619056083903653, "grad_norm": 2.0003044605255127, "learning_rate": 9.514418134320338e-06, "loss": 1.7058, "step": 45540 }, { "epoch": 1.6194116078570793, "grad_norm": 1.8701895475387573, "learning_rate": 9.510503691239874e-06, "loss": 1.6464, "step": 45550 }, { "epoch": 1.6197671318105056, "grad_norm": 1.814052939414978, "learning_rate": 9.506589323342977e-06, "loss": 1.6613, "step": 45560 }, { "epoch": 1.620122655763932, "grad_norm": 1.9273310899734497, "learning_rate": 9.50267503123087e-06, "loss": 1.6323, "step": 45570 }, { "epoch": 1.6204781797173584, "grad_norm": 1.8216038942337036, "learning_rate": 9.498760815504757e-06, "loss": 1.6257, "step": 45580 }, { "epoch": 1.620833703670785, "grad_norm": 1.8107649087905884, "learning_rate": 9.494846676765837e-06, "loss": 1.6397, "step": 45590 }, { "epoch": 1.6211892276242112, "grad_norm": 1.8750101327896118, "learning_rate": 9.490932615615303e-06, "loss": 1.6932, "step": 45600 }, { "epoch": 1.6215447515776376, "grad_norm": 1.7151609659194946, "learning_rate": 9.487018632654326e-06, "loss": 1.6466, "step": 45610 }, { "epoch": 1.6219002755310639, "grad_norm": 2.0541727542877197, "learning_rate": 9.483104728484071e-06, "loss": 1.6535, "step": 45620 }, { "epoch": 1.6222557994844902, "grad_norm": 1.9208871126174927, "learning_rate": 9.479190903705689e-06, "loss": 1.6895, "step": 45630 }, { "epoch": 1.6226113234379165, "grad_norm": 1.9287028312683105, "learning_rate": 9.475277158920317e-06, "loss": 1.6471, "step": 45640 }, { "epoch": 1.622966847391343, "grad_norm": 1.8937067985534668, "learning_rate": 9.471363494729084e-06, "loss": 1.6675, "step": 45650 }, { "epoch": 1.6233223713447693, "grad_norm": 1.8728097677230835, "learning_rate": 9.467449911733106e-06, "loss": 1.6872, "step": 45660 }, { "epoch": 1.6236778952981958, "grad_norm": 1.8582308292388916, "learning_rate": 9.463536410533482e-06, "loss": 1.6465, "step": 45670 }, { "epoch": 1.6240334192516221, "grad_norm": 1.7875969409942627, "learning_rate": 9.4596229917313e-06, "loss": 1.6823, "step": 45680 }, { "epoch": 1.6243889432050485, "grad_norm": 2.1125075817108154, "learning_rate": 9.455709655927637e-06, "loss": 1.6869, "step": 45690 }, { "epoch": 1.6247444671584748, "grad_norm": 2.0550649166107178, "learning_rate": 9.45179640372356e-06, "loss": 1.676, "step": 45700 }, { "epoch": 1.625099991111901, "grad_norm": 1.9072517156600952, "learning_rate": 9.447883235720118e-06, "loss": 1.6536, "step": 45710 }, { "epoch": 1.6254555150653274, "grad_norm": 1.7906911373138428, "learning_rate": 9.443970152518351e-06, "loss": 1.6242, "step": 45720 }, { "epoch": 1.625811039018754, "grad_norm": 1.8642396926879883, "learning_rate": 9.440057154719282e-06, "loss": 1.6641, "step": 45730 }, { "epoch": 1.6261665629721802, "grad_norm": 1.772557258605957, "learning_rate": 9.436144242923924e-06, "loss": 1.6442, "step": 45740 }, { "epoch": 1.6265220869256067, "grad_norm": 1.9297966957092285, "learning_rate": 9.432231417733272e-06, "loss": 1.6824, "step": 45750 }, { "epoch": 1.626877610879033, "grad_norm": 1.942812204360962, "learning_rate": 9.428318679748314e-06, "loss": 1.6509, "step": 45760 }, { "epoch": 1.6272331348324593, "grad_norm": 1.9203059673309326, "learning_rate": 9.424406029570022e-06, "loss": 1.656, "step": 45770 }, { "epoch": 1.6275886587858857, "grad_norm": 1.7564131021499634, "learning_rate": 9.420493467799356e-06, "loss": 1.6264, "step": 45780 }, { "epoch": 1.627944182739312, "grad_norm": 1.772635817527771, "learning_rate": 9.416580995037261e-06, "loss": 1.6301, "step": 45790 }, { "epoch": 1.6282997066927383, "grad_norm": 1.7491180896759033, "learning_rate": 9.412668611884666e-06, "loss": 1.6372, "step": 45800 }, { "epoch": 1.6286552306461648, "grad_norm": 1.8844412565231323, "learning_rate": 9.408756318942489e-06, "loss": 1.7025, "step": 45810 }, { "epoch": 1.629010754599591, "grad_norm": 1.8353954553604126, "learning_rate": 9.404844116811634e-06, "loss": 1.6664, "step": 45820 }, { "epoch": 1.6293662785530176, "grad_norm": 1.952030897140503, "learning_rate": 9.400932006092992e-06, "loss": 1.6324, "step": 45830 }, { "epoch": 1.629721802506444, "grad_norm": 1.7468416690826416, "learning_rate": 9.397019987387437e-06, "loss": 1.6635, "step": 45840 }, { "epoch": 1.6300773264598702, "grad_norm": 1.8611869812011719, "learning_rate": 9.393108061295832e-06, "loss": 1.6152, "step": 45850 }, { "epoch": 1.6304328504132966, "grad_norm": 1.8374412059783936, "learning_rate": 9.389196228419025e-06, "loss": 1.6389, "step": 45860 }, { "epoch": 1.6307883743667229, "grad_norm": 1.7448116540908813, "learning_rate": 9.385284489357845e-06, "loss": 1.6487, "step": 45870 }, { "epoch": 1.6311438983201492, "grad_norm": 2.0443167686462402, "learning_rate": 9.381372844713112e-06, "loss": 1.6542, "step": 45880 }, { "epoch": 1.6314994222735757, "grad_norm": 1.8457872867584229, "learning_rate": 9.377461295085633e-06, "loss": 1.6289, "step": 45890 }, { "epoch": 1.631854946227002, "grad_norm": 1.85100257396698, "learning_rate": 9.3735498410762e-06, "loss": 1.6703, "step": 45900 }, { "epoch": 1.6322104701804285, "grad_norm": 1.9700928926467896, "learning_rate": 9.36963848328558e-06, "loss": 1.6933, "step": 45910 }, { "epoch": 1.6325659941338548, "grad_norm": 1.913554072380066, "learning_rate": 9.365727222314537e-06, "loss": 1.6647, "step": 45920 }, { "epoch": 1.6329215180872811, "grad_norm": 1.9028133153915405, "learning_rate": 9.361816058763815e-06, "loss": 1.69, "step": 45930 }, { "epoch": 1.6332770420407074, "grad_norm": 2.0402705669403076, "learning_rate": 9.357904993234144e-06, "loss": 1.666, "step": 45940 }, { "epoch": 1.6336325659941338, "grad_norm": 1.7308645248413086, "learning_rate": 9.353994026326239e-06, "loss": 1.6441, "step": 45950 }, { "epoch": 1.63398808994756, "grad_norm": 1.9061516523361206, "learning_rate": 9.350083158640803e-06, "loss": 1.6332, "step": 45960 }, { "epoch": 1.6343436139009866, "grad_norm": 1.780673861503601, "learning_rate": 9.346172390778519e-06, "loss": 1.6511, "step": 45970 }, { "epoch": 1.634699137854413, "grad_norm": 1.911218285560608, "learning_rate": 9.342261723340057e-06, "loss": 1.6229, "step": 45980 }, { "epoch": 1.6350546618078394, "grad_norm": 1.9093784093856812, "learning_rate": 9.338351156926068e-06, "loss": 1.6529, "step": 45990 }, { "epoch": 1.6354101857612657, "grad_norm": 1.9924782514572144, "learning_rate": 9.334440692137194e-06, "loss": 1.6688, "step": 46000 }, { "epoch": 1.635765709714692, "grad_norm": 1.9529900550842285, "learning_rate": 9.330530329574055e-06, "loss": 1.6208, "step": 46010 }, { "epoch": 1.6361212336681183, "grad_norm": 1.913313388824463, "learning_rate": 9.326620069837261e-06, "loss": 1.6739, "step": 46020 }, { "epoch": 1.6364767576215447, "grad_norm": 1.8248666524887085, "learning_rate": 9.322709913527405e-06, "loss": 1.6198, "step": 46030 }, { "epoch": 1.636832281574971, "grad_norm": 2.080410957336426, "learning_rate": 9.318799861245056e-06, "loss": 1.6236, "step": 46040 }, { "epoch": 1.6371878055283975, "grad_norm": 1.839089274406433, "learning_rate": 9.314889913590778e-06, "loss": 1.6578, "step": 46050 }, { "epoch": 1.6375433294818238, "grad_norm": 1.9410159587860107, "learning_rate": 9.310980071165118e-06, "loss": 1.6916, "step": 46060 }, { "epoch": 1.6378988534352503, "grad_norm": 1.8802642822265625, "learning_rate": 9.307070334568592e-06, "loss": 1.6286, "step": 46070 }, { "epoch": 1.6382543773886766, "grad_norm": 1.7182576656341553, "learning_rate": 9.303160704401721e-06, "loss": 1.6485, "step": 46080 }, { "epoch": 1.638609901342103, "grad_norm": 1.786268949508667, "learning_rate": 9.299251181264997e-06, "loss": 1.699, "step": 46090 }, { "epoch": 1.6389654252955292, "grad_norm": 1.8786602020263672, "learning_rate": 9.295341765758897e-06, "loss": 1.6475, "step": 46100 }, { "epoch": 1.6393209492489556, "grad_norm": 1.8300297260284424, "learning_rate": 9.291432458483884e-06, "loss": 1.6512, "step": 46110 }, { "epoch": 1.6396764732023819, "grad_norm": 1.9037065505981445, "learning_rate": 9.287523260040402e-06, "loss": 1.6868, "step": 46120 }, { "epoch": 1.6400319971558084, "grad_norm": 2.003335952758789, "learning_rate": 9.28361417102888e-06, "loss": 1.6196, "step": 46130 }, { "epoch": 1.6403875211092347, "grad_norm": 1.788839340209961, "learning_rate": 9.279705192049729e-06, "loss": 1.6135, "step": 46140 }, { "epoch": 1.6407430450626612, "grad_norm": 2.0094990730285645, "learning_rate": 9.275796323703344e-06, "loss": 1.673, "step": 46150 }, { "epoch": 1.6410985690160875, "grad_norm": 1.7968368530273438, "learning_rate": 9.271887566590106e-06, "loss": 1.6354, "step": 46160 }, { "epoch": 1.6414540929695138, "grad_norm": 1.926692247390747, "learning_rate": 9.267978921310369e-06, "loss": 1.65, "step": 46170 }, { "epoch": 1.6418096169229401, "grad_norm": 1.8809410333633423, "learning_rate": 9.264070388464481e-06, "loss": 1.6341, "step": 46180 }, { "epoch": 1.6421651408763664, "grad_norm": 1.714543104171753, "learning_rate": 9.260161968652767e-06, "loss": 1.6531, "step": 46190 }, { "epoch": 1.6425206648297928, "grad_norm": 1.8635157346725464, "learning_rate": 9.256253662475535e-06, "loss": 1.6606, "step": 46200 }, { "epoch": 1.6428761887832193, "grad_norm": 1.8542648553848267, "learning_rate": 9.25234547053308e-06, "loss": 1.6439, "step": 46210 }, { "epoch": 1.6432317127366456, "grad_norm": 1.8535089492797852, "learning_rate": 9.248437393425673e-06, "loss": 1.6184, "step": 46220 }, { "epoch": 1.6435872366900721, "grad_norm": 2.300245761871338, "learning_rate": 9.24452943175357e-06, "loss": 1.6301, "step": 46230 }, { "epoch": 1.6439427606434984, "grad_norm": 1.8947709798812866, "learning_rate": 9.240621586117006e-06, "loss": 1.6802, "step": 46240 }, { "epoch": 1.6442982845969247, "grad_norm": 1.6864510774612427, "learning_rate": 9.236713857116207e-06, "loss": 1.6322, "step": 46250 }, { "epoch": 1.644653808550351, "grad_norm": 1.9338759183883667, "learning_rate": 9.232806245351373e-06, "loss": 1.6979, "step": 46260 }, { "epoch": 1.6450093325037773, "grad_norm": 1.9570294618606567, "learning_rate": 9.228898751422687e-06, "loss": 1.6405, "step": 46270 }, { "epoch": 1.6453648564572037, "grad_norm": 1.834808349609375, "learning_rate": 9.224991375930321e-06, "loss": 1.6452, "step": 46280 }, { "epoch": 1.6457203804106302, "grad_norm": 1.7941116094589233, "learning_rate": 9.22108411947442e-06, "loss": 1.6734, "step": 46290 }, { "epoch": 1.6460759043640565, "grad_norm": 1.899541974067688, "learning_rate": 9.217176982655116e-06, "loss": 1.6549, "step": 46300 }, { "epoch": 1.646431428317483, "grad_norm": 1.819176435470581, "learning_rate": 9.213269966072515e-06, "loss": 1.681, "step": 46310 }, { "epoch": 1.6467869522709093, "grad_norm": 1.7770761251449585, "learning_rate": 9.209363070326717e-06, "loss": 1.6408, "step": 46320 }, { "epoch": 1.6471424762243356, "grad_norm": 2.044375419616699, "learning_rate": 9.205456296017795e-06, "loss": 1.7003, "step": 46330 }, { "epoch": 1.647498000177762, "grad_norm": 1.8414000272750854, "learning_rate": 9.201549643745803e-06, "loss": 1.6818, "step": 46340 }, { "epoch": 1.6478535241311882, "grad_norm": 1.8396013975143433, "learning_rate": 9.197643114110779e-06, "loss": 1.6984, "step": 46350 }, { "epoch": 1.6482090480846145, "grad_norm": 1.8234518766403198, "learning_rate": 9.193736707712741e-06, "loss": 1.6635, "step": 46360 }, { "epoch": 1.648564572038041, "grad_norm": 1.8317780494689941, "learning_rate": 9.189830425151691e-06, "loss": 1.656, "step": 46370 }, { "epoch": 1.6489200959914674, "grad_norm": 1.935102939605713, "learning_rate": 9.185924267027611e-06, "loss": 1.6474, "step": 46380 }, { "epoch": 1.649275619944894, "grad_norm": 1.9568835496902466, "learning_rate": 9.182018233940455e-06, "loss": 1.6568, "step": 46390 }, { "epoch": 1.6496311438983202, "grad_norm": 1.7801934480667114, "learning_rate": 9.17811232649017e-06, "loss": 1.6024, "step": 46400 }, { "epoch": 1.6499866678517465, "grad_norm": 1.8486586809158325, "learning_rate": 9.174206545276678e-06, "loss": 1.6467, "step": 46410 }, { "epoch": 1.6503421918051728, "grad_norm": 1.9290732145309448, "learning_rate": 9.170300890899885e-06, "loss": 1.6414, "step": 46420 }, { "epoch": 1.6506977157585991, "grad_norm": 1.742576241493225, "learning_rate": 9.166395363959672e-06, "loss": 1.6737, "step": 46430 }, { "epoch": 1.6510532397120254, "grad_norm": 1.829534888267517, "learning_rate": 9.162489965055901e-06, "loss": 1.6578, "step": 46440 }, { "epoch": 1.651408763665452, "grad_norm": 1.8022770881652832, "learning_rate": 9.158584694788425e-06, "loss": 1.6884, "step": 46450 }, { "epoch": 1.6517642876188783, "grad_norm": 1.9050766229629517, "learning_rate": 9.154679553757059e-06, "loss": 1.6478, "step": 46460 }, { "epoch": 1.6521198115723048, "grad_norm": 1.8912161588668823, "learning_rate": 9.15077454256162e-06, "loss": 1.696, "step": 46470 }, { "epoch": 1.6524753355257311, "grad_norm": 1.9854507446289062, "learning_rate": 9.146869661801882e-06, "loss": 1.6734, "step": 46480 }, { "epoch": 1.6528308594791574, "grad_norm": 2.0170986652374268, "learning_rate": 9.142964912077615e-06, "loss": 1.6255, "step": 46490 }, { "epoch": 1.6531863834325837, "grad_norm": 1.7834538221359253, "learning_rate": 9.139060293988564e-06, "loss": 1.6196, "step": 46500 }, { "epoch": 1.65354190738601, "grad_norm": 1.8555934429168701, "learning_rate": 9.135155808134454e-06, "loss": 1.6007, "step": 46510 }, { "epoch": 1.6538974313394363, "grad_norm": 1.8404572010040283, "learning_rate": 9.131251455114991e-06, "loss": 1.6705, "step": 46520 }, { "epoch": 1.6542529552928629, "grad_norm": 1.8445826768875122, "learning_rate": 9.127347235529856e-06, "loss": 1.634, "step": 46530 }, { "epoch": 1.6546084792462892, "grad_norm": 1.8148281574249268, "learning_rate": 9.123443149978714e-06, "loss": 1.6293, "step": 46540 }, { "epoch": 1.6549640031997157, "grad_norm": 2.113463878631592, "learning_rate": 9.119539199061211e-06, "loss": 1.6654, "step": 46550 }, { "epoch": 1.655319527153142, "grad_norm": 1.8898741006851196, "learning_rate": 9.115635383376963e-06, "loss": 1.6806, "step": 46560 }, { "epoch": 1.6556750511065683, "grad_norm": 1.9250837564468384, "learning_rate": 9.111731703525575e-06, "loss": 1.6601, "step": 46570 }, { "epoch": 1.6560305750599946, "grad_norm": 1.8493584394454956, "learning_rate": 9.10782816010663e-06, "loss": 1.6724, "step": 46580 }, { "epoch": 1.656386099013421, "grad_norm": 1.9017980098724365, "learning_rate": 9.103924753719681e-06, "loss": 1.6255, "step": 46590 }, { "epoch": 1.6567416229668472, "grad_norm": 1.790822982788086, "learning_rate": 9.100021484964277e-06, "loss": 1.624, "step": 46600 }, { "epoch": 1.6570971469202738, "grad_norm": 1.837369441986084, "learning_rate": 9.096118354439927e-06, "loss": 1.6454, "step": 46610 }, { "epoch": 1.6574526708737, "grad_norm": 1.9058386087417603, "learning_rate": 9.09221536274613e-06, "loss": 1.6457, "step": 46620 }, { "epoch": 1.6578081948271266, "grad_norm": 1.996495008468628, "learning_rate": 9.088312510482363e-06, "loss": 1.691, "step": 46630 }, { "epoch": 1.658163718780553, "grad_norm": 1.959643006324768, "learning_rate": 9.084409798248076e-06, "loss": 1.6305, "step": 46640 }, { "epoch": 1.6585192427339792, "grad_norm": 1.8285119533538818, "learning_rate": 9.080507226642708e-06, "loss": 1.6712, "step": 46650 }, { "epoch": 1.6588747666874055, "grad_norm": 2.047224521636963, "learning_rate": 9.07660479626566e-06, "loss": 1.5961, "step": 46660 }, { "epoch": 1.6592302906408318, "grad_norm": 1.8310611248016357, "learning_rate": 9.072702507716327e-06, "loss": 1.6286, "step": 46670 }, { "epoch": 1.6595858145942581, "grad_norm": 1.9075833559036255, "learning_rate": 9.068800361594073e-06, "loss": 1.6366, "step": 46680 }, { "epoch": 1.6599413385476847, "grad_norm": 1.903961420059204, "learning_rate": 9.064898358498246e-06, "loss": 1.696, "step": 46690 }, { "epoch": 1.660296862501111, "grad_norm": 2.237349033355713, "learning_rate": 9.060996499028173e-06, "loss": 1.6458, "step": 46700 }, { "epoch": 1.6606523864545375, "grad_norm": 1.8397843837738037, "learning_rate": 9.057094783783144e-06, "loss": 1.6819, "step": 46710 }, { "epoch": 1.6610079104079638, "grad_norm": 1.9820095300674438, "learning_rate": 9.053193213362445e-06, "loss": 1.658, "step": 46720 }, { "epoch": 1.6613634343613901, "grad_norm": 1.8050580024719238, "learning_rate": 9.049291788365335e-06, "loss": 1.6458, "step": 46730 }, { "epoch": 1.6617189583148164, "grad_norm": 1.89781653881073, "learning_rate": 9.045390509391043e-06, "loss": 1.7096, "step": 46740 }, { "epoch": 1.6620744822682427, "grad_norm": 1.8570796251296997, "learning_rate": 9.041489377038783e-06, "loss": 1.6281, "step": 46750 }, { "epoch": 1.662430006221669, "grad_norm": 1.7772490978240967, "learning_rate": 9.037588391907744e-06, "loss": 1.6876, "step": 46760 }, { "epoch": 1.6627855301750956, "grad_norm": 2.075768232345581, "learning_rate": 9.033687554597093e-06, "loss": 1.6348, "step": 46770 }, { "epoch": 1.6631410541285219, "grad_norm": 1.9685065746307373, "learning_rate": 9.029786865705978e-06, "loss": 1.6813, "step": 46780 }, { "epoch": 1.6634965780819484, "grad_norm": 1.8220576047897339, "learning_rate": 9.025886325833517e-06, "loss": 1.6675, "step": 46790 }, { "epoch": 1.6638521020353747, "grad_norm": 1.6773161888122559, "learning_rate": 9.021985935578805e-06, "loss": 1.6648, "step": 46800 }, { "epoch": 1.664207625988801, "grad_norm": 2.089994192123413, "learning_rate": 9.018085695540923e-06, "loss": 1.5902, "step": 46810 }, { "epoch": 1.6645631499422273, "grad_norm": 1.9748122692108154, "learning_rate": 9.014185606318921e-06, "loss": 1.6401, "step": 46820 }, { "epoch": 1.6649186738956536, "grad_norm": 1.8910719156265259, "learning_rate": 9.01028566851183e-06, "loss": 1.6716, "step": 46830 }, { "epoch": 1.66527419784908, "grad_norm": 1.948042392730713, "learning_rate": 9.006385882718655e-06, "loss": 1.6564, "step": 46840 }, { "epoch": 1.6656297218025065, "grad_norm": 1.7150905132293701, "learning_rate": 9.002486249538379e-06, "loss": 1.6166, "step": 46850 }, { "epoch": 1.6659852457559328, "grad_norm": 1.9290721416473389, "learning_rate": 8.998586769569962e-06, "loss": 1.6315, "step": 46860 }, { "epoch": 1.6663407697093593, "grad_norm": 1.9267395734786987, "learning_rate": 8.994687443412336e-06, "loss": 1.6725, "step": 46870 }, { "epoch": 1.6666962936627856, "grad_norm": 2.0027971267700195, "learning_rate": 8.990788271664414e-06, "loss": 1.6515, "step": 46880 }, { "epoch": 1.667051817616212, "grad_norm": 1.913419246673584, "learning_rate": 8.986889254925086e-06, "loss": 1.643, "step": 46890 }, { "epoch": 1.6674073415696382, "grad_norm": 1.940077304840088, "learning_rate": 8.982990393793216e-06, "loss": 1.6508, "step": 46900 }, { "epoch": 1.6677628655230645, "grad_norm": 1.7701328992843628, "learning_rate": 8.979091688867648e-06, "loss": 1.6323, "step": 46910 }, { "epoch": 1.6681183894764908, "grad_norm": 1.9629327058792114, "learning_rate": 8.975193140747192e-06, "loss": 1.6514, "step": 46920 }, { "epoch": 1.6684739134299174, "grad_norm": 1.7670210599899292, "learning_rate": 8.971294750030643e-06, "loss": 1.6442, "step": 46930 }, { "epoch": 1.6688294373833437, "grad_norm": 1.852724313735962, "learning_rate": 8.967396517316772e-06, "loss": 1.6637, "step": 46940 }, { "epoch": 1.6691849613367702, "grad_norm": 1.8711625337600708, "learning_rate": 8.963498443204322e-06, "loss": 1.6879, "step": 46950 }, { "epoch": 1.6695404852901965, "grad_norm": 1.9303196668624878, "learning_rate": 8.959600528292015e-06, "loss": 1.6889, "step": 46960 }, { "epoch": 1.6698960092436228, "grad_norm": 2.0949487686157227, "learning_rate": 8.955702773178539e-06, "loss": 1.6167, "step": 46970 }, { "epoch": 1.670251533197049, "grad_norm": 1.7735263109207153, "learning_rate": 8.951805178462571e-06, "loss": 1.6337, "step": 46980 }, { "epoch": 1.6706070571504754, "grad_norm": 1.8674527406692505, "learning_rate": 8.947907744742754e-06, "loss": 1.6506, "step": 46990 }, { "epoch": 1.6709625811039017, "grad_norm": 1.892725944519043, "learning_rate": 8.944010472617712e-06, "loss": 1.5994, "step": 47000 }, { "epoch": 1.6713181050573283, "grad_norm": 1.900651216506958, "learning_rate": 8.94011336268604e-06, "loss": 1.6443, "step": 47010 }, { "epoch": 1.6716736290107546, "grad_norm": 1.9501687288284302, "learning_rate": 8.936216415546313e-06, "loss": 1.6676, "step": 47020 }, { "epoch": 1.672029152964181, "grad_norm": 1.8256280422210693, "learning_rate": 8.932319631797072e-06, "loss": 1.7012, "step": 47030 }, { "epoch": 1.6723846769176074, "grad_norm": 1.8548064231872559, "learning_rate": 8.928423012036842e-06, "loss": 1.6551, "step": 47040 }, { "epoch": 1.6727402008710337, "grad_norm": 1.8793046474456787, "learning_rate": 8.924526556864117e-06, "loss": 1.666, "step": 47050 }, { "epoch": 1.67309572482446, "grad_norm": 1.877183198928833, "learning_rate": 8.920630266877369e-06, "loss": 1.6759, "step": 47060 }, { "epoch": 1.6734512487778863, "grad_norm": 1.9366111755371094, "learning_rate": 8.916734142675045e-06, "loss": 1.6687, "step": 47070 }, { "epoch": 1.6738067727313126, "grad_norm": 1.7445632219314575, "learning_rate": 8.912838184855565e-06, "loss": 1.6967, "step": 47080 }, { "epoch": 1.6741622966847391, "grad_norm": 1.8954089879989624, "learning_rate": 8.908942394017325e-06, "loss": 1.6688, "step": 47090 }, { "epoch": 1.6745178206381655, "grad_norm": 1.915601134300232, "learning_rate": 8.90504677075869e-06, "loss": 1.6158, "step": 47100 }, { "epoch": 1.674873344591592, "grad_norm": 1.992204189300537, "learning_rate": 8.901151315678005e-06, "loss": 1.6899, "step": 47110 }, { "epoch": 1.6752288685450183, "grad_norm": 2.290611505508423, "learning_rate": 8.897256029373588e-06, "loss": 1.6479, "step": 47120 }, { "epoch": 1.6755843924984446, "grad_norm": 1.8793078660964966, "learning_rate": 8.89336091244373e-06, "loss": 1.6834, "step": 47130 }, { "epoch": 1.675939916451871, "grad_norm": 2.002035140991211, "learning_rate": 8.8894659654867e-06, "loss": 1.6357, "step": 47140 }, { "epoch": 1.6762954404052972, "grad_norm": 1.8884364366531372, "learning_rate": 8.885571189100736e-06, "loss": 1.6243, "step": 47150 }, { "epoch": 1.6766509643587235, "grad_norm": 1.9206151962280273, "learning_rate": 8.881676583884047e-06, "loss": 1.6377, "step": 47160 }, { "epoch": 1.67700648831215, "grad_norm": 1.73343026638031, "learning_rate": 8.877782150434822e-06, "loss": 1.6473, "step": 47170 }, { "epoch": 1.6773620122655764, "grad_norm": 1.940598487854004, "learning_rate": 8.873887889351228e-06, "loss": 1.6213, "step": 47180 }, { "epoch": 1.6777175362190029, "grad_norm": 1.9257320165634155, "learning_rate": 8.869993801231388e-06, "loss": 1.6867, "step": 47190 }, { "epoch": 1.6780730601724292, "grad_norm": 1.8307676315307617, "learning_rate": 8.866099886673415e-06, "loss": 1.6532, "step": 47200 }, { "epoch": 1.6784285841258555, "grad_norm": 1.7601826190948486, "learning_rate": 8.86220614627539e-06, "loss": 1.6535, "step": 47210 }, { "epoch": 1.6787841080792818, "grad_norm": 1.8694700002670288, "learning_rate": 8.858312580635369e-06, "loss": 1.6278, "step": 47220 }, { "epoch": 1.679139632032708, "grad_norm": 1.9378466606140137, "learning_rate": 8.854419190351376e-06, "loss": 1.5898, "step": 47230 }, { "epoch": 1.6794951559861344, "grad_norm": 1.9514213800430298, "learning_rate": 8.850525976021411e-06, "loss": 1.6166, "step": 47240 }, { "epoch": 1.679850679939561, "grad_norm": 1.9289764165878296, "learning_rate": 8.846632938243449e-06, "loss": 1.647, "step": 47250 }, { "epoch": 1.6802062038929872, "grad_norm": 1.8685369491577148, "learning_rate": 8.842740077615437e-06, "loss": 1.608, "step": 47260 }, { "epoch": 1.6805617278464138, "grad_norm": 1.9230856895446777, "learning_rate": 8.838847394735295e-06, "loss": 1.6099, "step": 47270 }, { "epoch": 1.68091725179984, "grad_norm": 1.829556941986084, "learning_rate": 8.83495489020091e-06, "loss": 1.679, "step": 47280 }, { "epoch": 1.6812727757532664, "grad_norm": 1.9097543954849243, "learning_rate": 8.831062564610149e-06, "loss": 1.6295, "step": 47290 }, { "epoch": 1.6816282997066927, "grad_norm": 1.8748795986175537, "learning_rate": 8.827170418560848e-06, "loss": 1.6632, "step": 47300 }, { "epoch": 1.681983823660119, "grad_norm": 1.975656270980835, "learning_rate": 8.823278452650818e-06, "loss": 1.6355, "step": 47310 }, { "epoch": 1.6823393476135453, "grad_norm": 1.9507516622543335, "learning_rate": 8.81938666747784e-06, "loss": 1.6695, "step": 47320 }, { "epoch": 1.6826948715669718, "grad_norm": 2.046321153640747, "learning_rate": 8.81549506363967e-06, "loss": 1.6775, "step": 47330 }, { "epoch": 1.6830503955203981, "grad_norm": 1.8780962228775024, "learning_rate": 8.81160364173403e-06, "loss": 1.6728, "step": 47340 }, { "epoch": 1.6834059194738247, "grad_norm": 1.9072388410568237, "learning_rate": 8.80771240235862e-06, "loss": 1.6772, "step": 47350 }, { "epoch": 1.683761443427251, "grad_norm": 1.8728219270706177, "learning_rate": 8.803821346111107e-06, "loss": 1.6678, "step": 47360 }, { "epoch": 1.6841169673806773, "grad_norm": 1.8046735525131226, "learning_rate": 8.799930473589135e-06, "loss": 1.6423, "step": 47370 }, { "epoch": 1.6844724913341036, "grad_norm": 1.9991408586502075, "learning_rate": 8.796039785390318e-06, "loss": 1.6361, "step": 47380 }, { "epoch": 1.68482801528753, "grad_norm": 1.8107160329818726, "learning_rate": 8.792149282112242e-06, "loss": 1.6728, "step": 47390 }, { "epoch": 1.6851835392409562, "grad_norm": 1.8943120241165161, "learning_rate": 8.788258964352462e-06, "loss": 1.6312, "step": 47400 }, { "epoch": 1.6855390631943827, "grad_norm": 1.8581575155258179, "learning_rate": 8.784368832708508e-06, "loss": 1.6802, "step": 47410 }, { "epoch": 1.685894587147809, "grad_norm": 1.838925838470459, "learning_rate": 8.780478887777878e-06, "loss": 1.6856, "step": 47420 }, { "epoch": 1.6862501111012356, "grad_norm": 1.986754298210144, "learning_rate": 8.776589130158045e-06, "loss": 1.6369, "step": 47430 }, { "epoch": 1.6866056350546619, "grad_norm": 1.8603802919387817, "learning_rate": 8.77269956044645e-06, "loss": 1.6509, "step": 47440 }, { "epoch": 1.6869611590080882, "grad_norm": 1.826055884361267, "learning_rate": 8.768810179240509e-06, "loss": 1.6599, "step": 47450 }, { "epoch": 1.6873166829615145, "grad_norm": 1.924135684967041, "learning_rate": 8.764920987137607e-06, "loss": 1.6413, "step": 47460 }, { "epoch": 1.6876722069149408, "grad_norm": 1.8157100677490234, "learning_rate": 8.761031984735093e-06, "loss": 1.6616, "step": 47470 }, { "epoch": 1.688027730868367, "grad_norm": 1.8275957107543945, "learning_rate": 8.757143172630297e-06, "loss": 1.653, "step": 47480 }, { "epoch": 1.6883832548217936, "grad_norm": 1.926822304725647, "learning_rate": 8.753254551420521e-06, "loss": 1.6631, "step": 47490 }, { "epoch": 1.68873877877522, "grad_norm": 1.8407974243164062, "learning_rate": 8.74936612170303e-06, "loss": 1.6275, "step": 47500 }, { "epoch": 1.6890943027286465, "grad_norm": 2.0156941413879395, "learning_rate": 8.745477884075058e-06, "loss": 1.6607, "step": 47510 }, { "epoch": 1.6894498266820728, "grad_norm": 1.8555457592010498, "learning_rate": 8.741589839133817e-06, "loss": 1.6482, "step": 47520 }, { "epoch": 1.689805350635499, "grad_norm": 2.3219103813171387, "learning_rate": 8.737701987476487e-06, "loss": 1.682, "step": 47530 }, { "epoch": 1.6901608745889254, "grad_norm": 1.843186378479004, "learning_rate": 8.733814329700218e-06, "loss": 1.6759, "step": 47540 }, { "epoch": 1.6905163985423517, "grad_norm": 2.1189942359924316, "learning_rate": 8.729926866402126e-06, "loss": 1.6852, "step": 47550 }, { "epoch": 1.690871922495778, "grad_norm": 2.0449013710021973, "learning_rate": 8.726039598179305e-06, "loss": 1.6711, "step": 47560 }, { "epoch": 1.6912274464492045, "grad_norm": 1.9948835372924805, "learning_rate": 8.722152525628816e-06, "loss": 1.6492, "step": 47570 }, { "epoch": 1.6915829704026308, "grad_norm": 1.9276567697525024, "learning_rate": 8.718265649347684e-06, "loss": 1.6344, "step": 47580 }, { "epoch": 1.6919384943560574, "grad_norm": 1.9229627847671509, "learning_rate": 8.714378969932914e-06, "loss": 1.645, "step": 47590 }, { "epoch": 1.6922940183094837, "grad_norm": 1.7437576055526733, "learning_rate": 8.710492487981472e-06, "loss": 1.6547, "step": 47600 }, { "epoch": 1.69264954226291, "grad_norm": 1.8802158832550049, "learning_rate": 8.706606204090299e-06, "loss": 1.6974, "step": 47610 }, { "epoch": 1.6930050662163363, "grad_norm": 1.8326317071914673, "learning_rate": 8.702720118856302e-06, "loss": 1.6751, "step": 47620 }, { "epoch": 1.6933605901697626, "grad_norm": 1.897667646408081, "learning_rate": 8.698834232876362e-06, "loss": 1.6785, "step": 47630 }, { "epoch": 1.693716114123189, "grad_norm": 2.0973215103149414, "learning_rate": 8.694948546747328e-06, "loss": 1.6683, "step": 47640 }, { "epoch": 1.6940716380766154, "grad_norm": 1.8899247646331787, "learning_rate": 8.691063061066011e-06, "loss": 1.6666, "step": 47650 }, { "epoch": 1.6944271620300417, "grad_norm": 1.9390369653701782, "learning_rate": 8.687177776429205e-06, "loss": 1.6676, "step": 47660 }, { "epoch": 1.6947826859834683, "grad_norm": 1.9885674715042114, "learning_rate": 8.683292693433658e-06, "loss": 1.6651, "step": 47670 }, { "epoch": 1.6951382099368946, "grad_norm": 1.8569226264953613, "learning_rate": 8.679407812676098e-06, "loss": 1.6536, "step": 47680 }, { "epoch": 1.6954937338903209, "grad_norm": 2.06219482421875, "learning_rate": 8.675523134753216e-06, "loss": 1.6487, "step": 47690 }, { "epoch": 1.6958492578437472, "grad_norm": 1.8178149461746216, "learning_rate": 8.671638660261677e-06, "loss": 1.6418, "step": 47700 }, { "epoch": 1.6962047817971735, "grad_norm": 1.7728937864303589, "learning_rate": 8.667754389798112e-06, "loss": 1.6617, "step": 47710 }, { "epoch": 1.6965603057505998, "grad_norm": 1.7685102224349976, "learning_rate": 8.663870323959118e-06, "loss": 1.6747, "step": 47720 }, { "epoch": 1.6969158297040263, "grad_norm": 1.9068578481674194, "learning_rate": 8.659986463341266e-06, "loss": 1.6484, "step": 47730 }, { "epoch": 1.6972713536574526, "grad_norm": 1.7723239660263062, "learning_rate": 8.656102808541089e-06, "loss": 1.6443, "step": 47740 }, { "epoch": 1.6976268776108792, "grad_norm": 1.8427810668945312, "learning_rate": 8.652219360155096e-06, "loss": 1.6363, "step": 47750 }, { "epoch": 1.6979824015643055, "grad_norm": 1.959257960319519, "learning_rate": 8.648336118779757e-06, "loss": 1.643, "step": 47760 }, { "epoch": 1.6983379255177318, "grad_norm": 2.000145196914673, "learning_rate": 8.644453085011518e-06, "loss": 1.6638, "step": 47770 }, { "epoch": 1.698693449471158, "grad_norm": 1.7659945487976074, "learning_rate": 8.640570259446782e-06, "loss": 1.6807, "step": 47780 }, { "epoch": 1.6990489734245844, "grad_norm": 1.9426976442337036, "learning_rate": 8.636687642681935e-06, "loss": 1.65, "step": 47790 }, { "epoch": 1.6994044973780107, "grad_norm": 1.8343504667282104, "learning_rate": 8.632805235313315e-06, "loss": 1.6104, "step": 47800 }, { "epoch": 1.6997600213314372, "grad_norm": 1.7841132879257202, "learning_rate": 8.628923037937238e-06, "loss": 1.6606, "step": 47810 }, { "epoch": 1.7001155452848635, "grad_norm": 1.7915278673171997, "learning_rate": 8.625041051149992e-06, "loss": 1.6563, "step": 47820 }, { "epoch": 1.70047106923829, "grad_norm": 1.7958619594573975, "learning_rate": 8.621159275547813e-06, "loss": 1.6554, "step": 47830 }, { "epoch": 1.7008265931917164, "grad_norm": 1.9656076431274414, "learning_rate": 8.617277711726927e-06, "loss": 1.6628, "step": 47840 }, { "epoch": 1.7011821171451427, "grad_norm": 1.9339438676834106, "learning_rate": 8.613396360283513e-06, "loss": 1.6671, "step": 47850 }, { "epoch": 1.701537641098569, "grad_norm": 1.8888641595840454, "learning_rate": 8.609515221813723e-06, "loss": 1.6346, "step": 47860 }, { "epoch": 1.7018931650519953, "grad_norm": 1.8836134672164917, "learning_rate": 8.605634296913677e-06, "loss": 1.6757, "step": 47870 }, { "epoch": 1.7022486890054216, "grad_norm": 1.9890522956848145, "learning_rate": 8.60175358617946e-06, "loss": 1.6638, "step": 47880 }, { "epoch": 1.7026042129588481, "grad_norm": 1.8154431581497192, "learning_rate": 8.597873090207126e-06, "loss": 1.6747, "step": 47890 }, { "epoch": 1.7029597369122744, "grad_norm": 1.8716108798980713, "learning_rate": 8.593992809592696e-06, "loss": 1.6357, "step": 47900 }, { "epoch": 1.703315260865701, "grad_norm": 1.9345163106918335, "learning_rate": 8.590112744932152e-06, "loss": 1.6609, "step": 47910 }, { "epoch": 1.7036707848191273, "grad_norm": 1.8247530460357666, "learning_rate": 8.58623289682145e-06, "loss": 1.6321, "step": 47920 }, { "epoch": 1.7040263087725536, "grad_norm": 1.8243434429168701, "learning_rate": 8.58235326585651e-06, "loss": 1.6495, "step": 47930 }, { "epoch": 1.7043818327259799, "grad_norm": 2.006085157394409, "learning_rate": 8.578473852633223e-06, "loss": 1.6593, "step": 47940 }, { "epoch": 1.7047373566794062, "grad_norm": 1.8560900688171387, "learning_rate": 8.574594657747438e-06, "loss": 1.6531, "step": 47950 }, { "epoch": 1.7050928806328325, "grad_norm": 2.0169386863708496, "learning_rate": 8.570715681794975e-06, "loss": 1.6395, "step": 47960 }, { "epoch": 1.705448404586259, "grad_norm": 1.9506243467330933, "learning_rate": 8.566836925371622e-06, "loss": 1.6153, "step": 47970 }, { "epoch": 1.7058039285396853, "grad_norm": 1.9792253971099854, "learning_rate": 8.562958389073135e-06, "loss": 1.659, "step": 47980 }, { "epoch": 1.7061594524931118, "grad_norm": 1.7830065488815308, "learning_rate": 8.559080073495225e-06, "loss": 1.6293, "step": 47990 }, { "epoch": 1.7065149764465382, "grad_norm": 1.8974437713623047, "learning_rate": 8.555201979233582e-06, "loss": 1.6888, "step": 48000 }, { "epoch": 1.7068705003999645, "grad_norm": 1.8268883228302002, "learning_rate": 8.551324106883855e-06, "loss": 1.6514, "step": 48010 }, { "epoch": 1.7072260243533908, "grad_norm": 1.7850176095962524, "learning_rate": 8.547446457041661e-06, "loss": 1.6396, "step": 48020 }, { "epoch": 1.707581548306817, "grad_norm": 1.9473520517349243, "learning_rate": 8.543569030302588e-06, "loss": 1.6424, "step": 48030 }, { "epoch": 1.7079370722602434, "grad_norm": 1.8418095111846924, "learning_rate": 8.539691827262176e-06, "loss": 1.651, "step": 48040 }, { "epoch": 1.70829259621367, "grad_norm": 1.8214665651321411, "learning_rate": 8.535814848515945e-06, "loss": 1.6395, "step": 48050 }, { "epoch": 1.7086481201670962, "grad_norm": 1.9348556995391846, "learning_rate": 8.531938094659372e-06, "loss": 1.671, "step": 48060 }, { "epoch": 1.7090036441205227, "grad_norm": 1.7536616325378418, "learning_rate": 8.528061566287903e-06, "loss": 1.652, "step": 48070 }, { "epoch": 1.709359168073949, "grad_norm": 1.8494676351547241, "learning_rate": 8.524185263996949e-06, "loss": 1.6481, "step": 48080 }, { "epoch": 1.7097146920273754, "grad_norm": 1.9387456178665161, "learning_rate": 8.520309188381883e-06, "loss": 1.6661, "step": 48090 }, { "epoch": 1.7100702159808017, "grad_norm": 2.008708953857422, "learning_rate": 8.516433340038048e-06, "loss": 1.6913, "step": 48100 }, { "epoch": 1.710425739934228, "grad_norm": 1.864981770515442, "learning_rate": 8.512557719560753e-06, "loss": 1.7076, "step": 48110 }, { "epoch": 1.7107812638876543, "grad_norm": 1.906530737876892, "learning_rate": 8.508682327545262e-06, "loss": 1.6635, "step": 48120 }, { "epoch": 1.7111367878410808, "grad_norm": 1.8444490432739258, "learning_rate": 8.504807164586819e-06, "loss": 1.6394, "step": 48130 }, { "epoch": 1.7114923117945071, "grad_norm": 1.902989149093628, "learning_rate": 8.500932231280621e-06, "loss": 1.6327, "step": 48140 }, { "epoch": 1.7118478357479336, "grad_norm": 1.8431181907653809, "learning_rate": 8.49705752822183e-06, "loss": 1.6779, "step": 48150 }, { "epoch": 1.71220335970136, "grad_norm": 1.8293133974075317, "learning_rate": 8.49318305600558e-06, "loss": 1.6268, "step": 48160 }, { "epoch": 1.7125588836547863, "grad_norm": 1.8206945657730103, "learning_rate": 8.489308815226964e-06, "loss": 1.6714, "step": 48170 }, { "epoch": 1.7129144076082126, "grad_norm": 1.8169163465499878, "learning_rate": 8.485434806481043e-06, "loss": 1.6115, "step": 48180 }, { "epoch": 1.7132699315616389, "grad_norm": 1.916739583015442, "learning_rate": 8.481561030362838e-06, "loss": 1.6676, "step": 48190 }, { "epoch": 1.7136254555150652, "grad_norm": 1.8636739253997803, "learning_rate": 8.477687487467339e-06, "loss": 1.6472, "step": 48200 }, { "epoch": 1.7139809794684917, "grad_norm": 1.800962209701538, "learning_rate": 8.473814178389498e-06, "loss": 1.6537, "step": 48210 }, { "epoch": 1.714336503421918, "grad_norm": 1.931645393371582, "learning_rate": 8.469941103724228e-06, "loss": 1.6173, "step": 48220 }, { "epoch": 1.7146920273753445, "grad_norm": 1.9857007265090942, "learning_rate": 8.466068264066412e-06, "loss": 1.6992, "step": 48230 }, { "epoch": 1.7150475513287708, "grad_norm": 2.0565314292907715, "learning_rate": 8.462195660010891e-06, "loss": 1.6542, "step": 48240 }, { "epoch": 1.7154030752821972, "grad_norm": 1.9118432998657227, "learning_rate": 8.458323292152475e-06, "loss": 1.6868, "step": 48250 }, { "epoch": 1.7157585992356235, "grad_norm": 1.9311240911483765, "learning_rate": 8.454451161085939e-06, "loss": 1.6531, "step": 48260 }, { "epoch": 1.7161141231890498, "grad_norm": 1.836564064025879, "learning_rate": 8.450579267406009e-06, "loss": 1.6497, "step": 48270 }, { "epoch": 1.716469647142476, "grad_norm": 1.8827811479568481, "learning_rate": 8.44670761170739e-06, "loss": 1.6564, "step": 48280 }, { "epoch": 1.7168251710959026, "grad_norm": 1.8383026123046875, "learning_rate": 8.442836194584742e-06, "loss": 1.6792, "step": 48290 }, { "epoch": 1.717180695049329, "grad_norm": 1.927247166633606, "learning_rate": 8.438965016632696e-06, "loss": 1.6611, "step": 48300 }, { "epoch": 1.7175362190027554, "grad_norm": 1.8960721492767334, "learning_rate": 8.435094078445832e-06, "loss": 1.6543, "step": 48310 }, { "epoch": 1.7178917429561817, "grad_norm": 2.0390431880950928, "learning_rate": 8.431223380618705e-06, "loss": 1.6784, "step": 48320 }, { "epoch": 1.718247266909608, "grad_norm": 1.7168264389038086, "learning_rate": 8.42735292374583e-06, "loss": 1.6135, "step": 48330 }, { "epoch": 1.7186027908630344, "grad_norm": 2.055449962615967, "learning_rate": 8.423482708421688e-06, "loss": 1.6575, "step": 48340 }, { "epoch": 1.7189583148164607, "grad_norm": 1.9829033613204956, "learning_rate": 8.419612735240715e-06, "loss": 1.6444, "step": 48350 }, { "epoch": 1.719313838769887, "grad_norm": 1.8641419410705566, "learning_rate": 8.415743004797316e-06, "loss": 1.6166, "step": 48360 }, { "epoch": 1.7196693627233135, "grad_norm": 1.9465245008468628, "learning_rate": 8.411873517685857e-06, "loss": 1.6515, "step": 48370 }, { "epoch": 1.7200248866767398, "grad_norm": 1.771341323852539, "learning_rate": 8.408004274500672e-06, "loss": 1.6284, "step": 48380 }, { "epoch": 1.7203804106301663, "grad_norm": 1.7727006673812866, "learning_rate": 8.404135275836048e-06, "loss": 1.6571, "step": 48390 }, { "epoch": 1.7207359345835926, "grad_norm": 1.8482820987701416, "learning_rate": 8.400266522286236e-06, "loss": 1.6374, "step": 48400 }, { "epoch": 1.721091458537019, "grad_norm": 1.762482762336731, "learning_rate": 8.396398014445458e-06, "loss": 1.6637, "step": 48410 }, { "epoch": 1.7214469824904453, "grad_norm": 1.965485692024231, "learning_rate": 8.392529752907889e-06, "loss": 1.6498, "step": 48420 }, { "epoch": 1.7218025064438716, "grad_norm": 1.8650978803634644, "learning_rate": 8.388661738267672e-06, "loss": 1.6588, "step": 48430 }, { "epoch": 1.7221580303972979, "grad_norm": 1.8982642889022827, "learning_rate": 8.384793971118912e-06, "loss": 1.6392, "step": 48440 }, { "epoch": 1.7225135543507244, "grad_norm": 1.8832117319107056, "learning_rate": 8.380926452055667e-06, "loss": 1.658, "step": 48450 }, { "epoch": 1.7228690783041507, "grad_norm": 1.882262110710144, "learning_rate": 8.37705918167197e-06, "loss": 1.6739, "step": 48460 }, { "epoch": 1.7232246022575772, "grad_norm": 1.9449636936187744, "learning_rate": 8.373192160561807e-06, "loss": 1.6682, "step": 48470 }, { "epoch": 1.7235801262110035, "grad_norm": 1.9521130323410034, "learning_rate": 8.369325389319126e-06, "loss": 1.6181, "step": 48480 }, { "epoch": 1.7239356501644298, "grad_norm": 1.724274754524231, "learning_rate": 8.36545886853784e-06, "loss": 1.6648, "step": 48490 }, { "epoch": 1.7242911741178562, "grad_norm": 1.8489983081817627, "learning_rate": 8.361592598811822e-06, "loss": 1.6589, "step": 48500 }, { "epoch": 1.7246466980712825, "grad_norm": 2.001936197280884, "learning_rate": 8.357726580734909e-06, "loss": 1.6294, "step": 48510 }, { "epoch": 1.7250022220247088, "grad_norm": 2.116563320159912, "learning_rate": 8.353860814900898e-06, "loss": 1.6479, "step": 48520 }, { "epoch": 1.7253577459781353, "grad_norm": 1.769295573234558, "learning_rate": 8.349995301903543e-06, "loss": 1.6134, "step": 48530 }, { "epoch": 1.7257132699315616, "grad_norm": 2.013415813446045, "learning_rate": 8.346130042336563e-06, "loss": 1.7189, "step": 48540 }, { "epoch": 1.7260687938849881, "grad_norm": 1.9870202541351318, "learning_rate": 8.34226503679364e-06, "loss": 1.6479, "step": 48550 }, { "epoch": 1.7264243178384144, "grad_norm": 1.8795701265335083, "learning_rate": 8.338400285868412e-06, "loss": 1.6145, "step": 48560 }, { "epoch": 1.7267798417918407, "grad_norm": 1.7985905408859253, "learning_rate": 8.334535790154485e-06, "loss": 1.6709, "step": 48570 }, { "epoch": 1.727135365745267, "grad_norm": 1.8081409931182861, "learning_rate": 8.330671550245415e-06, "loss": 1.6225, "step": 48580 }, { "epoch": 1.7274908896986934, "grad_norm": 1.790563702583313, "learning_rate": 8.32680756673473e-06, "loss": 1.5796, "step": 48590 }, { "epoch": 1.7278464136521197, "grad_norm": 1.9314616918563843, "learning_rate": 8.32294384021591e-06, "loss": 1.6238, "step": 48600 }, { "epoch": 1.7282019376055462, "grad_norm": 1.8861994743347168, "learning_rate": 8.319080371282401e-06, "loss": 1.644, "step": 48610 }, { "epoch": 1.7285574615589725, "grad_norm": 1.9431285858154297, "learning_rate": 8.315217160527608e-06, "loss": 1.6395, "step": 48620 }, { "epoch": 1.728912985512399, "grad_norm": 1.8947607278823853, "learning_rate": 8.3113542085449e-06, "loss": 1.647, "step": 48630 }, { "epoch": 1.7292685094658253, "grad_norm": 2.009699583053589, "learning_rate": 8.307491515927592e-06, "loss": 1.6631, "step": 48640 }, { "epoch": 1.7296240334192516, "grad_norm": 2.165419340133667, "learning_rate": 8.303629083268979e-06, "loss": 1.6434, "step": 48650 }, { "epoch": 1.729979557372678, "grad_norm": 1.774941325187683, "learning_rate": 8.2997669111623e-06, "loss": 1.6216, "step": 48660 }, { "epoch": 1.7303350813261043, "grad_norm": 2.1260578632354736, "learning_rate": 8.295905000200762e-06, "loss": 1.6332, "step": 48670 }, { "epoch": 1.7306906052795306, "grad_norm": 1.8239442110061646, "learning_rate": 8.29204335097753e-06, "loss": 1.6628, "step": 48680 }, { "epoch": 1.731046129232957, "grad_norm": 1.8172132968902588, "learning_rate": 8.288181964085732e-06, "loss": 1.6164, "step": 48690 }, { "epoch": 1.7314016531863834, "grad_norm": 1.971981406211853, "learning_rate": 8.284320840118454e-06, "loss": 1.6506, "step": 48700 }, { "epoch": 1.73175717713981, "grad_norm": 1.8251330852508545, "learning_rate": 8.280459979668733e-06, "loss": 1.6201, "step": 48710 }, { "epoch": 1.7321127010932362, "grad_norm": 1.8595893383026123, "learning_rate": 8.27659938332958e-06, "loss": 1.6461, "step": 48720 }, { "epoch": 1.7324682250466625, "grad_norm": 1.7532918453216553, "learning_rate": 8.272739051693956e-06, "loss": 1.606, "step": 48730 }, { "epoch": 1.7328237490000888, "grad_norm": 1.788098931312561, "learning_rate": 8.268878985354782e-06, "loss": 1.6624, "step": 48740 }, { "epoch": 1.7331792729535151, "grad_norm": 1.756616473197937, "learning_rate": 8.265019184904944e-06, "loss": 1.6264, "step": 48750 }, { "epoch": 1.7335347969069415, "grad_norm": 1.7843860387802124, "learning_rate": 8.261159650937279e-06, "loss": 1.6808, "step": 48760 }, { "epoch": 1.733890320860368, "grad_norm": 2.107708692550659, "learning_rate": 8.25730038404459e-06, "loss": 1.6513, "step": 48770 }, { "epoch": 1.7342458448137943, "grad_norm": 1.9271059036254883, "learning_rate": 8.253441384819633e-06, "loss": 1.6843, "step": 48780 }, { "epoch": 1.7346013687672208, "grad_norm": 1.7932971715927124, "learning_rate": 8.249582653855134e-06, "loss": 1.6347, "step": 48790 }, { "epoch": 1.7349568927206471, "grad_norm": 1.8732764720916748, "learning_rate": 8.24572419174376e-06, "loss": 1.6266, "step": 48800 }, { "epoch": 1.7353124166740734, "grad_norm": 1.7979843616485596, "learning_rate": 8.241865999078152e-06, "loss": 1.6351, "step": 48810 }, { "epoch": 1.7356679406274997, "grad_norm": 1.7244874238967896, "learning_rate": 8.2380080764509e-06, "loss": 1.6142, "step": 48820 }, { "epoch": 1.736023464580926, "grad_norm": 1.789110779762268, "learning_rate": 8.234150424454564e-06, "loss": 1.6237, "step": 48830 }, { "epoch": 1.7363789885343524, "grad_norm": 1.8341730833053589, "learning_rate": 8.230293043681647e-06, "loss": 1.6738, "step": 48840 }, { "epoch": 1.7367345124877789, "grad_norm": 1.818542718887329, "learning_rate": 8.226435934724624e-06, "loss": 1.6691, "step": 48850 }, { "epoch": 1.7370900364412052, "grad_norm": 1.8525408506393433, "learning_rate": 8.22257909817592e-06, "loss": 1.6211, "step": 48860 }, { "epoch": 1.7374455603946317, "grad_norm": 1.92518150806427, "learning_rate": 8.218722534627923e-06, "loss": 1.6193, "step": 48870 }, { "epoch": 1.737801084348058, "grad_norm": 2.171698808670044, "learning_rate": 8.214866244672977e-06, "loss": 1.6144, "step": 48880 }, { "epoch": 1.7381566083014843, "grad_norm": 2.083942174911499, "learning_rate": 8.211010228903382e-06, "loss": 1.6741, "step": 48890 }, { "epoch": 1.7385121322549106, "grad_norm": 1.959802508354187, "learning_rate": 8.207154487911397e-06, "loss": 1.6517, "step": 48900 }, { "epoch": 1.738867656208337, "grad_norm": 1.8355531692504883, "learning_rate": 8.203299022289244e-06, "loss": 1.6382, "step": 48910 }, { "epoch": 1.7392231801617632, "grad_norm": 2.048764705657959, "learning_rate": 8.199443832629093e-06, "loss": 1.6289, "step": 48920 }, { "epoch": 1.7395787041151898, "grad_norm": 1.98831045627594, "learning_rate": 8.195588919523083e-06, "loss": 1.6617, "step": 48930 }, { "epoch": 1.739934228068616, "grad_norm": 2.0530476570129395, "learning_rate": 8.1917342835633e-06, "loss": 1.6808, "step": 48940 }, { "epoch": 1.7402897520220426, "grad_norm": 1.8697683811187744, "learning_rate": 8.187879925341795e-06, "loss": 1.6542, "step": 48950 }, { "epoch": 1.740645275975469, "grad_norm": 1.8175350427627563, "learning_rate": 8.184025845450571e-06, "loss": 1.6497, "step": 48960 }, { "epoch": 1.7410007999288952, "grad_norm": 2.0738396644592285, "learning_rate": 8.18017204448159e-06, "loss": 1.6428, "step": 48970 }, { "epoch": 1.7413563238823215, "grad_norm": 1.8094022274017334, "learning_rate": 8.176318523026769e-06, "loss": 1.6206, "step": 48980 }, { "epoch": 1.7417118478357478, "grad_norm": 1.8019884824752808, "learning_rate": 8.172465281677992e-06, "loss": 1.6582, "step": 48990 }, { "epoch": 1.7420673717891741, "grad_norm": 1.8854589462280273, "learning_rate": 8.168612321027087e-06, "loss": 1.669, "step": 49000 }, { "epoch": 1.7424228957426007, "grad_norm": 1.8984565734863281, "learning_rate": 8.16475964166585e-06, "loss": 1.665, "step": 49010 }, { "epoch": 1.742778419696027, "grad_norm": 2.357611656188965, "learning_rate": 8.160907244186022e-06, "loss": 1.6877, "step": 49020 }, { "epoch": 1.7431339436494535, "grad_norm": 2.006403923034668, "learning_rate": 8.15705512917931e-06, "loss": 1.6531, "step": 49030 }, { "epoch": 1.7434894676028798, "grad_norm": 2.006674289703369, "learning_rate": 8.153203297237375e-06, "loss": 1.659, "step": 49040 }, { "epoch": 1.7438449915563061, "grad_norm": 1.8706060647964478, "learning_rate": 8.149351748951834e-06, "loss": 1.6299, "step": 49050 }, { "epoch": 1.7442005155097324, "grad_norm": 1.9062683582305908, "learning_rate": 8.14550048491426e-06, "loss": 1.6134, "step": 49060 }, { "epoch": 1.7445560394631587, "grad_norm": 1.959368348121643, "learning_rate": 8.141649505716187e-06, "loss": 1.596, "step": 49070 }, { "epoch": 1.744911563416585, "grad_norm": 1.8529287576675415, "learning_rate": 8.137798811949096e-06, "loss": 1.6149, "step": 49080 }, { "epoch": 1.7452670873700116, "grad_norm": 1.9746596813201904, "learning_rate": 8.13394840420443e-06, "loss": 1.6894, "step": 49090 }, { "epoch": 1.7456226113234379, "grad_norm": 1.8950711488723755, "learning_rate": 8.130098283073591e-06, "loss": 1.6461, "step": 49100 }, { "epoch": 1.7459781352768644, "grad_norm": 1.800689697265625, "learning_rate": 8.126248449147933e-06, "loss": 1.6526, "step": 49110 }, { "epoch": 1.7463336592302907, "grad_norm": 1.7372263669967651, "learning_rate": 8.122398903018762e-06, "loss": 1.6448, "step": 49120 }, { "epoch": 1.746689183183717, "grad_norm": 2.0515880584716797, "learning_rate": 8.118549645277347e-06, "loss": 1.6301, "step": 49130 }, { "epoch": 1.7470447071371433, "grad_norm": 1.994960069656372, "learning_rate": 8.114700676514912e-06, "loss": 1.6879, "step": 49140 }, { "epoch": 1.7474002310905696, "grad_norm": 2.0096182823181152, "learning_rate": 8.110851997322628e-06, "loss": 1.6471, "step": 49150 }, { "epoch": 1.747755755043996, "grad_norm": 2.0000274181365967, "learning_rate": 8.107003608291634e-06, "loss": 1.6305, "step": 49160 }, { "epoch": 1.7481112789974225, "grad_norm": 1.9292528629302979, "learning_rate": 8.103155510013016e-06, "loss": 1.702, "step": 49170 }, { "epoch": 1.7484668029508488, "grad_norm": 1.8559280633926392, "learning_rate": 8.09930770307782e-06, "loss": 1.6611, "step": 49180 }, { "epoch": 1.7488223269042753, "grad_norm": 2.0235233306884766, "learning_rate": 8.095460188077043e-06, "loss": 1.6847, "step": 49190 }, { "epoch": 1.7491778508577016, "grad_norm": 1.8203125, "learning_rate": 8.091612965601639e-06, "loss": 1.6576, "step": 49200 }, { "epoch": 1.749533374811128, "grad_norm": 1.8425822257995605, "learning_rate": 8.087766036242516e-06, "loss": 1.6762, "step": 49210 }, { "epoch": 1.7498888987645542, "grad_norm": 1.8479509353637695, "learning_rate": 8.083919400590544e-06, "loss": 1.6412, "step": 49220 }, { "epoch": 1.7502444227179805, "grad_norm": 1.9452064037322998, "learning_rate": 8.080073059236535e-06, "loss": 1.6381, "step": 49230 }, { "epoch": 1.7505999466714068, "grad_norm": 1.8753700256347656, "learning_rate": 8.076227012771266e-06, "loss": 1.6642, "step": 49240 }, { "epoch": 1.7509554706248334, "grad_norm": 1.87269127368927, "learning_rate": 8.072381261785469e-06, "loss": 1.6624, "step": 49250 }, { "epoch": 1.7513109945782597, "grad_norm": 1.8374950885772705, "learning_rate": 8.068535806869821e-06, "loss": 1.6988, "step": 49260 }, { "epoch": 1.7516665185316862, "grad_norm": 2.0532567501068115, "learning_rate": 8.064690648614966e-06, "loss": 1.6251, "step": 49270 }, { "epoch": 1.7520220424851125, "grad_norm": 1.9432693719863892, "learning_rate": 8.060845787611491e-06, "loss": 1.6493, "step": 49280 }, { "epoch": 1.7523775664385388, "grad_norm": 1.6951652765274048, "learning_rate": 8.057001224449943e-06, "loss": 1.6139, "step": 49290 }, { "epoch": 1.7527330903919651, "grad_norm": 1.902296543121338, "learning_rate": 8.053156959720826e-06, "loss": 1.6348, "step": 49300 }, { "epoch": 1.7530886143453914, "grad_norm": 2.1470212936401367, "learning_rate": 8.049312994014589e-06, "loss": 1.6944, "step": 49310 }, { "epoch": 1.7534441382988177, "grad_norm": 1.822934627532959, "learning_rate": 8.045469327921651e-06, "loss": 1.6366, "step": 49320 }, { "epoch": 1.7537996622522443, "grad_norm": 1.9577301740646362, "learning_rate": 8.041625962032367e-06, "loss": 1.6651, "step": 49330 }, { "epoch": 1.7541551862056706, "grad_norm": 1.9063875675201416, "learning_rate": 8.037782896937054e-06, "loss": 1.6522, "step": 49340 }, { "epoch": 1.754510710159097, "grad_norm": 1.7476140260696411, "learning_rate": 8.033940133225986e-06, "loss": 1.6984, "step": 49350 }, { "epoch": 1.7548662341125234, "grad_norm": 2.013690233230591, "learning_rate": 8.030097671489387e-06, "loss": 1.6184, "step": 49360 }, { "epoch": 1.7552217580659497, "grad_norm": 1.9201568365097046, "learning_rate": 8.026255512317434e-06, "loss": 1.6145, "step": 49370 }, { "epoch": 1.755577282019376, "grad_norm": 1.874620795249939, "learning_rate": 8.02241365630026e-06, "loss": 1.6559, "step": 49380 }, { "epoch": 1.7559328059728023, "grad_norm": 1.8998016119003296, "learning_rate": 8.018572104027948e-06, "loss": 1.667, "step": 49390 }, { "epoch": 1.7562883299262286, "grad_norm": 1.9586131572723389, "learning_rate": 8.014730856090535e-06, "loss": 1.6391, "step": 49400 }, { "epoch": 1.7566438538796552, "grad_norm": 1.9174072742462158, "learning_rate": 8.010889913078017e-06, "loss": 1.6337, "step": 49410 }, { "epoch": 1.7569993778330815, "grad_norm": 1.8285397291183472, "learning_rate": 8.007049275580335e-06, "loss": 1.6383, "step": 49420 }, { "epoch": 1.757354901786508, "grad_norm": 1.9597249031066895, "learning_rate": 8.003208944187394e-06, "loss": 1.6446, "step": 49430 }, { "epoch": 1.7577104257399343, "grad_norm": 1.9941136837005615, "learning_rate": 7.999368919489034e-06, "loss": 1.6533, "step": 49440 }, { "epoch": 1.7580659496933606, "grad_norm": 1.950408697128296, "learning_rate": 7.995529202075066e-06, "loss": 1.6704, "step": 49450 }, { "epoch": 1.758421473646787, "grad_norm": 1.843684434890747, "learning_rate": 7.991689792535244e-06, "loss": 1.6248, "step": 49460 }, { "epoch": 1.7587769976002132, "grad_norm": 2.0115206241607666, "learning_rate": 7.987850691459275e-06, "loss": 1.6599, "step": 49470 }, { "epoch": 1.7591325215536395, "grad_norm": 1.9936758279800415, "learning_rate": 7.984011899436826e-06, "loss": 1.684, "step": 49480 }, { "epoch": 1.759488045507066, "grad_norm": 1.991235613822937, "learning_rate": 7.980173417057507e-06, "loss": 1.6603, "step": 49490 }, { "epoch": 1.7598435694604924, "grad_norm": 1.8691331148147583, "learning_rate": 7.976335244910887e-06, "loss": 1.7209, "step": 49500 }, { "epoch": 1.760199093413919, "grad_norm": 1.801890254020691, "learning_rate": 7.972497383586489e-06, "loss": 1.6109, "step": 49510 }, { "epoch": 1.7605546173673452, "grad_norm": 1.8995479345321655, "learning_rate": 7.968659833673776e-06, "loss": 1.6582, "step": 49520 }, { "epoch": 1.7609101413207715, "grad_norm": 2.027247428894043, "learning_rate": 7.964822595762176e-06, "loss": 1.7072, "step": 49530 }, { "epoch": 1.7612656652741978, "grad_norm": 1.877119541168213, "learning_rate": 7.960985670441067e-06, "loss": 1.6737, "step": 49540 }, { "epoch": 1.7616211892276241, "grad_norm": 2.048544406890869, "learning_rate": 7.957149058299771e-06, "loss": 1.6738, "step": 49550 }, { "epoch": 1.7619767131810504, "grad_norm": 1.965006709098816, "learning_rate": 7.953312759927576e-06, "loss": 1.6252, "step": 49560 }, { "epoch": 1.762332237134477, "grad_norm": 1.9446678161621094, "learning_rate": 7.949476775913703e-06, "loss": 1.6972, "step": 49570 }, { "epoch": 1.7626877610879033, "grad_norm": 2.228520154953003, "learning_rate": 7.945641106847343e-06, "loss": 1.6422, "step": 49580 }, { "epoch": 1.7630432850413298, "grad_norm": 1.7643654346466064, "learning_rate": 7.941805753317631e-06, "loss": 1.6238, "step": 49590 }, { "epoch": 1.763398808994756, "grad_norm": 1.9489067792892456, "learning_rate": 7.937970715913647e-06, "loss": 1.6082, "step": 49600 }, { "epoch": 1.7637543329481824, "grad_norm": 2.0185320377349854, "learning_rate": 7.934135995224431e-06, "loss": 1.6458, "step": 49610 }, { "epoch": 1.7641098569016087, "grad_norm": 1.8964686393737793, "learning_rate": 7.930301591838973e-06, "loss": 1.6666, "step": 49620 }, { "epoch": 1.764465380855035, "grad_norm": 1.9244173765182495, "learning_rate": 7.926467506346215e-06, "loss": 1.6743, "step": 49630 }, { "epoch": 1.7648209048084613, "grad_norm": 1.8678314685821533, "learning_rate": 7.922633739335047e-06, "loss": 1.6113, "step": 49640 }, { "epoch": 1.7651764287618879, "grad_norm": 1.8226903676986694, "learning_rate": 7.91880029139431e-06, "loss": 1.6496, "step": 49650 }, { "epoch": 1.7655319527153142, "grad_norm": 1.890203833580017, "learning_rate": 7.914967163112799e-06, "loss": 1.6457, "step": 49660 }, { "epoch": 1.7658874766687407, "grad_norm": 1.8880730867385864, "learning_rate": 7.91113435507926e-06, "loss": 1.678, "step": 49670 }, { "epoch": 1.766243000622167, "grad_norm": 1.901037573814392, "learning_rate": 7.907301867882384e-06, "loss": 1.6444, "step": 49680 }, { "epoch": 1.7665985245755933, "grad_norm": 1.8280609846115112, "learning_rate": 7.903469702110824e-06, "loss": 1.6791, "step": 49690 }, { "epoch": 1.7669540485290196, "grad_norm": 1.799757719039917, "learning_rate": 7.89963785835317e-06, "loss": 1.6189, "step": 49700 }, { "epoch": 1.767309572482446, "grad_norm": 1.8317128419876099, "learning_rate": 7.895806337197971e-06, "loss": 1.6828, "step": 49710 }, { "epoch": 1.7676650964358722, "grad_norm": 1.954054832458496, "learning_rate": 7.891975139233726e-06, "loss": 1.6297, "step": 49720 }, { "epoch": 1.7680206203892987, "grad_norm": 1.7993971109390259, "learning_rate": 7.88814426504888e-06, "loss": 1.6793, "step": 49730 }, { "epoch": 1.768376144342725, "grad_norm": 1.8766379356384277, "learning_rate": 7.884313715231838e-06, "loss": 1.6927, "step": 49740 }, { "epoch": 1.7687316682961516, "grad_norm": 2.0255801677703857, "learning_rate": 7.880483490370943e-06, "loss": 1.6619, "step": 49750 }, { "epoch": 1.7690871922495779, "grad_norm": 1.9055756330490112, "learning_rate": 7.876653591054495e-06, "loss": 1.6556, "step": 49760 }, { "epoch": 1.7694427162030042, "grad_norm": 1.8836147785186768, "learning_rate": 7.87282401787074e-06, "loss": 1.6847, "step": 49770 }, { "epoch": 1.7697982401564305, "grad_norm": 2.059903860092163, "learning_rate": 7.868994771407876e-06, "loss": 1.6061, "step": 49780 }, { "epoch": 1.7701537641098568, "grad_norm": 1.9796390533447266, "learning_rate": 7.865165852254056e-06, "loss": 1.6801, "step": 49790 }, { "epoch": 1.7705092880632831, "grad_norm": 1.8722137212753296, "learning_rate": 7.861337260997375e-06, "loss": 1.6349, "step": 49800 }, { "epoch": 1.7708648120167096, "grad_norm": 1.951656460762024, "learning_rate": 7.857508998225881e-06, "loss": 1.6252, "step": 49810 }, { "epoch": 1.771220335970136, "grad_norm": 1.750725507736206, "learning_rate": 7.853681064527573e-06, "loss": 1.6021, "step": 49820 }, { "epoch": 1.7715758599235625, "grad_norm": 1.8703793287277222, "learning_rate": 7.849853460490396e-06, "loss": 1.6572, "step": 49830 }, { "epoch": 1.7719313838769888, "grad_norm": 1.9249235391616821, "learning_rate": 7.846026186702242e-06, "loss": 1.644, "step": 49840 }, { "epoch": 1.772286907830415, "grad_norm": 1.7748090028762817, "learning_rate": 7.842199243750962e-06, "loss": 1.6518, "step": 49850 }, { "epoch": 1.7726424317838414, "grad_norm": 1.9108740091323853, "learning_rate": 7.838372632224351e-06, "loss": 1.6537, "step": 49860 }, { "epoch": 1.7729979557372677, "grad_norm": 1.9827098846435547, "learning_rate": 7.83454635271015e-06, "loss": 1.665, "step": 49870 }, { "epoch": 1.773353479690694, "grad_norm": 1.978049874305725, "learning_rate": 7.83072040579605e-06, "loss": 1.6618, "step": 49880 }, { "epoch": 1.7737090036441205, "grad_norm": 1.8595516681671143, "learning_rate": 7.826894792069694e-06, "loss": 1.6266, "step": 49890 }, { "epoch": 1.7740645275975468, "grad_norm": 2.042062759399414, "learning_rate": 7.823069512118673e-06, "loss": 1.6352, "step": 49900 }, { "epoch": 1.7744200515509734, "grad_norm": 1.8743336200714111, "learning_rate": 7.81924456653053e-06, "loss": 1.63, "step": 49910 }, { "epoch": 1.7747755755043997, "grad_norm": 2.0691182613372803, "learning_rate": 7.815419955892744e-06, "loss": 1.6679, "step": 49920 }, { "epoch": 1.775131099457826, "grad_norm": 1.824062705039978, "learning_rate": 7.811595680792755e-06, "loss": 1.6083, "step": 49930 }, { "epoch": 1.7754866234112523, "grad_norm": 1.7824209928512573, "learning_rate": 7.807771741817947e-06, "loss": 1.6047, "step": 49940 }, { "epoch": 1.7758421473646786, "grad_norm": 2.0143516063690186, "learning_rate": 7.803948139555657e-06, "loss": 1.6744, "step": 49950 }, { "epoch": 1.776197671318105, "grad_norm": 1.9708036184310913, "learning_rate": 7.80012487459316e-06, "loss": 1.6705, "step": 49960 }, { "epoch": 1.7765531952715314, "grad_norm": 1.932173252105713, "learning_rate": 7.79630194751769e-06, "loss": 1.6251, "step": 49970 }, { "epoch": 1.7769087192249577, "grad_norm": 1.9063539505004883, "learning_rate": 7.792479358916425e-06, "loss": 1.6432, "step": 49980 }, { "epoch": 1.7772642431783843, "grad_norm": 2.011800527572632, "learning_rate": 7.788657109376488e-06, "loss": 1.6514, "step": 49990 }, { "epoch": 1.7776197671318106, "grad_norm": 2.0261800289154053, "learning_rate": 7.784835199484954e-06, "loss": 1.6612, "step": 50000 }, { "epoch": 1.7779752910852369, "grad_norm": 1.8654749393463135, "learning_rate": 7.781013629828845e-06, "loss": 1.6984, "step": 50010 }, { "epoch": 1.7783308150386632, "grad_norm": 1.7497426271438599, "learning_rate": 7.777192400995128e-06, "loss": 1.6403, "step": 50020 }, { "epoch": 1.7786863389920895, "grad_norm": 1.7313170433044434, "learning_rate": 7.773371513570723e-06, "loss": 1.6368, "step": 50030 }, { "epoch": 1.7790418629455158, "grad_norm": 1.8992712497711182, "learning_rate": 7.76955096814249e-06, "loss": 1.6526, "step": 50040 }, { "epoch": 1.7793973868989423, "grad_norm": 1.8442968130111694, "learning_rate": 7.765730765297246e-06, "loss": 1.6573, "step": 50050 }, { "epoch": 1.7797529108523686, "grad_norm": 1.8860867023468018, "learning_rate": 7.761910905621745e-06, "loss": 1.5908, "step": 50060 }, { "epoch": 1.7801084348057952, "grad_norm": 1.8785138130187988, "learning_rate": 7.7580913897027e-06, "loss": 1.6388, "step": 50070 }, { "epoch": 1.7804639587592215, "grad_norm": 1.8686470985412598, "learning_rate": 7.754272218126757e-06, "loss": 1.622, "step": 50080 }, { "epoch": 1.7808194827126478, "grad_norm": 1.92991042137146, "learning_rate": 7.750453391480522e-06, "loss": 1.6813, "step": 50090 }, { "epoch": 1.781175006666074, "grad_norm": 1.7424781322479248, "learning_rate": 7.746634910350538e-06, "loss": 1.6498, "step": 50100 }, { "epoch": 1.7815305306195004, "grad_norm": 2.404031991958618, "learning_rate": 7.742816775323306e-06, "loss": 1.6185, "step": 50110 }, { "epoch": 1.7818860545729267, "grad_norm": 1.7803823947906494, "learning_rate": 7.738998986985263e-06, "loss": 1.6103, "step": 50120 }, { "epoch": 1.7822415785263532, "grad_norm": 1.9528794288635254, "learning_rate": 7.735181545922804e-06, "loss": 1.6373, "step": 50130 }, { "epoch": 1.7825971024797795, "grad_norm": 1.7719610929489136, "learning_rate": 7.731364452722253e-06, "loss": 1.6575, "step": 50140 }, { "epoch": 1.782952626433206, "grad_norm": 1.8989477157592773, "learning_rate": 7.727547707969899e-06, "loss": 1.6341, "step": 50150 }, { "epoch": 1.7833081503866324, "grad_norm": 1.91252601146698, "learning_rate": 7.723731312251969e-06, "loss": 1.6367, "step": 50160 }, { "epoch": 1.7836636743400587, "grad_norm": 1.777572512626648, "learning_rate": 7.719915266154637e-06, "loss": 1.6309, "step": 50170 }, { "epoch": 1.784019198293485, "grad_norm": 1.8915362358093262, "learning_rate": 7.716099570264027e-06, "loss": 1.66, "step": 50180 }, { "epoch": 1.7843747222469113, "grad_norm": 2.0040836334228516, "learning_rate": 7.7122842251662e-06, "loss": 1.6722, "step": 50190 }, { "epoch": 1.7847302462003376, "grad_norm": 1.9604754447937012, "learning_rate": 7.708469231447171e-06, "loss": 1.679, "step": 50200 }, { "epoch": 1.7850857701537641, "grad_norm": 2.096215009689331, "learning_rate": 7.7046545896929e-06, "loss": 1.6247, "step": 50210 }, { "epoch": 1.7854412941071904, "grad_norm": 2.1358866691589355, "learning_rate": 7.700840300489292e-06, "loss": 1.6332, "step": 50220 }, { "epoch": 1.785796818060617, "grad_norm": 1.9483267068862915, "learning_rate": 7.697026364422204e-06, "loss": 1.6584, "step": 50230 }, { "epoch": 1.7861523420140433, "grad_norm": 1.8866466283798218, "learning_rate": 7.693212782077422e-06, "loss": 1.6412, "step": 50240 }, { "epoch": 1.7865078659674696, "grad_norm": 1.8588370084762573, "learning_rate": 7.689399554040692e-06, "loss": 1.5853, "step": 50250 }, { "epoch": 1.7868633899208959, "grad_norm": 2.025243043899536, "learning_rate": 7.685586680897706e-06, "loss": 1.6427, "step": 50260 }, { "epoch": 1.7872189138743222, "grad_norm": 1.9626860618591309, "learning_rate": 7.681774163234091e-06, "loss": 1.6968, "step": 50270 }, { "epoch": 1.7875744378277485, "grad_norm": 1.9678057432174683, "learning_rate": 7.67796200163543e-06, "loss": 1.6988, "step": 50280 }, { "epoch": 1.787929961781175, "grad_norm": 1.9846853017807007, "learning_rate": 7.674150196687247e-06, "loss": 1.659, "step": 50290 }, { "epoch": 1.7882854857346013, "grad_norm": 1.9511381387710571, "learning_rate": 7.67033874897501e-06, "loss": 1.6484, "step": 50300 }, { "epoch": 1.7886410096880279, "grad_norm": 2.210939884185791, "learning_rate": 7.666527659084135e-06, "loss": 1.6239, "step": 50310 }, { "epoch": 1.7889965336414542, "grad_norm": 1.7664453983306885, "learning_rate": 7.66271692759998e-06, "loss": 1.6703, "step": 50320 }, { "epoch": 1.7893520575948805, "grad_norm": 1.9270297288894653, "learning_rate": 7.658906555107848e-06, "loss": 1.6343, "step": 50330 }, { "epoch": 1.7897075815483068, "grad_norm": 1.9423515796661377, "learning_rate": 7.655096542192992e-06, "loss": 1.6383, "step": 50340 }, { "epoch": 1.790063105501733, "grad_norm": 1.9486314058303833, "learning_rate": 7.651286889440605e-06, "loss": 1.6595, "step": 50350 }, { "epoch": 1.7904186294551594, "grad_norm": 1.8882137537002563, "learning_rate": 7.647477597435826e-06, "loss": 1.6311, "step": 50360 }, { "epoch": 1.790774153408586, "grad_norm": 1.9961556196212769, "learning_rate": 7.643668666763736e-06, "loss": 1.656, "step": 50370 }, { "epoch": 1.7911296773620122, "grad_norm": 1.8319236040115356, "learning_rate": 7.639860098009366e-06, "loss": 1.6111, "step": 50380 }, { "epoch": 1.7914852013154388, "grad_norm": 2.002304792404175, "learning_rate": 7.636051891757688e-06, "loss": 1.6362, "step": 50390 }, { "epoch": 1.791840725268865, "grad_norm": 1.8890831470489502, "learning_rate": 7.632244048593616e-06, "loss": 1.6935, "step": 50400 }, { "epoch": 1.7921962492222914, "grad_norm": 1.926177978515625, "learning_rate": 7.62843656910201e-06, "loss": 1.6639, "step": 50410 }, { "epoch": 1.7925517731757177, "grad_norm": 1.9095516204833984, "learning_rate": 7.624629453867678e-06, "loss": 1.6437, "step": 50420 }, { "epoch": 1.792907297129144, "grad_norm": 1.785805106163025, "learning_rate": 7.620822703475368e-06, "loss": 1.66, "step": 50430 }, { "epoch": 1.7932628210825703, "grad_norm": 1.992326259613037, "learning_rate": 7.617016318509776e-06, "loss": 1.6632, "step": 50440 }, { "epoch": 1.7936183450359968, "grad_norm": 1.7599471807479858, "learning_rate": 7.613210299555534e-06, "loss": 1.6556, "step": 50450 }, { "epoch": 1.7939738689894231, "grad_norm": 2.0583577156066895, "learning_rate": 7.609404647197224e-06, "loss": 1.6079, "step": 50460 }, { "epoch": 1.7943293929428497, "grad_norm": 1.8822389841079712, "learning_rate": 7.605599362019371e-06, "loss": 1.6571, "step": 50470 }, { "epoch": 1.794684916896276, "grad_norm": 2.0984292030334473, "learning_rate": 7.601794444606443e-06, "loss": 1.6544, "step": 50480 }, { "epoch": 1.7950404408497023, "grad_norm": 1.8730183839797974, "learning_rate": 7.597989895542854e-06, "loss": 1.6628, "step": 50490 }, { "epoch": 1.7953959648031286, "grad_norm": 1.9191683530807495, "learning_rate": 7.594185715412954e-06, "loss": 1.6725, "step": 50500 }, { "epoch": 1.7957514887565549, "grad_norm": 1.9092954397201538, "learning_rate": 7.590381904801043e-06, "loss": 1.6753, "step": 50510 }, { "epoch": 1.7961070127099812, "grad_norm": 1.960010290145874, "learning_rate": 7.586578464291364e-06, "loss": 1.5978, "step": 50520 }, { "epoch": 1.7964625366634077, "grad_norm": 1.9142476320266724, "learning_rate": 7.5827753944681e-06, "loss": 1.657, "step": 50530 }, { "epoch": 1.796818060616834, "grad_norm": 1.9966776371002197, "learning_rate": 7.5789726959153795e-06, "loss": 1.6301, "step": 50540 }, { "epoch": 1.7971735845702606, "grad_norm": 1.8700714111328125, "learning_rate": 7.575170369217277e-06, "loss": 1.6687, "step": 50550 }, { "epoch": 1.7975291085236869, "grad_norm": 1.9684398174285889, "learning_rate": 7.571368414957798e-06, "loss": 1.6068, "step": 50560 }, { "epoch": 1.7978846324771132, "grad_norm": 1.9292086362838745, "learning_rate": 7.567566833720905e-06, "loss": 1.644, "step": 50570 }, { "epoch": 1.7982401564305395, "grad_norm": 1.9431310892105103, "learning_rate": 7.563765626090493e-06, "loss": 1.684, "step": 50580 }, { "epoch": 1.7985956803839658, "grad_norm": 2.0605413913726807, "learning_rate": 7.559964792650405e-06, "loss": 1.6587, "step": 50590 }, { "epoch": 1.798951204337392, "grad_norm": 1.8475911617279053, "learning_rate": 7.556164333984425e-06, "loss": 1.6829, "step": 50600 }, { "epoch": 1.7993067282908186, "grad_norm": 1.995463490486145, "learning_rate": 7.552364250676282e-06, "loss": 1.6511, "step": 50610 }, { "epoch": 1.799662252244245, "grad_norm": 1.9126129150390625, "learning_rate": 7.548564543309645e-06, "loss": 1.6134, "step": 50620 }, { "epoch": 1.8000177761976714, "grad_norm": 2.0450148582458496, "learning_rate": 7.544765212468119e-06, "loss": 1.623, "step": 50630 }, { "epoch": 1.8003733001510978, "grad_norm": 1.9064500331878662, "learning_rate": 7.540966258735265e-06, "loss": 1.6364, "step": 50640 }, { "epoch": 1.800728824104524, "grad_norm": 1.8855499029159546, "learning_rate": 7.537167682694574e-06, "loss": 1.6494, "step": 50650 }, { "epoch": 1.8010843480579504, "grad_norm": 1.9371328353881836, "learning_rate": 7.533369484929484e-06, "loss": 1.6667, "step": 50660 }, { "epoch": 1.8014398720113767, "grad_norm": 1.9383577108383179, "learning_rate": 7.52957166602338e-06, "loss": 1.6576, "step": 50670 }, { "epoch": 1.801795395964803, "grad_norm": 1.8724509477615356, "learning_rate": 7.525774226559575e-06, "loss": 1.6832, "step": 50680 }, { "epoch": 1.8021509199182295, "grad_norm": 1.9099206924438477, "learning_rate": 7.521977167121335e-06, "loss": 1.6408, "step": 50690 }, { "epoch": 1.8025064438716558, "grad_norm": 2.0748062133789062, "learning_rate": 7.5181804882918645e-06, "loss": 1.6146, "step": 50700 }, { "epoch": 1.8028619678250823, "grad_norm": 1.8202855587005615, "learning_rate": 7.5143841906543135e-06, "loss": 1.6116, "step": 50710 }, { "epoch": 1.8032174917785087, "grad_norm": 2.272087574005127, "learning_rate": 7.510588274791763e-06, "loss": 1.6305, "step": 50720 }, { "epoch": 1.803573015731935, "grad_norm": 1.9922109842300415, "learning_rate": 7.506792741287245e-06, "loss": 1.6432, "step": 50730 }, { "epoch": 1.8039285396853613, "grad_norm": 1.9014875888824463, "learning_rate": 7.502997590723729e-06, "loss": 1.6595, "step": 50740 }, { "epoch": 1.8042840636387876, "grad_norm": 1.8036856651306152, "learning_rate": 7.499202823684129e-06, "loss": 1.634, "step": 50750 }, { "epoch": 1.8046395875922139, "grad_norm": 2.0609259605407715, "learning_rate": 7.4954084407512915e-06, "loss": 1.6135, "step": 50760 }, { "epoch": 1.8049951115456404, "grad_norm": 2.009648084640503, "learning_rate": 7.491614442508015e-06, "loss": 1.6143, "step": 50770 }, { "epoch": 1.8053506354990667, "grad_norm": 1.8452153205871582, "learning_rate": 7.487820829537031e-06, "loss": 1.5965, "step": 50780 }, { "epoch": 1.8057061594524932, "grad_norm": 1.9561113119125366, "learning_rate": 7.4840276024210175e-06, "loss": 1.5791, "step": 50790 }, { "epoch": 1.8060616834059195, "grad_norm": 1.8616101741790771, "learning_rate": 7.480234761742592e-06, "loss": 1.6199, "step": 50800 }, { "epoch": 1.8064172073593459, "grad_norm": 1.8184536695480347, "learning_rate": 7.476442308084304e-06, "loss": 1.6182, "step": 50810 }, { "epoch": 1.8067727313127722, "grad_norm": 1.9425073862075806, "learning_rate": 7.472650242028656e-06, "loss": 1.6653, "step": 50820 }, { "epoch": 1.8071282552661985, "grad_norm": 1.9306671619415283, "learning_rate": 7.468858564158083e-06, "loss": 1.6021, "step": 50830 }, { "epoch": 1.8074837792196248, "grad_norm": 2.0422468185424805, "learning_rate": 7.4650672750549655e-06, "loss": 1.634, "step": 50840 }, { "epoch": 1.8078393031730513, "grad_norm": 1.7657324075698853, "learning_rate": 7.46127637530162e-06, "loss": 1.6569, "step": 50850 }, { "epoch": 1.8081948271264776, "grad_norm": 2.0055558681488037, "learning_rate": 7.4574858654803075e-06, "loss": 1.6297, "step": 50860 }, { "epoch": 1.8085503510799041, "grad_norm": 1.8196109533309937, "learning_rate": 7.453695746173224e-06, "loss": 1.6497, "step": 50870 }, { "epoch": 1.8089058750333304, "grad_norm": 1.7514034509658813, "learning_rate": 7.449906017962508e-06, "loss": 1.6158, "step": 50880 }, { "epoch": 1.8092613989867568, "grad_norm": 2.044034004211426, "learning_rate": 7.446116681430238e-06, "loss": 1.6472, "step": 50890 }, { "epoch": 1.809616922940183, "grad_norm": 1.816916584968567, "learning_rate": 7.44232773715843e-06, "loss": 1.6352, "step": 50900 }, { "epoch": 1.8099724468936094, "grad_norm": 2.0290634632110596, "learning_rate": 7.438539185729048e-06, "loss": 1.6396, "step": 50910 }, { "epoch": 1.8103279708470357, "grad_norm": 1.9274016618728638, "learning_rate": 7.434751027723984e-06, "loss": 1.6257, "step": 50920 }, { "epoch": 1.8106834948004622, "grad_norm": 1.9005857706069946, "learning_rate": 7.430963263725081e-06, "loss": 1.6422, "step": 50930 }, { "epoch": 1.8110390187538885, "grad_norm": 1.9277817010879517, "learning_rate": 7.427175894314112e-06, "loss": 1.6347, "step": 50940 }, { "epoch": 1.811394542707315, "grad_norm": 1.816381573677063, "learning_rate": 7.423388920072792e-06, "loss": 1.6504, "step": 50950 }, { "epoch": 1.8117500666607413, "grad_norm": 1.9121003150939941, "learning_rate": 7.419602341582779e-06, "loss": 1.6757, "step": 50960 }, { "epoch": 1.8121055906141676, "grad_norm": 2.0025718212127686, "learning_rate": 7.415816159425666e-06, "loss": 1.6708, "step": 50970 }, { "epoch": 1.812461114567594, "grad_norm": 1.9708784818649292, "learning_rate": 7.412030374182989e-06, "loss": 1.6861, "step": 50980 }, { "epoch": 1.8128166385210203, "grad_norm": 2.0150258541107178, "learning_rate": 7.408244986436222e-06, "loss": 1.6494, "step": 50990 }, { "epoch": 1.8131721624744466, "grad_norm": 1.8205463886260986, "learning_rate": 7.404459996766773e-06, "loss": 1.6255, "step": 51000 }, { "epoch": 1.813527686427873, "grad_norm": 1.8556621074676514, "learning_rate": 7.400675405755994e-06, "loss": 1.5942, "step": 51010 }, { "epoch": 1.8138832103812994, "grad_norm": 1.997451663017273, "learning_rate": 7.3968912139851735e-06, "loss": 1.6468, "step": 51020 }, { "epoch": 1.814238734334726, "grad_norm": 1.9072209596633911, "learning_rate": 7.393107422035547e-06, "loss": 1.6418, "step": 51030 }, { "epoch": 1.8145942582881522, "grad_norm": 1.880759835243225, "learning_rate": 7.3893240304882694e-06, "loss": 1.6816, "step": 51040 }, { "epoch": 1.8149497822415785, "grad_norm": 1.8945934772491455, "learning_rate": 7.385541039924453e-06, "loss": 1.6307, "step": 51050 }, { "epoch": 1.8153053061950049, "grad_norm": 1.8671655654907227, "learning_rate": 7.381758450925141e-06, "loss": 1.6668, "step": 51060 }, { "epoch": 1.8156608301484312, "grad_norm": 1.8045915365219116, "learning_rate": 7.377976264071314e-06, "loss": 1.6522, "step": 51070 }, { "epoch": 1.8160163541018575, "grad_norm": 1.8873698711395264, "learning_rate": 7.374194479943892e-06, "loss": 1.6658, "step": 51080 }, { "epoch": 1.816371878055284, "grad_norm": 2.0109992027282715, "learning_rate": 7.370413099123732e-06, "loss": 1.6037, "step": 51090 }, { "epoch": 1.8167274020087103, "grad_norm": 1.9579392671585083, "learning_rate": 7.366632122191635e-06, "loss": 1.6547, "step": 51100 }, { "epoch": 1.8170829259621368, "grad_norm": 1.8878474235534668, "learning_rate": 7.362851549728334e-06, "loss": 1.6412, "step": 51110 }, { "epoch": 1.8174384499155631, "grad_norm": 2.0014569759368896, "learning_rate": 7.359071382314497e-06, "loss": 1.6627, "step": 51120 }, { "epoch": 1.8177939738689894, "grad_norm": 1.850886344909668, "learning_rate": 7.3552916205307375e-06, "loss": 1.6991, "step": 51130 }, { "epoch": 1.8181494978224157, "grad_norm": 1.8765913248062134, "learning_rate": 7.351512264957602e-06, "loss": 1.6695, "step": 51140 }, { "epoch": 1.818505021775842, "grad_norm": 1.9025858640670776, "learning_rate": 7.347733316175577e-06, "loss": 1.6472, "step": 51150 }, { "epoch": 1.8188605457292684, "grad_norm": 1.9211353063583374, "learning_rate": 7.343954774765085e-06, "loss": 1.6251, "step": 51160 }, { "epoch": 1.819216069682695, "grad_norm": 1.9756635427474976, "learning_rate": 7.340176641306488e-06, "loss": 1.6854, "step": 51170 }, { "epoch": 1.8195715936361212, "grad_norm": 1.8511515855789185, "learning_rate": 7.3363989163800786e-06, "loss": 1.6373, "step": 51180 }, { "epoch": 1.8199271175895477, "grad_norm": 1.8389215469360352, "learning_rate": 7.332621600566101e-06, "loss": 1.6088, "step": 51190 }, { "epoch": 1.820282641542974, "grad_norm": 2.011458396911621, "learning_rate": 7.328844694444714e-06, "loss": 1.6239, "step": 51200 }, { "epoch": 1.8206381654964003, "grad_norm": 1.9376658201217651, "learning_rate": 7.325068198596037e-06, "loss": 1.6479, "step": 51210 }, { "epoch": 1.8209936894498266, "grad_norm": 1.9281283617019653, "learning_rate": 7.32129211360011e-06, "loss": 1.6244, "step": 51220 }, { "epoch": 1.821349213403253, "grad_norm": 1.9247887134552002, "learning_rate": 7.317516440036921e-06, "loss": 1.6828, "step": 51230 }, { "epoch": 1.8217047373566793, "grad_norm": 2.0128231048583984, "learning_rate": 7.3137411784863875e-06, "loss": 1.671, "step": 51240 }, { "epoch": 1.8220602613101058, "grad_norm": 1.8623316287994385, "learning_rate": 7.309966329528364e-06, "loss": 1.6425, "step": 51250 }, { "epoch": 1.822415785263532, "grad_norm": 1.8655879497528076, "learning_rate": 7.306191893742647e-06, "loss": 1.6237, "step": 51260 }, { "epoch": 1.8227713092169586, "grad_norm": 2.163519859313965, "learning_rate": 7.302417871708965e-06, "loss": 1.624, "step": 51270 }, { "epoch": 1.823126833170385, "grad_norm": 1.7776243686676025, "learning_rate": 7.2986442640069825e-06, "loss": 1.6252, "step": 51280 }, { "epoch": 1.8234823571238112, "grad_norm": 1.8731697797775269, "learning_rate": 7.294871071216304e-06, "loss": 1.6459, "step": 51290 }, { "epoch": 1.8238378810772375, "grad_norm": 2.144305944442749, "learning_rate": 7.29109829391647e-06, "loss": 1.6274, "step": 51300 }, { "epoch": 1.8241934050306639, "grad_norm": 2.031801223754883, "learning_rate": 7.287325932686951e-06, "loss": 1.6162, "step": 51310 }, { "epoch": 1.8245489289840902, "grad_norm": 1.8657842874526978, "learning_rate": 7.283553988107159e-06, "loss": 1.6696, "step": 51320 }, { "epoch": 1.8249044529375167, "grad_norm": 1.7387397289276123, "learning_rate": 7.279782460756444e-06, "loss": 1.6042, "step": 51330 }, { "epoch": 1.825259976890943, "grad_norm": 2.1133036613464355, "learning_rate": 7.276011351214086e-06, "loss": 1.6408, "step": 51340 }, { "epoch": 1.8256155008443695, "grad_norm": 1.9837652444839478, "learning_rate": 7.2722406600593085e-06, "loss": 1.6293, "step": 51350 }, { "epoch": 1.8259710247977958, "grad_norm": 1.8695124387741089, "learning_rate": 7.26847038787126e-06, "loss": 1.6729, "step": 51360 }, { "epoch": 1.8263265487512221, "grad_norm": 1.9198970794677734, "learning_rate": 7.264700535229034e-06, "loss": 1.5999, "step": 51370 }, { "epoch": 1.8266820727046484, "grad_norm": 2.110482692718506, "learning_rate": 7.260931102711655e-06, "loss": 1.6974, "step": 51380 }, { "epoch": 1.8270375966580747, "grad_norm": 1.60737943649292, "learning_rate": 7.257162090898082e-06, "loss": 1.6974, "step": 51390 }, { "epoch": 1.827393120611501, "grad_norm": 1.9014869928359985, "learning_rate": 7.2533935003672155e-06, "loss": 1.6543, "step": 51400 }, { "epoch": 1.8277486445649276, "grad_norm": 1.9607372283935547, "learning_rate": 7.2496253316978845e-06, "loss": 1.6592, "step": 51410 }, { "epoch": 1.828104168518354, "grad_norm": 1.8342626094818115, "learning_rate": 7.245857585468859e-06, "loss": 1.6359, "step": 51420 }, { "epoch": 1.8284596924717804, "grad_norm": 1.9489827156066895, "learning_rate": 7.242090262258843e-06, "loss": 1.6203, "step": 51430 }, { "epoch": 1.8288152164252067, "grad_norm": 1.9720518589019775, "learning_rate": 7.238323362646467e-06, "loss": 1.6486, "step": 51440 }, { "epoch": 1.829170740378633, "grad_norm": 1.8984580039978027, "learning_rate": 7.2345568872103066e-06, "loss": 1.6462, "step": 51450 }, { "epoch": 1.8295262643320593, "grad_norm": 1.8438483476638794, "learning_rate": 7.230790836528868e-06, "loss": 1.6396, "step": 51460 }, { "epoch": 1.8298817882854856, "grad_norm": 2.130063772201538, "learning_rate": 7.227025211180595e-06, "loss": 1.636, "step": 51470 }, { "epoch": 1.830237312238912, "grad_norm": 1.7676109075546265, "learning_rate": 7.223260011743864e-06, "loss": 1.668, "step": 51480 }, { "epoch": 1.8305928361923385, "grad_norm": 1.8324123620986938, "learning_rate": 7.219495238796984e-06, "loss": 1.6186, "step": 51490 }, { "epoch": 1.8309483601457648, "grad_norm": 1.9791982173919678, "learning_rate": 7.2157308929182015e-06, "loss": 1.6493, "step": 51500 }, { "epoch": 1.8313038840991913, "grad_norm": 1.9997572898864746, "learning_rate": 7.211966974685696e-06, "loss": 1.6644, "step": 51510 }, { "epoch": 1.8316594080526176, "grad_norm": 1.9054361581802368, "learning_rate": 7.208203484677585e-06, "loss": 1.6271, "step": 51520 }, { "epoch": 1.832014932006044, "grad_norm": 1.9085414409637451, "learning_rate": 7.204440423471912e-06, "loss": 1.6195, "step": 51530 }, { "epoch": 1.8323704559594702, "grad_norm": 1.890094518661499, "learning_rate": 7.20067779164666e-06, "loss": 1.639, "step": 51540 }, { "epoch": 1.8327259799128965, "grad_norm": 1.8796864748001099, "learning_rate": 7.196915589779751e-06, "loss": 1.6624, "step": 51550 }, { "epoch": 1.8330815038663228, "grad_norm": 2.056006669998169, "learning_rate": 7.193153818449028e-06, "loss": 1.6749, "step": 51560 }, { "epoch": 1.8334370278197494, "grad_norm": 1.8881081342697144, "learning_rate": 7.18939247823228e-06, "loss": 1.612, "step": 51570 }, { "epoch": 1.8337925517731757, "grad_norm": 1.8354383707046509, "learning_rate": 7.185631569707225e-06, "loss": 1.6587, "step": 51580 }, { "epoch": 1.8341480757266022, "grad_norm": 1.985830307006836, "learning_rate": 7.181871093451516e-06, "loss": 1.6561, "step": 51590 }, { "epoch": 1.8345035996800285, "grad_norm": 1.9907962083816528, "learning_rate": 7.178111050042735e-06, "loss": 1.6557, "step": 51600 }, { "epoch": 1.8348591236334548, "grad_norm": 1.9100240468978882, "learning_rate": 7.174351440058407e-06, "loss": 1.6453, "step": 51610 }, { "epoch": 1.8352146475868811, "grad_norm": 1.8884332180023193, "learning_rate": 7.1705922640759775e-06, "loss": 1.6647, "step": 51620 }, { "epoch": 1.8355701715403074, "grad_norm": 1.9803351163864136, "learning_rate": 7.1668335226728355e-06, "loss": 1.6602, "step": 51630 }, { "epoch": 1.8359256954937337, "grad_norm": 2.021226406097412, "learning_rate": 7.1630752164263015e-06, "loss": 1.6323, "step": 51640 }, { "epoch": 1.8362812194471603, "grad_norm": 2.012850761413574, "learning_rate": 7.159317345913626e-06, "loss": 1.6705, "step": 51650 }, { "epoch": 1.8366367434005866, "grad_norm": 1.9528616666793823, "learning_rate": 7.155559911711998e-06, "loss": 1.6693, "step": 51660 }, { "epoch": 1.836992267354013, "grad_norm": 2.012749433517456, "learning_rate": 7.151802914398529e-06, "loss": 1.6858, "step": 51670 }, { "epoch": 1.8373477913074394, "grad_norm": 1.9602713584899902, "learning_rate": 7.14804635455028e-06, "loss": 1.6632, "step": 51680 }, { "epoch": 1.8377033152608657, "grad_norm": 1.8004664182662964, "learning_rate": 7.144290232744224e-06, "loss": 1.6309, "step": 51690 }, { "epoch": 1.838058839214292, "grad_norm": 1.745529294013977, "learning_rate": 7.140534549557283e-06, "loss": 1.6526, "step": 51700 }, { "epoch": 1.8384143631677183, "grad_norm": 1.8921000957489014, "learning_rate": 7.136779305566306e-06, "loss": 1.6416, "step": 51710 }, { "epoch": 1.8387698871211446, "grad_norm": 2.0046639442443848, "learning_rate": 7.133024501348075e-06, "loss": 1.6262, "step": 51720 }, { "epoch": 1.8391254110745712, "grad_norm": 2.1108291149139404, "learning_rate": 7.129270137479305e-06, "loss": 1.6445, "step": 51730 }, { "epoch": 1.8394809350279975, "grad_norm": 1.8767834901809692, "learning_rate": 7.125516214536643e-06, "loss": 1.6542, "step": 51740 }, { "epoch": 1.839836458981424, "grad_norm": 1.8338385820388794, "learning_rate": 7.121762733096666e-06, "loss": 1.675, "step": 51750 }, { "epoch": 1.8401919829348503, "grad_norm": 1.8881033658981323, "learning_rate": 7.118009693735888e-06, "loss": 1.6364, "step": 51760 }, { "epoch": 1.8405475068882766, "grad_norm": 2.1846683025360107, "learning_rate": 7.114257097030749e-06, "loss": 1.6522, "step": 51770 }, { "epoch": 1.840903030841703, "grad_norm": 1.8050435781478882, "learning_rate": 7.110504943557627e-06, "loss": 1.6297, "step": 51780 }, { "epoch": 1.8412585547951292, "grad_norm": 1.8365222215652466, "learning_rate": 7.10675323389283e-06, "loss": 1.6818, "step": 51790 }, { "epoch": 1.8416140787485555, "grad_norm": 1.8047209978103638, "learning_rate": 7.103001968612593e-06, "loss": 1.6475, "step": 51800 }, { "epoch": 1.841969602701982, "grad_norm": 1.920447826385498, "learning_rate": 7.0992511482930905e-06, "loss": 1.6629, "step": 51810 }, { "epoch": 1.8423251266554084, "grad_norm": 1.8633935451507568, "learning_rate": 7.095500773510423e-06, "loss": 1.6475, "step": 51820 }, { "epoch": 1.842680650608835, "grad_norm": 2.0102744102478027, "learning_rate": 7.0917508448406256e-06, "loss": 1.6218, "step": 51830 }, { "epoch": 1.8430361745622612, "grad_norm": 2.0085699558258057, "learning_rate": 7.0880013628596675e-06, "loss": 1.6499, "step": 51840 }, { "epoch": 1.8433916985156875, "grad_norm": 1.9968560934066772, "learning_rate": 7.084252328143437e-06, "loss": 1.6404, "step": 51850 }, { "epoch": 1.8437472224691138, "grad_norm": 1.7302302122116089, "learning_rate": 7.080503741267768e-06, "loss": 1.6511, "step": 51860 }, { "epoch": 1.8441027464225401, "grad_norm": 1.8278783559799194, "learning_rate": 7.07675560280842e-06, "loss": 1.6605, "step": 51870 }, { "epoch": 1.8444582703759664, "grad_norm": 1.9455891847610474, "learning_rate": 7.0730079133410825e-06, "loss": 1.6489, "step": 51880 }, { "epoch": 1.844813794329393, "grad_norm": 2.237320899963379, "learning_rate": 7.069260673441376e-06, "loss": 1.6566, "step": 51890 }, { "epoch": 1.8451693182828193, "grad_norm": 2.096794843673706, "learning_rate": 7.065513883684853e-06, "loss": 1.6156, "step": 51900 }, { "epoch": 1.8455248422362458, "grad_norm": 1.753090500831604, "learning_rate": 7.061767544647e-06, "loss": 1.6459, "step": 51910 }, { "epoch": 1.845880366189672, "grad_norm": 1.887935996055603, "learning_rate": 7.058021656903231e-06, "loss": 1.6211, "step": 51920 }, { "epoch": 1.8462358901430984, "grad_norm": 1.9207141399383545, "learning_rate": 7.054276221028886e-06, "loss": 1.6498, "step": 51930 }, { "epoch": 1.8465914140965247, "grad_norm": 2.0247364044189453, "learning_rate": 7.050531237599244e-06, "loss": 1.6259, "step": 51940 }, { "epoch": 1.846946938049951, "grad_norm": 1.835836410522461, "learning_rate": 7.04678670718951e-06, "loss": 1.6194, "step": 51950 }, { "epoch": 1.8473024620033773, "grad_norm": 1.896708369255066, "learning_rate": 7.043042630374822e-06, "loss": 1.6359, "step": 51960 }, { "epoch": 1.8476579859568039, "grad_norm": 1.8522411584854126, "learning_rate": 7.039299007730248e-06, "loss": 1.6118, "step": 51970 }, { "epoch": 1.8480135099102302, "grad_norm": 1.8102995157241821, "learning_rate": 7.03555583983078e-06, "loss": 1.7098, "step": 51980 }, { "epoch": 1.8483690338636567, "grad_norm": 1.7815966606140137, "learning_rate": 7.031813127251348e-06, "loss": 1.6347, "step": 51990 }, { "epoch": 1.848724557817083, "grad_norm": 1.9562181234359741, "learning_rate": 7.028070870566813e-06, "loss": 1.6436, "step": 52000 }, { "epoch": 1.8490800817705093, "grad_norm": 2.038663387298584, "learning_rate": 7.024329070351954e-06, "loss": 1.6473, "step": 52010 }, { "epoch": 1.8494356057239356, "grad_norm": 1.9478405714035034, "learning_rate": 7.0205877271814914e-06, "loss": 1.6152, "step": 52020 }, { "epoch": 1.849791129677362, "grad_norm": 1.9415020942687988, "learning_rate": 7.016846841630074e-06, "loss": 1.5909, "step": 52030 }, { "epoch": 1.8501466536307882, "grad_norm": 1.7721590995788574, "learning_rate": 7.0131064142722775e-06, "loss": 1.595, "step": 52040 }, { "epoch": 1.8505021775842148, "grad_norm": 1.9393048286437988, "learning_rate": 7.00936644568261e-06, "loss": 1.6165, "step": 52050 }, { "epoch": 1.850857701537641, "grad_norm": 1.960496425628662, "learning_rate": 7.005626936435501e-06, "loss": 1.6196, "step": 52060 }, { "epoch": 1.8512132254910676, "grad_norm": 1.8558365106582642, "learning_rate": 7.0018878871053205e-06, "loss": 1.6447, "step": 52070 }, { "epoch": 1.851568749444494, "grad_norm": 1.9220082759857178, "learning_rate": 6.998149298266364e-06, "loss": 1.6293, "step": 52080 }, { "epoch": 1.8519242733979202, "grad_norm": 2.0755198001861572, "learning_rate": 6.994411170492852e-06, "loss": 1.6548, "step": 52090 }, { "epoch": 1.8522797973513465, "grad_norm": 1.7107176780700684, "learning_rate": 6.99067350435894e-06, "loss": 1.6267, "step": 52100 }, { "epoch": 1.8526353213047728, "grad_norm": 2.0055670738220215, "learning_rate": 6.986936300438709e-06, "loss": 1.6632, "step": 52110 }, { "epoch": 1.8529908452581991, "grad_norm": 1.9524128437042236, "learning_rate": 6.9831995593061695e-06, "loss": 1.6436, "step": 52120 }, { "epoch": 1.8533463692116257, "grad_norm": 1.8308955430984497, "learning_rate": 6.979463281535263e-06, "loss": 1.6158, "step": 52130 }, { "epoch": 1.853701893165052, "grad_norm": 1.8957483768463135, "learning_rate": 6.975727467699856e-06, "loss": 1.6426, "step": 52140 }, { "epoch": 1.8540574171184785, "grad_norm": 1.9096064567565918, "learning_rate": 6.971992118373751e-06, "loss": 1.6047, "step": 52150 }, { "epoch": 1.8544129410719048, "grad_norm": 1.9696846008300781, "learning_rate": 6.96825723413067e-06, "loss": 1.6335, "step": 52160 }, { "epoch": 1.854768465025331, "grad_norm": 1.9123235940933228, "learning_rate": 6.964522815544267e-06, "loss": 1.6384, "step": 52170 }, { "epoch": 1.8551239889787574, "grad_norm": 1.8498286008834839, "learning_rate": 6.960788863188128e-06, "loss": 1.69, "step": 52180 }, { "epoch": 1.8554795129321837, "grad_norm": 1.8621598482131958, "learning_rate": 6.957055377635763e-06, "loss": 1.6193, "step": 52190 }, { "epoch": 1.85583503688561, "grad_norm": 2.083085298538208, "learning_rate": 6.953322359460613e-06, "loss": 1.6396, "step": 52200 }, { "epoch": 1.8561905608390366, "grad_norm": 1.911636233329773, "learning_rate": 6.949589809236044e-06, "loss": 1.614, "step": 52210 }, { "epoch": 1.8565460847924629, "grad_norm": 1.8892377614974976, "learning_rate": 6.945857727535355e-06, "loss": 1.6627, "step": 52220 }, { "epoch": 1.8569016087458894, "grad_norm": 1.8188692331314087, "learning_rate": 6.942126114931771e-06, "loss": 1.7005, "step": 52230 }, { "epoch": 1.8572571326993157, "grad_norm": 1.85429048538208, "learning_rate": 6.938394971998441e-06, "loss": 1.6463, "step": 52240 }, { "epoch": 1.857612656652742, "grad_norm": 2.153946876525879, "learning_rate": 6.934664299308447e-06, "loss": 1.6127, "step": 52250 }, { "epoch": 1.8579681806061683, "grad_norm": 1.8729736804962158, "learning_rate": 6.930934097434798e-06, "loss": 1.6296, "step": 52260 }, { "epoch": 1.8583237045595946, "grad_norm": 2.1069846153259277, "learning_rate": 6.927204366950426e-06, "loss": 1.64, "step": 52270 }, { "epoch": 1.858679228513021, "grad_norm": 2.0351064205169678, "learning_rate": 6.9234751084282e-06, "loss": 1.5966, "step": 52280 }, { "epoch": 1.8590347524664474, "grad_norm": 1.7966029644012451, "learning_rate": 6.919746322440905e-06, "loss": 1.6567, "step": 52290 }, { "epoch": 1.8593902764198738, "grad_norm": 1.9065757989883423, "learning_rate": 6.916018009561263e-06, "loss": 1.6614, "step": 52300 }, { "epoch": 1.8597458003733003, "grad_norm": 1.963813066482544, "learning_rate": 6.9122901703619164e-06, "loss": 1.6268, "step": 52310 }, { "epoch": 1.8601013243267266, "grad_norm": 1.971492052078247, "learning_rate": 6.908562805415444e-06, "loss": 1.6525, "step": 52320 }, { "epoch": 1.860456848280153, "grad_norm": 2.0255093574523926, "learning_rate": 6.904835915294337e-06, "loss": 1.6332, "step": 52330 }, { "epoch": 1.8608123722335792, "grad_norm": 2.054584503173828, "learning_rate": 6.901109500571026e-06, "loss": 1.6178, "step": 52340 }, { "epoch": 1.8611678961870055, "grad_norm": 1.9593502283096313, "learning_rate": 6.897383561817867e-06, "loss": 1.6165, "step": 52350 }, { "epoch": 1.8615234201404318, "grad_norm": 1.9384634494781494, "learning_rate": 6.893658099607141e-06, "loss": 1.656, "step": 52360 }, { "epoch": 1.8618789440938583, "grad_norm": 2.130075454711914, "learning_rate": 6.889933114511052e-06, "loss": 1.6586, "step": 52370 }, { "epoch": 1.8622344680472847, "grad_norm": 2.0572383403778076, "learning_rate": 6.886208607101734e-06, "loss": 1.6845, "step": 52380 }, { "epoch": 1.8625899920007112, "grad_norm": 2.1621816158294678, "learning_rate": 6.882484577951254e-06, "loss": 1.6646, "step": 52390 }, { "epoch": 1.8629455159541375, "grad_norm": 1.9399415254592896, "learning_rate": 6.878761027631593e-06, "loss": 1.6042, "step": 52400 }, { "epoch": 1.8633010399075638, "grad_norm": 1.8925954103469849, "learning_rate": 6.875037956714672e-06, "loss": 1.619, "step": 52410 }, { "epoch": 1.86365656386099, "grad_norm": 1.9047517776489258, "learning_rate": 6.871315365772324e-06, "loss": 1.6381, "step": 52420 }, { "epoch": 1.8640120878144164, "grad_norm": 2.014160394668579, "learning_rate": 6.867593255376319e-06, "loss": 1.6567, "step": 52430 }, { "epoch": 1.8643676117678427, "grad_norm": 1.8610714673995972, "learning_rate": 6.863871626098349e-06, "loss": 1.6461, "step": 52440 }, { "epoch": 1.8647231357212692, "grad_norm": 1.9440066814422607, "learning_rate": 6.860150478510035e-06, "loss": 1.618, "step": 52450 }, { "epoch": 1.8650786596746955, "grad_norm": 1.9076669216156006, "learning_rate": 6.856429813182919e-06, "loss": 1.6221, "step": 52460 }, { "epoch": 1.865434183628122, "grad_norm": 1.999401330947876, "learning_rate": 6.852709630688477e-06, "loss": 1.6114, "step": 52470 }, { "epoch": 1.8657897075815484, "grad_norm": 1.7853844165802002, "learning_rate": 6.8489899315981e-06, "loss": 1.5854, "step": 52480 }, { "epoch": 1.8661452315349747, "grad_norm": 1.9500246047973633, "learning_rate": 6.845270716483114e-06, "loss": 1.6231, "step": 52490 }, { "epoch": 1.866500755488401, "grad_norm": 1.7870290279388428, "learning_rate": 6.841551985914763e-06, "loss": 1.6524, "step": 52500 }, { "epoch": 1.8668562794418273, "grad_norm": 1.970482587814331, "learning_rate": 6.837833740464224e-06, "loss": 1.6487, "step": 52510 }, { "epoch": 1.8672118033952536, "grad_norm": 1.7473478317260742, "learning_rate": 6.834115980702595e-06, "loss": 1.6268, "step": 52520 }, { "epoch": 1.8675673273486801, "grad_norm": 1.974705696105957, "learning_rate": 6.8303987072009005e-06, "loss": 1.6069, "step": 52530 }, { "epoch": 1.8679228513021064, "grad_norm": 1.9087203741073608, "learning_rate": 6.826681920530093e-06, "loss": 1.611, "step": 52540 }, { "epoch": 1.868278375255533, "grad_norm": 1.8926479816436768, "learning_rate": 6.8229656212610465e-06, "loss": 1.6678, "step": 52550 }, { "epoch": 1.8686338992089593, "grad_norm": 1.916961669921875, "learning_rate": 6.819249809964557e-06, "loss": 1.6275, "step": 52560 }, { "epoch": 1.8689894231623856, "grad_norm": 2.004983425140381, "learning_rate": 6.815534487211355e-06, "loss": 1.629, "step": 52570 }, { "epoch": 1.869344947115812, "grad_norm": 1.826501488685608, "learning_rate": 6.811819653572088e-06, "loss": 1.6336, "step": 52580 }, { "epoch": 1.8697004710692382, "grad_norm": 1.9561131000518799, "learning_rate": 6.808105309617334e-06, "loss": 1.6884, "step": 52590 }, { "epoch": 1.8700559950226645, "grad_norm": 1.9927582740783691, "learning_rate": 6.804391455917591e-06, "loss": 1.6628, "step": 52600 }, { "epoch": 1.870411518976091, "grad_norm": 1.850220799446106, "learning_rate": 6.8006780930432825e-06, "loss": 1.6724, "step": 52610 }, { "epoch": 1.8707670429295173, "grad_norm": 1.8439356088638306, "learning_rate": 6.7969652215647595e-06, "loss": 1.6149, "step": 52620 }, { "epoch": 1.8711225668829439, "grad_norm": 1.9668513536453247, "learning_rate": 6.793252842052294e-06, "loss": 1.6607, "step": 52630 }, { "epoch": 1.8714780908363702, "grad_norm": 1.8955330848693848, "learning_rate": 6.78954095507609e-06, "loss": 1.6587, "step": 52640 }, { "epoch": 1.8718336147897965, "grad_norm": 1.7684237957000732, "learning_rate": 6.785829561206263e-06, "loss": 1.6449, "step": 52650 }, { "epoch": 1.8721891387432228, "grad_norm": 1.9502431154251099, "learning_rate": 6.782118661012861e-06, "loss": 1.6575, "step": 52660 }, { "epoch": 1.872544662696649, "grad_norm": 2.061988353729248, "learning_rate": 6.778408255065858e-06, "loss": 1.68, "step": 52670 }, { "epoch": 1.8729001866500754, "grad_norm": 1.8865063190460205, "learning_rate": 6.7746983439351465e-06, "loss": 1.6484, "step": 52680 }, { "epoch": 1.873255710603502, "grad_norm": 1.9195656776428223, "learning_rate": 6.770988928190547e-06, "loss": 1.6131, "step": 52690 }, { "epoch": 1.8736112345569282, "grad_norm": 1.9573023319244385, "learning_rate": 6.767280008401801e-06, "loss": 1.6546, "step": 52700 }, { "epoch": 1.8739667585103548, "grad_norm": 1.9096479415893555, "learning_rate": 6.763571585138578e-06, "loss": 1.6396, "step": 52710 }, { "epoch": 1.874322282463781, "grad_norm": 1.8641668558120728, "learning_rate": 6.759863658970467e-06, "loss": 1.6524, "step": 52720 }, { "epoch": 1.8746778064172074, "grad_norm": 2.046111822128296, "learning_rate": 6.756156230466981e-06, "loss": 1.6586, "step": 52730 }, { "epoch": 1.8750333303706337, "grad_norm": 2.072157382965088, "learning_rate": 6.752449300197559e-06, "loss": 1.6432, "step": 52740 }, { "epoch": 1.87538885432406, "grad_norm": 1.7518972158432007, "learning_rate": 6.7487428687315615e-06, "loss": 1.6357, "step": 52750 }, { "epoch": 1.8757443782774863, "grad_norm": 2.038274049758911, "learning_rate": 6.745036936638274e-06, "loss": 1.6059, "step": 52760 }, { "epoch": 1.8760999022309128, "grad_norm": 1.9318206310272217, "learning_rate": 6.741331504486905e-06, "loss": 1.648, "step": 52770 }, { "epoch": 1.8764554261843391, "grad_norm": 2.032932758331299, "learning_rate": 6.7376265728465865e-06, "loss": 1.6737, "step": 52780 }, { "epoch": 1.8768109501377657, "grad_norm": 2.2127063274383545, "learning_rate": 6.733922142286368e-06, "loss": 1.6334, "step": 52790 }, { "epoch": 1.877166474091192, "grad_norm": 1.8505887985229492, "learning_rate": 6.730218213375237e-06, "loss": 1.6206, "step": 52800 }, { "epoch": 1.8775219980446183, "grad_norm": 2.0579097270965576, "learning_rate": 6.72651478668208e-06, "loss": 1.6816, "step": 52810 }, { "epoch": 1.8778775219980446, "grad_norm": 1.8971630334854126, "learning_rate": 6.7228118627757275e-06, "loss": 1.5917, "step": 52820 }, { "epoch": 1.878233045951471, "grad_norm": 1.8610026836395264, "learning_rate": 6.7191094422249244e-06, "loss": 1.6413, "step": 52830 }, { "epoch": 1.8785885699048972, "grad_norm": 1.9640917778015137, "learning_rate": 6.715407525598341e-06, "loss": 1.6587, "step": 52840 }, { "epoch": 1.8789440938583237, "grad_norm": 1.9591315984725952, "learning_rate": 6.711706113464569e-06, "loss": 1.638, "step": 52850 }, { "epoch": 1.87929961781175, "grad_norm": 1.8325140476226807, "learning_rate": 6.7080052063921166e-06, "loss": 1.6287, "step": 52860 }, { "epoch": 1.8796551417651766, "grad_norm": 1.9015944004058838, "learning_rate": 6.704304804949424e-06, "loss": 1.6104, "step": 52870 }, { "epoch": 1.8800106657186029, "grad_norm": 1.8562361001968384, "learning_rate": 6.700604909704851e-06, "loss": 1.5918, "step": 52880 }, { "epoch": 1.8803661896720292, "grad_norm": 2.0939865112304688, "learning_rate": 6.696905521226674e-06, "loss": 1.619, "step": 52890 }, { "epoch": 1.8807217136254555, "grad_norm": 1.8259873390197754, "learning_rate": 6.693206640083101e-06, "loss": 1.5921, "step": 52900 }, { "epoch": 1.8810772375788818, "grad_norm": 1.8358393907546997, "learning_rate": 6.689508266842254e-06, "loss": 1.6305, "step": 52910 }, { "epoch": 1.881432761532308, "grad_norm": 1.8782728910446167, "learning_rate": 6.685810402072179e-06, "loss": 1.6674, "step": 52920 }, { "epoch": 1.8817882854857346, "grad_norm": 2.064119338989258, "learning_rate": 6.682113046340846e-06, "loss": 1.6177, "step": 52930 }, { "epoch": 1.882143809439161, "grad_norm": 1.9853843450546265, "learning_rate": 6.6784162002161465e-06, "loss": 1.6494, "step": 52940 }, { "epoch": 1.8824993333925875, "grad_norm": 1.725140929222107, "learning_rate": 6.674719864265892e-06, "loss": 1.6455, "step": 52950 }, { "epoch": 1.8828548573460138, "grad_norm": 1.8963104486465454, "learning_rate": 6.671024039057821e-06, "loss": 1.6511, "step": 52960 }, { "epoch": 1.88321038129944, "grad_norm": 1.8338735103607178, "learning_rate": 6.667328725159579e-06, "loss": 1.6808, "step": 52970 }, { "epoch": 1.8835659052528664, "grad_norm": 1.9279541969299316, "learning_rate": 6.663633923138753e-06, "loss": 1.6262, "step": 52980 }, { "epoch": 1.8839214292062927, "grad_norm": 1.9289398193359375, "learning_rate": 6.659939633562833e-06, "loss": 1.6397, "step": 52990 }, { "epoch": 1.884276953159719, "grad_norm": 2.0128793716430664, "learning_rate": 6.656245856999244e-06, "loss": 1.6602, "step": 53000 }, { "epoch": 1.8846324771131455, "grad_norm": 1.7492691278457642, "learning_rate": 6.6525525940153265e-06, "loss": 1.6003, "step": 53010 }, { "epoch": 1.8849880010665718, "grad_norm": 1.9173500537872314, "learning_rate": 6.648859845178342e-06, "loss": 1.626, "step": 53020 }, { "epoch": 1.8853435250199984, "grad_norm": 1.886384129524231, "learning_rate": 6.645167611055474e-06, "loss": 1.6548, "step": 53030 }, { "epoch": 1.8856990489734247, "grad_norm": 2.015211343765259, "learning_rate": 6.641475892213824e-06, "loss": 1.7013, "step": 53040 }, { "epoch": 1.886054572926851, "grad_norm": 1.8351353406906128, "learning_rate": 6.637784689220421e-06, "loss": 1.6402, "step": 53050 }, { "epoch": 1.8864100968802773, "grad_norm": 1.9520881175994873, "learning_rate": 6.634094002642207e-06, "loss": 1.6562, "step": 53060 }, { "epoch": 1.8867656208337036, "grad_norm": 1.868396282196045, "learning_rate": 6.63040383304605e-06, "loss": 1.6822, "step": 53070 }, { "epoch": 1.88712114478713, "grad_norm": 1.8462475538253784, "learning_rate": 6.626714180998737e-06, "loss": 1.64, "step": 53080 }, { "epoch": 1.8874766687405564, "grad_norm": 2.012273073196411, "learning_rate": 6.623025047066976e-06, "loss": 1.606, "step": 53090 }, { "epoch": 1.8878321926939827, "grad_norm": 1.8039524555206299, "learning_rate": 6.619336431817393e-06, "loss": 1.6002, "step": 53100 }, { "epoch": 1.8881877166474093, "grad_norm": 1.964730143547058, "learning_rate": 6.615648335816536e-06, "loss": 1.6452, "step": 53110 }, { "epoch": 1.8885432406008356, "grad_norm": 1.8994137048721313, "learning_rate": 6.61196075963088e-06, "loss": 1.6654, "step": 53120 }, { "epoch": 1.8888987645542619, "grad_norm": 1.8373494148254395, "learning_rate": 6.608273703826804e-06, "loss": 1.6285, "step": 53130 }, { "epoch": 1.8892542885076882, "grad_norm": 1.860703468322754, "learning_rate": 6.60458716897062e-06, "loss": 1.6306, "step": 53140 }, { "epoch": 1.8896098124611145, "grad_norm": 1.8882999420166016, "learning_rate": 6.600901155628558e-06, "loss": 1.6727, "step": 53150 }, { "epoch": 1.8899653364145408, "grad_norm": 1.800947904586792, "learning_rate": 6.597215664366767e-06, "loss": 1.6457, "step": 53160 }, { "epoch": 1.8903208603679673, "grad_norm": 1.9370545148849487, "learning_rate": 6.593530695751314e-06, "loss": 1.6583, "step": 53170 }, { "epoch": 1.8906763843213936, "grad_norm": 1.8378711938858032, "learning_rate": 6.589846250348186e-06, "loss": 1.6085, "step": 53180 }, { "epoch": 1.8910319082748202, "grad_norm": 1.7905361652374268, "learning_rate": 6.586162328723294e-06, "loss": 1.637, "step": 53190 }, { "epoch": 1.8913874322282465, "grad_norm": 2.0296435356140137, "learning_rate": 6.582478931442462e-06, "loss": 1.6363, "step": 53200 }, { "epoch": 1.8917429561816728, "grad_norm": 1.907057523727417, "learning_rate": 6.578796059071437e-06, "loss": 1.6487, "step": 53210 }, { "epoch": 1.892098480135099, "grad_norm": 1.8881608247756958, "learning_rate": 6.57511371217589e-06, "loss": 1.6877, "step": 53220 }, { "epoch": 1.8924540040885254, "grad_norm": 1.935365915298462, "learning_rate": 6.571431891321401e-06, "loss": 1.6318, "step": 53230 }, { "epoch": 1.8928095280419517, "grad_norm": 2.105151653289795, "learning_rate": 6.5677505970734745e-06, "loss": 1.6447, "step": 53240 }, { "epoch": 1.8931650519953782, "grad_norm": 1.9732235670089722, "learning_rate": 6.564069829997537e-06, "loss": 1.6506, "step": 53250 }, { "epoch": 1.8935205759488045, "grad_norm": 1.8154934644699097, "learning_rate": 6.5603895906589286e-06, "loss": 1.6714, "step": 53260 }, { "epoch": 1.893876099902231, "grad_norm": 1.8948172330856323, "learning_rate": 6.556709879622916e-06, "loss": 1.6852, "step": 53270 }, { "epoch": 1.8942316238556574, "grad_norm": 1.943363070487976, "learning_rate": 6.553030697454677e-06, "loss": 1.6407, "step": 53280 }, { "epoch": 1.8945871478090837, "grad_norm": 1.8452975749969482, "learning_rate": 6.5493520447193085e-06, "loss": 1.6285, "step": 53290 }, { "epoch": 1.89494267176251, "grad_norm": 1.944027066230774, "learning_rate": 6.54567392198183e-06, "loss": 1.6307, "step": 53300 }, { "epoch": 1.8952981957159363, "grad_norm": 1.9325518608093262, "learning_rate": 6.541996329807177e-06, "loss": 1.6061, "step": 53310 }, { "epoch": 1.8956537196693626, "grad_norm": 2.05012845993042, "learning_rate": 6.538319268760205e-06, "loss": 1.62, "step": 53320 }, { "epoch": 1.896009243622789, "grad_norm": 1.8536720275878906, "learning_rate": 6.53464273940569e-06, "loss": 1.6188, "step": 53330 }, { "epoch": 1.8963647675762154, "grad_norm": 1.8141745328903198, "learning_rate": 6.530966742308322e-06, "loss": 1.6328, "step": 53340 }, { "epoch": 1.896720291529642, "grad_norm": 1.8708752393722534, "learning_rate": 6.5272912780327125e-06, "loss": 1.627, "step": 53350 }, { "epoch": 1.8970758154830683, "grad_norm": 1.889166235923767, "learning_rate": 6.523616347143388e-06, "loss": 1.6239, "step": 53360 }, { "epoch": 1.8974313394364946, "grad_norm": 1.9003798961639404, "learning_rate": 6.519941950204796e-06, "loss": 1.6434, "step": 53370 }, { "epoch": 1.8977868633899209, "grad_norm": 1.793718934059143, "learning_rate": 6.516268087781298e-06, "loss": 1.6483, "step": 53380 }, { "epoch": 1.8981423873433472, "grad_norm": 1.900206208229065, "learning_rate": 6.51259476043718e-06, "loss": 1.6316, "step": 53390 }, { "epoch": 1.8984979112967735, "grad_norm": 1.7711083889007568, "learning_rate": 6.508921968736641e-06, "loss": 1.6419, "step": 53400 }, { "epoch": 1.8988534352502, "grad_norm": 2.0549914836883545, "learning_rate": 6.505249713243798e-06, "loss": 1.685, "step": 53410 }, { "epoch": 1.8992089592036263, "grad_norm": 2.0399253368377686, "learning_rate": 6.501577994522687e-06, "loss": 1.6453, "step": 53420 }, { "epoch": 1.8995644831570528, "grad_norm": 1.9935857057571411, "learning_rate": 6.49790681313726e-06, "loss": 1.6456, "step": 53430 }, { "epoch": 1.8999200071104791, "grad_norm": 1.8322583436965942, "learning_rate": 6.4942361696513936e-06, "loss": 1.5947, "step": 53440 }, { "epoch": 1.9002755310639055, "grad_norm": 1.8570095300674438, "learning_rate": 6.490566064628865e-06, "loss": 1.6, "step": 53450 }, { "epoch": 1.9006310550173318, "grad_norm": 1.8600091934204102, "learning_rate": 6.486896498633384e-06, "loss": 1.662, "step": 53460 }, { "epoch": 1.900986578970758, "grad_norm": 2.0022993087768555, "learning_rate": 6.483227472228576e-06, "loss": 1.5742, "step": 53470 }, { "epoch": 1.9013421029241844, "grad_norm": 1.7619541883468628, "learning_rate": 6.479558985977976e-06, "loss": 1.6417, "step": 53480 }, { "epoch": 1.901697626877611, "grad_norm": 1.8340739011764526, "learning_rate": 6.475891040445043e-06, "loss": 1.6716, "step": 53490 }, { "epoch": 1.9020531508310372, "grad_norm": 1.8696131706237793, "learning_rate": 6.472223636193149e-06, "loss": 1.6595, "step": 53500 }, { "epoch": 1.9024086747844637, "grad_norm": 1.9896914958953857, "learning_rate": 6.468556773785585e-06, "loss": 1.6487, "step": 53510 }, { "epoch": 1.90276419873789, "grad_norm": 1.9387229681015015, "learning_rate": 6.464890453785559e-06, "loss": 1.6067, "step": 53520 }, { "epoch": 1.9031197226913164, "grad_norm": 2.0417120456695557, "learning_rate": 6.461224676756195e-06, "loss": 1.6311, "step": 53530 }, { "epoch": 1.9034752466447427, "grad_norm": 1.9006065130233765, "learning_rate": 6.457559443260531e-06, "loss": 1.653, "step": 53540 }, { "epoch": 1.903830770598169, "grad_norm": 1.9895280599594116, "learning_rate": 6.453894753861525e-06, "loss": 1.5814, "step": 53550 }, { "epoch": 1.9041862945515953, "grad_norm": 2.510457754135132, "learning_rate": 6.450230609122052e-06, "loss": 1.6378, "step": 53560 }, { "epoch": 1.9045418185050218, "grad_norm": 1.9443553686141968, "learning_rate": 6.446567009604898e-06, "loss": 1.6472, "step": 53570 }, { "epoch": 1.904897342458448, "grad_norm": 2.010951519012451, "learning_rate": 6.442903955872775e-06, "loss": 1.6459, "step": 53580 }, { "epoch": 1.9052528664118746, "grad_norm": 1.9579919576644897, "learning_rate": 6.439241448488298e-06, "loss": 1.6313, "step": 53590 }, { "epoch": 1.905608390365301, "grad_norm": 1.821547031402588, "learning_rate": 6.4355794880140124e-06, "loss": 1.6367, "step": 53600 }, { "epoch": 1.9059639143187272, "grad_norm": 1.8498295545578003, "learning_rate": 6.431918075012365e-06, "loss": 1.6766, "step": 53610 }, { "epoch": 1.9063194382721536, "grad_norm": 1.9463131427764893, "learning_rate": 6.42825721004573e-06, "loss": 1.6605, "step": 53620 }, { "epoch": 1.9066749622255799, "grad_norm": 1.7999359369277954, "learning_rate": 6.4245968936763905e-06, "loss": 1.6405, "step": 53630 }, { "epoch": 1.9070304861790062, "grad_norm": 1.837437629699707, "learning_rate": 6.420937126466551e-06, "loss": 1.6095, "step": 53640 }, { "epoch": 1.9073860101324327, "grad_norm": 1.9130752086639404, "learning_rate": 6.4172779089783276e-06, "loss": 1.5937, "step": 53650 }, { "epoch": 1.907741534085859, "grad_norm": 1.8691471815109253, "learning_rate": 6.413619241773757e-06, "loss": 1.6569, "step": 53660 }, { "epoch": 1.9080970580392855, "grad_norm": 1.8947511911392212, "learning_rate": 6.409961125414781e-06, "loss": 1.6334, "step": 53670 }, { "epoch": 1.9084525819927118, "grad_norm": 1.9409008026123047, "learning_rate": 6.406303560463267e-06, "loss": 1.6369, "step": 53680 }, { "epoch": 1.9088081059461381, "grad_norm": 1.8441576957702637, "learning_rate": 6.402646547480993e-06, "loss": 1.6131, "step": 53690 }, { "epoch": 1.9091636298995645, "grad_norm": 1.829941749572754, "learning_rate": 6.398990087029653e-06, "loss": 1.6402, "step": 53700 }, { "epoch": 1.9095191538529908, "grad_norm": 1.9568201303482056, "learning_rate": 6.39533417967086e-06, "loss": 1.6263, "step": 53710 }, { "epoch": 1.909874677806417, "grad_norm": 1.840928554534912, "learning_rate": 6.391678825966134e-06, "loss": 1.6257, "step": 53720 }, { "epoch": 1.9102302017598436, "grad_norm": 2.085839033126831, "learning_rate": 6.388024026476915e-06, "loss": 1.6544, "step": 53730 }, { "epoch": 1.91058572571327, "grad_norm": 1.9179319143295288, "learning_rate": 6.384369781764559e-06, "loss": 1.6979, "step": 53740 }, { "epoch": 1.9109412496666964, "grad_norm": 1.9086040258407593, "learning_rate": 6.380716092390333e-06, "loss": 1.6438, "step": 53750 }, { "epoch": 1.9112967736201227, "grad_norm": 2.0124614238739014, "learning_rate": 6.3770629589154275e-06, "loss": 1.6385, "step": 53760 }, { "epoch": 1.911652297573549, "grad_norm": 1.9612501859664917, "learning_rate": 6.3734103819009315e-06, "loss": 1.6497, "step": 53770 }, { "epoch": 1.9120078215269753, "grad_norm": 2.2222492694854736, "learning_rate": 6.369758361907861e-06, "loss": 1.6181, "step": 53780 }, { "epoch": 1.9123633454804017, "grad_norm": 2.0280916690826416, "learning_rate": 6.366106899497149e-06, "loss": 1.6248, "step": 53790 }, { "epoch": 1.912718869433828, "grad_norm": 1.8456861972808838, "learning_rate": 6.3624559952296275e-06, "loss": 1.6181, "step": 53800 }, { "epoch": 1.9130743933872545, "grad_norm": 1.9914888143539429, "learning_rate": 6.358805649666058e-06, "loss": 1.6349, "step": 53810 }, { "epoch": 1.9134299173406808, "grad_norm": 2.0794036388397217, "learning_rate": 6.35515586336711e-06, "loss": 1.632, "step": 53820 }, { "epoch": 1.9137854412941073, "grad_norm": 1.9409990310668945, "learning_rate": 6.35150663689337e-06, "loss": 1.5857, "step": 53830 }, { "epoch": 1.9141409652475336, "grad_norm": 1.886592149734497, "learning_rate": 6.347857970805336e-06, "loss": 1.6556, "step": 53840 }, { "epoch": 1.91449648920096, "grad_norm": 1.7917068004608154, "learning_rate": 6.3442098656634155e-06, "loss": 1.6261, "step": 53850 }, { "epoch": 1.9148520131543862, "grad_norm": 1.9958102703094482, "learning_rate": 6.340562322027936e-06, "loss": 1.6755, "step": 53860 }, { "epoch": 1.9152075371078126, "grad_norm": 1.9651449918746948, "learning_rate": 6.336915340459142e-06, "loss": 1.6133, "step": 53870 }, { "epoch": 1.9155630610612389, "grad_norm": 1.9258513450622559, "learning_rate": 6.333268921517184e-06, "loss": 1.6422, "step": 53880 }, { "epoch": 1.9159185850146654, "grad_norm": 1.9306154251098633, "learning_rate": 6.329623065762129e-06, "loss": 1.623, "step": 53890 }, { "epoch": 1.9162741089680917, "grad_norm": 1.9108705520629883, "learning_rate": 6.325977773753957e-06, "loss": 1.6683, "step": 53900 }, { "epoch": 1.9166296329215182, "grad_norm": 1.916987419128418, "learning_rate": 6.322333046052562e-06, "loss": 1.6664, "step": 53910 }, { "epoch": 1.9169851568749445, "grad_norm": 1.8760071992874146, "learning_rate": 6.318688883217756e-06, "loss": 1.6102, "step": 53920 }, { "epoch": 1.9173406808283708, "grad_norm": 2.035249710083008, "learning_rate": 6.315045285809251e-06, "loss": 1.6147, "step": 53930 }, { "epoch": 1.9176962047817971, "grad_norm": 1.8555046319961548, "learning_rate": 6.311402254386687e-06, "loss": 1.6164, "step": 53940 }, { "epoch": 1.9180517287352234, "grad_norm": 1.8743116855621338, "learning_rate": 6.307759789509609e-06, "loss": 1.6632, "step": 53950 }, { "epoch": 1.9184072526886498, "grad_norm": 1.9584312438964844, "learning_rate": 6.304117891737475e-06, "loss": 1.6478, "step": 53960 }, { "epoch": 1.9187627766420763, "grad_norm": 1.9928791522979736, "learning_rate": 6.300476561629662e-06, "loss": 1.6504, "step": 53970 }, { "epoch": 1.9191183005955026, "grad_norm": 2.083934783935547, "learning_rate": 6.296835799745452e-06, "loss": 1.6567, "step": 53980 }, { "epoch": 1.9194738245489291, "grad_norm": 1.8650946617126465, "learning_rate": 6.293195606644044e-06, "loss": 1.6624, "step": 53990 }, { "epoch": 1.9198293485023554, "grad_norm": 1.9788693189620972, "learning_rate": 6.289555982884548e-06, "loss": 1.6915, "step": 54000 }, { "epoch": 1.9201848724557817, "grad_norm": 2.0542051792144775, "learning_rate": 6.285916929025988e-06, "loss": 1.6699, "step": 54010 }, { "epoch": 1.920540396409208, "grad_norm": 1.9393855333328247, "learning_rate": 6.282278445627304e-06, "loss": 1.6726, "step": 54020 }, { "epoch": 1.9208959203626343, "grad_norm": 1.938590407371521, "learning_rate": 6.278640533247338e-06, "loss": 1.6455, "step": 54030 }, { "epoch": 1.9212514443160607, "grad_norm": 2.0474417209625244, "learning_rate": 6.275003192444852e-06, "loss": 1.6008, "step": 54040 }, { "epoch": 1.9216069682694872, "grad_norm": 1.933955430984497, "learning_rate": 6.2713664237785195e-06, "loss": 1.6037, "step": 54050 }, { "epoch": 1.9219624922229135, "grad_norm": 1.941810965538025, "learning_rate": 6.2677302278069266e-06, "loss": 1.6202, "step": 54060 }, { "epoch": 1.92231801617634, "grad_norm": 2.1382062435150146, "learning_rate": 6.2640946050885705e-06, "loss": 1.6369, "step": 54070 }, { "epoch": 1.9226735401297663, "grad_norm": 1.9926743507385254, "learning_rate": 6.2604595561818595e-06, "loss": 1.624, "step": 54080 }, { "epoch": 1.9230290640831926, "grad_norm": 1.8309038877487183, "learning_rate": 6.25682508164511e-06, "loss": 1.6487, "step": 54090 }, { "epoch": 1.923384588036619, "grad_norm": 2.033742904663086, "learning_rate": 6.253191182036562e-06, "loss": 1.6486, "step": 54100 }, { "epoch": 1.9237401119900452, "grad_norm": 1.9058263301849365, "learning_rate": 6.249557857914354e-06, "loss": 1.642, "step": 54110 }, { "epoch": 1.9240956359434715, "grad_norm": 2.0782461166381836, "learning_rate": 6.245925109836542e-06, "loss": 1.6093, "step": 54120 }, { "epoch": 1.924451159896898, "grad_norm": 1.862004041671753, "learning_rate": 6.242292938361096e-06, "loss": 1.6631, "step": 54130 }, { "epoch": 1.9248066838503244, "grad_norm": 1.8530863523483276, "learning_rate": 6.2386613440458936e-06, "loss": 1.6516, "step": 54140 }, { "epoch": 1.925162207803751, "grad_norm": 1.9508360624313354, "learning_rate": 6.235030327448726e-06, "loss": 1.6512, "step": 54150 }, { "epoch": 1.9255177317571772, "grad_norm": 1.7762582302093506, "learning_rate": 6.231399889127292e-06, "loss": 1.624, "step": 54160 }, { "epoch": 1.9258732557106035, "grad_norm": 2.1135988235473633, "learning_rate": 6.227770029639206e-06, "loss": 1.6372, "step": 54170 }, { "epoch": 1.9262287796640298, "grad_norm": 1.9216655492782593, "learning_rate": 6.2241407495419916e-06, "loss": 1.6046, "step": 54180 }, { "epoch": 1.9265843036174561, "grad_norm": 1.995802879333496, "learning_rate": 6.220512049393082e-06, "loss": 1.6022, "step": 54190 }, { "epoch": 1.9269398275708824, "grad_norm": 1.8913733959197998, "learning_rate": 6.216883929749826e-06, "loss": 1.6214, "step": 54200 }, { "epoch": 1.927295351524309, "grad_norm": 1.824079155921936, "learning_rate": 6.213256391169478e-06, "loss": 1.6018, "step": 54210 }, { "epoch": 1.9276508754777353, "grad_norm": 1.9052177667617798, "learning_rate": 6.209629434209203e-06, "loss": 1.6245, "step": 54220 }, { "epoch": 1.9280063994311618, "grad_norm": 2.0674831867218018, "learning_rate": 6.2060030594260836e-06, "loss": 1.6535, "step": 54230 }, { "epoch": 1.9283619233845881, "grad_norm": 1.8703824281692505, "learning_rate": 6.202377267377104e-06, "loss": 1.601, "step": 54240 }, { "epoch": 1.9287174473380144, "grad_norm": 1.8373008966445923, "learning_rate": 6.1987520586191694e-06, "loss": 1.5962, "step": 54250 }, { "epoch": 1.9290729712914407, "grad_norm": 1.872745156288147, "learning_rate": 6.19512743370908e-06, "loss": 1.6289, "step": 54260 }, { "epoch": 1.929428495244867, "grad_norm": 1.81459641456604, "learning_rate": 6.191503393203561e-06, "loss": 1.6279, "step": 54270 }, { "epoch": 1.9297840191982933, "grad_norm": 2.0255491733551025, "learning_rate": 6.187879937659242e-06, "loss": 1.6352, "step": 54280 }, { "epoch": 1.9301395431517199, "grad_norm": 1.938364863395691, "learning_rate": 6.184257067632662e-06, "loss": 1.6082, "step": 54290 }, { "epoch": 1.9304950671051462, "grad_norm": 2.025392532348633, "learning_rate": 6.180634783680272e-06, "loss": 1.6319, "step": 54300 }, { "epoch": 1.9308505910585727, "grad_norm": 2.0753066539764404, "learning_rate": 6.17701308635843e-06, "loss": 1.6358, "step": 54310 }, { "epoch": 1.931206115011999, "grad_norm": 1.9253500699996948, "learning_rate": 6.173391976223408e-06, "loss": 1.603, "step": 54320 }, { "epoch": 1.9315616389654253, "grad_norm": 1.9665428400039673, "learning_rate": 6.169771453831388e-06, "loss": 1.6557, "step": 54330 }, { "epoch": 1.9319171629188516, "grad_norm": 1.876198649406433, "learning_rate": 6.166151519738454e-06, "loss": 1.659, "step": 54340 }, { "epoch": 1.932272686872278, "grad_norm": 2.0440635681152344, "learning_rate": 6.162532174500608e-06, "loss": 1.5992, "step": 54350 }, { "epoch": 1.9326282108257042, "grad_norm": 1.954292893409729, "learning_rate": 6.1589134186737594e-06, "loss": 1.6207, "step": 54360 }, { "epoch": 1.9329837347791308, "grad_norm": 1.9511399269104004, "learning_rate": 6.155295252813726e-06, "loss": 1.644, "step": 54370 }, { "epoch": 1.933339258732557, "grad_norm": 1.899208426475525, "learning_rate": 6.151677677476235e-06, "loss": 1.6321, "step": 54380 }, { "epoch": 1.9336947826859836, "grad_norm": 1.962569236755371, "learning_rate": 6.148060693216926e-06, "loss": 1.6182, "step": 54390 }, { "epoch": 1.93405030663941, "grad_norm": 1.9431875944137573, "learning_rate": 6.144444300591341e-06, "loss": 1.6533, "step": 54400 }, { "epoch": 1.9344058305928362, "grad_norm": 1.917087435722351, "learning_rate": 6.14082850015494e-06, "loss": 1.6401, "step": 54410 }, { "epoch": 1.9347613545462625, "grad_norm": 2.0295639038085938, "learning_rate": 6.137213292463081e-06, "loss": 1.6772, "step": 54420 }, { "epoch": 1.9351168784996888, "grad_norm": 2.0047333240509033, "learning_rate": 6.13359867807104e-06, "loss": 1.6686, "step": 54430 }, { "epoch": 1.9354724024531151, "grad_norm": 1.8973724842071533, "learning_rate": 6.1299846575339995e-06, "loss": 1.6447, "step": 54440 }, { "epoch": 1.9358279264065417, "grad_norm": 1.8601739406585693, "learning_rate": 6.126371231407051e-06, "loss": 1.6251, "step": 54450 }, { "epoch": 1.936183450359968, "grad_norm": 1.846688151359558, "learning_rate": 6.122758400245195e-06, "loss": 1.6743, "step": 54460 }, { "epoch": 1.9365389743133945, "grad_norm": 1.6897163391113281, "learning_rate": 6.119146164603335e-06, "loss": 1.5958, "step": 54470 }, { "epoch": 1.9368944982668208, "grad_norm": 1.7965532541275024, "learning_rate": 6.115534525036293e-06, "loss": 1.6697, "step": 54480 }, { "epoch": 1.9372500222202471, "grad_norm": 1.9531224966049194, "learning_rate": 6.111923482098791e-06, "loss": 1.6911, "step": 54490 }, { "epoch": 1.9376055461736734, "grad_norm": 1.8963346481323242, "learning_rate": 6.108313036345465e-06, "loss": 1.6553, "step": 54500 }, { "epoch": 1.9379610701270997, "grad_norm": 1.7927358150482178, "learning_rate": 6.1047031883308565e-06, "loss": 1.635, "step": 54510 }, { "epoch": 1.938316594080526, "grad_norm": 1.7936055660247803, "learning_rate": 6.101093938609413e-06, "loss": 1.6526, "step": 54520 }, { "epoch": 1.9386721180339526, "grad_norm": 1.9310426712036133, "learning_rate": 6.097485287735493e-06, "loss": 1.6477, "step": 54530 }, { "epoch": 1.9390276419873789, "grad_norm": 2.0859527587890625, "learning_rate": 6.093877236263365e-06, "loss": 1.6356, "step": 54540 }, { "epoch": 1.9393831659408054, "grad_norm": 1.9422963857650757, "learning_rate": 6.090269784747201e-06, "loss": 1.6431, "step": 54550 }, { "epoch": 1.9397386898942317, "grad_norm": 1.8831487894058228, "learning_rate": 6.086662933741085e-06, "loss": 1.6067, "step": 54560 }, { "epoch": 1.940094213847658, "grad_norm": 2.097904682159424, "learning_rate": 6.083056683799009e-06, "loss": 1.6668, "step": 54570 }, { "epoch": 1.9404497378010843, "grad_norm": 1.8286104202270508, "learning_rate": 6.079451035474864e-06, "loss": 1.5989, "step": 54580 }, { "epoch": 1.9408052617545106, "grad_norm": 2.1394026279449463, "learning_rate": 6.075845989322457e-06, "loss": 1.6453, "step": 54590 }, { "epoch": 1.941160785707937, "grad_norm": 2.041598320007324, "learning_rate": 6.072241545895503e-06, "loss": 1.6452, "step": 54600 }, { "epoch": 1.9415163096613635, "grad_norm": 1.9624253511428833, "learning_rate": 6.068637705747617e-06, "loss": 1.6554, "step": 54610 }, { "epoch": 1.9418718336147898, "grad_norm": 1.9965091943740845, "learning_rate": 6.065034469432332e-06, "loss": 1.6203, "step": 54620 }, { "epoch": 1.9422273575682163, "grad_norm": 1.9117686748504639, "learning_rate": 6.06143183750308e-06, "loss": 1.6907, "step": 54630 }, { "epoch": 1.9425828815216426, "grad_norm": 1.9436414241790771, "learning_rate": 6.057829810513204e-06, "loss": 1.6389, "step": 54640 }, { "epoch": 1.942938405475069, "grad_norm": 2.0671372413635254, "learning_rate": 6.05422838901595e-06, "loss": 1.629, "step": 54650 }, { "epoch": 1.9432939294284952, "grad_norm": 2.0150489807128906, "learning_rate": 6.050627573564474e-06, "loss": 1.6819, "step": 54660 }, { "epoch": 1.9436494533819215, "grad_norm": 2.0774424076080322, "learning_rate": 6.047027364711842e-06, "loss": 1.6285, "step": 54670 }, { "epoch": 1.9440049773353478, "grad_norm": 1.8574728965759277, "learning_rate": 6.0434277630110195e-06, "loss": 1.6132, "step": 54680 }, { "epoch": 1.9443605012887744, "grad_norm": 1.8759816884994507, "learning_rate": 6.0398287690148864e-06, "loss": 1.6227, "step": 54690 }, { "epoch": 1.9447160252422007, "grad_norm": 2.022251844406128, "learning_rate": 6.036230383276224e-06, "loss": 1.6282, "step": 54700 }, { "epoch": 1.9450715491956272, "grad_norm": 1.8638499975204468, "learning_rate": 6.03263260634772e-06, "loss": 1.6657, "step": 54710 }, { "epoch": 1.9454270731490535, "grad_norm": 1.9763329029083252, "learning_rate": 6.029035438781973e-06, "loss": 1.6613, "step": 54720 }, { "epoch": 1.9457825971024798, "grad_norm": 2.0072896480560303, "learning_rate": 6.025438881131489e-06, "loss": 1.6344, "step": 54730 }, { "epoch": 1.9461381210559061, "grad_norm": 2.0064783096313477, "learning_rate": 6.021842933948667e-06, "loss": 1.6461, "step": 54740 }, { "epoch": 1.9464936450093324, "grad_norm": 1.7632877826690674, "learning_rate": 6.018247597785827e-06, "loss": 1.671, "step": 54750 }, { "epoch": 1.9468491689627587, "grad_norm": 1.792749285697937, "learning_rate": 6.01465287319519e-06, "loss": 1.6333, "step": 54760 }, { "epoch": 1.9472046929161853, "grad_norm": 1.7539933919906616, "learning_rate": 6.011058760728887e-06, "loss": 1.6139, "step": 54770 }, { "epoch": 1.9475602168696116, "grad_norm": 2.02398943901062, "learning_rate": 6.007465260938945e-06, "loss": 1.6289, "step": 54780 }, { "epoch": 1.947915740823038, "grad_norm": 1.932620644569397, "learning_rate": 6.003872374377304e-06, "loss": 1.6382, "step": 54790 }, { "epoch": 1.9482712647764644, "grad_norm": 1.8369656801223755, "learning_rate": 6.000280101595812e-06, "loss": 1.6346, "step": 54800 }, { "epoch": 1.9486267887298907, "grad_norm": 1.9664580821990967, "learning_rate": 5.996688443146217e-06, "loss": 1.5874, "step": 54810 }, { "epoch": 1.948982312683317, "grad_norm": 1.980931282043457, "learning_rate": 5.993097399580177e-06, "loss": 1.6441, "step": 54820 }, { "epoch": 1.9493378366367433, "grad_norm": 2.005143880844116, "learning_rate": 5.989506971449255e-06, "loss": 1.6219, "step": 54830 }, { "epoch": 1.9496933605901696, "grad_norm": 1.9172461032867432, "learning_rate": 5.985917159304912e-06, "loss": 1.6612, "step": 54840 }, { "epoch": 1.9500488845435962, "grad_norm": 1.9985624551773071, "learning_rate": 5.982327963698528e-06, "loss": 1.6124, "step": 54850 }, { "epoch": 1.9504044084970225, "grad_norm": 1.9466190338134766, "learning_rate": 5.978739385181376e-06, "loss": 1.6695, "step": 54860 }, { "epoch": 1.950759932450449, "grad_norm": 1.8929356336593628, "learning_rate": 5.975151424304641e-06, "loss": 1.6219, "step": 54870 }, { "epoch": 1.9511154564038753, "grad_norm": 1.8140504360198975, "learning_rate": 5.971564081619414e-06, "loss": 1.6497, "step": 54880 }, { "epoch": 1.9514709803573016, "grad_norm": 1.8329968452453613, "learning_rate": 5.967977357676684e-06, "loss": 1.6418, "step": 54890 }, { "epoch": 1.951826504310728, "grad_norm": 1.822810173034668, "learning_rate": 5.964391253027353e-06, "loss": 1.6689, "step": 54900 }, { "epoch": 1.9521820282641542, "grad_norm": 1.939647912979126, "learning_rate": 5.9608057682222194e-06, "loss": 1.6432, "step": 54910 }, { "epoch": 1.9525375522175805, "grad_norm": 2.139228343963623, "learning_rate": 5.957220903811993e-06, "loss": 1.6232, "step": 54920 }, { "epoch": 1.952893076171007, "grad_norm": 1.8620997667312622, "learning_rate": 5.953636660347288e-06, "loss": 1.6186, "step": 54930 }, { "epoch": 1.9532486001244334, "grad_norm": 1.9274126291275024, "learning_rate": 5.9500530383786205e-06, "loss": 1.6124, "step": 54940 }, { "epoch": 1.9536041240778599, "grad_norm": 1.7978813648223877, "learning_rate": 5.946470038456416e-06, "loss": 1.5766, "step": 54950 }, { "epoch": 1.9539596480312862, "grad_norm": 1.954411268234253, "learning_rate": 5.942887661130995e-06, "loss": 1.6011, "step": 54960 }, { "epoch": 1.9543151719847125, "grad_norm": 2.41074800491333, "learning_rate": 5.93930590695259e-06, "loss": 1.6419, "step": 54970 }, { "epoch": 1.9546706959381388, "grad_norm": 1.847296953201294, "learning_rate": 5.935724776471339e-06, "loss": 1.6543, "step": 54980 }, { "epoch": 1.955026219891565, "grad_norm": 1.9085177183151245, "learning_rate": 5.932144270237279e-06, "loss": 1.6094, "step": 54990 }, { "epoch": 1.9553817438449914, "grad_norm": 1.9735503196716309, "learning_rate": 5.9285643888003516e-06, "loss": 1.6406, "step": 55000 }, { "epoch": 1.955737267798418, "grad_norm": 1.8311965465545654, "learning_rate": 5.924985132710409e-06, "loss": 1.6452, "step": 55010 }, { "epoch": 1.9560927917518443, "grad_norm": 2.274158239364624, "learning_rate": 5.921406502517197e-06, "loss": 1.6486, "step": 55020 }, { "epoch": 1.9564483157052708, "grad_norm": 1.9504927396774292, "learning_rate": 5.917828498770375e-06, "loss": 1.6552, "step": 55030 }, { "epoch": 1.956803839658697, "grad_norm": 1.9453336000442505, "learning_rate": 5.9142511220194985e-06, "loss": 1.614, "step": 55040 }, { "epoch": 1.9571593636121234, "grad_norm": 1.955653190612793, "learning_rate": 5.910674372814036e-06, "loss": 1.62, "step": 55050 }, { "epoch": 1.9575148875655497, "grad_norm": 1.9923677444458008, "learning_rate": 5.907098251703345e-06, "loss": 1.6377, "step": 55060 }, { "epoch": 1.957870411518976, "grad_norm": 1.9610176086425781, "learning_rate": 5.903522759236702e-06, "loss": 1.6179, "step": 55070 }, { "epoch": 1.9582259354724023, "grad_norm": 1.9300296306610107, "learning_rate": 5.899947895963279e-06, "loss": 1.6107, "step": 55080 }, { "epoch": 1.9585814594258288, "grad_norm": 2.070293426513672, "learning_rate": 5.896373662432149e-06, "loss": 1.6164, "step": 55090 }, { "epoch": 1.9589369833792551, "grad_norm": 1.903748631477356, "learning_rate": 5.892800059192294e-06, "loss": 1.6447, "step": 55100 }, { "epoch": 1.9592925073326817, "grad_norm": 2.0441808700561523, "learning_rate": 5.889227086792598e-06, "loss": 1.6361, "step": 55110 }, { "epoch": 1.959648031286108, "grad_norm": 1.9533919095993042, "learning_rate": 5.885654745781848e-06, "loss": 1.6185, "step": 55120 }, { "epoch": 1.9600035552395343, "grad_norm": 2.0124576091766357, "learning_rate": 5.88208303670873e-06, "loss": 1.6174, "step": 55130 }, { "epoch": 1.9603590791929606, "grad_norm": 2.0306930541992188, "learning_rate": 5.878511960121842e-06, "loss": 1.6283, "step": 55140 }, { "epoch": 1.960714603146387, "grad_norm": 1.9483821392059326, "learning_rate": 5.8749415165696725e-06, "loss": 1.6634, "step": 55150 }, { "epoch": 1.9610701270998132, "grad_norm": 2.05631422996521, "learning_rate": 5.871371706600621e-06, "loss": 1.6079, "step": 55160 }, { "epoch": 1.9614256510532397, "grad_norm": 1.9040125608444214, "learning_rate": 5.867802530762988e-06, "loss": 1.6189, "step": 55170 }, { "epoch": 1.961781175006666, "grad_norm": 1.896045446395874, "learning_rate": 5.864233989604978e-06, "loss": 1.605, "step": 55180 }, { "epoch": 1.9621366989600926, "grad_norm": 1.9672343730926514, "learning_rate": 5.8606660836746985e-06, "loss": 1.6656, "step": 55190 }, { "epoch": 1.9624922229135189, "grad_norm": 2.0945780277252197, "learning_rate": 5.857098813520152e-06, "loss": 1.6252, "step": 55200 }, { "epoch": 1.9628477468669452, "grad_norm": 1.7114602327346802, "learning_rate": 5.853532179689256e-06, "loss": 1.657, "step": 55210 }, { "epoch": 1.9632032708203715, "grad_norm": 2.0863399505615234, "learning_rate": 5.849966182729814e-06, "loss": 1.649, "step": 55220 }, { "epoch": 1.9635587947737978, "grad_norm": 1.833872675895691, "learning_rate": 5.846400823189546e-06, "loss": 1.6404, "step": 55230 }, { "epoch": 1.963914318727224, "grad_norm": 1.8471561670303345, "learning_rate": 5.842836101616067e-06, "loss": 1.6303, "step": 55240 }, { "epoch": 1.9642698426806506, "grad_norm": 1.9319822788238525, "learning_rate": 5.839272018556899e-06, "loss": 1.6363, "step": 55250 }, { "epoch": 1.964625366634077, "grad_norm": 1.969245433807373, "learning_rate": 5.83570857455946e-06, "loss": 1.5972, "step": 55260 }, { "epoch": 1.9649808905875035, "grad_norm": 1.8855727910995483, "learning_rate": 5.832145770171074e-06, "loss": 1.6405, "step": 55270 }, { "epoch": 1.9653364145409298, "grad_norm": 1.7602448463439941, "learning_rate": 5.828583605938964e-06, "loss": 1.6767, "step": 55280 }, { "epoch": 1.965691938494356, "grad_norm": 1.832932710647583, "learning_rate": 5.825022082410263e-06, "loss": 1.6609, "step": 55290 }, { "epoch": 1.9660474624477824, "grad_norm": 1.9036272764205933, "learning_rate": 5.821461200131988e-06, "loss": 1.6552, "step": 55300 }, { "epoch": 1.9664029864012087, "grad_norm": 2.110887050628662, "learning_rate": 5.817900959651072e-06, "loss": 1.6909, "step": 55310 }, { "epoch": 1.966758510354635, "grad_norm": 1.8974913358688354, "learning_rate": 5.814341361514349e-06, "loss": 1.61, "step": 55320 }, { "epoch": 1.9671140343080615, "grad_norm": 1.991543173789978, "learning_rate": 5.810782406268546e-06, "loss": 1.6339, "step": 55330 }, { "epoch": 1.9674695582614878, "grad_norm": 1.9699667692184448, "learning_rate": 5.8072240944603e-06, "loss": 1.5963, "step": 55340 }, { "epoch": 1.9678250822149144, "grad_norm": 1.982343316078186, "learning_rate": 5.803666426636145e-06, "loss": 1.7, "step": 55350 }, { "epoch": 1.9681806061683407, "grad_norm": 1.8685624599456787, "learning_rate": 5.800109403342513e-06, "loss": 1.6561, "step": 55360 }, { "epoch": 1.968536130121767, "grad_norm": 1.9771759510040283, "learning_rate": 5.7965530251257495e-06, "loss": 1.6414, "step": 55370 }, { "epoch": 1.9688916540751933, "grad_norm": 1.860378384590149, "learning_rate": 5.792997292532081e-06, "loss": 1.5819, "step": 55380 }, { "epoch": 1.9692471780286196, "grad_norm": 1.8745434284210205, "learning_rate": 5.789442206107649e-06, "loss": 1.6408, "step": 55390 }, { "epoch": 1.969602701982046, "grad_norm": 1.8563114404678345, "learning_rate": 5.785887766398496e-06, "loss": 1.6107, "step": 55400 }, { "epoch": 1.9699582259354724, "grad_norm": 2.024580240249634, "learning_rate": 5.782333973950558e-06, "loss": 1.6539, "step": 55410 }, { "epoch": 1.9703137498888987, "grad_norm": 1.8910824060440063, "learning_rate": 5.7787808293096815e-06, "loss": 1.6408, "step": 55420 }, { "epoch": 1.9706692738423253, "grad_norm": 1.897397756576538, "learning_rate": 5.775228333021597e-06, "loss": 1.6104, "step": 55430 }, { "epoch": 1.9710247977957516, "grad_norm": 1.880532145500183, "learning_rate": 5.771676485631952e-06, "loss": 1.6784, "step": 55440 }, { "epoch": 1.9713803217491779, "grad_norm": 1.9245859384536743, "learning_rate": 5.768125287686287e-06, "loss": 1.6261, "step": 55450 }, { "epoch": 1.9717358457026042, "grad_norm": 1.824560523033142, "learning_rate": 5.764574739730043e-06, "loss": 1.6433, "step": 55460 }, { "epoch": 1.9720913696560305, "grad_norm": 1.9456140995025635, "learning_rate": 5.761024842308564e-06, "loss": 1.6675, "step": 55470 }, { "epoch": 1.9724468936094568, "grad_norm": 1.9762824773788452, "learning_rate": 5.7574755959670906e-06, "loss": 1.6517, "step": 55480 }, { "epoch": 1.9728024175628833, "grad_norm": 1.9311729669570923, "learning_rate": 5.753927001250763e-06, "loss": 1.6418, "step": 55490 }, { "epoch": 1.9731579415163096, "grad_norm": 2.19989013671875, "learning_rate": 5.750379058704626e-06, "loss": 1.6751, "step": 55500 }, { "epoch": 1.9735134654697362, "grad_norm": 1.8706977367401123, "learning_rate": 5.746831768873619e-06, "loss": 1.589, "step": 55510 }, { "epoch": 1.9738689894231625, "grad_norm": 2.1740660667419434, "learning_rate": 5.743285132302588e-06, "loss": 1.6747, "step": 55520 }, { "epoch": 1.9742245133765888, "grad_norm": 1.9190523624420166, "learning_rate": 5.739739149536272e-06, "loss": 1.6302, "step": 55530 }, { "epoch": 1.974580037330015, "grad_norm": 1.9370909929275513, "learning_rate": 5.736193821119307e-06, "loss": 1.6445, "step": 55540 }, { "epoch": 1.9749355612834414, "grad_norm": 1.9816778898239136, "learning_rate": 5.732649147596242e-06, "loss": 1.651, "step": 55550 }, { "epoch": 1.9752910852368677, "grad_norm": 1.9557394981384277, "learning_rate": 5.7291051295115065e-06, "loss": 1.6275, "step": 55560 }, { "epoch": 1.9756466091902942, "grad_norm": 1.9068324565887451, "learning_rate": 5.7255617674094445e-06, "loss": 1.6153, "step": 55570 }, { "epoch": 1.9760021331437205, "grad_norm": 1.999617338180542, "learning_rate": 5.7220190618342944e-06, "loss": 1.6201, "step": 55580 }, { "epoch": 1.976357657097147, "grad_norm": 1.916778326034546, "learning_rate": 5.718477013330193e-06, "loss": 1.6376, "step": 55590 }, { "epoch": 1.9767131810505734, "grad_norm": 1.9432013034820557, "learning_rate": 5.714935622441177e-06, "loss": 1.6546, "step": 55600 }, { "epoch": 1.9770687050039997, "grad_norm": 1.8765946626663208, "learning_rate": 5.711394889711181e-06, "loss": 1.6364, "step": 55610 }, { "epoch": 1.977424228957426, "grad_norm": 2.0579068660736084, "learning_rate": 5.707854815684042e-06, "loss": 1.624, "step": 55620 }, { "epoch": 1.9777797529108523, "grad_norm": 1.9126039743423462, "learning_rate": 5.704315400903491e-06, "loss": 1.658, "step": 55630 }, { "epoch": 1.9781352768642786, "grad_norm": 1.9604213237762451, "learning_rate": 5.700776645913159e-06, "loss": 1.6575, "step": 55640 }, { "epoch": 1.9784908008177051, "grad_norm": 1.833206295967102, "learning_rate": 5.697238551256579e-06, "loss": 1.5933, "step": 55650 }, { "epoch": 1.9788463247711314, "grad_norm": 2.0333101749420166, "learning_rate": 5.693701117477182e-06, "loss": 1.7015, "step": 55660 }, { "epoch": 1.979201848724558, "grad_norm": 1.8384674787521362, "learning_rate": 5.690164345118289e-06, "loss": 1.5944, "step": 55670 }, { "epoch": 1.9795573726779843, "grad_norm": 1.8767904043197632, "learning_rate": 5.68662823472313e-06, "loss": 1.6359, "step": 55680 }, { "epoch": 1.9799128966314106, "grad_norm": 2.3191983699798584, "learning_rate": 5.683092786834833e-06, "loss": 1.6399, "step": 55690 }, { "epoch": 1.9802684205848369, "grad_norm": 1.9148094654083252, "learning_rate": 5.6795580019964125e-06, "loss": 1.6165, "step": 55700 }, { "epoch": 1.9806239445382632, "grad_norm": 1.9649780988693237, "learning_rate": 5.676023880750794e-06, "loss": 1.6495, "step": 55710 }, { "epoch": 1.9809794684916895, "grad_norm": 1.7389864921569824, "learning_rate": 5.672490423640795e-06, "loss": 1.6209, "step": 55720 }, { "epoch": 1.981334992445116, "grad_norm": 1.940738558769226, "learning_rate": 5.668957631209133e-06, "loss": 1.5906, "step": 55730 }, { "epoch": 1.9816905163985423, "grad_norm": 1.726500153541565, "learning_rate": 5.6654255039984256e-06, "loss": 1.5935, "step": 55740 }, { "epoch": 1.9820460403519689, "grad_norm": 1.841575026512146, "learning_rate": 5.661894042551181e-06, "loss": 1.6638, "step": 55750 }, { "epoch": 1.9824015643053952, "grad_norm": 1.9646481275558472, "learning_rate": 5.658363247409811e-06, "loss": 1.6413, "step": 55760 }, { "epoch": 1.9827570882588215, "grad_norm": 1.8491153717041016, "learning_rate": 5.654833119116624e-06, "loss": 1.6231, "step": 55770 }, { "epoch": 1.9831126122122478, "grad_norm": 1.9587262868881226, "learning_rate": 5.651303658213825e-06, "loss": 1.6477, "step": 55780 }, { "epoch": 1.983468136165674, "grad_norm": 1.9501127004623413, "learning_rate": 5.647774865243523e-06, "loss": 1.6247, "step": 55790 }, { "epoch": 1.9838236601191004, "grad_norm": 1.8898067474365234, "learning_rate": 5.644246740747707e-06, "loss": 1.6378, "step": 55800 }, { "epoch": 1.984179184072527, "grad_norm": 1.9823150634765625, "learning_rate": 5.640719285268284e-06, "loss": 1.6399, "step": 55810 }, { "epoch": 1.9845347080259532, "grad_norm": 1.9178792238235474, "learning_rate": 5.637192499347044e-06, "loss": 1.6473, "step": 55820 }, { "epoch": 1.9848902319793797, "grad_norm": 2.044755458831787, "learning_rate": 5.63366638352568e-06, "loss": 1.6059, "step": 55830 }, { "epoch": 1.985245755932806, "grad_norm": 2.0040781497955322, "learning_rate": 5.630140938345784e-06, "loss": 1.6196, "step": 55840 }, { "epoch": 1.9856012798862324, "grad_norm": 1.8934400081634521, "learning_rate": 5.626616164348844e-06, "loss": 1.6292, "step": 55850 }, { "epoch": 1.9859568038396587, "grad_norm": 1.950329303741455, "learning_rate": 5.623092062076236e-06, "loss": 1.6541, "step": 55860 }, { "epoch": 1.986312327793085, "grad_norm": 1.837617039680481, "learning_rate": 5.619568632069243e-06, "loss": 1.6316, "step": 55870 }, { "epoch": 1.9866678517465113, "grad_norm": 1.951453685760498, "learning_rate": 5.616045874869045e-06, "loss": 1.6863, "step": 55880 }, { "epoch": 1.9870233756999378, "grad_norm": 1.8173454999923706, "learning_rate": 5.612523791016711e-06, "loss": 1.5941, "step": 55890 }, { "epoch": 1.9873788996533641, "grad_norm": 1.9951165914535522, "learning_rate": 5.609002381053213e-06, "loss": 1.6386, "step": 55900 }, { "epoch": 1.9877344236067906, "grad_norm": 1.8609397411346436, "learning_rate": 5.605481645519422e-06, "loss": 1.6426, "step": 55910 }, { "epoch": 1.988089947560217, "grad_norm": 1.9778833389282227, "learning_rate": 5.601961584956093e-06, "loss": 1.6824, "step": 55920 }, { "epoch": 1.9884454715136433, "grad_norm": 1.9272605180740356, "learning_rate": 5.598442199903887e-06, "loss": 1.63, "step": 55930 }, { "epoch": 1.9888009954670696, "grad_norm": 2.024440050125122, "learning_rate": 5.594923490903363e-06, "loss": 1.6416, "step": 55940 }, { "epoch": 1.9891565194204959, "grad_norm": 1.9837208986282349, "learning_rate": 5.591405458494969e-06, "loss": 1.6337, "step": 55950 }, { "epoch": 1.9895120433739222, "grad_norm": 1.9372029304504395, "learning_rate": 5.5878881032190555e-06, "loss": 1.6284, "step": 55960 }, { "epoch": 1.9898675673273487, "grad_norm": 2.217460870742798, "learning_rate": 5.584371425615865e-06, "loss": 1.6289, "step": 55970 }, { "epoch": 1.990223091280775, "grad_norm": 1.9019287824630737, "learning_rate": 5.580855426225538e-06, "loss": 1.6128, "step": 55980 }, { "epoch": 1.9905786152342015, "grad_norm": 2.099592924118042, "learning_rate": 5.577340105588109e-06, "loss": 1.6096, "step": 55990 }, { "epoch": 1.9909341391876278, "grad_norm": 1.902306079864502, "learning_rate": 5.573825464243508e-06, "loss": 1.6055, "step": 56000 }, { "epoch": 1.9912896631410542, "grad_norm": 1.955054521560669, "learning_rate": 5.570311502731569e-06, "loss": 1.627, "step": 56010 }, { "epoch": 1.9916451870944805, "grad_norm": 1.910407304763794, "learning_rate": 5.566798221592005e-06, "loss": 1.5674, "step": 56020 }, { "epoch": 1.9920007110479068, "grad_norm": 1.9375877380371094, "learning_rate": 5.563285621364436e-06, "loss": 1.6361, "step": 56030 }, { "epoch": 1.992356235001333, "grad_norm": 1.9428280591964722, "learning_rate": 5.559773702588382e-06, "loss": 1.6032, "step": 56040 }, { "epoch": 1.9927117589547596, "grad_norm": 2.0371007919311523, "learning_rate": 5.556262465803244e-06, "loss": 1.6356, "step": 56050 }, { "epoch": 1.993067282908186, "grad_norm": 1.87294340133667, "learning_rate": 5.552751911548328e-06, "loss": 1.6167, "step": 56060 }, { "epoch": 1.9934228068616124, "grad_norm": 2.201530933380127, "learning_rate": 5.5492420403628335e-06, "loss": 1.6139, "step": 56070 }, { "epoch": 1.9937783308150387, "grad_norm": 1.8459148406982422, "learning_rate": 5.5457328527858546e-06, "loss": 1.6256, "step": 56080 }, { "epoch": 1.994133854768465, "grad_norm": 1.781484842300415, "learning_rate": 5.542224349356382e-06, "loss": 1.6231, "step": 56090 }, { "epoch": 1.9944893787218914, "grad_norm": 1.799372673034668, "learning_rate": 5.5387165306133e-06, "loss": 1.6064, "step": 56100 }, { "epoch": 1.9948449026753177, "grad_norm": 1.8162728548049927, "learning_rate": 5.5352093970953844e-06, "loss": 1.6262, "step": 56110 }, { "epoch": 1.995200426628744, "grad_norm": 1.9241447448730469, "learning_rate": 5.531702949341311e-06, "loss": 1.6323, "step": 56120 }, { "epoch": 1.9955559505821705, "grad_norm": 1.852016806602478, "learning_rate": 5.5281971878896505e-06, "loss": 1.6684, "step": 56130 }, { "epoch": 1.9959114745355968, "grad_norm": 1.8842954635620117, "learning_rate": 5.524692113278861e-06, "loss": 1.6066, "step": 56140 }, { "epoch": 1.9962669984890233, "grad_norm": 1.9963688850402832, "learning_rate": 5.521187726047308e-06, "loss": 1.6465, "step": 56150 }, { "epoch": 1.9966225224424496, "grad_norm": 1.9112470149993896, "learning_rate": 5.517684026733232e-06, "loss": 1.6166, "step": 56160 }, { "epoch": 1.996978046395876, "grad_norm": 1.9450349807739258, "learning_rate": 5.514181015874791e-06, "loss": 1.6156, "step": 56170 }, { "epoch": 1.9973335703493023, "grad_norm": 1.9551585912704468, "learning_rate": 5.510678694010016e-06, "loss": 1.5933, "step": 56180 }, { "epoch": 1.9976890943027286, "grad_norm": 1.9778324365615845, "learning_rate": 5.507177061676844e-06, "loss": 1.6367, "step": 56190 }, { "epoch": 1.9980446182561549, "grad_norm": 1.8860313892364502, "learning_rate": 5.503676119413106e-06, "loss": 1.6546, "step": 56200 }, { "epoch": 1.9984001422095814, "grad_norm": 1.9805750846862793, "learning_rate": 5.500175867756523e-06, "loss": 1.6811, "step": 56210 }, { "epoch": 1.9987556661630077, "grad_norm": 1.9156203269958496, "learning_rate": 5.496676307244713e-06, "loss": 1.6241, "step": 56220 }, { "epoch": 1.9991111901164342, "grad_norm": 1.8544695377349854, "learning_rate": 5.493177438415186e-06, "loss": 1.6506, "step": 56230 }, { "epoch": 1.9994667140698605, "grad_norm": 1.9403393268585205, "learning_rate": 5.489679261805347e-06, "loss": 1.6226, "step": 56240 }, { "epoch": 1.9998222380232868, "grad_norm": 1.8933693170547485, "learning_rate": 5.486181777952493e-06, "loss": 1.6402, "step": 56250 }, { "epoch": 2.0, "eval_loss": 1.720118761062622, "eval_runtime": 9.6697, "eval_samples_per_second": 105.898, "eval_steps_per_second": 1.655, "step": 56255 } ], "logging_steps": 10, "max_steps": 84381, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.815794246966837e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }