{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9994739610731194, "eval_steps": 500, "global_step": 9500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010520778537611783, "grad_norm": 1.57374906539917, "learning_rate": 6.993006993006994e-07, "loss": 2.7842, "step": 1 }, { "epoch": 0.00021041557075223566, "grad_norm": 1.4931204319000244, "learning_rate": 1.3986013986013987e-06, "loss": 2.1824, "step": 2 }, { "epoch": 0.0003156233561283535, "grad_norm": 1.372463583946228, "learning_rate": 2.0979020979020983e-06, "loss": 2.2936, "step": 3 }, { "epoch": 0.0004208311415044713, "grad_norm": 1.475909948348999, "learning_rate": 2.7972027972027974e-06, "loss": 2.4173, "step": 4 }, { "epoch": 0.0005260389268805891, "grad_norm": 2.1777091026306152, "learning_rate": 3.496503496503497e-06, "loss": 2.1757, "step": 5 }, { "epoch": 0.000631246712256707, "grad_norm": 1.7631592750549316, "learning_rate": 4.195804195804197e-06, "loss": 2.4156, "step": 6 }, { "epoch": 0.0007364544976328248, "grad_norm": 1.8002638816833496, "learning_rate": 4.895104895104895e-06, "loss": 2.3946, "step": 7 }, { "epoch": 0.0008416622830089426, "grad_norm": 1.8707481622695923, "learning_rate": 5.594405594405595e-06, "loss": 2.1748, "step": 8 }, { "epoch": 0.0009468700683850605, "grad_norm": 2.459454298019409, "learning_rate": 6.2937062937062944e-06, "loss": 2.4425, "step": 9 }, { "epoch": 0.0010520778537611783, "grad_norm": 2.8795626163482666, "learning_rate": 6.993006993006994e-06, "loss": 2.0042, "step": 10 }, { "epoch": 0.0011572856391372961, "grad_norm": 1.7612422704696655, "learning_rate": 7.692307692307694e-06, "loss": 2.2756, "step": 11 }, { "epoch": 0.001262493424513414, "grad_norm": 1.7078272104263306, "learning_rate": 8.391608391608393e-06, "loss": 2.327, "step": 12 }, { "epoch": 0.0013677012098895318, "grad_norm": 1.3450385332107544, "learning_rate": 9.090909090909091e-06, "loss": 2.1059, "step": 13 }, { "epoch": 0.0014729089952656496, "grad_norm": 1.0579757690429688, "learning_rate": 9.79020979020979e-06, "loss": 2.3028, "step": 14 }, { "epoch": 0.0015781167806417674, "grad_norm": 1.1774177551269531, "learning_rate": 1.048951048951049e-05, "loss": 2.1585, "step": 15 }, { "epoch": 0.0016833245660178853, "grad_norm": 1.7530457973480225, "learning_rate": 1.118881118881119e-05, "loss": 2.4828, "step": 16 }, { "epoch": 0.001788532351394003, "grad_norm": 1.65647554397583, "learning_rate": 1.188811188811189e-05, "loss": 1.976, "step": 17 }, { "epoch": 0.001893740136770121, "grad_norm": 1.5993101596832275, "learning_rate": 1.2587412587412589e-05, "loss": 1.7929, "step": 18 }, { "epoch": 0.0019989479221462388, "grad_norm": 1.1430275440216064, "learning_rate": 1.3286713286713287e-05, "loss": 2.0432, "step": 19 }, { "epoch": 0.0021041557075223566, "grad_norm": 1.8768856525421143, "learning_rate": 1.3986013986013988e-05, "loss": 1.8982, "step": 20 }, { "epoch": 0.0022093634928984744, "grad_norm": 1.7143903970718384, "learning_rate": 1.4685314685314686e-05, "loss": 2.0638, "step": 21 }, { "epoch": 0.0023145712782745922, "grad_norm": 1.5235050916671753, "learning_rate": 1.5384615384615387e-05, "loss": 2.2032, "step": 22 }, { "epoch": 0.00241977906365071, "grad_norm": 2.3741185665130615, "learning_rate": 1.6083916083916083e-05, "loss": 2.146, "step": 23 }, { "epoch": 0.002524986849026828, "grad_norm": 1.3590655326843262, "learning_rate": 1.6783216783216786e-05, "loss": 2.2378, "step": 24 }, { "epoch": 0.0026301946344029457, "grad_norm": 1.331407904624939, "learning_rate": 1.7482517482517483e-05, "loss": 1.82, "step": 25 }, { "epoch": 0.0027354024197790636, "grad_norm": 1.4533194303512573, "learning_rate": 1.8181818181818182e-05, "loss": 1.9261, "step": 26 }, { "epoch": 0.0028406102051551814, "grad_norm": 1.5130401849746704, "learning_rate": 1.888111888111888e-05, "loss": 2.1417, "step": 27 }, { "epoch": 0.0029458179905312992, "grad_norm": 1.1877672672271729, "learning_rate": 1.958041958041958e-05, "loss": 2.1062, "step": 28 }, { "epoch": 0.003051025775907417, "grad_norm": 0.9648356437683105, "learning_rate": 2.027972027972028e-05, "loss": 2.0972, "step": 29 }, { "epoch": 0.003156233561283535, "grad_norm": 1.1514739990234375, "learning_rate": 2.097902097902098e-05, "loss": 2.4502, "step": 30 }, { "epoch": 0.0032614413466596527, "grad_norm": 0.9614094495773315, "learning_rate": 2.1678321678321677e-05, "loss": 1.953, "step": 31 }, { "epoch": 0.0033666491320357705, "grad_norm": 0.970487117767334, "learning_rate": 2.237762237762238e-05, "loss": 1.9179, "step": 32 }, { "epoch": 0.0034718569174118884, "grad_norm": 1.6860216856002808, "learning_rate": 2.307692307692308e-05, "loss": 2.0499, "step": 33 }, { "epoch": 0.003577064702788006, "grad_norm": 1.1684486865997314, "learning_rate": 2.377622377622378e-05, "loss": 2.1496, "step": 34 }, { "epoch": 0.003682272488164124, "grad_norm": 1.1669265031814575, "learning_rate": 2.4475524475524478e-05, "loss": 2.3943, "step": 35 }, { "epoch": 0.003787480273540242, "grad_norm": 2.067823886871338, "learning_rate": 2.5174825174825178e-05, "loss": 2.2909, "step": 36 }, { "epoch": 0.0038926880589163597, "grad_norm": 0.8208512663841248, "learning_rate": 2.5874125874125877e-05, "loss": 1.9907, "step": 37 }, { "epoch": 0.0039978958442924775, "grad_norm": 1.7650972604751587, "learning_rate": 2.6573426573426574e-05, "loss": 2.1261, "step": 38 }, { "epoch": 0.004103103629668595, "grad_norm": 1.0491235256195068, "learning_rate": 2.7272727272727273e-05, "loss": 2.1774, "step": 39 }, { "epoch": 0.004208311415044713, "grad_norm": 1.6899977922439575, "learning_rate": 2.7972027972027976e-05, "loss": 2.5479, "step": 40 }, { "epoch": 0.004313519200420831, "grad_norm": 0.9535022974014282, "learning_rate": 2.8671328671328672e-05, "loss": 1.9773, "step": 41 }, { "epoch": 0.004418726985796949, "grad_norm": 1.9684417247772217, "learning_rate": 2.9370629370629372e-05, "loss": 1.7069, "step": 42 }, { "epoch": 0.004523934771173067, "grad_norm": 1.4697853326797485, "learning_rate": 3.0069930069930068e-05, "loss": 2.225, "step": 43 }, { "epoch": 0.0046291425565491845, "grad_norm": 1.3747695684432983, "learning_rate": 3.0769230769230774e-05, "loss": 1.8592, "step": 44 }, { "epoch": 0.004734350341925302, "grad_norm": 1.0535547733306885, "learning_rate": 3.146853146853147e-05, "loss": 1.8008, "step": 45 }, { "epoch": 0.00483955812730142, "grad_norm": 0.7596752643585205, "learning_rate": 3.216783216783217e-05, "loss": 1.9742, "step": 46 }, { "epoch": 0.004944765912677538, "grad_norm": 1.3590867519378662, "learning_rate": 3.2867132867132866e-05, "loss": 1.8842, "step": 47 }, { "epoch": 0.005049973698053656, "grad_norm": 1.1350642442703247, "learning_rate": 3.356643356643357e-05, "loss": 1.6666, "step": 48 }, { "epoch": 0.005155181483429774, "grad_norm": 1.2734442949295044, "learning_rate": 3.4265734265734265e-05, "loss": 2.0847, "step": 49 }, { "epoch": 0.0052603892688058915, "grad_norm": 2.199073314666748, "learning_rate": 3.4965034965034965e-05, "loss": 2.0992, "step": 50 }, { "epoch": 0.005365597054182009, "grad_norm": 1.4339218139648438, "learning_rate": 3.566433566433567e-05, "loss": 2.2127, "step": 51 }, { "epoch": 0.005470804839558127, "grad_norm": 1.0405915975570679, "learning_rate": 3.6363636363636364e-05, "loss": 2.1656, "step": 52 }, { "epoch": 0.005576012624934245, "grad_norm": 1.6510354280471802, "learning_rate": 3.7062937062937064e-05, "loss": 1.8267, "step": 53 }, { "epoch": 0.005681220410310363, "grad_norm": 1.8749672174453735, "learning_rate": 3.776223776223776e-05, "loss": 2.3715, "step": 54 }, { "epoch": 0.005786428195686481, "grad_norm": 1.5232490301132202, "learning_rate": 3.846153846153846e-05, "loss": 1.9894, "step": 55 }, { "epoch": 0.0058916359810625984, "grad_norm": 1.09939706325531, "learning_rate": 3.916083916083916e-05, "loss": 2.1504, "step": 56 }, { "epoch": 0.005996843766438716, "grad_norm": 0.9933468699455261, "learning_rate": 3.986013986013986e-05, "loss": 2.1873, "step": 57 }, { "epoch": 0.006102051551814834, "grad_norm": 1.2622774839401245, "learning_rate": 4.055944055944056e-05, "loss": 1.9191, "step": 58 }, { "epoch": 0.006207259337190952, "grad_norm": 1.2729672193527222, "learning_rate": 4.125874125874126e-05, "loss": 2.1369, "step": 59 }, { "epoch": 0.00631246712256707, "grad_norm": 1.32735276222229, "learning_rate": 4.195804195804196e-05, "loss": 1.8028, "step": 60 }, { "epoch": 0.006417674907943188, "grad_norm": 1.1334835290908813, "learning_rate": 4.265734265734266e-05, "loss": 2.1593, "step": 61 }, { "epoch": 0.006522882693319305, "grad_norm": 1.2618657350540161, "learning_rate": 4.335664335664335e-05, "loss": 1.9589, "step": 62 }, { "epoch": 0.006628090478695423, "grad_norm": 1.6038740873336792, "learning_rate": 4.405594405594406e-05, "loss": 2.2707, "step": 63 }, { "epoch": 0.006733298264071541, "grad_norm": 1.2185710668563843, "learning_rate": 4.475524475524476e-05, "loss": 2.0879, "step": 64 }, { "epoch": 0.006838506049447659, "grad_norm": 1.9370228052139282, "learning_rate": 4.545454545454546e-05, "loss": 2.0566, "step": 65 }, { "epoch": 0.006943713834823777, "grad_norm": 1.3582186698913574, "learning_rate": 4.615384615384616e-05, "loss": 2.1007, "step": 66 }, { "epoch": 0.007048921620199895, "grad_norm": 0.7574198246002197, "learning_rate": 4.685314685314686e-05, "loss": 2.0049, "step": 67 }, { "epoch": 0.007154129405576012, "grad_norm": 1.2563366889953613, "learning_rate": 4.755244755244756e-05, "loss": 2.0965, "step": 68 }, { "epoch": 0.00725933719095213, "grad_norm": 1.1161051988601685, "learning_rate": 4.825174825174825e-05, "loss": 1.5938, "step": 69 }, { "epoch": 0.007364544976328248, "grad_norm": 2.441533088684082, "learning_rate": 4.8951048951048956e-05, "loss": 2.4897, "step": 70 }, { "epoch": 0.007469752761704366, "grad_norm": 1.1693131923675537, "learning_rate": 4.9650349650349656e-05, "loss": 1.9202, "step": 71 }, { "epoch": 0.007574960547080484, "grad_norm": 1.2114942073822021, "learning_rate": 5.0349650349650356e-05, "loss": 1.996, "step": 72 }, { "epoch": 0.0076801683324566015, "grad_norm": 0.6417075991630554, "learning_rate": 5.1048951048951055e-05, "loss": 2.1455, "step": 73 }, { "epoch": 0.007785376117832719, "grad_norm": 1.2000133991241455, "learning_rate": 5.1748251748251755e-05, "loss": 2.2027, "step": 74 }, { "epoch": 0.007890583903208837, "grad_norm": 1.086300015449524, "learning_rate": 5.244755244755245e-05, "loss": 1.9952, "step": 75 }, { "epoch": 0.007995791688584955, "grad_norm": 1.217038869857788, "learning_rate": 5.314685314685315e-05, "loss": 1.8331, "step": 76 }, { "epoch": 0.008100999473961073, "grad_norm": 2.1391165256500244, "learning_rate": 5.384615384615385e-05, "loss": 1.7158, "step": 77 }, { "epoch": 0.00820620725933719, "grad_norm": 1.2688722610473633, "learning_rate": 5.4545454545454546e-05, "loss": 1.8603, "step": 78 }, { "epoch": 0.008311415044713309, "grad_norm": 1.1678074598312378, "learning_rate": 5.524475524475524e-05, "loss": 1.8435, "step": 79 }, { "epoch": 0.008416622830089426, "grad_norm": 0.6559339165687561, "learning_rate": 5.594405594405595e-05, "loss": 2.1147, "step": 80 }, { "epoch": 0.008521830615465544, "grad_norm": 1.0474909543991089, "learning_rate": 5.664335664335665e-05, "loss": 2.3672, "step": 81 }, { "epoch": 0.008627038400841662, "grad_norm": 0.8283208012580872, "learning_rate": 5.7342657342657345e-05, "loss": 2.0395, "step": 82 }, { "epoch": 0.00873224618621778, "grad_norm": 0.7440145015716553, "learning_rate": 5.8041958041958044e-05, "loss": 2.5889, "step": 83 }, { "epoch": 0.008837453971593898, "grad_norm": 1.0744500160217285, "learning_rate": 5.8741258741258744e-05, "loss": 1.9353, "step": 84 }, { "epoch": 0.008942661756970016, "grad_norm": 1.0884795188903809, "learning_rate": 5.944055944055944e-05, "loss": 1.8183, "step": 85 }, { "epoch": 0.009047869542346133, "grad_norm": 1.3748575448989868, "learning_rate": 6.0139860139860136e-05, "loss": 2.0691, "step": 86 }, { "epoch": 0.009153077327722251, "grad_norm": 0.7459085583686829, "learning_rate": 6.083916083916085e-05, "loss": 2.2508, "step": 87 }, { "epoch": 0.009258285113098369, "grad_norm": 1.4073225259780884, "learning_rate": 6.153846153846155e-05, "loss": 1.9281, "step": 88 }, { "epoch": 0.009363492898474487, "grad_norm": 0.988071620464325, "learning_rate": 6.223776223776224e-05, "loss": 1.7869, "step": 89 }, { "epoch": 0.009468700683850605, "grad_norm": 1.1808884143829346, "learning_rate": 6.293706293706293e-05, "loss": 1.5635, "step": 90 }, { "epoch": 0.009573908469226722, "grad_norm": 1.0313398838043213, "learning_rate": 6.363636363636364e-05, "loss": 2.0061, "step": 91 }, { "epoch": 0.00967911625460284, "grad_norm": 0.7924686074256897, "learning_rate": 6.433566433566433e-05, "loss": 2.0563, "step": 92 }, { "epoch": 0.009784324039978958, "grad_norm": 1.0491949319839478, "learning_rate": 6.503496503496504e-05, "loss": 1.8572, "step": 93 }, { "epoch": 0.009889531825355076, "grad_norm": 0.8003563284873962, "learning_rate": 6.573426573426573e-05, "loss": 2.1663, "step": 94 }, { "epoch": 0.009994739610731194, "grad_norm": 0.8011645078659058, "learning_rate": 6.643356643356644e-05, "loss": 2.1442, "step": 95 }, { "epoch": 0.010099947396107312, "grad_norm": 1.4755507707595825, "learning_rate": 6.713286713286715e-05, "loss": 1.5279, "step": 96 }, { "epoch": 0.01020515518148343, "grad_norm": 1.2490993738174438, "learning_rate": 6.783216783216784e-05, "loss": 1.7732, "step": 97 }, { "epoch": 0.010310362966859547, "grad_norm": 0.8913796544075012, "learning_rate": 6.853146853146853e-05, "loss": 1.6675, "step": 98 }, { "epoch": 0.010415570752235665, "grad_norm": 2.243081569671631, "learning_rate": 6.923076923076924e-05, "loss": 1.9422, "step": 99 }, { "epoch": 0.010520778537611783, "grad_norm": 1.7832255363464355, "learning_rate": 6.993006993006993e-05, "loss": 1.9094, "step": 100 }, { "epoch": 0.0106259863229879, "grad_norm": 1.1693270206451416, "learning_rate": 7.062937062937062e-05, "loss": 2.2014, "step": 101 }, { "epoch": 0.010731194108364019, "grad_norm": 0.9510181546211243, "learning_rate": 7.132867132867134e-05, "loss": 2.0675, "step": 102 }, { "epoch": 0.010836401893740136, "grad_norm": 1.1362980604171753, "learning_rate": 7.202797202797204e-05, "loss": 1.8922, "step": 103 }, { "epoch": 0.010941609679116254, "grad_norm": 1.42923903465271, "learning_rate": 7.272727272727273e-05, "loss": 1.5876, "step": 104 }, { "epoch": 0.011046817464492372, "grad_norm": 0.838302493095398, "learning_rate": 7.342657342657343e-05, "loss": 2.0269, "step": 105 }, { "epoch": 0.01115202524986849, "grad_norm": 0.8059056401252747, "learning_rate": 7.412587412587413e-05, "loss": 2.2676, "step": 106 }, { "epoch": 0.011257233035244608, "grad_norm": 1.8598095178604126, "learning_rate": 7.482517482517482e-05, "loss": 2.2518, "step": 107 }, { "epoch": 0.011362440820620726, "grad_norm": 1.1147247552871704, "learning_rate": 7.552447552447553e-05, "loss": 1.8006, "step": 108 }, { "epoch": 0.011467648605996843, "grad_norm": 2.8933708667755127, "learning_rate": 7.622377622377622e-05, "loss": 1.4782, "step": 109 }, { "epoch": 0.011572856391372961, "grad_norm": 1.3423585891723633, "learning_rate": 7.692307692307693e-05, "loss": 1.6204, "step": 110 }, { "epoch": 0.011678064176749079, "grad_norm": 1.0002747774124146, "learning_rate": 7.762237762237763e-05, "loss": 1.5048, "step": 111 }, { "epoch": 0.011783271962125197, "grad_norm": 1.511420488357544, "learning_rate": 7.832167832167832e-05, "loss": 2.4107, "step": 112 }, { "epoch": 0.011888479747501315, "grad_norm": 0.7754486799240112, "learning_rate": 7.902097902097903e-05, "loss": 2.1876, "step": 113 }, { "epoch": 0.011993687532877433, "grad_norm": 0.6452352404594421, "learning_rate": 7.972027972027972e-05, "loss": 2.3313, "step": 114 }, { "epoch": 0.01209889531825355, "grad_norm": 0.9890616536140442, "learning_rate": 8.041958041958042e-05, "loss": 2.172, "step": 115 }, { "epoch": 0.012204103103629668, "grad_norm": 1.8413726091384888, "learning_rate": 8.111888111888112e-05, "loss": 1.969, "step": 116 }, { "epoch": 0.012309310889005786, "grad_norm": 0.800186812877655, "learning_rate": 8.181818181818183e-05, "loss": 2.268, "step": 117 }, { "epoch": 0.012414518674381904, "grad_norm": 1.1103674173355103, "learning_rate": 8.251748251748252e-05, "loss": 1.9077, "step": 118 }, { "epoch": 0.012519726459758022, "grad_norm": 4.220036029815674, "learning_rate": 8.321678321678323e-05, "loss": 2.0661, "step": 119 }, { "epoch": 0.01262493424513414, "grad_norm": 1.084676742553711, "learning_rate": 8.391608391608392e-05, "loss": 2.1746, "step": 120 }, { "epoch": 0.012730142030510257, "grad_norm": 1.1023154258728027, "learning_rate": 8.461538461538461e-05, "loss": 1.8782, "step": 121 }, { "epoch": 0.012835349815886375, "grad_norm": 0.8550626635551453, "learning_rate": 8.531468531468532e-05, "loss": 2.0319, "step": 122 }, { "epoch": 0.012940557601262493, "grad_norm": 0.5834708213806152, "learning_rate": 8.601398601398601e-05, "loss": 2.1768, "step": 123 }, { "epoch": 0.01304576538663861, "grad_norm": 0.9890002012252808, "learning_rate": 8.67132867132867e-05, "loss": 2.1604, "step": 124 }, { "epoch": 0.013150973172014729, "grad_norm": 1.2125439643859863, "learning_rate": 8.741258741258743e-05, "loss": 2.0555, "step": 125 }, { "epoch": 0.013256180957390847, "grad_norm": 0.8572925329208374, "learning_rate": 8.811188811188812e-05, "loss": 2.1043, "step": 126 }, { "epoch": 0.013361388742766964, "grad_norm": 0.8613000512123108, "learning_rate": 8.881118881118881e-05, "loss": 2.1363, "step": 127 }, { "epoch": 0.013466596528143082, "grad_norm": 1.0686365365982056, "learning_rate": 8.951048951048952e-05, "loss": 2.1241, "step": 128 }, { "epoch": 0.0135718043135192, "grad_norm": 0.6561310291290283, "learning_rate": 9.020979020979021e-05, "loss": 2.037, "step": 129 }, { "epoch": 0.013677012098895318, "grad_norm": 1.03804612159729, "learning_rate": 9.090909090909092e-05, "loss": 2.0004, "step": 130 }, { "epoch": 0.013782219884271436, "grad_norm": 0.842249870300293, "learning_rate": 9.160839160839161e-05, "loss": 2.1774, "step": 131 }, { "epoch": 0.013887427669647553, "grad_norm": 1.4844613075256348, "learning_rate": 9.230769230769232e-05, "loss": 2.0346, "step": 132 }, { "epoch": 0.013992635455023671, "grad_norm": 0.7268441319465637, "learning_rate": 9.300699300699301e-05, "loss": 2.3364, "step": 133 }, { "epoch": 0.01409784324039979, "grad_norm": 0.8013659715652466, "learning_rate": 9.370629370629372e-05, "loss": 2.2674, "step": 134 }, { "epoch": 0.014203051025775907, "grad_norm": 0.6665470004081726, "learning_rate": 9.440559440559441e-05, "loss": 2.0605, "step": 135 }, { "epoch": 0.014308258811152025, "grad_norm": 1.0592727661132812, "learning_rate": 9.510489510489511e-05, "loss": 1.8648, "step": 136 }, { "epoch": 0.014413466596528143, "grad_norm": 0.9042508602142334, "learning_rate": 9.580419580419581e-05, "loss": 1.9756, "step": 137 }, { "epoch": 0.01451867438190426, "grad_norm": 0.7357807755470276, "learning_rate": 9.65034965034965e-05, "loss": 1.9707, "step": 138 }, { "epoch": 0.014623882167280378, "grad_norm": 0.8247336149215698, "learning_rate": 9.72027972027972e-05, "loss": 2.1052, "step": 139 }, { "epoch": 0.014729089952656496, "grad_norm": 1.1244010925292969, "learning_rate": 9.790209790209791e-05, "loss": 1.8881, "step": 140 }, { "epoch": 0.014834297738032614, "grad_norm": 0.7070969343185425, "learning_rate": 9.86013986013986e-05, "loss": 1.9978, "step": 141 }, { "epoch": 0.014939505523408732, "grad_norm": 1.1827497482299805, "learning_rate": 9.930069930069931e-05, "loss": 1.708, "step": 142 }, { "epoch": 0.01504471330878485, "grad_norm": 1.9365792274475098, "learning_rate": 0.0001, "loss": 1.8381, "step": 143 }, { "epoch": 0.015149921094160967, "grad_norm": 0.8472030758857727, "learning_rate": 0.00010069930069930071, "loss": 2.1147, "step": 144 }, { "epoch": 0.015255128879537085, "grad_norm": 0.7535983920097351, "learning_rate": 0.0001013986013986014, "loss": 2.3344, "step": 145 }, { "epoch": 0.015360336664913203, "grad_norm": 1.1090881824493408, "learning_rate": 0.00010209790209790211, "loss": 1.7615, "step": 146 }, { "epoch": 0.015465544450289321, "grad_norm": 0.7076790928840637, "learning_rate": 0.00010279720279720279, "loss": 2.0014, "step": 147 }, { "epoch": 0.015570752235665439, "grad_norm": 0.7309038043022156, "learning_rate": 0.00010349650349650351, "loss": 2.5436, "step": 148 }, { "epoch": 0.015675960021041557, "grad_norm": 0.9821389317512512, "learning_rate": 0.00010419580419580419, "loss": 2.0839, "step": 149 }, { "epoch": 0.015781167806417674, "grad_norm": 0.5570242404937744, "learning_rate": 0.0001048951048951049, "loss": 2.1916, "step": 150 }, { "epoch": 0.015886375591793792, "grad_norm": 1.2793196439743042, "learning_rate": 0.00010559440559440561, "loss": 2.1197, "step": 151 }, { "epoch": 0.01599158337716991, "grad_norm": 1.9450187683105469, "learning_rate": 0.0001062937062937063, "loss": 2.5529, "step": 152 }, { "epoch": 0.016096791162546028, "grad_norm": 2.063152313232422, "learning_rate": 0.000106993006993007, "loss": 1.9606, "step": 153 }, { "epoch": 0.016201998947922146, "grad_norm": 1.4163166284561157, "learning_rate": 0.0001076923076923077, "loss": 2.002, "step": 154 }, { "epoch": 0.016307206733298264, "grad_norm": 0.6336003541946411, "learning_rate": 0.0001083916083916084, "loss": 2.3557, "step": 155 }, { "epoch": 0.01641241451867438, "grad_norm": 1.3749457597732544, "learning_rate": 0.00010909090909090909, "loss": 2.3967, "step": 156 }, { "epoch": 0.0165176223040505, "grad_norm": 0.8000378608703613, "learning_rate": 0.0001097902097902098, "loss": 2.0812, "step": 157 }, { "epoch": 0.016622830089426617, "grad_norm": 0.8916708827018738, "learning_rate": 0.00011048951048951048, "loss": 1.8927, "step": 158 }, { "epoch": 0.016728037874802735, "grad_norm": 0.8637097477912903, "learning_rate": 0.0001111888111888112, "loss": 1.8131, "step": 159 }, { "epoch": 0.016833245660178853, "grad_norm": 0.6609115600585938, "learning_rate": 0.0001118881118881119, "loss": 1.9201, "step": 160 }, { "epoch": 0.01693845344555497, "grad_norm": 1.2824020385742188, "learning_rate": 0.00011258741258741258, "loss": 2.3487, "step": 161 }, { "epoch": 0.01704366123093109, "grad_norm": 1.6525263786315918, "learning_rate": 0.0001132867132867133, "loss": 1.9996, "step": 162 }, { "epoch": 0.017148869016307206, "grad_norm": 1.3814963102340698, "learning_rate": 0.00011398601398601398, "loss": 1.7609, "step": 163 }, { "epoch": 0.017254076801683324, "grad_norm": 0.6455362439155579, "learning_rate": 0.00011468531468531469, "loss": 2.1223, "step": 164 }, { "epoch": 0.017359284587059442, "grad_norm": 0.7798193097114563, "learning_rate": 0.00011538461538461538, "loss": 2.0314, "step": 165 }, { "epoch": 0.01746449237243556, "grad_norm": 0.7223178744316101, "learning_rate": 0.00011608391608391609, "loss": 2.0679, "step": 166 }, { "epoch": 0.017569700157811678, "grad_norm": 0.8387543559074402, "learning_rate": 0.0001167832167832168, "loss": 2.2392, "step": 167 }, { "epoch": 0.017674907943187795, "grad_norm": 1.1753877401351929, "learning_rate": 0.00011748251748251749, "loss": 2.2198, "step": 168 }, { "epoch": 0.017780115728563913, "grad_norm": 0.6246500015258789, "learning_rate": 0.0001181818181818182, "loss": 1.9576, "step": 169 }, { "epoch": 0.01788532351394003, "grad_norm": 0.7104965448379517, "learning_rate": 0.00011888111888111889, "loss": 1.8494, "step": 170 }, { "epoch": 0.01799053129931615, "grad_norm": 0.6617504954338074, "learning_rate": 0.00011958041958041959, "loss": 2.1685, "step": 171 }, { "epoch": 0.018095739084692267, "grad_norm": 0.5817732810974121, "learning_rate": 0.00012027972027972027, "loss": 2.4582, "step": 172 }, { "epoch": 0.018200946870068384, "grad_norm": 1.5956623554229736, "learning_rate": 0.00012097902097902098, "loss": 1.9319, "step": 173 }, { "epoch": 0.018306154655444502, "grad_norm": 0.6000881195068359, "learning_rate": 0.0001216783216783217, "loss": 2.2725, "step": 174 }, { "epoch": 0.01841136244082062, "grad_norm": 1.0373468399047852, "learning_rate": 0.00012237762237762238, "loss": 2.3838, "step": 175 }, { "epoch": 0.018516570226196738, "grad_norm": 0.6030766367912292, "learning_rate": 0.0001230769230769231, "loss": 2.0372, "step": 176 }, { "epoch": 0.018621778011572856, "grad_norm": 0.7469233274459839, "learning_rate": 0.00012377622377622376, "loss": 2.1564, "step": 177 }, { "epoch": 0.018726985796948974, "grad_norm": 0.9262884259223938, "learning_rate": 0.00012447552447552448, "loss": 1.4632, "step": 178 }, { "epoch": 0.01883219358232509, "grad_norm": 0.8348685503005981, "learning_rate": 0.00012517482517482518, "loss": 2.193, "step": 179 }, { "epoch": 0.01893740136770121, "grad_norm": 0.6114562153816223, "learning_rate": 0.00012587412587412587, "loss": 2.0964, "step": 180 }, { "epoch": 0.019042609153077327, "grad_norm": 1.039941668510437, "learning_rate": 0.0001265734265734266, "loss": 1.9306, "step": 181 }, { "epoch": 0.019147816938453445, "grad_norm": 0.730789303779602, "learning_rate": 0.00012727272727272728, "loss": 1.7754, "step": 182 }, { "epoch": 0.019253024723829563, "grad_norm": 0.76559978723526, "learning_rate": 0.00012797202797202797, "loss": 2.1377, "step": 183 }, { "epoch": 0.01935823250920568, "grad_norm": 0.7957161068916321, "learning_rate": 0.00012867132867132867, "loss": 2.2704, "step": 184 }, { "epoch": 0.0194634402945818, "grad_norm": 0.9248109459877014, "learning_rate": 0.0001293706293706294, "loss": 2.0038, "step": 185 }, { "epoch": 0.019568648079957916, "grad_norm": 0.7987109422683716, "learning_rate": 0.00013006993006993008, "loss": 2.2301, "step": 186 }, { "epoch": 0.019673855865334034, "grad_norm": 0.5288488864898682, "learning_rate": 0.00013076923076923077, "loss": 1.9312, "step": 187 }, { "epoch": 0.019779063650710152, "grad_norm": 0.6533221006393433, "learning_rate": 0.00013146853146853147, "loss": 2.0679, "step": 188 }, { "epoch": 0.01988427143608627, "grad_norm": 1.0460737943649292, "learning_rate": 0.00013216783216783219, "loss": 1.9375, "step": 189 }, { "epoch": 0.019989479221462388, "grad_norm": 1.1394834518432617, "learning_rate": 0.00013286713286713288, "loss": 2.248, "step": 190 }, { "epoch": 0.020094687006838505, "grad_norm": 0.6399918794631958, "learning_rate": 0.00013356643356643357, "loss": 2.1755, "step": 191 }, { "epoch": 0.020199894792214623, "grad_norm": 1.1859235763549805, "learning_rate": 0.0001342657342657343, "loss": 1.8647, "step": 192 }, { "epoch": 0.02030510257759074, "grad_norm": 0.7891494631767273, "learning_rate": 0.00013496503496503496, "loss": 2.1801, "step": 193 }, { "epoch": 0.02041031036296686, "grad_norm": 0.8865561485290527, "learning_rate": 0.00013566433566433568, "loss": 1.9941, "step": 194 }, { "epoch": 0.020515518148342977, "grad_norm": 0.6699333786964417, "learning_rate": 0.00013636363636363637, "loss": 1.9786, "step": 195 }, { "epoch": 0.020620725933719095, "grad_norm": 0.6600997447967529, "learning_rate": 0.00013706293706293706, "loss": 2.2511, "step": 196 }, { "epoch": 0.020725933719095212, "grad_norm": 0.6689944863319397, "learning_rate": 0.00013776223776223778, "loss": 2.0213, "step": 197 }, { "epoch": 0.02083114150447133, "grad_norm": 0.8018683791160583, "learning_rate": 0.00013846153846153847, "loss": 2.0495, "step": 198 }, { "epoch": 0.020936349289847448, "grad_norm": 1.0803660154342651, "learning_rate": 0.00013916083916083917, "loss": 1.1935, "step": 199 }, { "epoch": 0.021041557075223566, "grad_norm": 2.967259168624878, "learning_rate": 0.00013986013986013986, "loss": 1.5776, "step": 200 }, { "epoch": 0.021146764860599684, "grad_norm": 0.9621496200561523, "learning_rate": 0.00014055944055944058, "loss": 1.9666, "step": 201 }, { "epoch": 0.0212519726459758, "grad_norm": 0.8522931933403015, "learning_rate": 0.00014125874125874125, "loss": 1.8651, "step": 202 }, { "epoch": 0.02135718043135192, "grad_norm": 0.942253589630127, "learning_rate": 0.00014195804195804197, "loss": 2.1782, "step": 203 }, { "epoch": 0.021462388216728037, "grad_norm": 0.6087969541549683, "learning_rate": 0.00014265734265734269, "loss": 2.165, "step": 204 }, { "epoch": 0.021567596002104155, "grad_norm": 0.6847811937332153, "learning_rate": 0.00014335664335664335, "loss": 2.1116, "step": 205 }, { "epoch": 0.021672803787480273, "grad_norm": 0.9084582328796387, "learning_rate": 0.00014405594405594407, "loss": 1.9343, "step": 206 }, { "epoch": 0.02177801157285639, "grad_norm": 1.0526587963104248, "learning_rate": 0.00014475524475524476, "loss": 2.1309, "step": 207 }, { "epoch": 0.02188321935823251, "grad_norm": 0.8651286363601685, "learning_rate": 0.00014545454545454546, "loss": 2.1103, "step": 208 }, { "epoch": 0.021988427143608626, "grad_norm": 0.6131342053413391, "learning_rate": 0.00014615384615384615, "loss": 1.9468, "step": 209 }, { "epoch": 0.022093634928984744, "grad_norm": 0.784864068031311, "learning_rate": 0.00014685314685314687, "loss": 2.1167, "step": 210 }, { "epoch": 0.022198842714360862, "grad_norm": 1.1468371152877808, "learning_rate": 0.00014755244755244756, "loss": 1.8572, "step": 211 }, { "epoch": 0.02230405049973698, "grad_norm": 0.9270665049552917, "learning_rate": 0.00014825174825174825, "loss": 1.9398, "step": 212 }, { "epoch": 0.022409258285113098, "grad_norm": 0.7162615060806274, "learning_rate": 0.00014895104895104897, "loss": 2.0499, "step": 213 }, { "epoch": 0.022514466070489215, "grad_norm": 0.6274422407150269, "learning_rate": 0.00014965034965034964, "loss": 1.8765, "step": 214 }, { "epoch": 0.022619673855865333, "grad_norm": 1.4804967641830444, "learning_rate": 0.00015034965034965036, "loss": 1.7015, "step": 215 }, { "epoch": 0.02272488164124145, "grad_norm": 0.5013079047203064, "learning_rate": 0.00015104895104895105, "loss": 2.0067, "step": 216 }, { "epoch": 0.02283008942661757, "grad_norm": 0.7269556522369385, "learning_rate": 0.00015174825174825175, "loss": 1.934, "step": 217 }, { "epoch": 0.022935297211993687, "grad_norm": 0.9243491291999817, "learning_rate": 0.00015244755244755244, "loss": 2.2308, "step": 218 }, { "epoch": 0.023040504997369805, "grad_norm": 0.7595291137695312, "learning_rate": 0.00015314685314685316, "loss": 2.0564, "step": 219 }, { "epoch": 0.023145712782745922, "grad_norm": 0.8647655248641968, "learning_rate": 0.00015384615384615385, "loss": 2.0716, "step": 220 }, { "epoch": 0.02325092056812204, "grad_norm": 1.0030921697616577, "learning_rate": 0.00015454545454545454, "loss": 1.9771, "step": 221 }, { "epoch": 0.023356128353498158, "grad_norm": 0.588725745677948, "learning_rate": 0.00015524475524475526, "loss": 2.2316, "step": 222 }, { "epoch": 0.023461336138874276, "grad_norm": 1.029255747795105, "learning_rate": 0.00015594405594405596, "loss": 2.2892, "step": 223 }, { "epoch": 0.023566543924250394, "grad_norm": 0.9255016446113586, "learning_rate": 0.00015664335664335665, "loss": 2.2594, "step": 224 }, { "epoch": 0.02367175170962651, "grad_norm": 0.84700608253479, "learning_rate": 0.00015734265734265734, "loss": 1.8739, "step": 225 }, { "epoch": 0.02377695949500263, "grad_norm": 0.8125012516975403, "learning_rate": 0.00015804195804195806, "loss": 2.0795, "step": 226 }, { "epoch": 0.023882167280378747, "grad_norm": 0.8614683151245117, "learning_rate": 0.00015874125874125876, "loss": 2.4142, "step": 227 }, { "epoch": 0.023987375065754865, "grad_norm": 1.1380670070648193, "learning_rate": 0.00015944055944055945, "loss": 2.1002, "step": 228 }, { "epoch": 0.024092582851130983, "grad_norm": 0.4754476547241211, "learning_rate": 0.00016013986013986014, "loss": 2.0722, "step": 229 }, { "epoch": 0.0241977906365071, "grad_norm": 0.861049473285675, "learning_rate": 0.00016083916083916083, "loss": 1.9022, "step": 230 }, { "epoch": 0.02430299842188322, "grad_norm": 1.181152582168579, "learning_rate": 0.00016153846153846155, "loss": 2.0356, "step": 231 }, { "epoch": 0.024408206207259336, "grad_norm": 0.8931273818016052, "learning_rate": 0.00016223776223776225, "loss": 1.7346, "step": 232 }, { "epoch": 0.024513413992635454, "grad_norm": 0.7377164959907532, "learning_rate": 0.00016293706293706294, "loss": 2.0764, "step": 233 }, { "epoch": 0.024618621778011572, "grad_norm": 1.085389494895935, "learning_rate": 0.00016363636363636366, "loss": 1.9141, "step": 234 }, { "epoch": 0.02472382956338769, "grad_norm": 1.0622587203979492, "learning_rate": 0.00016433566433566435, "loss": 1.8455, "step": 235 }, { "epoch": 0.024829037348763808, "grad_norm": 1.0064948797225952, "learning_rate": 0.00016503496503496504, "loss": 1.9076, "step": 236 }, { "epoch": 0.024934245134139926, "grad_norm": 0.5665073394775391, "learning_rate": 0.00016573426573426574, "loss": 2.0581, "step": 237 }, { "epoch": 0.025039452919516043, "grad_norm": 1.042952299118042, "learning_rate": 0.00016643356643356646, "loss": 1.9164, "step": 238 }, { "epoch": 0.02514466070489216, "grad_norm": 1.4019465446472168, "learning_rate": 0.00016713286713286712, "loss": 1.8818, "step": 239 }, { "epoch": 0.02524986849026828, "grad_norm": 1.0074125528335571, "learning_rate": 0.00016783216783216784, "loss": 1.971, "step": 240 }, { "epoch": 0.025355076275644397, "grad_norm": 0.9089799523353577, "learning_rate": 0.00016853146853146856, "loss": 2.087, "step": 241 }, { "epoch": 0.025460284061020515, "grad_norm": 0.7242212891578674, "learning_rate": 0.00016923076923076923, "loss": 2.1149, "step": 242 }, { "epoch": 0.025565491846396633, "grad_norm": 1.1103755235671997, "learning_rate": 0.00016993006993006995, "loss": 2.1568, "step": 243 }, { "epoch": 0.02567069963177275, "grad_norm": 1.03432297706604, "learning_rate": 0.00017062937062937064, "loss": 2.1965, "step": 244 }, { "epoch": 0.025775907417148868, "grad_norm": 0.8843188881874084, "learning_rate": 0.00017132867132867133, "loss": 1.7656, "step": 245 }, { "epoch": 0.025881115202524986, "grad_norm": 1.1456265449523926, "learning_rate": 0.00017202797202797203, "loss": 2.2859, "step": 246 }, { "epoch": 0.025986322987901104, "grad_norm": 1.0545116662979126, "learning_rate": 0.00017272727272727275, "loss": 2.0337, "step": 247 }, { "epoch": 0.02609153077327722, "grad_norm": 1.3290053606033325, "learning_rate": 0.0001734265734265734, "loss": 2.1952, "step": 248 }, { "epoch": 0.02619673855865334, "grad_norm": 1.2462905645370483, "learning_rate": 0.00017412587412587413, "loss": 2.1667, "step": 249 }, { "epoch": 0.026301946344029457, "grad_norm": 1.0230878591537476, "learning_rate": 0.00017482517482517485, "loss": 2.0687, "step": 250 }, { "epoch": 0.026407154129405575, "grad_norm": 0.6971817016601562, "learning_rate": 0.00017552447552447552, "loss": 1.947, "step": 251 }, { "epoch": 0.026512361914781693, "grad_norm": 1.1189419031143188, "learning_rate": 0.00017622377622377624, "loss": 1.9158, "step": 252 }, { "epoch": 0.02661756970015781, "grad_norm": 0.9112401604652405, "learning_rate": 0.00017692307692307693, "loss": 2.2175, "step": 253 }, { "epoch": 0.02672277748553393, "grad_norm": 0.6773673892021179, "learning_rate": 0.00017762237762237762, "loss": 2.0269, "step": 254 }, { "epoch": 0.026827985270910047, "grad_norm": 1.021903157234192, "learning_rate": 0.00017832167832167832, "loss": 1.4599, "step": 255 }, { "epoch": 0.026933193056286164, "grad_norm": 1.0119444131851196, "learning_rate": 0.00017902097902097904, "loss": 2.5356, "step": 256 }, { "epoch": 0.027038400841662282, "grad_norm": 0.8079578280448914, "learning_rate": 0.00017972027972027973, "loss": 1.7094, "step": 257 }, { "epoch": 0.0271436086270384, "grad_norm": 0.8587137460708618, "learning_rate": 0.00018041958041958042, "loss": 1.9044, "step": 258 }, { "epoch": 0.027248816412414518, "grad_norm": 0.4971306324005127, "learning_rate": 0.00018111888111888114, "loss": 2.7055, "step": 259 }, { "epoch": 0.027354024197790636, "grad_norm": 0.6644048094749451, "learning_rate": 0.00018181818181818183, "loss": 2.22, "step": 260 }, { "epoch": 0.027459231983166753, "grad_norm": 0.8848311901092529, "learning_rate": 0.00018251748251748253, "loss": 2.1772, "step": 261 }, { "epoch": 0.02756443976854287, "grad_norm": 0.8665701746940613, "learning_rate": 0.00018321678321678322, "loss": 2.0003, "step": 262 }, { "epoch": 0.02766964755391899, "grad_norm": 0.9104865193367004, "learning_rate": 0.0001839160839160839, "loss": 2.2117, "step": 263 }, { "epoch": 0.027774855339295107, "grad_norm": 0.7475347518920898, "learning_rate": 0.00018461538461538463, "loss": 2.0686, "step": 264 }, { "epoch": 0.027880063124671225, "grad_norm": 0.9693028926849365, "learning_rate": 0.00018531468531468533, "loss": 2.1383, "step": 265 }, { "epoch": 0.027985270910047343, "grad_norm": 0.6588386297225952, "learning_rate": 0.00018601398601398602, "loss": 1.8683, "step": 266 }, { "epoch": 0.02809047869542346, "grad_norm": 1.076177716255188, "learning_rate": 0.0001867132867132867, "loss": 1.5809, "step": 267 }, { "epoch": 0.02819568648079958, "grad_norm": 0.6180933713912964, "learning_rate": 0.00018741258741258743, "loss": 1.9758, "step": 268 }, { "epoch": 0.028300894266175696, "grad_norm": 1.6872791051864624, "learning_rate": 0.00018811188811188812, "loss": 2.3591, "step": 269 }, { "epoch": 0.028406102051551814, "grad_norm": 1.2484400272369385, "learning_rate": 0.00018881118881118882, "loss": 1.7551, "step": 270 }, { "epoch": 0.028511309836927932, "grad_norm": 0.5355095863342285, "learning_rate": 0.00018951048951048954, "loss": 2.1445, "step": 271 }, { "epoch": 0.02861651762230405, "grad_norm": 0.8856534361839294, "learning_rate": 0.00019020979020979023, "loss": 2.0174, "step": 272 }, { "epoch": 0.028721725407680167, "grad_norm": 1.0029089450836182, "learning_rate": 0.00019090909090909092, "loss": 2.0354, "step": 273 }, { "epoch": 0.028826933193056285, "grad_norm": 0.859724760055542, "learning_rate": 0.00019160839160839161, "loss": 2.0125, "step": 274 }, { "epoch": 0.028932140978432403, "grad_norm": 1.281600832939148, "learning_rate": 0.00019230769230769233, "loss": 1.8134, "step": 275 }, { "epoch": 0.02903734876380852, "grad_norm": 1.7904019355773926, "learning_rate": 0.000193006993006993, "loss": 1.7277, "step": 276 }, { "epoch": 0.02914255654918464, "grad_norm": 0.7003609538078308, "learning_rate": 0.00019370629370629372, "loss": 2.0308, "step": 277 }, { "epoch": 0.029247764334560757, "grad_norm": 0.8506051898002625, "learning_rate": 0.0001944055944055944, "loss": 1.8183, "step": 278 }, { "epoch": 0.029352972119936874, "grad_norm": 0.7373519539833069, "learning_rate": 0.0001951048951048951, "loss": 2.0156, "step": 279 }, { "epoch": 0.029458179905312992, "grad_norm": 0.8916333913803101, "learning_rate": 0.00019580419580419583, "loss": 1.9928, "step": 280 }, { "epoch": 0.02956338769068911, "grad_norm": 0.7887770533561707, "learning_rate": 0.00019650349650349652, "loss": 2.0476, "step": 281 }, { "epoch": 0.029668595476065228, "grad_norm": 1.0839449167251587, "learning_rate": 0.0001972027972027972, "loss": 2.457, "step": 282 }, { "epoch": 0.029773803261441346, "grad_norm": 0.7722465395927429, "learning_rate": 0.0001979020979020979, "loss": 2.1979, "step": 283 }, { "epoch": 0.029879011046817464, "grad_norm": 0.6390840411186218, "learning_rate": 0.00019860139860139862, "loss": 1.9645, "step": 284 }, { "epoch": 0.02998421883219358, "grad_norm": 0.6858240365982056, "learning_rate": 0.0001993006993006993, "loss": 2.0411, "step": 285 }, { "epoch": 0.0300894266175697, "grad_norm": 0.831282377243042, "learning_rate": 0.0002, "loss": 2.4271, "step": 286 }, { "epoch": 0.030194634402945817, "grad_norm": 1.3080503940582275, "learning_rate": 0.00019999999419366464, "loss": 2.4988, "step": 287 }, { "epoch": 0.030299842188321935, "grad_norm": 0.6604061126708984, "learning_rate": 0.00019999997677465928, "loss": 2.3058, "step": 288 }, { "epoch": 0.030405049973698053, "grad_norm": 0.682809591293335, "learning_rate": 0.00019999994774298586, "loss": 2.3447, "step": 289 }, { "epoch": 0.03051025775907417, "grad_norm": 0.7935196161270142, "learning_rate": 0.00019999990709864784, "loss": 1.9286, "step": 290 }, { "epoch": 0.03061546554445029, "grad_norm": 0.7283986806869507, "learning_rate": 0.00019999985484164988, "loss": 2.1939, "step": 291 }, { "epoch": 0.030720673329826406, "grad_norm": 0.6452785134315491, "learning_rate": 0.00019999979097199807, "loss": 2.186, "step": 292 }, { "epoch": 0.030825881115202524, "grad_norm": 0.7383095622062683, "learning_rate": 0.00019999971548969982, "loss": 2.4815, "step": 293 }, { "epoch": 0.030931088900578642, "grad_norm": 0.8694736361503601, "learning_rate": 0.00019999962839476393, "loss": 1.8029, "step": 294 }, { "epoch": 0.03103629668595476, "grad_norm": 1.1409188508987427, "learning_rate": 0.00019999952968720045, "loss": 1.8402, "step": 295 }, { "epoch": 0.031141504471330878, "grad_norm": 1.1897854804992676, "learning_rate": 0.0001999994193670209, "loss": 2.2032, "step": 296 }, { "epoch": 0.031246712256706995, "grad_norm": 1.0531314611434937, "learning_rate": 0.00019999929743423804, "loss": 2.0809, "step": 297 }, { "epoch": 0.03135192004208311, "grad_norm": 0.9149481654167175, "learning_rate": 0.0001999991638888661, "loss": 1.9994, "step": 298 }, { "epoch": 0.031457127827459234, "grad_norm": 0.9323375821113586, "learning_rate": 0.00019999901873092054, "loss": 2.1517, "step": 299 }, { "epoch": 0.03156233561283535, "grad_norm": 2.169660806655884, "learning_rate": 0.0001999988619604182, "loss": 1.935, "step": 300 }, { "epoch": 0.03166754339821147, "grad_norm": 1.145294189453125, "learning_rate": 0.0001999986935773773, "loss": 2.2346, "step": 301 }, { "epoch": 0.031772751183587584, "grad_norm": 0.9163552522659302, "learning_rate": 0.00019999851358181746, "loss": 1.757, "step": 302 }, { "epoch": 0.031877958968963706, "grad_norm": 0.9327316284179688, "learning_rate": 0.00019999832197375948, "loss": 2.0945, "step": 303 }, { "epoch": 0.03198316675433982, "grad_norm": 1.3414212465286255, "learning_rate": 0.00019999811875322566, "loss": 1.8407, "step": 304 }, { "epoch": 0.03208837453971594, "grad_norm": 0.7236807346343994, "learning_rate": 0.0001999979039202396, "loss": 2.1481, "step": 305 }, { "epoch": 0.032193582325092056, "grad_norm": 0.7519909143447876, "learning_rate": 0.00019999767747482623, "loss": 2.156, "step": 306 }, { "epoch": 0.03229879011046818, "grad_norm": 1.3652244806289673, "learning_rate": 0.00019999743941701188, "loss": 1.8686, "step": 307 }, { "epoch": 0.03240399789584429, "grad_norm": 0.5761735439300537, "learning_rate": 0.00019999718974682417, "loss": 2.0619, "step": 308 }, { "epoch": 0.03250920568122041, "grad_norm": 1.1688545942306519, "learning_rate": 0.0001999969284642921, "loss": 2.0442, "step": 309 }, { "epoch": 0.03261441346659653, "grad_norm": 0.6734681129455566, "learning_rate": 0.000199996655569446, "loss": 2.2581, "step": 310 }, { "epoch": 0.03271962125197265, "grad_norm": 0.9942630529403687, "learning_rate": 0.00019999637106231756, "loss": 2.0594, "step": 311 }, { "epoch": 0.03282482903734876, "grad_norm": 0.9288811087608337, "learning_rate": 0.00019999607494293985, "loss": 1.8957, "step": 312 }, { "epoch": 0.032930036822724884, "grad_norm": 2.1428439617156982, "learning_rate": 0.00019999576721134723, "loss": 2.2481, "step": 313 }, { "epoch": 0.033035244608101, "grad_norm": 1.1253790855407715, "learning_rate": 0.00019999544786757545, "loss": 2.0877, "step": 314 }, { "epoch": 0.03314045239347712, "grad_norm": 0.9545801281929016, "learning_rate": 0.00019999511691166157, "loss": 2.2495, "step": 315 }, { "epoch": 0.033245660178853234, "grad_norm": 0.9224940538406372, "learning_rate": 0.00019999477434364405, "loss": 2.0281, "step": 316 }, { "epoch": 0.033350867964229355, "grad_norm": 0.9393844604492188, "learning_rate": 0.00019999442016356266, "loss": 1.7388, "step": 317 }, { "epoch": 0.03345607574960547, "grad_norm": 0.8138694763183594, "learning_rate": 0.00019999405437145856, "loss": 1.8485, "step": 318 }, { "epoch": 0.03356128353498159, "grad_norm": 0.9134123921394348, "learning_rate": 0.00019999367696737415, "loss": 1.9404, "step": 319 }, { "epoch": 0.033666491320357705, "grad_norm": 0.7344478368759155, "learning_rate": 0.0001999932879513533, "loss": 2.4859, "step": 320 }, { "epoch": 0.03377169910573383, "grad_norm": 1.5100687742233276, "learning_rate": 0.00019999288732344122, "loss": 1.9299, "step": 321 }, { "epoch": 0.03387690689110994, "grad_norm": 0.7897876501083374, "learning_rate": 0.0001999924750836844, "loss": 2.0341, "step": 322 }, { "epoch": 0.03398211467648606, "grad_norm": 0.8573527932167053, "learning_rate": 0.00019999205123213073, "loss": 1.8272, "step": 323 }, { "epoch": 0.03408732246186218, "grad_norm": 0.6045930981636047, "learning_rate": 0.00019999161576882938, "loss": 2.3906, "step": 324 }, { "epoch": 0.0341925302472383, "grad_norm": 1.1515878438949585, "learning_rate": 0.000199991168693831, "loss": 1.8901, "step": 325 }, { "epoch": 0.03429773803261441, "grad_norm": 0.7033329606056213, "learning_rate": 0.00019999071000718742, "loss": 2.0389, "step": 326 }, { "epoch": 0.034402945817990534, "grad_norm": 0.7773725986480713, "learning_rate": 0.00019999023970895198, "loss": 1.7427, "step": 327 }, { "epoch": 0.03450815360336665, "grad_norm": 0.6639922261238098, "learning_rate": 0.0001999897577991792, "loss": 2.4179, "step": 328 }, { "epoch": 0.03461336138874277, "grad_norm": 0.8306198716163635, "learning_rate": 0.00019998926427792517, "loss": 1.8829, "step": 329 }, { "epoch": 0.034718569174118884, "grad_norm": 0.9085239768028259, "learning_rate": 0.00019998875914524714, "loss": 2.3608, "step": 330 }, { "epoch": 0.034823776959495005, "grad_norm": 1.2059913873672485, "learning_rate": 0.00019998824240120372, "loss": 2.1859, "step": 331 }, { "epoch": 0.03492898474487112, "grad_norm": 0.8465059399604797, "learning_rate": 0.000199987714045855, "loss": 1.9598, "step": 332 }, { "epoch": 0.03503419253024724, "grad_norm": 1.1591929197311401, "learning_rate": 0.00019998717407926228, "loss": 1.7713, "step": 333 }, { "epoch": 0.035139400315623355, "grad_norm": 1.2473613023757935, "learning_rate": 0.0001999866225014883, "loss": 2.0015, "step": 334 }, { "epoch": 0.035244608100999476, "grad_norm": 1.0094128847122192, "learning_rate": 0.0001999860593125971, "loss": 2.3931, "step": 335 }, { "epoch": 0.03534981588637559, "grad_norm": 0.587139368057251, "learning_rate": 0.00019998548451265405, "loss": 2.1993, "step": 336 }, { "epoch": 0.03545502367175171, "grad_norm": 1.4602278470993042, "learning_rate": 0.00019998489810172596, "loss": 2.1095, "step": 337 }, { "epoch": 0.035560231457127826, "grad_norm": 0.9542207717895508, "learning_rate": 0.00019998430007988087, "loss": 2.0584, "step": 338 }, { "epoch": 0.03566543924250395, "grad_norm": 0.9044630527496338, "learning_rate": 0.00019998369044718826, "loss": 2.2235, "step": 339 }, { "epoch": 0.03577064702788006, "grad_norm": 0.8812581300735474, "learning_rate": 0.0001999830692037189, "loss": 2.0134, "step": 340 }, { "epoch": 0.03587585481325618, "grad_norm": 0.8419604301452637, "learning_rate": 0.000199982436349545, "loss": 2.0388, "step": 341 }, { "epoch": 0.0359810625986323, "grad_norm": 0.7698401808738708, "learning_rate": 0.00019998179188473997, "loss": 1.9938, "step": 342 }, { "epoch": 0.03608627038400842, "grad_norm": 0.808614194393158, "learning_rate": 0.0001999811358093787, "loss": 2.1887, "step": 343 }, { "epoch": 0.03619147816938453, "grad_norm": 0.756734311580658, "learning_rate": 0.00019998046812353732, "loss": 2.1403, "step": 344 }, { "epoch": 0.036296685954760655, "grad_norm": 0.8613669276237488, "learning_rate": 0.00019997978882729345, "loss": 1.4033, "step": 345 }, { "epoch": 0.03640189374013677, "grad_norm": 0.8686724305152893, "learning_rate": 0.0001999790979207259, "loss": 2.2557, "step": 346 }, { "epoch": 0.03650710152551289, "grad_norm": 1.0263909101486206, "learning_rate": 0.00019997839540391495, "loss": 2.1586, "step": 347 }, { "epoch": 0.036612309310889005, "grad_norm": 1.0269930362701416, "learning_rate": 0.00019997768127694214, "loss": 2.3377, "step": 348 }, { "epoch": 0.036717517096265126, "grad_norm": 1.0349308252334595, "learning_rate": 0.00019997695553989042, "loss": 1.8215, "step": 349 }, { "epoch": 0.03682272488164124, "grad_norm": 0.8196139931678772, "learning_rate": 0.0001999762181928441, "loss": 2.0538, "step": 350 }, { "epoch": 0.03692793266701736, "grad_norm": 0.9920759797096252, "learning_rate": 0.00019997546923588875, "loss": 2.2415, "step": 351 }, { "epoch": 0.037033140452393476, "grad_norm": 1.101965069770813, "learning_rate": 0.00019997470866911136, "loss": 1.9107, "step": 352 }, { "epoch": 0.0371383482377696, "grad_norm": 1.4456695318222046, "learning_rate": 0.00019997393649260028, "loss": 2.3002, "step": 353 }, { "epoch": 0.03724355602314571, "grad_norm": 1.1259775161743164, "learning_rate": 0.00019997315270644514, "loss": 1.8325, "step": 354 }, { "epoch": 0.03734876380852183, "grad_norm": 0.7673949003219604, "learning_rate": 0.000199972357310737, "loss": 2.3046, "step": 355 }, { "epoch": 0.03745397159389795, "grad_norm": 0.8659745454788208, "learning_rate": 0.00019997155030556822, "loss": 2.1083, "step": 356 }, { "epoch": 0.03755917937927407, "grad_norm": 0.6444668769836426, "learning_rate": 0.0001999707316910325, "loss": 2.1361, "step": 357 }, { "epoch": 0.03766438716465018, "grad_norm": 0.978226900100708, "learning_rate": 0.0001999699014672249, "loss": 2.1801, "step": 358 }, { "epoch": 0.037769594950026304, "grad_norm": 1.1167306900024414, "learning_rate": 0.0001999690596342418, "loss": 2.3531, "step": 359 }, { "epoch": 0.03787480273540242, "grad_norm": 0.9561347365379333, "learning_rate": 0.00019996820619218105, "loss": 2.2767, "step": 360 }, { "epoch": 0.03798001052077854, "grad_norm": 0.6484118700027466, "learning_rate": 0.00019996734114114165, "loss": 2.2618, "step": 361 }, { "epoch": 0.038085218306154654, "grad_norm": 1.1894108057022095, "learning_rate": 0.00019996646448122414, "loss": 2.169, "step": 362 }, { "epoch": 0.038190426091530776, "grad_norm": 0.7844385504722595, "learning_rate": 0.00019996557621253027, "loss": 1.8752, "step": 363 }, { "epoch": 0.03829563387690689, "grad_norm": 0.7246622443199158, "learning_rate": 0.00019996467633516326, "loss": 2.1476, "step": 364 }, { "epoch": 0.03840084166228301, "grad_norm": 0.7702452540397644, "learning_rate": 0.0001999637648492275, "loss": 2.038, "step": 365 }, { "epoch": 0.038506049447659126, "grad_norm": 0.7791807055473328, "learning_rate": 0.00019996284175482893, "loss": 1.9363, "step": 366 }, { "epoch": 0.03861125723303525, "grad_norm": 0.650177538394928, "learning_rate": 0.00019996190705207475, "loss": 2.0905, "step": 367 }, { "epoch": 0.03871646501841136, "grad_norm": 1.108355164527893, "learning_rate": 0.00019996096074107342, "loss": 2.0743, "step": 368 }, { "epoch": 0.03882167280378748, "grad_norm": 1.025335431098938, "learning_rate": 0.0001999600028219349, "loss": 1.895, "step": 369 }, { "epoch": 0.0389268805891636, "grad_norm": 1.1372300386428833, "learning_rate": 0.0001999590332947704, "loss": 2.1421, "step": 370 }, { "epoch": 0.03903208837453972, "grad_norm": 1.4822285175323486, "learning_rate": 0.00019995805215969258, "loss": 2.3544, "step": 371 }, { "epoch": 0.03913729615991583, "grad_norm": 1.2210115194320679, "learning_rate": 0.00019995705941681523, "loss": 2.0829, "step": 372 }, { "epoch": 0.039242503945291954, "grad_norm": 0.9157518148422241, "learning_rate": 0.00019995605506625377, "loss": 1.9328, "step": 373 }, { "epoch": 0.03934771173066807, "grad_norm": 0.8865572214126587, "learning_rate": 0.00019995503910812478, "loss": 2.202, "step": 374 }, { "epoch": 0.03945291951604419, "grad_norm": 1.0392677783966064, "learning_rate": 0.00019995401154254626, "loss": 2.1567, "step": 375 }, { "epoch": 0.039558127301420304, "grad_norm": 1.0013548135757446, "learning_rate": 0.00019995297236963749, "loss": 1.9707, "step": 376 }, { "epoch": 0.039663335086796425, "grad_norm": 0.6896701455116272, "learning_rate": 0.00019995192158951919, "loss": 1.8789, "step": 377 }, { "epoch": 0.03976854287217254, "grad_norm": 0.6109434366226196, "learning_rate": 0.00019995085920231336, "loss": 2.2985, "step": 378 }, { "epoch": 0.03987375065754866, "grad_norm": 0.7272301912307739, "learning_rate": 0.00019994978520814337, "loss": 1.8993, "step": 379 }, { "epoch": 0.039978958442924775, "grad_norm": 1.2493723630905151, "learning_rate": 0.00019994869960713397, "loss": 2.2194, "step": 380 }, { "epoch": 0.040084166228300896, "grad_norm": 0.7804626822471619, "learning_rate": 0.0001999476023994112, "loss": 2.0154, "step": 381 }, { "epoch": 0.04018937401367701, "grad_norm": 0.863120973110199, "learning_rate": 0.00019994649358510248, "loss": 1.7918, "step": 382 }, { "epoch": 0.04029458179905313, "grad_norm": 1.1610263586044312, "learning_rate": 0.0001999453731643366, "loss": 2.3425, "step": 383 }, { "epoch": 0.040399789584429247, "grad_norm": 0.9565117359161377, "learning_rate": 0.00019994424113724363, "loss": 1.8774, "step": 384 }, { "epoch": 0.04050499736980537, "grad_norm": 0.676775336265564, "learning_rate": 0.00019994309750395506, "loss": 2.066, "step": 385 }, { "epoch": 0.04061020515518148, "grad_norm": 0.6846379637718201, "learning_rate": 0.00019994194226460367, "loss": 1.9599, "step": 386 }, { "epoch": 0.0407154129405576, "grad_norm": 0.5663594603538513, "learning_rate": 0.0001999407754193236, "loss": 1.9791, "step": 387 }, { "epoch": 0.04082062072593372, "grad_norm": 0.967096745967865, "learning_rate": 0.0001999395969682504, "loss": 2.1146, "step": 388 }, { "epoch": 0.04092582851130984, "grad_norm": 0.7784907817840576, "learning_rate": 0.00019993840691152093, "loss": 2.0466, "step": 389 }, { "epoch": 0.041031036296685953, "grad_norm": 0.7650101780891418, "learning_rate": 0.0001999372052492733, "loss": 2.2071, "step": 390 }, { "epoch": 0.041136244082062075, "grad_norm": 0.6302483677864075, "learning_rate": 0.00019993599198164715, "loss": 2.0403, "step": 391 }, { "epoch": 0.04124145186743819, "grad_norm": 0.6898879408836365, "learning_rate": 0.00019993476710878332, "loss": 1.7578, "step": 392 }, { "epoch": 0.04134665965281431, "grad_norm": 1.1919569969177246, "learning_rate": 0.00019993353063082404, "loss": 1.9792, "step": 393 }, { "epoch": 0.041451867438190425, "grad_norm": 0.6219474077224731, "learning_rate": 0.00019993228254791293, "loss": 2.4463, "step": 394 }, { "epoch": 0.041557075223566546, "grad_norm": 0.9380045533180237, "learning_rate": 0.00019993102286019495, "loss": 1.8507, "step": 395 }, { "epoch": 0.04166228300894266, "grad_norm": 1.5710420608520508, "learning_rate": 0.0001999297515678163, "loss": 2.1511, "step": 396 }, { "epoch": 0.04176749079431878, "grad_norm": 0.6227583289146423, "learning_rate": 0.00019992846867092473, "loss": 2.0332, "step": 397 }, { "epoch": 0.041872698579694896, "grad_norm": 0.7754364609718323, "learning_rate": 0.0001999271741696691, "loss": 1.6828, "step": 398 }, { "epoch": 0.04197790636507102, "grad_norm": 1.0617812871932983, "learning_rate": 0.0001999258680641998, "loss": 2.0997, "step": 399 }, { "epoch": 0.04208311415044713, "grad_norm": 0.8257030844688416, "learning_rate": 0.00019992455035466847, "loss": 1.948, "step": 400 }, { "epoch": 0.04218832193582325, "grad_norm": 0.9446254968643188, "learning_rate": 0.00019992322104122817, "loss": 1.7266, "step": 401 }, { "epoch": 0.04229352972119937, "grad_norm": 0.52854323387146, "learning_rate": 0.00019992188012403324, "loss": 2.2328, "step": 402 }, { "epoch": 0.04239873750657549, "grad_norm": 0.9252466559410095, "learning_rate": 0.00019992052760323941, "loss": 2.1785, "step": 403 }, { "epoch": 0.0425039452919516, "grad_norm": 0.7319321632385254, "learning_rate": 0.00019991916347900378, "loss": 2.2174, "step": 404 }, { "epoch": 0.042609153077327724, "grad_norm": 0.5902657508850098, "learning_rate": 0.00019991778775148465, "loss": 2.1312, "step": 405 }, { "epoch": 0.04271436086270384, "grad_norm": 0.8190617561340332, "learning_rate": 0.0001999164004208419, "loss": 1.658, "step": 406 }, { "epoch": 0.04281956864807996, "grad_norm": 0.9919306635856628, "learning_rate": 0.00019991500148723658, "loss": 2.158, "step": 407 }, { "epoch": 0.042924776433456074, "grad_norm": 1.2936168909072876, "learning_rate": 0.00019991359095083112, "loss": 1.8505, "step": 408 }, { "epoch": 0.043029984218832196, "grad_norm": 0.947219729423523, "learning_rate": 0.00019991216881178937, "loss": 1.8942, "step": 409 }, { "epoch": 0.04313519200420831, "grad_norm": 1.0280535221099854, "learning_rate": 0.00019991073507027646, "loss": 1.7948, "step": 410 }, { "epoch": 0.04324039978958443, "grad_norm": 0.795024573802948, "learning_rate": 0.00019990928972645887, "loss": 1.9645, "step": 411 }, { "epoch": 0.043345607574960546, "grad_norm": 0.9444951415061951, "learning_rate": 0.00019990783278050448, "loss": 2.0397, "step": 412 }, { "epoch": 0.04345081536033667, "grad_norm": 0.7790425419807434, "learning_rate": 0.00019990636423258246, "loss": 2.0172, "step": 413 }, { "epoch": 0.04355602314571278, "grad_norm": 0.8171620965003967, "learning_rate": 0.00019990488408286333, "loss": 1.9963, "step": 414 }, { "epoch": 0.0436612309310889, "grad_norm": 1.3680206537246704, "learning_rate": 0.000199903392331519, "loss": 1.5671, "step": 415 }, { "epoch": 0.04376643871646502, "grad_norm": 1.41587495803833, "learning_rate": 0.00019990188897872266, "loss": 2.2752, "step": 416 }, { "epoch": 0.04387164650184114, "grad_norm": 0.888954222202301, "learning_rate": 0.00019990037402464896, "loss": 2.1677, "step": 417 }, { "epoch": 0.04397685428721725, "grad_norm": 0.8410660028457642, "learning_rate": 0.00019989884746947378, "loss": 2.1372, "step": 418 }, { "epoch": 0.044082062072593374, "grad_norm": 0.8259899616241455, "learning_rate": 0.0001998973093133744, "loss": 1.7659, "step": 419 }, { "epoch": 0.04418726985796949, "grad_norm": 1.2934560775756836, "learning_rate": 0.00019989575955652944, "loss": 1.8465, "step": 420 }, { "epoch": 0.04429247764334561, "grad_norm": 0.7665582299232483, "learning_rate": 0.00019989419819911887, "loss": 2.2067, "step": 421 }, { "epoch": 0.044397685428721724, "grad_norm": 0.676923394203186, "learning_rate": 0.000199892625241324, "loss": 2.3273, "step": 422 }, { "epoch": 0.044502893214097845, "grad_norm": 1.0820274353027344, "learning_rate": 0.00019989104068332756, "loss": 1.8123, "step": 423 }, { "epoch": 0.04460810099947396, "grad_norm": 1.1282938718795776, "learning_rate": 0.00019988944452531345, "loss": 1.9473, "step": 424 }, { "epoch": 0.04471330878485008, "grad_norm": 1.5150783061981201, "learning_rate": 0.00019988783676746708, "loss": 1.6027, "step": 425 }, { "epoch": 0.044818516570226195, "grad_norm": 0.991240918636322, "learning_rate": 0.00019988621740997512, "loss": 1.9637, "step": 426 }, { "epoch": 0.04492372435560232, "grad_norm": 0.8624051809310913, "learning_rate": 0.00019988458645302568, "loss": 1.9826, "step": 427 }, { "epoch": 0.04502893214097843, "grad_norm": 0.9856376647949219, "learning_rate": 0.00019988294389680812, "loss": 2.3543, "step": 428 }, { "epoch": 0.04513413992635455, "grad_norm": 0.5573300719261169, "learning_rate": 0.0001998812897415132, "loss": 1.955, "step": 429 }, { "epoch": 0.04523934771173067, "grad_norm": 0.8506868481636047, "learning_rate": 0.000199879623987333, "loss": 2.0627, "step": 430 }, { "epoch": 0.04534455549710679, "grad_norm": 1.4136837720870972, "learning_rate": 0.00019987794663446095, "loss": 2.0503, "step": 431 }, { "epoch": 0.0454497632824829, "grad_norm": 0.9526495337486267, "learning_rate": 0.0001998762576830919, "loss": 1.7729, "step": 432 }, { "epoch": 0.045554971067859024, "grad_norm": 0.8205630779266357, "learning_rate": 0.00019987455713342187, "loss": 2.3268, "step": 433 }, { "epoch": 0.04566017885323514, "grad_norm": 0.9681246876716614, "learning_rate": 0.0001998728449856484, "loss": 1.862, "step": 434 }, { "epoch": 0.04576538663861126, "grad_norm": 0.7441048622131348, "learning_rate": 0.00019987112123997033, "loss": 2.1119, "step": 435 }, { "epoch": 0.045870594423987374, "grad_norm": 1.2735470533370972, "learning_rate": 0.00019986938589658783, "loss": 1.7813, "step": 436 }, { "epoch": 0.045975802209363495, "grad_norm": 1.4802747964859009, "learning_rate": 0.00019986763895570242, "loss": 2.1473, "step": 437 }, { "epoch": 0.04608100999473961, "grad_norm": 1.388631820678711, "learning_rate": 0.0001998658804175169, "loss": 1.8662, "step": 438 }, { "epoch": 0.04618621778011573, "grad_norm": 0.5619102716445923, "learning_rate": 0.00019986411028223558, "loss": 2.0551, "step": 439 }, { "epoch": 0.046291425565491845, "grad_norm": 0.7170664668083191, "learning_rate": 0.000199862328550064, "loss": 2.0751, "step": 440 }, { "epoch": 0.046396633350867966, "grad_norm": 0.7204136252403259, "learning_rate": 0.000199860535221209, "loss": 2.0988, "step": 441 }, { "epoch": 0.04650184113624408, "grad_norm": 1.1917775869369507, "learning_rate": 0.0001998587302958789, "loss": 1.8188, "step": 442 }, { "epoch": 0.0466070489216202, "grad_norm": 0.861599862575531, "learning_rate": 0.00019985691377428326, "loss": 2.3283, "step": 443 }, { "epoch": 0.046712256706996316, "grad_norm": 0.7343453168869019, "learning_rate": 0.00019985508565663305, "loss": 2.1423, "step": 444 }, { "epoch": 0.04681746449237244, "grad_norm": 1.0733033418655396, "learning_rate": 0.00019985324594314055, "loss": 1.814, "step": 445 }, { "epoch": 0.04692267227774855, "grad_norm": 0.7048352956771851, "learning_rate": 0.00019985139463401944, "loss": 2.2652, "step": 446 }, { "epoch": 0.04702788006312467, "grad_norm": 0.9343500733375549, "learning_rate": 0.00019984953172948465, "loss": 2.1747, "step": 447 }, { "epoch": 0.04713308784850079, "grad_norm": 0.9725675582885742, "learning_rate": 0.00019984765722975254, "loss": 2.0506, "step": 448 }, { "epoch": 0.04723829563387691, "grad_norm": 1.3480443954467773, "learning_rate": 0.00019984577113504076, "loss": 2.4285, "step": 449 }, { "epoch": 0.04734350341925302, "grad_norm": 1.5416226387023926, "learning_rate": 0.0001998438734455684, "loss": 1.7112, "step": 450 }, { "epoch": 0.047448711204629145, "grad_norm": 0.7121959328651428, "learning_rate": 0.0001998419641615558, "loss": 1.9964, "step": 451 }, { "epoch": 0.04755391899000526, "grad_norm": 0.905596911907196, "learning_rate": 0.00019984004328322464, "loss": 2.351, "step": 452 }, { "epoch": 0.04765912677538138, "grad_norm": 0.9009513258934021, "learning_rate": 0.00019983811081079807, "loss": 2.0917, "step": 453 }, { "epoch": 0.047764334560757495, "grad_norm": 1.4585151672363281, "learning_rate": 0.0001998361667445004, "loss": 1.8901, "step": 454 }, { "epoch": 0.047869542346133616, "grad_norm": 1.1657588481903076, "learning_rate": 0.00019983421108455746, "loss": 1.8957, "step": 455 }, { "epoch": 0.04797475013150973, "grad_norm": 0.7370551824569702, "learning_rate": 0.00019983224383119633, "loss": 1.9769, "step": 456 }, { "epoch": 0.04807995791688585, "grad_norm": 0.9683654308319092, "learning_rate": 0.00019983026498464546, "loss": 1.6403, "step": 457 }, { "epoch": 0.048185165702261966, "grad_norm": 0.7799041867256165, "learning_rate": 0.00019982827454513466, "loss": 1.9753, "step": 458 }, { "epoch": 0.04829037348763809, "grad_norm": 1.0562982559204102, "learning_rate": 0.00019982627251289504, "loss": 2.2229, "step": 459 }, { "epoch": 0.0483955812730142, "grad_norm": 0.7870259881019592, "learning_rate": 0.00019982425888815915, "loss": 2.025, "step": 460 }, { "epoch": 0.04850078905839032, "grad_norm": 1.191759467124939, "learning_rate": 0.00019982223367116076, "loss": 2.3058, "step": 461 }, { "epoch": 0.04860599684376644, "grad_norm": 1.1136091947555542, "learning_rate": 0.0001998201968621351, "loss": 1.824, "step": 462 }, { "epoch": 0.04871120462914256, "grad_norm": 0.8942115306854248, "learning_rate": 0.00019981814846131867, "loss": 2.1555, "step": 463 }, { "epoch": 0.04881641241451867, "grad_norm": 1.0691640377044678, "learning_rate": 0.00019981608846894933, "loss": 1.9872, "step": 464 }, { "epoch": 0.048921620199894794, "grad_norm": 1.0228484869003296, "learning_rate": 0.00019981401688526636, "loss": 2.1705, "step": 465 }, { "epoch": 0.04902682798527091, "grad_norm": 0.9551164507865906, "learning_rate": 0.00019981193371051026, "loss": 1.9308, "step": 466 }, { "epoch": 0.04913203577064703, "grad_norm": 1.2936720848083496, "learning_rate": 0.000199809838944923, "loss": 2.2471, "step": 467 }, { "epoch": 0.049237243556023144, "grad_norm": 0.9659333229064941, "learning_rate": 0.00019980773258874778, "loss": 1.9027, "step": 468 }, { "epoch": 0.049342451341399265, "grad_norm": 0.814227819442749, "learning_rate": 0.00019980561464222926, "loss": 1.8804, "step": 469 }, { "epoch": 0.04944765912677538, "grad_norm": 1.2761330604553223, "learning_rate": 0.00019980348510561334, "loss": 2.2427, "step": 470 }, { "epoch": 0.0495528669121515, "grad_norm": 1.2825102806091309, "learning_rate": 0.00019980134397914735, "loss": 1.4184, "step": 471 }, { "epoch": 0.049658074697527615, "grad_norm": 1.0994774103164673, "learning_rate": 0.00019979919126307993, "loss": 2.3454, "step": 472 }, { "epoch": 0.04976328248290374, "grad_norm": 1.1263169050216675, "learning_rate": 0.00019979702695766105, "loss": 2.1226, "step": 473 }, { "epoch": 0.04986849026827985, "grad_norm": 1.1242156028747559, "learning_rate": 0.00019979485106314207, "loss": 1.7008, "step": 474 }, { "epoch": 0.04997369805365597, "grad_norm": 0.7600993514060974, "learning_rate": 0.00019979266357977564, "loss": 1.9561, "step": 475 }, { "epoch": 0.05007890583903209, "grad_norm": 1.0761032104492188, "learning_rate": 0.00019979046450781577, "loss": 2.1057, "step": 476 }, { "epoch": 0.05018411362440821, "grad_norm": 0.9772080779075623, "learning_rate": 0.00019978825384751788, "loss": 2.1573, "step": 477 }, { "epoch": 0.05028932140978432, "grad_norm": 0.6181446313858032, "learning_rate": 0.0001997860315991387, "loss": 2.1602, "step": 478 }, { "epoch": 0.050394529195160444, "grad_norm": 1.589167594909668, "learning_rate": 0.0001997837977629362, "loss": 2.1132, "step": 479 }, { "epoch": 0.05049973698053656, "grad_norm": 0.6622167229652405, "learning_rate": 0.0001997815523391699, "loss": 2.331, "step": 480 }, { "epoch": 0.05060494476591268, "grad_norm": 1.0673385858535767, "learning_rate": 0.00019977929532810046, "loss": 2.387, "step": 481 }, { "epoch": 0.050710152551288794, "grad_norm": 0.897954523563385, "learning_rate": 0.00019977702672999007, "loss": 2.0587, "step": 482 }, { "epoch": 0.050815360336664915, "grad_norm": 1.2834991216659546, "learning_rate": 0.00019977474654510205, "loss": 2.2961, "step": 483 }, { "epoch": 0.05092056812204103, "grad_norm": 0.9525724649429321, "learning_rate": 0.0001997724547737013, "loss": 2.1939, "step": 484 }, { "epoch": 0.05102577590741715, "grad_norm": 1.231590747833252, "learning_rate": 0.00019977015141605392, "loss": 2.0266, "step": 485 }, { "epoch": 0.051130983692793265, "grad_norm": 2.677229642868042, "learning_rate": 0.0001997678364724274, "loss": 1.3896, "step": 486 }, { "epoch": 0.051236191478169386, "grad_norm": 0.6867842674255371, "learning_rate": 0.00019976550994309054, "loss": 1.9468, "step": 487 }, { "epoch": 0.0513413992635455, "grad_norm": 0.6714895963668823, "learning_rate": 0.00019976317182831356, "loss": 1.925, "step": 488 }, { "epoch": 0.05144660704892162, "grad_norm": 1.246391773223877, "learning_rate": 0.00019976082212836793, "loss": 2.1784, "step": 489 }, { "epoch": 0.051551814834297736, "grad_norm": 1.2437677383422852, "learning_rate": 0.00019975846084352653, "loss": 2.0721, "step": 490 }, { "epoch": 0.05165702261967386, "grad_norm": 0.6305761933326721, "learning_rate": 0.00019975608797406357, "loss": 2.1311, "step": 491 }, { "epoch": 0.05176223040504997, "grad_norm": 1.3557363748550415, "learning_rate": 0.0001997537035202546, "loss": 2.0776, "step": 492 }, { "epoch": 0.05186743819042609, "grad_norm": 1.1781286001205444, "learning_rate": 0.00019975130748237655, "loss": 1.7604, "step": 493 }, { "epoch": 0.05197264597580221, "grad_norm": 1.2240660190582275, "learning_rate": 0.0001997488998607076, "loss": 1.9454, "step": 494 }, { "epoch": 0.05207785376117833, "grad_norm": 0.7471686601638794, "learning_rate": 0.00019974648065552736, "loss": 2.2171, "step": 495 }, { "epoch": 0.05218306154655444, "grad_norm": 0.757163405418396, "learning_rate": 0.0001997440498671168, "loss": 2.1052, "step": 496 }, { "epoch": 0.052288269331930565, "grad_norm": 0.6472220420837402, "learning_rate": 0.00019974160749575818, "loss": 1.8683, "step": 497 }, { "epoch": 0.05239347711730668, "grad_norm": 1.0862151384353638, "learning_rate": 0.00019973915354173515, "loss": 1.9728, "step": 498 }, { "epoch": 0.0524986849026828, "grad_norm": 0.7950903177261353, "learning_rate": 0.00019973668800533264, "loss": 1.7395, "step": 499 }, { "epoch": 0.052603892688058915, "grad_norm": 0.8745871782302856, "learning_rate": 0.00019973421088683696, "loss": 1.7054, "step": 500 }, { "epoch": 0.052709100473435036, "grad_norm": 0.8229191899299622, "learning_rate": 0.00019973172218653578, "loss": 1.9457, "step": 501 }, { "epoch": 0.05281430825881115, "grad_norm": 1.065964698791504, "learning_rate": 0.00019972922190471812, "loss": 2.1223, "step": 502 }, { "epoch": 0.05291951604418727, "grad_norm": 1.3047000169754028, "learning_rate": 0.00019972671004167433, "loss": 2.145, "step": 503 }, { "epoch": 0.053024723829563386, "grad_norm": 0.7635353803634644, "learning_rate": 0.00019972418659769606, "loss": 2.4166, "step": 504 }, { "epoch": 0.05312993161493951, "grad_norm": 0.7105801701545715, "learning_rate": 0.00019972165157307643, "loss": 2.3084, "step": 505 }, { "epoch": 0.05323513940031562, "grad_norm": 1.485958218574524, "learning_rate": 0.00019971910496810976, "loss": 2.1874, "step": 506 }, { "epoch": 0.05334034718569174, "grad_norm": 1.4032565355300903, "learning_rate": 0.0001997165467830918, "loss": 1.6684, "step": 507 }, { "epoch": 0.05344555497106786, "grad_norm": 0.8490357398986816, "learning_rate": 0.00019971397701831962, "loss": 2.2013, "step": 508 }, { "epoch": 0.05355076275644398, "grad_norm": 1.023547649383545, "learning_rate": 0.00019971139567409165, "loss": 1.885, "step": 509 }, { "epoch": 0.05365597054182009, "grad_norm": 1.5144532918930054, "learning_rate": 0.00019970880275070762, "loss": 1.6336, "step": 510 }, { "epoch": 0.053761178327196214, "grad_norm": 0.5086830258369446, "learning_rate": 0.00019970619824846866, "loss": 2.2339, "step": 511 }, { "epoch": 0.05386638611257233, "grad_norm": 0.7949154376983643, "learning_rate": 0.00019970358216767723, "loss": 1.8825, "step": 512 }, { "epoch": 0.05397159389794845, "grad_norm": 0.9173998832702637, "learning_rate": 0.00019970095450863714, "loss": 1.5642, "step": 513 }, { "epoch": 0.054076801683324564, "grad_norm": 1.7043392658233643, "learning_rate": 0.00019969831527165348, "loss": 1.8851, "step": 514 }, { "epoch": 0.054182009468700686, "grad_norm": 0.7839592099189758, "learning_rate": 0.00019969566445703278, "loss": 2.0123, "step": 515 }, { "epoch": 0.0542872172540768, "grad_norm": 0.8127647638320923, "learning_rate": 0.00019969300206508286, "loss": 2.2239, "step": 516 }, { "epoch": 0.05439242503945292, "grad_norm": 0.8179565668106079, "learning_rate": 0.00019969032809611287, "loss": 2.2176, "step": 517 }, { "epoch": 0.054497632824829036, "grad_norm": 0.7557021975517273, "learning_rate": 0.0001996876425504334, "loss": 2.0041, "step": 518 }, { "epoch": 0.05460284061020516, "grad_norm": 0.9029064178466797, "learning_rate": 0.0001996849454283562, "loss": 2.214, "step": 519 }, { "epoch": 0.05470804839558127, "grad_norm": 0.778174638748169, "learning_rate": 0.0001996822367301946, "loss": 2.3064, "step": 520 }, { "epoch": 0.05481325618095739, "grad_norm": 0.7716636061668396, "learning_rate": 0.00019967951645626306, "loss": 2.2683, "step": 521 }, { "epoch": 0.05491846396633351, "grad_norm": 0.7747607231140137, "learning_rate": 0.00019967678460687752, "loss": 2.257, "step": 522 }, { "epoch": 0.05502367175170963, "grad_norm": 1.1563743352890015, "learning_rate": 0.00019967404118235521, "loss": 1.6323, "step": 523 }, { "epoch": 0.05512887953708574, "grad_norm": 0.7931325435638428, "learning_rate": 0.0001996712861830147, "loss": 1.4886, "step": 524 }, { "epoch": 0.055234087322461864, "grad_norm": 1.187528133392334, "learning_rate": 0.00019966851960917596, "loss": 2.2017, "step": 525 }, { "epoch": 0.05533929510783798, "grad_norm": 0.762750506401062, "learning_rate": 0.00019966574146116023, "loss": 1.953, "step": 526 }, { "epoch": 0.0554445028932141, "grad_norm": 0.7893527150154114, "learning_rate": 0.00019966295173929016, "loss": 2.2539, "step": 527 }, { "epoch": 0.055549710678590214, "grad_norm": 1.012707233428955, "learning_rate": 0.00019966015044388966, "loss": 2.0272, "step": 528 }, { "epoch": 0.055654918463966335, "grad_norm": 0.7175372838973999, "learning_rate": 0.00019965733757528405, "loss": 2.2922, "step": 529 }, { "epoch": 0.05576012624934245, "grad_norm": 1.1108931303024292, "learning_rate": 0.0001996545131338, "loss": 2.0014, "step": 530 }, { "epoch": 0.05586533403471857, "grad_norm": 1.002982258796692, "learning_rate": 0.00019965167711976552, "loss": 1.9751, "step": 531 }, { "epoch": 0.055970541820094685, "grad_norm": 1.2420021295547485, "learning_rate": 0.00019964882953350989, "loss": 1.8003, "step": 532 }, { "epoch": 0.05607574960547081, "grad_norm": 0.8945255875587463, "learning_rate": 0.00019964597037536383, "loss": 2.0404, "step": 533 }, { "epoch": 0.05618095739084692, "grad_norm": 0.8845522999763489, "learning_rate": 0.00019964309964565937, "loss": 1.8745, "step": 534 }, { "epoch": 0.05628616517622304, "grad_norm": 0.826899528503418, "learning_rate": 0.00019964021734472987, "loss": 1.8893, "step": 535 }, { "epoch": 0.05639137296159916, "grad_norm": 0.8589649200439453, "learning_rate": 0.00019963732347291, "loss": 2.2759, "step": 536 }, { "epoch": 0.05649658074697528, "grad_norm": 0.9177697896957397, "learning_rate": 0.00019963441803053588, "loss": 1.9493, "step": 537 }, { "epoch": 0.05660178853235139, "grad_norm": 1.1781582832336426, "learning_rate": 0.0001996315010179449, "loss": 1.8959, "step": 538 }, { "epoch": 0.056706996317727514, "grad_norm": 1.119227647781372, "learning_rate": 0.00019962857243547574, "loss": 2.3161, "step": 539 }, { "epoch": 0.05681220410310363, "grad_norm": 0.8425557017326355, "learning_rate": 0.00019962563228346857, "loss": 2.2769, "step": 540 }, { "epoch": 0.05691741188847975, "grad_norm": 1.2644481658935547, "learning_rate": 0.0001996226805622648, "loss": 1.9871, "step": 541 }, { "epoch": 0.057022619673855864, "grad_norm": 0.9367343187332153, "learning_rate": 0.00019961971727220715, "loss": 2.4598, "step": 542 }, { "epoch": 0.057127827459231985, "grad_norm": 0.9345009326934814, "learning_rate": 0.00019961674241363974, "loss": 2.0872, "step": 543 }, { "epoch": 0.0572330352446081, "grad_norm": 1.1807975769042969, "learning_rate": 0.00019961375598690813, "loss": 1.8824, "step": 544 }, { "epoch": 0.05733824302998422, "grad_norm": 0.9418180584907532, "learning_rate": 0.00019961075799235903, "loss": 2.2195, "step": 545 }, { "epoch": 0.057443450815360335, "grad_norm": 1.003548502922058, "learning_rate": 0.0001996077484303406, "loss": 1.5655, "step": 546 }, { "epoch": 0.057548658600736456, "grad_norm": 0.9430425763130188, "learning_rate": 0.00019960472730120237, "loss": 1.9868, "step": 547 }, { "epoch": 0.05765386638611257, "grad_norm": 0.7052996754646301, "learning_rate": 0.00019960169460529515, "loss": 2.1614, "step": 548 }, { "epoch": 0.05775907417148869, "grad_norm": 0.9624035954475403, "learning_rate": 0.0001995986503429711, "loss": 1.8665, "step": 549 }, { "epoch": 0.057864281956864806, "grad_norm": 0.8051895499229431, "learning_rate": 0.00019959559451458375, "loss": 2.1922, "step": 550 }, { "epoch": 0.05796948974224093, "grad_norm": 5.186445713043213, "learning_rate": 0.000199592527120488, "loss": 2.3402, "step": 551 }, { "epoch": 0.05807469752761704, "grad_norm": 0.7552983164787292, "learning_rate": 0.00019958944816104, "loss": 1.9901, "step": 552 }, { "epoch": 0.05817990531299316, "grad_norm": 1.3142822980880737, "learning_rate": 0.0001995863576365973, "loss": 2.0759, "step": 553 }, { "epoch": 0.05828511309836928, "grad_norm": 0.6937042474746704, "learning_rate": 0.00019958325554751886, "loss": 2.1585, "step": 554 }, { "epoch": 0.0583903208837454, "grad_norm": 0.8620643615722656, "learning_rate": 0.00019958014189416489, "loss": 2.2186, "step": 555 }, { "epoch": 0.05849552866912151, "grad_norm": 0.8564947843551636, "learning_rate": 0.00019957701667689691, "loss": 2.0109, "step": 556 }, { "epoch": 0.058600736454497634, "grad_norm": 0.6378996968269348, "learning_rate": 0.0001995738798960779, "loss": 2.0941, "step": 557 }, { "epoch": 0.05870594423987375, "grad_norm": 0.6846954822540283, "learning_rate": 0.0001995707315520721, "loss": 2.0447, "step": 558 }, { "epoch": 0.05881115202524987, "grad_norm": 1.0269975662231445, "learning_rate": 0.00019956757164524516, "loss": 1.7775, "step": 559 }, { "epoch": 0.058916359810625984, "grad_norm": 0.8012015223503113, "learning_rate": 0.00019956440017596393, "loss": 2.0707, "step": 560 }, { "epoch": 0.059021567596002106, "grad_norm": 1.592552900314331, "learning_rate": 0.0001995612171445968, "loss": 1.9245, "step": 561 }, { "epoch": 0.05912677538137822, "grad_norm": 0.7692115306854248, "learning_rate": 0.00019955802255151338, "loss": 2.3235, "step": 562 }, { "epoch": 0.05923198316675434, "grad_norm": 0.8409086465835571, "learning_rate": 0.00019955481639708463, "loss": 2.0308, "step": 563 }, { "epoch": 0.059337190952130456, "grad_norm": 0.7190937399864197, "learning_rate": 0.00019955159868168288, "loss": 2.2148, "step": 564 }, { "epoch": 0.05944239873750658, "grad_norm": 1.3858685493469238, "learning_rate": 0.00019954836940568177, "loss": 2.1553, "step": 565 }, { "epoch": 0.05954760652288269, "grad_norm": 0.7564934492111206, "learning_rate": 0.00019954512856945632, "loss": 1.713, "step": 566 }, { "epoch": 0.05965281430825881, "grad_norm": 1.1952269077301025, "learning_rate": 0.00019954187617338294, "loss": 2.1569, "step": 567 }, { "epoch": 0.05975802209363493, "grad_norm": 1.282727837562561, "learning_rate": 0.00019953861221783922, "loss": 2.0503, "step": 568 }, { "epoch": 0.05986322987901105, "grad_norm": 0.8127844333648682, "learning_rate": 0.00019953533670320422, "loss": 1.7488, "step": 569 }, { "epoch": 0.05996843766438716, "grad_norm": 1.0792090892791748, "learning_rate": 0.00019953204962985837, "loss": 1.9068, "step": 570 }, { "epoch": 0.060073645449763284, "grad_norm": 0.9024432897567749, "learning_rate": 0.00019952875099818332, "loss": 2.2592, "step": 571 }, { "epoch": 0.0601788532351394, "grad_norm": 1.1777130365371704, "learning_rate": 0.0001995254408085622, "loss": 2.4742, "step": 572 }, { "epoch": 0.06028406102051552, "grad_norm": 0.7864671945571899, "learning_rate": 0.00019952211906137932, "loss": 2.0437, "step": 573 }, { "epoch": 0.060389268805891634, "grad_norm": 0.9623721837997437, "learning_rate": 0.00019951878575702047, "loss": 2.3201, "step": 574 }, { "epoch": 0.060494476591267755, "grad_norm": 0.7268714904785156, "learning_rate": 0.00019951544089587278, "loss": 1.9149, "step": 575 }, { "epoch": 0.06059968437664387, "grad_norm": 1.013013243675232, "learning_rate": 0.00019951208447832461, "loss": 1.7478, "step": 576 }, { "epoch": 0.06070489216201999, "grad_norm": 0.6717780232429504, "learning_rate": 0.00019950871650476577, "loss": 2.208, "step": 577 }, { "epoch": 0.060810099947396105, "grad_norm": 2.0669307708740234, "learning_rate": 0.00019950533697558732, "loss": 2.185, "step": 578 }, { "epoch": 0.06091530773277223, "grad_norm": 1.002172827720642, "learning_rate": 0.0001995019458911818, "loss": 2.0923, "step": 579 }, { "epoch": 0.06102051551814834, "grad_norm": 1.053417444229126, "learning_rate": 0.00019949854325194294, "loss": 2.0336, "step": 580 }, { "epoch": 0.06112572330352446, "grad_norm": 0.8169642686843872, "learning_rate": 0.0001994951290582659, "loss": 2.1974, "step": 581 }, { "epoch": 0.06123093108890058, "grad_norm": 0.8666344881057739, "learning_rate": 0.0001994917033105471, "loss": 2.1284, "step": 582 }, { "epoch": 0.0613361388742767, "grad_norm": 0.8064733743667603, "learning_rate": 0.00019948826600918443, "loss": 2.0592, "step": 583 }, { "epoch": 0.06144134665965281, "grad_norm": 1.0283541679382324, "learning_rate": 0.00019948481715457707, "loss": 2.2653, "step": 584 }, { "epoch": 0.061546554445028934, "grad_norm": 1.1109108924865723, "learning_rate": 0.00019948135674712546, "loss": 2.0874, "step": 585 }, { "epoch": 0.06165176223040505, "grad_norm": 0.8930381536483765, "learning_rate": 0.00019947788478723153, "loss": 1.988, "step": 586 }, { "epoch": 0.06175697001578117, "grad_norm": 0.8919090032577515, "learning_rate": 0.00019947440127529836, "loss": 2.3961, "step": 587 }, { "epoch": 0.061862177801157284, "grad_norm": 0.9803879857063293, "learning_rate": 0.00019947090621173053, "loss": 2.2986, "step": 588 }, { "epoch": 0.061967385586533405, "grad_norm": 2.3181352615356445, "learning_rate": 0.00019946739959693393, "loss": 2.3345, "step": 589 }, { "epoch": 0.06207259337190952, "grad_norm": 1.0249263048171997, "learning_rate": 0.00019946388143131575, "loss": 1.9578, "step": 590 }, { "epoch": 0.06217780115728564, "grad_norm": 1.8943885564804077, "learning_rate": 0.00019946035171528455, "loss": 1.5106, "step": 591 }, { "epoch": 0.062283008942661755, "grad_norm": 1.0123568773269653, "learning_rate": 0.0001994568104492502, "loss": 2.1627, "step": 592 }, { "epoch": 0.062388216728037876, "grad_norm": 0.9259027242660522, "learning_rate": 0.00019945325763362398, "loss": 1.6539, "step": 593 }, { "epoch": 0.06249342451341399, "grad_norm": 0.9884849786758423, "learning_rate": 0.00019944969326881845, "loss": 2.0843, "step": 594 }, { "epoch": 0.0625986322987901, "grad_norm": 0.735097348690033, "learning_rate": 0.0001994461173552475, "loss": 2.4694, "step": 595 }, { "epoch": 0.06270384008416623, "grad_norm": 1.136003017425537, "learning_rate": 0.0001994425298933264, "loss": 1.7615, "step": 596 }, { "epoch": 0.06280904786954235, "grad_norm": 1.316763162612915, "learning_rate": 0.00019943893088347178, "loss": 2.3421, "step": 597 }, { "epoch": 0.06291425565491847, "grad_norm": 1.165870189666748, "learning_rate": 0.00019943532032610156, "loss": 2.354, "step": 598 }, { "epoch": 0.06301946344029458, "grad_norm": 12.435591697692871, "learning_rate": 0.00019943169822163502, "loss": 2.0802, "step": 599 }, { "epoch": 0.0631246712256707, "grad_norm": 1.253097414970398, "learning_rate": 0.00019942806457049278, "loss": 2.0145, "step": 600 }, { "epoch": 0.06322987901104682, "grad_norm": 1.299344539642334, "learning_rate": 0.00019942441937309684, "loss": 1.7266, "step": 601 }, { "epoch": 0.06333508679642294, "grad_norm": 0.9620880484580994, "learning_rate": 0.00019942076262987043, "loss": 2.2549, "step": 602 }, { "epoch": 0.06344029458179905, "grad_norm": 1.1274274587631226, "learning_rate": 0.00019941709434123826, "loss": 1.9165, "step": 603 }, { "epoch": 0.06354550236717517, "grad_norm": 0.8177148103713989, "learning_rate": 0.00019941341450762629, "loss": 2.0151, "step": 604 }, { "epoch": 0.06365071015255129, "grad_norm": 1.068963885307312, "learning_rate": 0.00019940972312946186, "loss": 2.1957, "step": 605 }, { "epoch": 0.06375591793792741, "grad_norm": 0.8297029733657837, "learning_rate": 0.00019940602020717364, "loss": 2.2105, "step": 606 }, { "epoch": 0.06386112572330352, "grad_norm": 0.8417388796806335, "learning_rate": 0.00019940230574119164, "loss": 1.9136, "step": 607 }, { "epoch": 0.06396633350867964, "grad_norm": 0.8712098598480225, "learning_rate": 0.00019939857973194717, "loss": 2.4713, "step": 608 }, { "epoch": 0.06407154129405576, "grad_norm": 1.0298516750335693, "learning_rate": 0.000199394842179873, "loss": 1.8507, "step": 609 }, { "epoch": 0.06417674907943188, "grad_norm": 0.9027281999588013, "learning_rate": 0.00019939109308540304, "loss": 2.0152, "step": 610 }, { "epoch": 0.06428195686480799, "grad_norm": 1.213963270187378, "learning_rate": 0.00019938733244897274, "loss": 1.9294, "step": 611 }, { "epoch": 0.06438716465018411, "grad_norm": 1.2193472385406494, "learning_rate": 0.00019938356027101884, "loss": 1.9002, "step": 612 }, { "epoch": 0.06449237243556023, "grad_norm": 1.7133346796035767, "learning_rate": 0.0001993797765519793, "loss": 2.3218, "step": 613 }, { "epoch": 0.06459758022093635, "grad_norm": 1.3086202144622803, "learning_rate": 0.0001993759812922936, "loss": 2.2043, "step": 614 }, { "epoch": 0.06470278800631246, "grad_norm": 1.2867093086242676, "learning_rate": 0.0001993721744924024, "loss": 2.1243, "step": 615 }, { "epoch": 0.06480799579168858, "grad_norm": 1.0671265125274658, "learning_rate": 0.00019936835615274782, "loss": 2.1034, "step": 616 }, { "epoch": 0.0649132035770647, "grad_norm": 0.6220844984054565, "learning_rate": 0.00019936452627377323, "loss": 2.1934, "step": 617 }, { "epoch": 0.06501841136244083, "grad_norm": 0.9203489422798157, "learning_rate": 0.0001993606848559234, "loss": 2.1497, "step": 618 }, { "epoch": 0.06512361914781693, "grad_norm": 1.5812958478927612, "learning_rate": 0.00019935683189964447, "loss": 2.0074, "step": 619 }, { "epoch": 0.06522882693319305, "grad_norm": 0.9502617120742798, "learning_rate": 0.00019935296740538377, "loss": 2.3318, "step": 620 }, { "epoch": 0.06533403471856918, "grad_norm": 1.632648229598999, "learning_rate": 0.00019934909137359018, "loss": 1.8631, "step": 621 }, { "epoch": 0.0654392425039453, "grad_norm": 1.4153451919555664, "learning_rate": 0.00019934520380471372, "loss": 1.6571, "step": 622 }, { "epoch": 0.0655444502893214, "grad_norm": 0.8237829804420471, "learning_rate": 0.00019934130469920588, "loss": 1.9875, "step": 623 }, { "epoch": 0.06564965807469753, "grad_norm": 0.8410417437553406, "learning_rate": 0.00019933739405751945, "loss": 2.0338, "step": 624 }, { "epoch": 0.06575486586007365, "grad_norm": 0.9069095253944397, "learning_rate": 0.00019933347188010858, "loss": 2.1402, "step": 625 }, { "epoch": 0.06586007364544977, "grad_norm": 1.0030279159545898, "learning_rate": 0.0001993295381674287, "loss": 2.1201, "step": 626 }, { "epoch": 0.06596528143082588, "grad_norm": 1.0119670629501343, "learning_rate": 0.00019932559291993665, "loss": 1.9571, "step": 627 }, { "epoch": 0.066070489216202, "grad_norm": 1.5967551469802856, "learning_rate": 0.00019932163613809055, "loss": 1.943, "step": 628 }, { "epoch": 0.06617569700157812, "grad_norm": 0.7849034070968628, "learning_rate": 0.0001993176678223499, "loss": 2.3237, "step": 629 }, { "epoch": 0.06628090478695424, "grad_norm": 0.9208400249481201, "learning_rate": 0.00019931368797317553, "loss": 2.4865, "step": 630 }, { "epoch": 0.06638611257233035, "grad_norm": 1.232565999031067, "learning_rate": 0.00019930969659102962, "loss": 2.2367, "step": 631 }, { "epoch": 0.06649132035770647, "grad_norm": 0.7089071273803711, "learning_rate": 0.0001993056936763757, "loss": 2.3407, "step": 632 }, { "epoch": 0.06659652814308259, "grad_norm": 0.9543630480766296, "learning_rate": 0.00019930167922967853, "loss": 1.8678, "step": 633 }, { "epoch": 0.06670173592845871, "grad_norm": 1.19600510597229, "learning_rate": 0.00019929765325140436, "loss": 1.9537, "step": 634 }, { "epoch": 0.06680694371383482, "grad_norm": 0.7090086936950684, "learning_rate": 0.0001992936157420207, "loss": 1.946, "step": 635 }, { "epoch": 0.06691215149921094, "grad_norm": 55.697574615478516, "learning_rate": 0.0001992895667019964, "loss": 2.0818, "step": 636 }, { "epoch": 0.06701735928458706, "grad_norm": 0.9292970299720764, "learning_rate": 0.00019928550613180164, "loss": 2.256, "step": 637 }, { "epoch": 0.06712256706996318, "grad_norm": 1.2075432538986206, "learning_rate": 0.00019928143403190806, "loss": 1.8407, "step": 638 }, { "epoch": 0.06722777485533929, "grad_norm": 0.8975555300712585, "learning_rate": 0.00019927735040278842, "loss": 2.1384, "step": 639 }, { "epoch": 0.06733298264071541, "grad_norm": 0.9104759097099304, "learning_rate": 0.00019927325524491703, "loss": 2.1348, "step": 640 }, { "epoch": 0.06743819042609153, "grad_norm": 0.8688013553619385, "learning_rate": 0.0001992691485587694, "loss": 1.7637, "step": 641 }, { "epoch": 0.06754339821146765, "grad_norm": 1.2788143157958984, "learning_rate": 0.0001992650303448224, "loss": 1.8858, "step": 642 }, { "epoch": 0.06764860599684376, "grad_norm": 0.7544511556625366, "learning_rate": 0.0001992609006035543, "loss": 1.9076, "step": 643 }, { "epoch": 0.06775381378221988, "grad_norm": 1.3862735033035278, "learning_rate": 0.00019925675933544473, "loss": 2.212, "step": 644 }, { "epoch": 0.067859021567596, "grad_norm": 1.04462730884552, "learning_rate": 0.00019925260654097448, "loss": 1.9386, "step": 645 }, { "epoch": 0.06796422935297212, "grad_norm": 0.9594143033027649, "learning_rate": 0.0001992484422206259, "loss": 2.0205, "step": 646 }, { "epoch": 0.06806943713834823, "grad_norm": 1.3281629085540771, "learning_rate": 0.00019924426637488252, "loss": 1.6588, "step": 647 }, { "epoch": 0.06817464492372435, "grad_norm": 1.1453989744186401, "learning_rate": 0.0001992400790042293, "loss": 1.8736, "step": 648 }, { "epoch": 0.06827985270910047, "grad_norm": 1.5039159059524536, "learning_rate": 0.0001992358801091525, "loss": 1.3861, "step": 649 }, { "epoch": 0.0683850604944766, "grad_norm": 1.353845477104187, "learning_rate": 0.0001992316696901397, "loss": 1.8725, "step": 650 }, { "epoch": 0.0684902682798527, "grad_norm": 0.9831173419952393, "learning_rate": 0.00019922744774767987, "loss": 2.1601, "step": 651 }, { "epoch": 0.06859547606522882, "grad_norm": 2.004272937774658, "learning_rate": 0.0001992232142822633, "loss": 1.7202, "step": 652 }, { "epoch": 0.06870068385060495, "grad_norm": 0.7142511010169983, "learning_rate": 0.00019921896929438158, "loss": 2.1088, "step": 653 }, { "epoch": 0.06880589163598107, "grad_norm": 0.8999900817871094, "learning_rate": 0.00019921471278452768, "loss": 1.9379, "step": 654 }, { "epoch": 0.06891109942135717, "grad_norm": 1.3912204504013062, "learning_rate": 0.00019921044475319585, "loss": 1.6042, "step": 655 }, { "epoch": 0.0690163072067333, "grad_norm": 0.8359988331794739, "learning_rate": 0.0001992061652008818, "loss": 2.0374, "step": 656 }, { "epoch": 0.06912151499210942, "grad_norm": 0.8871389627456665, "learning_rate": 0.00019920187412808248, "loss": 2.088, "step": 657 }, { "epoch": 0.06922672277748554, "grad_norm": 1.1207047700881958, "learning_rate": 0.00019919757153529614, "loss": 1.8852, "step": 658 }, { "epoch": 0.06933193056286165, "grad_norm": 1.3903989791870117, "learning_rate": 0.0001991932574230225, "loss": 2.1249, "step": 659 }, { "epoch": 0.06943713834823777, "grad_norm": 0.6716268062591553, "learning_rate": 0.00019918893179176253, "loss": 2.1855, "step": 660 }, { "epoch": 0.06954234613361389, "grad_norm": 1.388175368309021, "learning_rate": 0.0001991845946420185, "loss": 2.5185, "step": 661 }, { "epoch": 0.06964755391899001, "grad_norm": 1.7384852170944214, "learning_rate": 0.0001991802459742941, "loss": 1.2171, "step": 662 }, { "epoch": 0.06975276170436612, "grad_norm": 1.0405939817428589, "learning_rate": 0.0001991758857890943, "loss": 2.0206, "step": 663 }, { "epoch": 0.06985796948974224, "grad_norm": 0.9668706655502319, "learning_rate": 0.0001991715140869255, "loss": 2.1, "step": 664 }, { "epoch": 0.06996317727511836, "grad_norm": 1.021179437637329, "learning_rate": 0.00019916713086829533, "loss": 1.8814, "step": 665 }, { "epoch": 0.07006838506049448, "grad_norm": 0.6269777417182922, "learning_rate": 0.0001991627361337128, "loss": 2.5095, "step": 666 }, { "epoch": 0.07017359284587059, "grad_norm": 1.236629605293274, "learning_rate": 0.00019915832988368824, "loss": 2.1106, "step": 667 }, { "epoch": 0.07027880063124671, "grad_norm": 1.0450339317321777, "learning_rate": 0.0001991539121187334, "loss": 2.1438, "step": 668 }, { "epoch": 0.07038400841662283, "grad_norm": 0.8368414044380188, "learning_rate": 0.00019914948283936119, "loss": 2.1143, "step": 669 }, { "epoch": 0.07048921620199895, "grad_norm": 1.016280174255371, "learning_rate": 0.0001991450420460861, "loss": 2.1759, "step": 670 }, { "epoch": 0.07059442398737506, "grad_norm": 1.2927619218826294, "learning_rate": 0.00019914058973942368, "loss": 1.9304, "step": 671 }, { "epoch": 0.07069963177275118, "grad_norm": 1.0759140253067017, "learning_rate": 0.0001991361259198911, "loss": 1.7529, "step": 672 }, { "epoch": 0.0708048395581273, "grad_norm": 0.802558183670044, "learning_rate": 0.00019913165058800663, "loss": 2.4059, "step": 673 }, { "epoch": 0.07091004734350342, "grad_norm": 0.8148163557052612, "learning_rate": 0.00019912716374429, "loss": 2.1683, "step": 674 }, { "epoch": 0.07101525512887953, "grad_norm": 1.259080171585083, "learning_rate": 0.00019912266538926225, "loss": 2.1657, "step": 675 }, { "epoch": 0.07112046291425565, "grad_norm": 1.0984203815460205, "learning_rate": 0.00019911815552344582, "loss": 2.0398, "step": 676 }, { "epoch": 0.07122567069963177, "grad_norm": 1.0263009071350098, "learning_rate": 0.00019911363414736434, "loss": 1.7294, "step": 677 }, { "epoch": 0.0713308784850079, "grad_norm": 1.2345585823059082, "learning_rate": 0.00019910910126154293, "loss": 2.333, "step": 678 }, { "epoch": 0.071436086270384, "grad_norm": 0.7678645253181458, "learning_rate": 0.00019910455686650793, "loss": 2.2339, "step": 679 }, { "epoch": 0.07154129405576012, "grad_norm": 0.8823609948158264, "learning_rate": 0.0001991000009627871, "loss": 2.0109, "step": 680 }, { "epoch": 0.07164650184113625, "grad_norm": 0.9095181822776794, "learning_rate": 0.00019909543355090946, "loss": 2.3621, "step": 681 }, { "epoch": 0.07175170962651237, "grad_norm": 1.3118935823440552, "learning_rate": 0.00019909085463140546, "loss": 1.9576, "step": 682 }, { "epoch": 0.07185691741188847, "grad_norm": 1.0895490646362305, "learning_rate": 0.0001990862642048068, "loss": 1.8436, "step": 683 }, { "epoch": 0.0719621251972646, "grad_norm": 1.1863242387771606, "learning_rate": 0.00019908166227164655, "loss": 1.8009, "step": 684 }, { "epoch": 0.07206733298264072, "grad_norm": 1.0298961400985718, "learning_rate": 0.00019907704883245916, "loss": 1.9716, "step": 685 }, { "epoch": 0.07217254076801684, "grad_norm": 0.7246216535568237, "learning_rate": 0.00019907242388778033, "loss": 2.119, "step": 686 }, { "epoch": 0.07227774855339295, "grad_norm": 1.172180414199829, "learning_rate": 0.00019906778743814711, "loss": 2.3032, "step": 687 }, { "epoch": 0.07238295633876907, "grad_norm": 1.1342841386795044, "learning_rate": 0.000199063139484098, "loss": 2.4119, "step": 688 }, { "epoch": 0.07248816412414519, "grad_norm": 1.160383701324463, "learning_rate": 0.0001990584800261727, "loss": 1.9328, "step": 689 }, { "epoch": 0.07259337190952131, "grad_norm": 0.991884171962738, "learning_rate": 0.00019905380906491232, "loss": 2.1967, "step": 690 }, { "epoch": 0.07269857969489742, "grad_norm": 1.0760473012924194, "learning_rate": 0.00019904912660085927, "loss": 1.9094, "step": 691 }, { "epoch": 0.07280378748027354, "grad_norm": 1.6143467426300049, "learning_rate": 0.00019904443263455728, "loss": 1.8885, "step": 692 }, { "epoch": 0.07290899526564966, "grad_norm": 1.200732707977295, "learning_rate": 0.00019903972716655148, "loss": 1.8561, "step": 693 }, { "epoch": 0.07301420305102578, "grad_norm": 0.9255091547966003, "learning_rate": 0.0001990350101973883, "loss": 1.9438, "step": 694 }, { "epoch": 0.07311941083640189, "grad_norm": 0.8052567839622498, "learning_rate": 0.00019903028172761552, "loss": 1.8901, "step": 695 }, { "epoch": 0.07322461862177801, "grad_norm": 0.843561589717865, "learning_rate": 0.00019902554175778222, "loss": 2.1764, "step": 696 }, { "epoch": 0.07332982640715413, "grad_norm": 1.0128999948501587, "learning_rate": 0.0001990207902884388, "loss": 1.6193, "step": 697 }, { "epoch": 0.07343503419253025, "grad_norm": 1.1728532314300537, "learning_rate": 0.00019901602732013709, "loss": 1.8212, "step": 698 }, { "epoch": 0.07354024197790636, "grad_norm": 0.6884058117866516, "learning_rate": 0.00019901125285343022, "loss": 2.1301, "step": 699 }, { "epoch": 0.07364544976328248, "grad_norm": 0.8318617343902588, "learning_rate": 0.00019900646688887253, "loss": 2.3534, "step": 700 }, { "epoch": 0.0737506575486586, "grad_norm": 0.7841008901596069, "learning_rate": 0.0001990016694270199, "loss": 1.9417, "step": 701 }, { "epoch": 0.07385586533403472, "grad_norm": 0.7915363907814026, "learning_rate": 0.0001989968604684294, "loss": 2.337, "step": 702 }, { "epoch": 0.07396107311941083, "grad_norm": 0.9618809819221497, "learning_rate": 0.00019899204001365948, "loss": 2.1485, "step": 703 }, { "epoch": 0.07406628090478695, "grad_norm": 1.1301730871200562, "learning_rate": 0.00019898720806326993, "loss": 2.3046, "step": 704 }, { "epoch": 0.07417148869016307, "grad_norm": 0.922090470790863, "learning_rate": 0.00019898236461782186, "loss": 1.9871, "step": 705 }, { "epoch": 0.0742766964755392, "grad_norm": 0.9652605056762695, "learning_rate": 0.0001989775096778777, "loss": 2.1545, "step": 706 }, { "epoch": 0.0743819042609153, "grad_norm": 0.8259909749031067, "learning_rate": 0.00019897264324400128, "loss": 1.9013, "step": 707 }, { "epoch": 0.07448711204629142, "grad_norm": 0.7845616340637207, "learning_rate": 0.00019896776531675773, "loss": 2.0664, "step": 708 }, { "epoch": 0.07459231983166754, "grad_norm": 0.9520933032035828, "learning_rate": 0.0001989628758967135, "loss": 1.898, "step": 709 }, { "epoch": 0.07469752761704367, "grad_norm": 1.1913421154022217, "learning_rate": 0.00019895797498443633, "loss": 1.794, "step": 710 }, { "epoch": 0.07480273540241977, "grad_norm": 0.8856778144836426, "learning_rate": 0.00019895306258049542, "loss": 2.2846, "step": 711 }, { "epoch": 0.0749079431877959, "grad_norm": 1.1165918111801147, "learning_rate": 0.00019894813868546115, "loss": 1.8066, "step": 712 }, { "epoch": 0.07501315097317202, "grad_norm": 1.1976250410079956, "learning_rate": 0.0001989432032999054, "loss": 1.9725, "step": 713 }, { "epoch": 0.07511835875854814, "grad_norm": 1.1325089931488037, "learning_rate": 0.00019893825642440128, "loss": 2.4161, "step": 714 }, { "epoch": 0.07522356654392424, "grad_norm": 1.7667649984359741, "learning_rate": 0.0001989332980595232, "loss": 1.7692, "step": 715 }, { "epoch": 0.07532877432930037, "grad_norm": 0.7589969038963318, "learning_rate": 0.00019892832820584704, "loss": 1.9779, "step": 716 }, { "epoch": 0.07543398211467649, "grad_norm": 1.1662501096725464, "learning_rate": 0.00019892334686394985, "loss": 2.1976, "step": 717 }, { "epoch": 0.07553918990005261, "grad_norm": 0.9506317973136902, "learning_rate": 0.00019891835403441013, "loss": 2.2166, "step": 718 }, { "epoch": 0.07564439768542872, "grad_norm": 0.8529815077781677, "learning_rate": 0.00019891334971780772, "loss": 2.1537, "step": 719 }, { "epoch": 0.07574960547080484, "grad_norm": 0.7838415503501892, "learning_rate": 0.0001989083339147237, "loss": 2.0705, "step": 720 }, { "epoch": 0.07585481325618096, "grad_norm": 0.8060374855995178, "learning_rate": 0.00019890330662574056, "loss": 2.0404, "step": 721 }, { "epoch": 0.07596002104155708, "grad_norm": 1.2424758672714233, "learning_rate": 0.0001988982678514421, "loss": 2.1626, "step": 722 }, { "epoch": 0.07606522882693319, "grad_norm": 0.752369225025177, "learning_rate": 0.00019889321759241347, "loss": 2.0623, "step": 723 }, { "epoch": 0.07617043661230931, "grad_norm": 1.5461053848266602, "learning_rate": 0.00019888815584924113, "loss": 2.5155, "step": 724 }, { "epoch": 0.07627564439768543, "grad_norm": 0.9477027654647827, "learning_rate": 0.00019888308262251285, "loss": 2.526, "step": 725 }, { "epoch": 0.07638085218306155, "grad_norm": 1.75247061252594, "learning_rate": 0.00019887799791281784, "loss": 1.9596, "step": 726 }, { "epoch": 0.07648605996843766, "grad_norm": 1.1626888513565063, "learning_rate": 0.0001988729017207465, "loss": 2.2181, "step": 727 }, { "epoch": 0.07659126775381378, "grad_norm": 5.1132049560546875, "learning_rate": 0.0001988677940468907, "loss": 2.1096, "step": 728 }, { "epoch": 0.0766964755391899, "grad_norm": 1.210626482963562, "learning_rate": 0.0001988626748918435, "loss": 1.8661, "step": 729 }, { "epoch": 0.07680168332456602, "grad_norm": 1.0915313959121704, "learning_rate": 0.00019885754425619945, "loss": 1.9508, "step": 730 }, { "epoch": 0.07690689110994213, "grad_norm": 0.9805365204811096, "learning_rate": 0.0001988524021405543, "loss": 1.8635, "step": 731 }, { "epoch": 0.07701209889531825, "grad_norm": 1.2141715288162231, "learning_rate": 0.0001988472485455052, "loss": 2.0866, "step": 732 }, { "epoch": 0.07711730668069437, "grad_norm": 2.095106601715088, "learning_rate": 0.00019884208347165062, "loss": 1.658, "step": 733 }, { "epoch": 0.0772225144660705, "grad_norm": 1.0189520120620728, "learning_rate": 0.00019883690691959035, "loss": 2.0882, "step": 734 }, { "epoch": 0.0773277222514466, "grad_norm": 1.0242335796356201, "learning_rate": 0.00019883171888992557, "loss": 1.9676, "step": 735 }, { "epoch": 0.07743293003682272, "grad_norm": 1.7329810857772827, "learning_rate": 0.00019882651938325872, "loss": 2.0722, "step": 736 }, { "epoch": 0.07753813782219884, "grad_norm": 0.8744789958000183, "learning_rate": 0.00019882130840019358, "loss": 2.3554, "step": 737 }, { "epoch": 0.07764334560757497, "grad_norm": 1.0732308626174927, "learning_rate": 0.0001988160859413353, "loss": 2.057, "step": 738 }, { "epoch": 0.07774855339295107, "grad_norm": 0.9206953048706055, "learning_rate": 0.0001988108520072904, "loss": 2.1232, "step": 739 }, { "epoch": 0.0778537611783272, "grad_norm": 0.7946454286575317, "learning_rate": 0.0001988056065986666, "loss": 2.1398, "step": 740 }, { "epoch": 0.07795896896370332, "grad_norm": 1.073172926902771, "learning_rate": 0.00019880034971607308, "loss": 2.0701, "step": 741 }, { "epoch": 0.07806417674907944, "grad_norm": 0.7679953575134277, "learning_rate": 0.00019879508136012026, "loss": 2.3293, "step": 742 }, { "epoch": 0.07816938453445554, "grad_norm": 1.1941078901290894, "learning_rate": 0.00019878980153141998, "loss": 2.2848, "step": 743 }, { "epoch": 0.07827459231983167, "grad_norm": 0.7265371680259705, "learning_rate": 0.00019878451023058537, "loss": 2.1909, "step": 744 }, { "epoch": 0.07837980010520779, "grad_norm": 1.4622846841812134, "learning_rate": 0.00019877920745823085, "loss": 2.1973, "step": 745 }, { "epoch": 0.07848500789058391, "grad_norm": 1.132213830947876, "learning_rate": 0.00019877389321497227, "loss": 1.757, "step": 746 }, { "epoch": 0.07859021567596002, "grad_norm": 1.0680114030838013, "learning_rate": 0.00019876856750142673, "loss": 1.8053, "step": 747 }, { "epoch": 0.07869542346133614, "grad_norm": 1.1053400039672852, "learning_rate": 0.00019876323031821266, "loss": 2.0033, "step": 748 }, { "epoch": 0.07880063124671226, "grad_norm": 1.0279300212860107, "learning_rate": 0.0001987578816659499, "loss": 1.9289, "step": 749 }, { "epoch": 0.07890583903208838, "grad_norm": 0.8577696084976196, "learning_rate": 0.00019875252154525952, "loss": 2.3191, "step": 750 }, { "epoch": 0.07901104681746449, "grad_norm": 2.2503817081451416, "learning_rate": 0.000198747149956764, "loss": 1.8925, "step": 751 }, { "epoch": 0.07911625460284061, "grad_norm": 0.7883946299552917, "learning_rate": 0.0001987417669010871, "loss": 2.1394, "step": 752 }, { "epoch": 0.07922146238821673, "grad_norm": 0.9540475010871887, "learning_rate": 0.00019873637237885402, "loss": 2.1549, "step": 753 }, { "epoch": 0.07932667017359285, "grad_norm": 1.1005622148513794, "learning_rate": 0.0001987309663906911, "loss": 1.8121, "step": 754 }, { "epoch": 0.07943187795896896, "grad_norm": 1.2828789949417114, "learning_rate": 0.00019872554893722618, "loss": 1.9085, "step": 755 }, { "epoch": 0.07953708574434508, "grad_norm": 0.9298080205917358, "learning_rate": 0.00019872012001908833, "loss": 2.301, "step": 756 }, { "epoch": 0.0796422935297212, "grad_norm": 1.2716705799102783, "learning_rate": 0.00019871467963690807, "loss": 2.0119, "step": 757 }, { "epoch": 0.07974750131509732, "grad_norm": 1.2654720544815063, "learning_rate": 0.0001987092277913171, "loss": 1.7698, "step": 758 }, { "epoch": 0.07985270910047343, "grad_norm": 1.111207127571106, "learning_rate": 0.00019870376448294851, "loss": 1.6179, "step": 759 }, { "epoch": 0.07995791688584955, "grad_norm": 1.3444252014160156, "learning_rate": 0.00019869828971243682, "loss": 1.7896, "step": 760 }, { "epoch": 0.08006312467122567, "grad_norm": 0.7708830237388611, "learning_rate": 0.00019869280348041774, "loss": 2.241, "step": 761 }, { "epoch": 0.08016833245660179, "grad_norm": 1.6333880424499512, "learning_rate": 0.0001986873057875284, "loss": 1.9599, "step": 762 }, { "epoch": 0.0802735402419779, "grad_norm": 0.9313520789146423, "learning_rate": 0.00019868179663440718, "loss": 2.3505, "step": 763 }, { "epoch": 0.08037874802735402, "grad_norm": 1.2057533264160156, "learning_rate": 0.00019867627602169387, "loss": 2.1487, "step": 764 }, { "epoch": 0.08048395581273014, "grad_norm": 1.0157853364944458, "learning_rate": 0.00019867074395002958, "loss": 2.0869, "step": 765 }, { "epoch": 0.08058916359810626, "grad_norm": 0.8782801628112793, "learning_rate": 0.00019866520042005669, "loss": 2.1859, "step": 766 }, { "epoch": 0.08069437138348237, "grad_norm": 0.8281430006027222, "learning_rate": 0.00019865964543241897, "loss": 2.1563, "step": 767 }, { "epoch": 0.08079957916885849, "grad_norm": 0.9678866863250732, "learning_rate": 0.00019865407898776152, "loss": 2.1626, "step": 768 }, { "epoch": 0.08090478695423461, "grad_norm": 0.9700407981872559, "learning_rate": 0.00019864850108673073, "loss": 2.2104, "step": 769 }, { "epoch": 0.08100999473961074, "grad_norm": 1.1797422170639038, "learning_rate": 0.00019864291172997435, "loss": 1.8951, "step": 770 }, { "epoch": 0.08111520252498684, "grad_norm": 1.1290271282196045, "learning_rate": 0.00019863731091814146, "loss": 2.2929, "step": 771 }, { "epoch": 0.08122041031036296, "grad_norm": 1.6742466688156128, "learning_rate": 0.00019863169865188244, "loss": 1.3898, "step": 772 }, { "epoch": 0.08132561809573909, "grad_norm": 1.3946906328201294, "learning_rate": 0.00019862607493184906, "loss": 1.6763, "step": 773 }, { "epoch": 0.0814308258811152, "grad_norm": 1.0987610816955566, "learning_rate": 0.00019862043975869438, "loss": 1.9443, "step": 774 }, { "epoch": 0.08153603366649131, "grad_norm": 1.0788226127624512, "learning_rate": 0.00019861479313307273, "loss": 1.8061, "step": 775 }, { "epoch": 0.08164124145186744, "grad_norm": 0.9351698160171509, "learning_rate": 0.0001986091350556399, "loss": 2.159, "step": 776 }, { "epoch": 0.08174644923724356, "grad_norm": 1.1127021312713623, "learning_rate": 0.000198603465527053, "loss": 2.0271, "step": 777 }, { "epoch": 0.08185165702261968, "grad_norm": 0.7860623598098755, "learning_rate": 0.00019859778454797027, "loss": 1.8645, "step": 778 }, { "epoch": 0.08195686480799579, "grad_norm": 1.112596869468689, "learning_rate": 0.00019859209211905152, "loss": 1.7839, "step": 779 }, { "epoch": 0.08206207259337191, "grad_norm": 1.2952525615692139, "learning_rate": 0.00019858638824095775, "loss": 2.0274, "step": 780 }, { "epoch": 0.08216728037874803, "grad_norm": 1.0121921300888062, "learning_rate": 0.00019858067291435137, "loss": 2.2605, "step": 781 }, { "epoch": 0.08227248816412415, "grad_norm": 1.0447254180908203, "learning_rate": 0.00019857494613989606, "loss": 2.1212, "step": 782 }, { "epoch": 0.08237769594950026, "grad_norm": 1.3232123851776123, "learning_rate": 0.00019856920791825683, "loss": 1.7452, "step": 783 }, { "epoch": 0.08248290373487638, "grad_norm": 1.3245103359222412, "learning_rate": 0.0001985634582501001, "loss": 1.6841, "step": 784 }, { "epoch": 0.0825881115202525, "grad_norm": 1.0271555185317993, "learning_rate": 0.00019855769713609348, "loss": 1.9158, "step": 785 }, { "epoch": 0.08269331930562862, "grad_norm": 1.0767377614974976, "learning_rate": 0.00019855192457690607, "loss": 1.7579, "step": 786 }, { "epoch": 0.08279852709100473, "grad_norm": 0.7473664283752441, "learning_rate": 0.00019854614057320818, "loss": 2.3149, "step": 787 }, { "epoch": 0.08290373487638085, "grad_norm": 1.0371477603912354, "learning_rate": 0.0001985403451256715, "loss": 1.9446, "step": 788 }, { "epoch": 0.08300894266175697, "grad_norm": 1.0556628704071045, "learning_rate": 0.00019853453823496898, "loss": 2.0536, "step": 789 }, { "epoch": 0.08311415044713309, "grad_norm": 1.5194404125213623, "learning_rate": 0.00019852871990177503, "loss": 2.1898, "step": 790 }, { "epoch": 0.0832193582325092, "grad_norm": 1.2382123470306396, "learning_rate": 0.0001985228901267653, "loss": 2.096, "step": 791 }, { "epoch": 0.08332456601788532, "grad_norm": 0.805229902267456, "learning_rate": 0.00019851704891061676, "loss": 2.2309, "step": 792 }, { "epoch": 0.08342977380326144, "grad_norm": 1.50439453125, "learning_rate": 0.00019851119625400774, "loss": 1.664, "step": 793 }, { "epoch": 0.08353498158863756, "grad_norm": 1.2170758247375488, "learning_rate": 0.0001985053321576179, "loss": 2.2134, "step": 794 }, { "epoch": 0.08364018937401367, "grad_norm": 0.898504376411438, "learning_rate": 0.0001984994566221282, "loss": 1.7387, "step": 795 }, { "epoch": 0.08374539715938979, "grad_norm": 1.0071955919265747, "learning_rate": 0.00019849356964822093, "loss": 2.255, "step": 796 }, { "epoch": 0.08385060494476591, "grad_norm": 1.0701338052749634, "learning_rate": 0.00019848767123657976, "loss": 2.1611, "step": 797 }, { "epoch": 0.08395581273014203, "grad_norm": 0.9649009704589844, "learning_rate": 0.00019848176138788964, "loss": 2.2191, "step": 798 }, { "epoch": 0.08406102051551814, "grad_norm": 2.6447019577026367, "learning_rate": 0.00019847584010283686, "loss": 1.9287, "step": 799 }, { "epoch": 0.08416622830089426, "grad_norm": 0.8168442845344543, "learning_rate": 0.00019846990738210907, "loss": 1.7487, "step": 800 }, { "epoch": 0.08427143608627038, "grad_norm": 1.1686725616455078, "learning_rate": 0.00019846396322639514, "loss": 1.7051, "step": 801 }, { "epoch": 0.0843766438716465, "grad_norm": 0.8122353553771973, "learning_rate": 0.00019845800763638544, "loss": 1.7214, "step": 802 }, { "epoch": 0.08448185165702261, "grad_norm": 1.4718796014785767, "learning_rate": 0.0001984520406127715, "loss": 1.8946, "step": 803 }, { "epoch": 0.08458705944239873, "grad_norm": 1.2869932651519775, "learning_rate": 0.0001984460621562463, "loss": 1.6935, "step": 804 }, { "epoch": 0.08469226722777486, "grad_norm": 0.8310649991035461, "learning_rate": 0.00019844007226750408, "loss": 1.7826, "step": 805 }, { "epoch": 0.08479747501315098, "grad_norm": 1.2225552797317505, "learning_rate": 0.0001984340709472404, "loss": 1.9194, "step": 806 }, { "epoch": 0.08490268279852708, "grad_norm": 0.819175660610199, "learning_rate": 0.00019842805819615222, "loss": 2.0277, "step": 807 }, { "epoch": 0.0850078905839032, "grad_norm": 0.8296215534210205, "learning_rate": 0.00019842203401493772, "loss": 2.1235, "step": 808 }, { "epoch": 0.08511309836927933, "grad_norm": 1.2839940786361694, "learning_rate": 0.00019841599840429654, "loss": 1.6553, "step": 809 }, { "epoch": 0.08521830615465545, "grad_norm": 1.0420074462890625, "learning_rate": 0.00019840995136492955, "loss": 1.9328, "step": 810 }, { "epoch": 0.08532351394003156, "grad_norm": 0.8936055898666382, "learning_rate": 0.00019840389289753896, "loss": 2.176, "step": 811 }, { "epoch": 0.08542872172540768, "grad_norm": 0.811882495880127, "learning_rate": 0.0001983978230028283, "loss": 2.2401, "step": 812 }, { "epoch": 0.0855339295107838, "grad_norm": 0.9509261846542358, "learning_rate": 0.00019839174168150247, "loss": 2.1676, "step": 813 }, { "epoch": 0.08563913729615992, "grad_norm": 0.921415388584137, "learning_rate": 0.0001983856489342677, "loss": 2.3311, "step": 814 }, { "epoch": 0.08574434508153603, "grad_norm": 0.9342227578163147, "learning_rate": 0.00019837954476183148, "loss": 2.2151, "step": 815 }, { "epoch": 0.08584955286691215, "grad_norm": 0.9104680418968201, "learning_rate": 0.00019837342916490268, "loss": 2.3867, "step": 816 }, { "epoch": 0.08595476065228827, "grad_norm": 1.001815915107727, "learning_rate": 0.0001983673021441915, "loss": 1.998, "step": 817 }, { "epoch": 0.08605996843766439, "grad_norm": 1.704974889755249, "learning_rate": 0.00019836116370040944, "loss": 1.8599, "step": 818 }, { "epoch": 0.0861651762230405, "grad_norm": 0.82003253698349, "learning_rate": 0.0001983550138342693, "loss": 2.3351, "step": 819 }, { "epoch": 0.08627038400841662, "grad_norm": 0.7901598215103149, "learning_rate": 0.00019834885254648533, "loss": 2.0966, "step": 820 }, { "epoch": 0.08637559179379274, "grad_norm": 1.0715858936309814, "learning_rate": 0.00019834267983777292, "loss": 1.8696, "step": 821 }, { "epoch": 0.08648079957916886, "grad_norm": 1.0539333820343018, "learning_rate": 0.000198336495708849, "loss": 1.7904, "step": 822 }, { "epoch": 0.08658600736454497, "grad_norm": 1.2801223993301392, "learning_rate": 0.00019833030016043156, "loss": 2.1986, "step": 823 }, { "epoch": 0.08669121514992109, "grad_norm": 1.5818783044815063, "learning_rate": 0.00019832409319324023, "loss": 1.8651, "step": 824 }, { "epoch": 0.08679642293529721, "grad_norm": 0.8712964057922363, "learning_rate": 0.00019831787480799568, "loss": 2.2397, "step": 825 }, { "epoch": 0.08690163072067333, "grad_norm": 1.518312931060791, "learning_rate": 0.00019831164500542012, "loss": 2.1451, "step": 826 }, { "epoch": 0.08700683850604944, "grad_norm": 1.9714057445526123, "learning_rate": 0.00019830540378623694, "loss": 2.4544, "step": 827 }, { "epoch": 0.08711204629142556, "grad_norm": 1.1616429090499878, "learning_rate": 0.00019829915115117093, "loss": 1.8353, "step": 828 }, { "epoch": 0.08721725407680168, "grad_norm": 12.633820533752441, "learning_rate": 0.0001982928871009482, "loss": 2.3892, "step": 829 }, { "epoch": 0.0873224618621778, "grad_norm": 0.8267893195152283, "learning_rate": 0.00019828661163629615, "loss": 2.1383, "step": 830 }, { "epoch": 0.08742766964755391, "grad_norm": 0.9138221144676208, "learning_rate": 0.00019828032475794352, "loss": 2.3029, "step": 831 }, { "epoch": 0.08753287743293003, "grad_norm": 1.1330816745758057, "learning_rate": 0.00019827402646662047, "loss": 2.2558, "step": 832 }, { "epoch": 0.08763808521830616, "grad_norm": 0.8642826676368713, "learning_rate": 0.0001982677167630583, "loss": 1.9996, "step": 833 }, { "epoch": 0.08774329300368228, "grad_norm": 0.8680064678192139, "learning_rate": 0.00019826139564798974, "loss": 2.3248, "step": 834 }, { "epoch": 0.08784850078905838, "grad_norm": 1.49010169506073, "learning_rate": 0.0001982550631221489, "loss": 1.9746, "step": 835 }, { "epoch": 0.0879537085744345, "grad_norm": 2.9857089519500732, "learning_rate": 0.00019824871918627115, "loss": 2.3267, "step": 836 }, { "epoch": 0.08805891635981063, "grad_norm": 1.274704933166504, "learning_rate": 0.0001982423638410931, "loss": 2.3621, "step": 837 }, { "epoch": 0.08816412414518675, "grad_norm": 0.9977430105209351, "learning_rate": 0.0001982359970873529, "loss": 2.0158, "step": 838 }, { "epoch": 0.08826933193056286, "grad_norm": 0.9175136685371399, "learning_rate": 0.0001982296189257898, "loss": 1.8731, "step": 839 }, { "epoch": 0.08837453971593898, "grad_norm": 0.993022620677948, "learning_rate": 0.00019822322935714458, "loss": 1.9218, "step": 840 }, { "epoch": 0.0884797475013151, "grad_norm": 1.6498595476150513, "learning_rate": 0.00019821682838215915, "loss": 2.0277, "step": 841 }, { "epoch": 0.08858495528669122, "grad_norm": 0.8532339334487915, "learning_rate": 0.00019821041600157682, "loss": 1.7503, "step": 842 }, { "epoch": 0.08869016307206733, "grad_norm": 0.8794986605644226, "learning_rate": 0.0001982039922161423, "loss": 2.2771, "step": 843 }, { "epoch": 0.08879537085744345, "grad_norm": 1.4324920177459717, "learning_rate": 0.00019819755702660155, "loss": 2.0883, "step": 844 }, { "epoch": 0.08890057864281957, "grad_norm": 1.0365421772003174, "learning_rate": 0.00019819111043370186, "loss": 2.0581, "step": 845 }, { "epoch": 0.08900578642819569, "grad_norm": 0.9392815232276917, "learning_rate": 0.00019818465243819184, "loss": 2.3084, "step": 846 }, { "epoch": 0.0891109942135718, "grad_norm": 0.9906817674636841, "learning_rate": 0.00019817818304082146, "loss": 2.3203, "step": 847 }, { "epoch": 0.08921620199894792, "grad_norm": 1.1406745910644531, "learning_rate": 0.000198171702242342, "loss": 1.9426, "step": 848 }, { "epoch": 0.08932140978432404, "grad_norm": 0.843298077583313, "learning_rate": 0.00019816521004350596, "loss": 2.2718, "step": 849 }, { "epoch": 0.08942661756970016, "grad_norm": 0.8017824292182922, "learning_rate": 0.00019815870644506738, "loss": 1.9611, "step": 850 }, { "epoch": 0.08953182535507627, "grad_norm": 1.6382418870925903, "learning_rate": 0.00019815219144778143, "loss": 1.5162, "step": 851 }, { "epoch": 0.08963703314045239, "grad_norm": 0.6821427941322327, "learning_rate": 0.00019814566505240472, "loss": 1.8852, "step": 852 }, { "epoch": 0.08974224092582851, "grad_norm": 0.967110276222229, "learning_rate": 0.00019813912725969509, "loss": 2.2542, "step": 853 }, { "epoch": 0.08984744871120463, "grad_norm": 1.212904691696167, "learning_rate": 0.00019813257807041178, "loss": 2.1593, "step": 854 }, { "epoch": 0.08995265649658074, "grad_norm": 0.7041783928871155, "learning_rate": 0.00019812601748531533, "loss": 2.0618, "step": 855 }, { "epoch": 0.09005786428195686, "grad_norm": 1.1121630668640137, "learning_rate": 0.00019811944550516758, "loss": 1.94, "step": 856 }, { "epoch": 0.09016307206733298, "grad_norm": 1.1690653562545776, "learning_rate": 0.00019811286213073173, "loss": 2.095, "step": 857 }, { "epoch": 0.0902682798527091, "grad_norm": 1.0316269397735596, "learning_rate": 0.00019810626736277228, "loss": 1.8736, "step": 858 }, { "epoch": 0.09037348763808521, "grad_norm": 0.9707139134407043, "learning_rate": 0.00019809966120205505, "loss": 1.9554, "step": 859 }, { "epoch": 0.09047869542346133, "grad_norm": 1.1598957777023315, "learning_rate": 0.0001980930436493472, "loss": 2.1387, "step": 860 }, { "epoch": 0.09058390320883745, "grad_norm": 0.881175696849823, "learning_rate": 0.0001980864147054172, "loss": 1.9518, "step": 861 }, { "epoch": 0.09068911099421358, "grad_norm": 1.648833990097046, "learning_rate": 0.0001980797743710349, "loss": 1.8736, "step": 862 }, { "epoch": 0.09079431877958968, "grad_norm": 1.2492873668670654, "learning_rate": 0.0001980731226469713, "loss": 2.2163, "step": 863 }, { "epoch": 0.0908995265649658, "grad_norm": 0.6980217099189758, "learning_rate": 0.00019806645953399893, "loss": 1.8288, "step": 864 }, { "epoch": 0.09100473435034193, "grad_norm": 1.0036383867263794, "learning_rate": 0.00019805978503289158, "loss": 2.3498, "step": 865 }, { "epoch": 0.09110994213571805, "grad_norm": 0.788852870464325, "learning_rate": 0.00019805309914442426, "loss": 1.8863, "step": 866 }, { "epoch": 0.09121514992109415, "grad_norm": 1.2762038707733154, "learning_rate": 0.00019804640186937343, "loss": 2.1017, "step": 867 }, { "epoch": 0.09132035770647028, "grad_norm": 0.878587543964386, "learning_rate": 0.0001980396932085168, "loss": 2.1149, "step": 868 }, { "epoch": 0.0914255654918464, "grad_norm": 1.13942551612854, "learning_rate": 0.00019803297316263346, "loss": 1.8322, "step": 869 }, { "epoch": 0.09153077327722252, "grad_norm": 0.9839184284210205, "learning_rate": 0.00019802624173250374, "loss": 2.1218, "step": 870 }, { "epoch": 0.09163598106259863, "grad_norm": 1.22666335105896, "learning_rate": 0.00019801949891890938, "loss": 1.9397, "step": 871 }, { "epoch": 0.09174118884797475, "grad_norm": 1.162661075592041, "learning_rate": 0.00019801274472263335, "loss": 1.6768, "step": 872 }, { "epoch": 0.09184639663335087, "grad_norm": 1.2919373512268066, "learning_rate": 0.00019800597914446005, "loss": 2.1166, "step": 873 }, { "epoch": 0.09195160441872699, "grad_norm": 1.0327990055084229, "learning_rate": 0.0001979992021851751, "loss": 2.3549, "step": 874 }, { "epoch": 0.0920568122041031, "grad_norm": 0.6984542012214661, "learning_rate": 0.0001979924138455655, "loss": 1.9739, "step": 875 }, { "epoch": 0.09216201998947922, "grad_norm": 1.3677195310592651, "learning_rate": 0.00019798561412641958, "loss": 1.7295, "step": 876 }, { "epoch": 0.09226722777485534, "grad_norm": 1.9641640186309814, "learning_rate": 0.00019797880302852697, "loss": 2.3405, "step": 877 }, { "epoch": 0.09237243556023146, "grad_norm": 0.9281172752380371, "learning_rate": 0.00019797198055267857, "loss": 1.2817, "step": 878 }, { "epoch": 0.09247764334560757, "grad_norm": 0.967189371585846, "learning_rate": 0.0001979651466996667, "loss": 2.2958, "step": 879 }, { "epoch": 0.09258285113098369, "grad_norm": 1.9948618412017822, "learning_rate": 0.0001979583014702849, "loss": 1.6858, "step": 880 }, { "epoch": 0.09268805891635981, "grad_norm": 0.7749442458152771, "learning_rate": 0.00019795144486532814, "loss": 2.4653, "step": 881 }, { "epoch": 0.09279326670173593, "grad_norm": 1.0032241344451904, "learning_rate": 0.00019794457688559265, "loss": 2.2595, "step": 882 }, { "epoch": 0.09289847448711204, "grad_norm": 0.8965802788734436, "learning_rate": 0.00019793769753187595, "loss": 1.9857, "step": 883 }, { "epoch": 0.09300368227248816, "grad_norm": 0.8196418285369873, "learning_rate": 0.00019793080680497696, "loss": 2.0913, "step": 884 }, { "epoch": 0.09310889005786428, "grad_norm": 0.9274260401725769, "learning_rate": 0.00019792390470569583, "loss": 2.0106, "step": 885 }, { "epoch": 0.0932140978432404, "grad_norm": 0.8402416110038757, "learning_rate": 0.00019791699123483412, "loss": 2.5971, "step": 886 }, { "epoch": 0.09331930562861651, "grad_norm": 1.1058905124664307, "learning_rate": 0.00019791006639319463, "loss": 2.134, "step": 887 }, { "epoch": 0.09342451341399263, "grad_norm": 0.6589333415031433, "learning_rate": 0.00019790313018158156, "loss": 2.375, "step": 888 }, { "epoch": 0.09352972119936875, "grad_norm": 1.1351195573806763, "learning_rate": 0.00019789618260080034, "loss": 1.8826, "step": 889 }, { "epoch": 0.09363492898474488, "grad_norm": 1.2128400802612305, "learning_rate": 0.00019788922365165785, "loss": 2.023, "step": 890 }, { "epoch": 0.09374013677012098, "grad_norm": 0.9158788919448853, "learning_rate": 0.0001978822533349621, "loss": 2.2709, "step": 891 }, { "epoch": 0.0938453445554971, "grad_norm": 1.525220274925232, "learning_rate": 0.00019787527165152265, "loss": 2.0153, "step": 892 }, { "epoch": 0.09395055234087323, "grad_norm": 0.9528924226760864, "learning_rate": 0.00019786827860215014, "loss": 2.1791, "step": 893 }, { "epoch": 0.09405576012624935, "grad_norm": 1.2895303964614868, "learning_rate": 0.00019786127418765673, "loss": 1.9553, "step": 894 }, { "epoch": 0.09416096791162545, "grad_norm": 0.9034793972969055, "learning_rate": 0.0001978542584088558, "loss": 2.3079, "step": 895 }, { "epoch": 0.09426617569700158, "grad_norm": 1.507163643836975, "learning_rate": 0.0001978472312665621, "loss": 1.5934, "step": 896 }, { "epoch": 0.0943713834823777, "grad_norm": 0.9403305649757385, "learning_rate": 0.0001978401927615916, "loss": 1.7338, "step": 897 }, { "epoch": 0.09447659126775382, "grad_norm": 0.8427889347076416, "learning_rate": 0.00019783314289476168, "loss": 2.0062, "step": 898 }, { "epoch": 0.09458179905312993, "grad_norm": 0.8502089977264404, "learning_rate": 0.0001978260816668911, "loss": 2.1841, "step": 899 }, { "epoch": 0.09468700683850605, "grad_norm": 0.8778148889541626, "learning_rate": 0.00019781900907879974, "loss": 2.0756, "step": 900 }, { "epoch": 0.09479221462388217, "grad_norm": 0.8359899520874023, "learning_rate": 0.00019781192513130896, "loss": 1.9334, "step": 901 }, { "epoch": 0.09489742240925829, "grad_norm": 1.1402784585952759, "learning_rate": 0.00019780482982524142, "loss": 1.7855, "step": 902 }, { "epoch": 0.0950026301946344, "grad_norm": 0.7871297597885132, "learning_rate": 0.00019779772316142104, "loss": 2.3578, "step": 903 }, { "epoch": 0.09510783798001052, "grad_norm": 0.9127509593963623, "learning_rate": 0.0001977906051406731, "loss": 2.2987, "step": 904 }, { "epoch": 0.09521304576538664, "grad_norm": 1.1920058727264404, "learning_rate": 0.00019778347576382424, "loss": 2.1563, "step": 905 }, { "epoch": 0.09531825355076276, "grad_norm": 1.1233481168746948, "learning_rate": 0.0001977763350317023, "loss": 2.1715, "step": 906 }, { "epoch": 0.09542346133613887, "grad_norm": 0.7940162420272827, "learning_rate": 0.00019776918294513656, "loss": 2.3458, "step": 907 }, { "epoch": 0.09552866912151499, "grad_norm": 1.0063912868499756, "learning_rate": 0.00019776201950495755, "loss": 2.1605, "step": 908 }, { "epoch": 0.09563387690689111, "grad_norm": 1.514668583869934, "learning_rate": 0.00019775484471199715, "loss": 1.5532, "step": 909 }, { "epoch": 0.09573908469226723, "grad_norm": 0.8086494207382202, "learning_rate": 0.0001977476585670885, "loss": 1.8652, "step": 910 }, { "epoch": 0.09584429247764334, "grad_norm": 0.925151526927948, "learning_rate": 0.00019774046107106616, "loss": 2.1997, "step": 911 }, { "epoch": 0.09594950026301946, "grad_norm": 0.7572203278541565, "learning_rate": 0.0001977332522247659, "loss": 2.116, "step": 912 }, { "epoch": 0.09605470804839558, "grad_norm": 1.877502202987671, "learning_rate": 0.00019772603202902492, "loss": 1.468, "step": 913 }, { "epoch": 0.0961599158337717, "grad_norm": 1.2712554931640625, "learning_rate": 0.00019771880048468163, "loss": 2.0208, "step": 914 }, { "epoch": 0.09626512361914781, "grad_norm": 0.7878434062004089, "learning_rate": 0.00019771155759257584, "loss": 2.0554, "step": 915 }, { "epoch": 0.09637033140452393, "grad_norm": 0.7482603788375854, "learning_rate": 0.0001977043033535486, "loss": 1.954, "step": 916 }, { "epoch": 0.09647553918990005, "grad_norm": 0.8648151755332947, "learning_rate": 0.00019769703776844236, "loss": 1.9668, "step": 917 }, { "epoch": 0.09658074697527617, "grad_norm": 1.0494996309280396, "learning_rate": 0.0001976897608381008, "loss": 2.1018, "step": 918 }, { "epoch": 0.09668595476065228, "grad_norm": 1.2748123407363892, "learning_rate": 0.00019768247256336902, "loss": 1.8108, "step": 919 }, { "epoch": 0.0967911625460284, "grad_norm": 0.7436427474021912, "learning_rate": 0.00019767517294509338, "loss": 2.0819, "step": 920 }, { "epoch": 0.09689637033140452, "grad_norm": 0.9573638439178467, "learning_rate": 0.00019766786198412154, "loss": 1.9145, "step": 921 }, { "epoch": 0.09700157811678065, "grad_norm": 1.3364827632904053, "learning_rate": 0.00019766053968130247, "loss": 2.1283, "step": 922 }, { "epoch": 0.09710678590215675, "grad_norm": 0.971378743648529, "learning_rate": 0.00019765320603748655, "loss": 2.0985, "step": 923 }, { "epoch": 0.09721199368753287, "grad_norm": 0.8163911700248718, "learning_rate": 0.00019764586105352534, "loss": 2.3535, "step": 924 }, { "epoch": 0.097317201472909, "grad_norm": 0.8818692564964294, "learning_rate": 0.00019763850473027183, "loss": 2.0812, "step": 925 }, { "epoch": 0.09742240925828512, "grad_norm": 0.984257698059082, "learning_rate": 0.00019763113706858031, "loss": 1.9479, "step": 926 }, { "epoch": 0.09752761704366122, "grad_norm": 0.9912437200546265, "learning_rate": 0.00019762375806930632, "loss": 1.6832, "step": 927 }, { "epoch": 0.09763282482903735, "grad_norm": 0.8805968761444092, "learning_rate": 0.0001976163677333068, "loss": 2.3275, "step": 928 }, { "epoch": 0.09773803261441347, "grad_norm": 1.2597976922988892, "learning_rate": 0.00019760896606143988, "loss": 1.8149, "step": 929 }, { "epoch": 0.09784324039978959, "grad_norm": 0.709547758102417, "learning_rate": 0.0001976015530545652, "loss": 2.4011, "step": 930 }, { "epoch": 0.0979484481851657, "grad_norm": 1.3702518939971924, "learning_rate": 0.00019759412871354353, "loss": 1.4587, "step": 931 }, { "epoch": 0.09805365597054182, "grad_norm": 1.471561312675476, "learning_rate": 0.00019758669303923706, "loss": 1.9036, "step": 932 }, { "epoch": 0.09815886375591794, "grad_norm": 0.7236111164093018, "learning_rate": 0.0001975792460325093, "loss": 2.8316, "step": 933 }, { "epoch": 0.09826407154129406, "grad_norm": 0.9491783380508423, "learning_rate": 0.000197571787694225, "loss": 2.0152, "step": 934 }, { "epoch": 0.09836927932667017, "grad_norm": 1.1442753076553345, "learning_rate": 0.0001975643180252503, "loss": 1.9371, "step": 935 }, { "epoch": 0.09847448711204629, "grad_norm": 1.107806921005249, "learning_rate": 0.00019755683702645262, "loss": 2.0472, "step": 936 }, { "epoch": 0.09857969489742241, "grad_norm": 1.2097374200820923, "learning_rate": 0.0001975493446987007, "loss": 2.0863, "step": 937 }, { "epoch": 0.09868490268279853, "grad_norm": 0.8795415759086609, "learning_rate": 0.0001975418410428646, "loss": 2.0176, "step": 938 }, { "epoch": 0.09879011046817464, "grad_norm": 0.8208872079849243, "learning_rate": 0.0001975343260598157, "loss": 2.0678, "step": 939 }, { "epoch": 0.09889531825355076, "grad_norm": 1.147039532661438, "learning_rate": 0.0001975267997504267, "loss": 1.71, "step": 940 }, { "epoch": 0.09900052603892688, "grad_norm": 1.0926604270935059, "learning_rate": 0.00019751926211557157, "loss": 2.2779, "step": 941 }, { "epoch": 0.099105733824303, "grad_norm": 1.0695338249206543, "learning_rate": 0.00019751171315612567, "loss": 2.0222, "step": 942 }, { "epoch": 0.09921094160967911, "grad_norm": 1.0999269485473633, "learning_rate": 0.00019750415287296563, "loss": 2.1906, "step": 943 }, { "epoch": 0.09931614939505523, "grad_norm": 1.2324326038360596, "learning_rate": 0.00019749658126696934, "loss": 2.123, "step": 944 }, { "epoch": 0.09942135718043135, "grad_norm": 0.9449137449264526, "learning_rate": 0.00019748899833901614, "loss": 1.8318, "step": 945 }, { "epoch": 0.09952656496580747, "grad_norm": 1.1237130165100098, "learning_rate": 0.0001974814040899866, "loss": 1.9734, "step": 946 }, { "epoch": 0.09963177275118358, "grad_norm": 0.7438077926635742, "learning_rate": 0.00019747379852076263, "loss": 2.406, "step": 947 }, { "epoch": 0.0997369805365597, "grad_norm": 0.7513858675956726, "learning_rate": 0.00019746618163222736, "loss": 1.8843, "step": 948 }, { "epoch": 0.09984218832193582, "grad_norm": 0.7331326007843018, "learning_rate": 0.0001974585534252654, "loss": 2.3763, "step": 949 }, { "epoch": 0.09994739610731194, "grad_norm": 1.0843267440795898, "learning_rate": 0.00019745091390076252, "loss": 1.962, "step": 950 }, { "epoch": 0.10005260389268805, "grad_norm": 1.1343190670013428, "learning_rate": 0.00019744326305960595, "loss": 2.0071, "step": 951 }, { "epoch": 0.10015781167806417, "grad_norm": 1.4789917469024658, "learning_rate": 0.0001974356009026841, "loss": 1.715, "step": 952 }, { "epoch": 0.1002630194634403, "grad_norm": 1.1908003091812134, "learning_rate": 0.00019742792743088675, "loss": 1.8924, "step": 953 }, { "epoch": 0.10036822724881642, "grad_norm": 0.7355921864509583, "learning_rate": 0.000197420242645105, "loss": 2.3287, "step": 954 }, { "epoch": 0.10047343503419252, "grad_norm": 0.7553874254226685, "learning_rate": 0.0001974125465462313, "loss": 1.7495, "step": 955 }, { "epoch": 0.10057864281956864, "grad_norm": 1.1832842826843262, "learning_rate": 0.00019740483913515932, "loss": 2.0124, "step": 956 }, { "epoch": 0.10068385060494477, "grad_norm": 1.039902925491333, "learning_rate": 0.0001973971204127841, "loss": 2.0525, "step": 957 }, { "epoch": 0.10078905839032089, "grad_norm": 0.8891255855560303, "learning_rate": 0.00019738939038000205, "loss": 2.1702, "step": 958 }, { "epoch": 0.100894266175697, "grad_norm": 1.692345142364502, "learning_rate": 0.00019738164903771078, "loss": 1.9213, "step": 959 }, { "epoch": 0.10099947396107312, "grad_norm": 0.9816641807556152, "learning_rate": 0.00019737389638680924, "loss": 1.9241, "step": 960 }, { "epoch": 0.10110468174644924, "grad_norm": 0.90645432472229, "learning_rate": 0.0001973661324281978, "loss": 1.6803, "step": 961 }, { "epoch": 0.10120988953182536, "grad_norm": 1.1190398931503296, "learning_rate": 0.00019735835716277802, "loss": 2.0615, "step": 962 }, { "epoch": 0.10131509731720147, "grad_norm": 0.9607227444648743, "learning_rate": 0.0001973505705914528, "loss": 1.9322, "step": 963 }, { "epoch": 0.10142030510257759, "grad_norm": 0.7287774682044983, "learning_rate": 0.00019734277271512638, "loss": 2.3326, "step": 964 }, { "epoch": 0.10152551288795371, "grad_norm": 1.8522443771362305, "learning_rate": 0.00019733496353470433, "loss": 2.1133, "step": 965 }, { "epoch": 0.10163072067332983, "grad_norm": 0.899685800075531, "learning_rate": 0.00019732714305109345, "loss": 2.0211, "step": 966 }, { "epoch": 0.10173592845870594, "grad_norm": 1.8534700870513916, "learning_rate": 0.00019731931126520195, "loss": 1.6331, "step": 967 }, { "epoch": 0.10184113624408206, "grad_norm": 0.8898396492004395, "learning_rate": 0.00019731146817793932, "loss": 2.2339, "step": 968 }, { "epoch": 0.10194634402945818, "grad_norm": 1.305379867553711, "learning_rate": 0.00019730361379021632, "loss": 1.9012, "step": 969 }, { "epoch": 0.1020515518148343, "grad_norm": 1.1972163915634155, "learning_rate": 0.00019729574810294507, "loss": 1.6715, "step": 970 }, { "epoch": 0.10215675960021041, "grad_norm": 1.1665350198745728, "learning_rate": 0.00019728787111703895, "loss": 2.3843, "step": 971 }, { "epoch": 0.10226196738558653, "grad_norm": 0.8711062073707581, "learning_rate": 0.00019727998283341274, "loss": 2.2372, "step": 972 }, { "epoch": 0.10236717517096265, "grad_norm": 0.9705300331115723, "learning_rate": 0.00019727208325298246, "loss": 2.2943, "step": 973 }, { "epoch": 0.10247238295633877, "grad_norm": 1.7931549549102783, "learning_rate": 0.00019726417237666546, "loss": 2.1751, "step": 974 }, { "epoch": 0.10257759074171488, "grad_norm": 1.630979061126709, "learning_rate": 0.00019725625020538038, "loss": 1.9252, "step": 975 }, { "epoch": 0.102682798527091, "grad_norm": 1.412156105041504, "learning_rate": 0.00019724831674004724, "loss": 1.9631, "step": 976 }, { "epoch": 0.10278800631246712, "grad_norm": 0.7268504500389099, "learning_rate": 0.00019724037198158733, "loss": 1.8943, "step": 977 }, { "epoch": 0.10289321409784324, "grad_norm": 1.439155101776123, "learning_rate": 0.00019723241593092318, "loss": 1.9662, "step": 978 }, { "epoch": 0.10299842188321935, "grad_norm": 1.4355570077896118, "learning_rate": 0.00019722444858897878, "loss": 2.2064, "step": 979 }, { "epoch": 0.10310362966859547, "grad_norm": 0.9133340716362, "learning_rate": 0.00019721646995667932, "loss": 2.6105, "step": 980 }, { "epoch": 0.1032088374539716, "grad_norm": 0.9703789949417114, "learning_rate": 0.0001972084800349513, "loss": 2.2393, "step": 981 }, { "epoch": 0.10331404523934772, "grad_norm": 1.9817613363265991, "learning_rate": 0.00019720047882472262, "loss": 1.6778, "step": 982 }, { "epoch": 0.10341925302472382, "grad_norm": 0.9752820134162903, "learning_rate": 0.00019719246632692242, "loss": 2.002, "step": 983 }, { "epoch": 0.10352446081009994, "grad_norm": 1.4282121658325195, "learning_rate": 0.00019718444254248114, "loss": 1.8544, "step": 984 }, { "epoch": 0.10362966859547607, "grad_norm": 0.9460940957069397, "learning_rate": 0.00019717640747233056, "loss": 2.1763, "step": 985 }, { "epoch": 0.10373487638085219, "grad_norm": 1.4641571044921875, "learning_rate": 0.00019716836111740378, "loss": 2.2195, "step": 986 }, { "epoch": 0.1038400841662283, "grad_norm": 0.9056519865989685, "learning_rate": 0.00019716030347863517, "loss": 2.0533, "step": 987 }, { "epoch": 0.10394529195160442, "grad_norm": 1.7422082424163818, "learning_rate": 0.00019715223455696047, "loss": 2.1961, "step": 988 }, { "epoch": 0.10405049973698054, "grad_norm": 0.7487703561782837, "learning_rate": 0.0001971441543533167, "loss": 2.3884, "step": 989 }, { "epoch": 0.10415570752235666, "grad_norm": 1.123172402381897, "learning_rate": 0.0001971360628686422, "loss": 2.3124, "step": 990 }, { "epoch": 0.10426091530773277, "grad_norm": 0.9539334177970886, "learning_rate": 0.00019712796010387654, "loss": 2.1406, "step": 991 }, { "epoch": 0.10436612309310889, "grad_norm": 1.5190744400024414, "learning_rate": 0.0001971198460599607, "loss": 2.081, "step": 992 }, { "epoch": 0.10447133087848501, "grad_norm": 0.7697191834449768, "learning_rate": 0.00019711172073783696, "loss": 2.2589, "step": 993 }, { "epoch": 0.10457653866386113, "grad_norm": 1.2157846689224243, "learning_rate": 0.0001971035841384489, "loss": 1.9017, "step": 994 }, { "epoch": 0.10468174644923724, "grad_norm": 0.8707440495491028, "learning_rate": 0.00019709543626274131, "loss": 2.2788, "step": 995 }, { "epoch": 0.10478695423461336, "grad_norm": 1.0644099712371826, "learning_rate": 0.00019708727711166047, "loss": 1.8108, "step": 996 }, { "epoch": 0.10489216201998948, "grad_norm": 1.3928571939468384, "learning_rate": 0.00019707910668615382, "loss": 1.811, "step": 997 }, { "epoch": 0.1049973698053656, "grad_norm": 1.6720000505447388, "learning_rate": 0.00019707092498717023, "loss": 1.6834, "step": 998 }, { "epoch": 0.10510257759074171, "grad_norm": 1.0016800165176392, "learning_rate": 0.00019706273201565972, "loss": 2.008, "step": 999 }, { "epoch": 0.10520778537611783, "grad_norm": 0.7917373180389404, "learning_rate": 0.00019705452777257377, "loss": 2.1877, "step": 1000 }, { "epoch": 0.10531299316149395, "grad_norm": 1.1289230585098267, "learning_rate": 0.00019704631225886515, "loss": 2.135, "step": 1001 }, { "epoch": 0.10541820094687007, "grad_norm": 1.262541651725769, "learning_rate": 0.00019703808547548782, "loss": 2.1767, "step": 1002 }, { "epoch": 0.10552340873224618, "grad_norm": 0.9078851938247681, "learning_rate": 0.00019702984742339715, "loss": 2.0435, "step": 1003 }, { "epoch": 0.1056286165176223, "grad_norm": 1.3077322244644165, "learning_rate": 0.00019702159810354978, "loss": 1.8528, "step": 1004 }, { "epoch": 0.10573382430299842, "grad_norm": 1.0650526285171509, "learning_rate": 0.00019701333751690378, "loss": 2.2322, "step": 1005 }, { "epoch": 0.10583903208837454, "grad_norm": 1.7082046270370483, "learning_rate": 0.0001970050656644183, "loss": 2.0592, "step": 1006 }, { "epoch": 0.10594423987375065, "grad_norm": 0.9092219471931458, "learning_rate": 0.000196996782547054, "loss": 1.929, "step": 1007 }, { "epoch": 0.10604944765912677, "grad_norm": 1.0924969911575317, "learning_rate": 0.00019698848816577274, "loss": 2.0751, "step": 1008 }, { "epoch": 0.1061546554445029, "grad_norm": 0.8354095220565796, "learning_rate": 0.0001969801825215377, "loss": 2.3727, "step": 1009 }, { "epoch": 0.10625986322987901, "grad_norm": 0.8786736726760864, "learning_rate": 0.00019697186561531345, "loss": 2.0541, "step": 1010 }, { "epoch": 0.10636507101525512, "grad_norm": 1.0986034870147705, "learning_rate": 0.00019696353744806574, "loss": 2.3069, "step": 1011 }, { "epoch": 0.10647027880063124, "grad_norm": 0.7767875790596008, "learning_rate": 0.00019695519802076175, "loss": 2.2888, "step": 1012 }, { "epoch": 0.10657548658600736, "grad_norm": 1.035365104675293, "learning_rate": 0.00019694684733436986, "loss": 1.9233, "step": 1013 }, { "epoch": 0.10668069437138349, "grad_norm": 1.2048399448394775, "learning_rate": 0.00019693848538985983, "loss": 2.3406, "step": 1014 }, { "epoch": 0.1067859021567596, "grad_norm": 0.9446375370025635, "learning_rate": 0.0001969301121882027, "loss": 1.9142, "step": 1015 }, { "epoch": 0.10689110994213571, "grad_norm": 1.7339941263198853, "learning_rate": 0.0001969217277303708, "loss": 1.6881, "step": 1016 }, { "epoch": 0.10699631772751184, "grad_norm": 1.0482121706008911, "learning_rate": 0.00019691333201733786, "loss": 2.0258, "step": 1017 }, { "epoch": 0.10710152551288796, "grad_norm": 1.113286018371582, "learning_rate": 0.00019690492505007877, "loss": 1.7817, "step": 1018 }, { "epoch": 0.10720673329826406, "grad_norm": 1.4576612710952759, "learning_rate": 0.00019689650682956986, "loss": 2.085, "step": 1019 }, { "epoch": 0.10731194108364019, "grad_norm": 1.79568612575531, "learning_rate": 0.00019688807735678866, "loss": 2.0266, "step": 1020 }, { "epoch": 0.10741714886901631, "grad_norm": 1.656327486038208, "learning_rate": 0.00019687963663271409, "loss": 2.1882, "step": 1021 }, { "epoch": 0.10752235665439243, "grad_norm": 1.273909091949463, "learning_rate": 0.00019687118465832636, "loss": 1.9803, "step": 1022 }, { "epoch": 0.10762756443976854, "grad_norm": 1.4731061458587646, "learning_rate": 0.00019686272143460692, "loss": 1.8951, "step": 1023 }, { "epoch": 0.10773277222514466, "grad_norm": 0.7379525899887085, "learning_rate": 0.00019685424696253858, "loss": 2.0547, "step": 1024 }, { "epoch": 0.10783798001052078, "grad_norm": 0.7799714803695679, "learning_rate": 0.0001968457612431055, "loss": 1.9213, "step": 1025 }, { "epoch": 0.1079431877958969, "grad_norm": 2.3484795093536377, "learning_rate": 0.00019683726427729306, "loss": 2.5578, "step": 1026 }, { "epoch": 0.10804839558127301, "grad_norm": 1.0442508459091187, "learning_rate": 0.000196828756066088, "loss": 2.1757, "step": 1027 }, { "epoch": 0.10815360336664913, "grad_norm": 1.0097154378890991, "learning_rate": 0.00019682023661047836, "loss": 1.8894, "step": 1028 }, { "epoch": 0.10825881115202525, "grad_norm": 1.412798523902893, "learning_rate": 0.00019681170591145345, "loss": 2.2574, "step": 1029 }, { "epoch": 0.10836401893740137, "grad_norm": 0.9488776922225952, "learning_rate": 0.00019680316397000395, "loss": 1.6457, "step": 1030 }, { "epoch": 0.10846922672277748, "grad_norm": 0.8774062395095825, "learning_rate": 0.00019679461078712178, "loss": 2.0935, "step": 1031 }, { "epoch": 0.1085744345081536, "grad_norm": 1.0559821128845215, "learning_rate": 0.00019678604636380018, "loss": 1.9539, "step": 1032 }, { "epoch": 0.10867964229352972, "grad_norm": 1.1021692752838135, "learning_rate": 0.0001967774707010337, "loss": 2.1199, "step": 1033 }, { "epoch": 0.10878485007890584, "grad_norm": 1.441303014755249, "learning_rate": 0.0001967688837998183, "loss": 1.7238, "step": 1034 }, { "epoch": 0.10889005786428195, "grad_norm": 1.2004631757736206, "learning_rate": 0.00019676028566115102, "loss": 2.3156, "step": 1035 }, { "epoch": 0.10899526564965807, "grad_norm": 1.1571768522262573, "learning_rate": 0.0001967516762860304, "loss": 2.2756, "step": 1036 }, { "epoch": 0.10910047343503419, "grad_norm": 0.7429097890853882, "learning_rate": 0.0001967430556754562, "loss": 1.4574, "step": 1037 }, { "epoch": 0.10920568122041031, "grad_norm": 1.2119436264038086, "learning_rate": 0.00019673442383042952, "loss": 2.3933, "step": 1038 }, { "epoch": 0.10931088900578642, "grad_norm": 1.4365086555480957, "learning_rate": 0.00019672578075195272, "loss": 1.9312, "step": 1039 }, { "epoch": 0.10941609679116254, "grad_norm": 1.0024199485778809, "learning_rate": 0.00019671712644102956, "loss": 1.9121, "step": 1040 }, { "epoch": 0.10952130457653866, "grad_norm": 0.6678622961044312, "learning_rate": 0.00019670846089866496, "loss": 1.9272, "step": 1041 }, { "epoch": 0.10962651236191479, "grad_norm": 1.1651445627212524, "learning_rate": 0.00019669978412586528, "loss": 2.1199, "step": 1042 }, { "epoch": 0.10973172014729089, "grad_norm": 1.0801807641983032, "learning_rate": 0.00019669109612363803, "loss": 1.9533, "step": 1043 }, { "epoch": 0.10983692793266701, "grad_norm": 1.6487115621566772, "learning_rate": 0.00019668239689299224, "loss": 1.4197, "step": 1044 }, { "epoch": 0.10994213571804314, "grad_norm": 1.0398926734924316, "learning_rate": 0.00019667368643493804, "loss": 1.8756, "step": 1045 }, { "epoch": 0.11004734350341926, "grad_norm": 1.2380412817001343, "learning_rate": 0.00019666496475048698, "loss": 1.731, "step": 1046 }, { "epoch": 0.11015255128879536, "grad_norm": 1.0695997476577759, "learning_rate": 0.00019665623184065187, "loss": 1.816, "step": 1047 }, { "epoch": 0.11025775907417149, "grad_norm": 1.0123080015182495, "learning_rate": 0.00019664748770644686, "loss": 2.0089, "step": 1048 }, { "epoch": 0.1103629668595476, "grad_norm": 0.980810821056366, "learning_rate": 0.00019663873234888733, "loss": 2.0396, "step": 1049 }, { "epoch": 0.11046817464492373, "grad_norm": 1.2700474262237549, "learning_rate": 0.00019662996576899004, "loss": 2.3745, "step": 1050 }, { "epoch": 0.11057338243029984, "grad_norm": 1.1388194561004639, "learning_rate": 0.00019662118796777303, "loss": 1.8479, "step": 1051 }, { "epoch": 0.11067859021567596, "grad_norm": 1.463036298751831, "learning_rate": 0.0001966123989462556, "loss": 1.9751, "step": 1052 }, { "epoch": 0.11078379800105208, "grad_norm": 1.2596359252929688, "learning_rate": 0.00019660359870545845, "loss": 2.0322, "step": 1053 }, { "epoch": 0.1108890057864282, "grad_norm": 0.8281905055046082, "learning_rate": 0.00019659478724640348, "loss": 1.9301, "step": 1054 }, { "epoch": 0.1109942135718043, "grad_norm": 1.469774603843689, "learning_rate": 0.00019658596457011393, "loss": 1.5288, "step": 1055 }, { "epoch": 0.11109942135718043, "grad_norm": 0.8355114459991455, "learning_rate": 0.0001965771306776144, "loss": 2.1748, "step": 1056 }, { "epoch": 0.11120462914255655, "grad_norm": 0.8642022013664246, "learning_rate": 0.00019656828556993068, "loss": 1.6075, "step": 1057 }, { "epoch": 0.11130983692793267, "grad_norm": 0.6303163170814514, "learning_rate": 0.00019655942924808994, "loss": 2.3366, "step": 1058 }, { "epoch": 0.11141504471330879, "grad_norm": 0.9480810761451721, "learning_rate": 0.00019655056171312069, "loss": 1.8519, "step": 1059 }, { "epoch": 0.1115202524986849, "grad_norm": 0.8360817432403564, "learning_rate": 0.0001965416829660526, "loss": 2.1247, "step": 1060 }, { "epoch": 0.11162546028406102, "grad_norm": 0.9040228724479675, "learning_rate": 0.0001965327930079168, "loss": 2.2492, "step": 1061 }, { "epoch": 0.11173066806943714, "grad_norm": 0.8252432942390442, "learning_rate": 0.00019652389183974557, "loss": 1.7289, "step": 1062 }, { "epoch": 0.11183587585481326, "grad_norm": 0.783162534236908, "learning_rate": 0.00019651497946257266, "loss": 2.3029, "step": 1063 }, { "epoch": 0.11194108364018937, "grad_norm": 0.8001965284347534, "learning_rate": 0.00019650605587743302, "loss": 1.6801, "step": 1064 }, { "epoch": 0.11204629142556549, "grad_norm": 1.3523527383804321, "learning_rate": 0.00019649712108536286, "loss": 2.1644, "step": 1065 }, { "epoch": 0.11215149921094161, "grad_norm": 1.2581042051315308, "learning_rate": 0.00019648817508739983, "loss": 2.1609, "step": 1066 }, { "epoch": 0.11225670699631773, "grad_norm": 1.0684157609939575, "learning_rate": 0.00019647921788458272, "loss": 2.4841, "step": 1067 }, { "epoch": 0.11236191478169384, "grad_norm": 0.9492899775505066, "learning_rate": 0.00019647024947795175, "loss": 1.8177, "step": 1068 }, { "epoch": 0.11246712256706996, "grad_norm": 1.1650155782699585, "learning_rate": 0.00019646126986854837, "loss": 1.6481, "step": 1069 }, { "epoch": 0.11257233035244608, "grad_norm": 1.2191792726516724, "learning_rate": 0.00019645227905741534, "loss": 1.7948, "step": 1070 }, { "epoch": 0.1126775381378222, "grad_norm": 1.074036717414856, "learning_rate": 0.0001964432770455968, "loss": 1.764, "step": 1071 }, { "epoch": 0.11278274592319831, "grad_norm": 1.0380913019180298, "learning_rate": 0.00019643426383413805, "loss": 1.526, "step": 1072 }, { "epoch": 0.11288795370857443, "grad_norm": 1.4645812511444092, "learning_rate": 0.0001964252394240858, "loss": 2.016, "step": 1073 }, { "epoch": 0.11299316149395056, "grad_norm": 0.8805195093154907, "learning_rate": 0.000196416203816488, "loss": 2.0084, "step": 1074 }, { "epoch": 0.11309836927932668, "grad_norm": 1.593756079673767, "learning_rate": 0.00019640715701239395, "loss": 2.1586, "step": 1075 }, { "epoch": 0.11320357706470278, "grad_norm": 0.9434992671012878, "learning_rate": 0.00019639809901285423, "loss": 1.8971, "step": 1076 }, { "epoch": 0.1133087848500789, "grad_norm": 1.0768349170684814, "learning_rate": 0.00019638902981892068, "loss": 2.289, "step": 1077 }, { "epoch": 0.11341399263545503, "grad_norm": 1.1029759645462036, "learning_rate": 0.0001963799494316465, "loss": 2.2014, "step": 1078 }, { "epoch": 0.11351920042083115, "grad_norm": 1.1551514863967896, "learning_rate": 0.0001963708578520862, "loss": 2.3707, "step": 1079 }, { "epoch": 0.11362440820620726, "grad_norm": 1.2115519046783447, "learning_rate": 0.00019636175508129552, "loss": 1.4232, "step": 1080 }, { "epoch": 0.11372961599158338, "grad_norm": 1.1642786264419556, "learning_rate": 0.0001963526411203315, "loss": 2.1658, "step": 1081 }, { "epoch": 0.1138348237769595, "grad_norm": 1.1630796194076538, "learning_rate": 0.00019634351597025255, "loss": 2.0171, "step": 1082 }, { "epoch": 0.11394003156233562, "grad_norm": 1.058016061782837, "learning_rate": 0.00019633437963211832, "loss": 1.5272, "step": 1083 }, { "epoch": 0.11404523934771173, "grad_norm": 1.1244498491287231, "learning_rate": 0.00019632523210698987, "loss": 2.4017, "step": 1084 }, { "epoch": 0.11415044713308785, "grad_norm": 1.0353667736053467, "learning_rate": 0.00019631607339592937, "loss": 1.9151, "step": 1085 }, { "epoch": 0.11425565491846397, "grad_norm": 0.735931396484375, "learning_rate": 0.00019630690350000042, "loss": 2.3843, "step": 1086 }, { "epoch": 0.11436086270384009, "grad_norm": 0.9920666217803955, "learning_rate": 0.00019629772242026793, "loss": 2.1867, "step": 1087 }, { "epoch": 0.1144660704892162, "grad_norm": 0.9778599739074707, "learning_rate": 0.000196288530157798, "loss": 2.3535, "step": 1088 }, { "epoch": 0.11457127827459232, "grad_norm": 1.1692768335342407, "learning_rate": 0.00019627932671365813, "loss": 1.9833, "step": 1089 }, { "epoch": 0.11467648605996844, "grad_norm": 1.3122508525848389, "learning_rate": 0.00019627011208891713, "loss": 1.8988, "step": 1090 }, { "epoch": 0.11478169384534456, "grad_norm": 1.044777512550354, "learning_rate": 0.00019626088628464498, "loss": 1.92, "step": 1091 }, { "epoch": 0.11488690163072067, "grad_norm": 0.8848743438720703, "learning_rate": 0.00019625164930191311, "loss": 2.4497, "step": 1092 }, { "epoch": 0.11499210941609679, "grad_norm": 0.9451887607574463, "learning_rate": 0.00019624240114179416, "loss": 1.7285, "step": 1093 }, { "epoch": 0.11509731720147291, "grad_norm": 0.9410362243652344, "learning_rate": 0.00019623314180536205, "loss": 1.9463, "step": 1094 }, { "epoch": 0.11520252498684903, "grad_norm": 1.023637294769287, "learning_rate": 0.00019622387129369212, "loss": 1.8648, "step": 1095 }, { "epoch": 0.11530773277222514, "grad_norm": 1.2272717952728271, "learning_rate": 0.00019621458960786083, "loss": 2.1191, "step": 1096 }, { "epoch": 0.11541294055760126, "grad_norm": 1.150048851966858, "learning_rate": 0.0001962052967489461, "loss": 1.7046, "step": 1097 }, { "epoch": 0.11551814834297738, "grad_norm": 0.7519487738609314, "learning_rate": 0.00019619599271802706, "loss": 2.2551, "step": 1098 }, { "epoch": 0.1156233561283535, "grad_norm": 1.0038561820983887, "learning_rate": 0.00019618667751618416, "loss": 2.2381, "step": 1099 }, { "epoch": 0.11572856391372961, "grad_norm": 0.9891253113746643, "learning_rate": 0.0001961773511444991, "loss": 2.2862, "step": 1100 }, { "epoch": 0.11583377169910573, "grad_norm": 0.9823799729347229, "learning_rate": 0.00019616801360405499, "loss": 1.7907, "step": 1101 }, { "epoch": 0.11593897948448185, "grad_norm": 2.5130529403686523, "learning_rate": 0.0001961586648959361, "loss": 2.1288, "step": 1102 }, { "epoch": 0.11604418726985798, "grad_norm": 0.940342366695404, "learning_rate": 0.00019614930502122812, "loss": 2.1486, "step": 1103 }, { "epoch": 0.11614939505523408, "grad_norm": 1.1634947061538696, "learning_rate": 0.00019613993398101795, "loss": 1.3548, "step": 1104 }, { "epoch": 0.1162546028406102, "grad_norm": 1.2739289999008179, "learning_rate": 0.00019613055177639384, "loss": 2.4006, "step": 1105 }, { "epoch": 0.11635981062598633, "grad_norm": 0.7770984768867493, "learning_rate": 0.0001961211584084453, "loss": 2.2417, "step": 1106 }, { "epoch": 0.11646501841136245, "grad_norm": 0.9923128485679626, "learning_rate": 0.00019611175387826315, "loss": 1.9447, "step": 1107 }, { "epoch": 0.11657022619673856, "grad_norm": 1.3460139036178589, "learning_rate": 0.00019610233818693953, "loss": 1.8778, "step": 1108 }, { "epoch": 0.11667543398211468, "grad_norm": 0.7878180146217346, "learning_rate": 0.0001960929113355678, "loss": 1.8391, "step": 1109 }, { "epoch": 0.1167806417674908, "grad_norm": 1.1408841609954834, "learning_rate": 0.00019608347332524272, "loss": 1.8246, "step": 1110 }, { "epoch": 0.11688584955286692, "grad_norm": 1.3990023136138916, "learning_rate": 0.00019607402415706027, "loss": 1.8562, "step": 1111 }, { "epoch": 0.11699105733824303, "grad_norm": 1.045021414756775, "learning_rate": 0.00019606456383211777, "loss": 1.9724, "step": 1112 }, { "epoch": 0.11709626512361915, "grad_norm": 0.6789584755897522, "learning_rate": 0.0001960550923515138, "loss": 2.149, "step": 1113 }, { "epoch": 0.11720147290899527, "grad_norm": 0.8170668482780457, "learning_rate": 0.00019604560971634826, "loss": 2.0608, "step": 1114 }, { "epoch": 0.11730668069437139, "grad_norm": 0.6909334659576416, "learning_rate": 0.00019603611592772233, "loss": 2.1037, "step": 1115 }, { "epoch": 0.1174118884797475, "grad_norm": 0.8929466605186462, "learning_rate": 0.0001960266109867385, "loss": 1.7609, "step": 1116 }, { "epoch": 0.11751709626512362, "grad_norm": 1.2429050207138062, "learning_rate": 0.00019601709489450056, "loss": 1.6607, "step": 1117 }, { "epoch": 0.11762230405049974, "grad_norm": 0.7718959450721741, "learning_rate": 0.00019600756765211354, "loss": 2.273, "step": 1118 }, { "epoch": 0.11772751183587586, "grad_norm": 1.4449671506881714, "learning_rate": 0.00019599802926068384, "loss": 1.9564, "step": 1119 }, { "epoch": 0.11783271962125197, "grad_norm": 0.955116868019104, "learning_rate": 0.00019598847972131914, "loss": 1.8775, "step": 1120 }, { "epoch": 0.11793792740662809, "grad_norm": 1.5084177255630493, "learning_rate": 0.00019597891903512835, "loss": 2.0589, "step": 1121 }, { "epoch": 0.11804313519200421, "grad_norm": 1.039664626121521, "learning_rate": 0.00019596934720322176, "loss": 2.331, "step": 1122 }, { "epoch": 0.11814834297738033, "grad_norm": 1.2235255241394043, "learning_rate": 0.00019595976422671086, "loss": 1.7996, "step": 1123 }, { "epoch": 0.11825355076275644, "grad_norm": 0.9834601283073425, "learning_rate": 0.00019595017010670858, "loss": 2.4876, "step": 1124 }, { "epoch": 0.11835875854813256, "grad_norm": 1.0267283916473389, "learning_rate": 0.00019594056484432897, "loss": 2.1381, "step": 1125 }, { "epoch": 0.11846396633350868, "grad_norm": 1.187152624130249, "learning_rate": 0.00019593094844068748, "loss": 2.1336, "step": 1126 }, { "epoch": 0.1185691741188848, "grad_norm": 0.8260311484336853, "learning_rate": 0.00019592132089690085, "loss": 2.0608, "step": 1127 }, { "epoch": 0.11867438190426091, "grad_norm": 0.8362654447555542, "learning_rate": 0.0001959116822140871, "loss": 2.2765, "step": 1128 }, { "epoch": 0.11877958968963703, "grad_norm": 0.9070550799369812, "learning_rate": 0.00019590203239336552, "loss": 2.1471, "step": 1129 }, { "epoch": 0.11888479747501315, "grad_norm": 0.8633410334587097, "learning_rate": 0.0001958923714358567, "loss": 2.2632, "step": 1130 }, { "epoch": 0.11899000526038928, "grad_norm": 1.0233980417251587, "learning_rate": 0.00019588269934268257, "loss": 2.0631, "step": 1131 }, { "epoch": 0.11909521304576538, "grad_norm": 1.4428153038024902, "learning_rate": 0.00019587301611496632, "loss": 1.646, "step": 1132 }, { "epoch": 0.1192004208311415, "grad_norm": 1.2302439212799072, "learning_rate": 0.00019586332175383238, "loss": 2.0555, "step": 1133 }, { "epoch": 0.11930562861651763, "grad_norm": 0.8173778057098389, "learning_rate": 0.00019585361626040654, "loss": 1.8569, "step": 1134 }, { "epoch": 0.11941083640189375, "grad_norm": 1.5377962589263916, "learning_rate": 0.00019584389963581592, "loss": 1.9263, "step": 1135 }, { "epoch": 0.11951604418726985, "grad_norm": 0.9011675119400024, "learning_rate": 0.00019583417188118882, "loss": 1.7328, "step": 1136 }, { "epoch": 0.11962125197264598, "grad_norm": 1.0108987092971802, "learning_rate": 0.0001958244329976549, "loss": 1.8854, "step": 1137 }, { "epoch": 0.1197264597580221, "grad_norm": 1.1729274988174438, "learning_rate": 0.00019581468298634515, "loss": 2.1649, "step": 1138 }, { "epoch": 0.11983166754339822, "grad_norm": 1.0889396667480469, "learning_rate": 0.00019580492184839175, "loss": 2.0792, "step": 1139 }, { "epoch": 0.11993687532877433, "grad_norm": 1.3217434883117676, "learning_rate": 0.00019579514958492826, "loss": 2.1042, "step": 1140 }, { "epoch": 0.12004208311415045, "grad_norm": 1.0219428539276123, "learning_rate": 0.00019578536619708952, "loss": 2.3675, "step": 1141 }, { "epoch": 0.12014729089952657, "grad_norm": 1.771694302558899, "learning_rate": 0.0001957755716860116, "loss": 2.1605, "step": 1142 }, { "epoch": 0.12025249868490269, "grad_norm": 1.005615472793579, "learning_rate": 0.0001957657660528319, "loss": 1.9396, "step": 1143 }, { "epoch": 0.1203577064702788, "grad_norm": 0.8795095086097717, "learning_rate": 0.00019575594929868918, "loss": 2.2603, "step": 1144 }, { "epoch": 0.12046291425565492, "grad_norm": 1.1998109817504883, "learning_rate": 0.00019574612142472334, "loss": 2.5464, "step": 1145 }, { "epoch": 0.12056812204103104, "grad_norm": 0.9663223624229431, "learning_rate": 0.00019573628243207573, "loss": 1.8903, "step": 1146 }, { "epoch": 0.12067332982640716, "grad_norm": 0.9603292346000671, "learning_rate": 0.0001957264323218889, "loss": 2.1182, "step": 1147 }, { "epoch": 0.12077853761178327, "grad_norm": 0.8244251608848572, "learning_rate": 0.00019571657109530667, "loss": 2.0749, "step": 1148 }, { "epoch": 0.12088374539715939, "grad_norm": 1.2357412576675415, "learning_rate": 0.00019570669875347427, "loss": 1.8142, "step": 1149 }, { "epoch": 0.12098895318253551, "grad_norm": 1.6487301588058472, "learning_rate": 0.00019569681529753806, "loss": 2.057, "step": 1150 }, { "epoch": 0.12109416096791163, "grad_norm": 1.3535126447677612, "learning_rate": 0.00019568692072864581, "loss": 1.8645, "step": 1151 }, { "epoch": 0.12119936875328774, "grad_norm": 0.983331024646759, "learning_rate": 0.0001956770150479466, "loss": 2.2773, "step": 1152 }, { "epoch": 0.12130457653866386, "grad_norm": 0.625877320766449, "learning_rate": 0.00019566709825659064, "loss": 2.2146, "step": 1153 }, { "epoch": 0.12140978432403998, "grad_norm": 1.0601180791854858, "learning_rate": 0.0001956571703557296, "loss": 2.2351, "step": 1154 }, { "epoch": 0.1215149921094161, "grad_norm": 1.0137640237808228, "learning_rate": 0.00019564723134651634, "loss": 2.5221, "step": 1155 }, { "epoch": 0.12162019989479221, "grad_norm": 1.044480323791504, "learning_rate": 0.0001956372812301051, "loss": 2.14, "step": 1156 }, { "epoch": 0.12172540768016833, "grad_norm": 1.44232976436615, "learning_rate": 0.00019562732000765127, "loss": 1.9134, "step": 1157 }, { "epoch": 0.12183061546554445, "grad_norm": 1.2370481491088867, "learning_rate": 0.0001956173476803117, "loss": 1.727, "step": 1158 }, { "epoch": 0.12193582325092057, "grad_norm": 1.006666898727417, "learning_rate": 0.00019560736424924439, "loss": 2.221, "step": 1159 }, { "epoch": 0.12204103103629668, "grad_norm": 0.8660513162612915, "learning_rate": 0.0001955973697156087, "loss": 2.1285, "step": 1160 }, { "epoch": 0.1221462388216728, "grad_norm": 0.7105367183685303, "learning_rate": 0.00019558736408056525, "loss": 2.0834, "step": 1161 }, { "epoch": 0.12225144660704892, "grad_norm": 1.236364483833313, "learning_rate": 0.000195577347345276, "loss": 1.8932, "step": 1162 }, { "epoch": 0.12235665439242505, "grad_norm": 1.5001050233840942, "learning_rate": 0.0001955673195109041, "loss": 2.263, "step": 1163 }, { "epoch": 0.12246186217780115, "grad_norm": 0.8300707936286926, "learning_rate": 0.0001955572805786141, "loss": 2.3241, "step": 1164 }, { "epoch": 0.12256706996317727, "grad_norm": 1.1796188354492188, "learning_rate": 0.00019554723054957175, "loss": 1.7304, "step": 1165 }, { "epoch": 0.1226722777485534, "grad_norm": 1.1886162757873535, "learning_rate": 0.00019553716942494415, "loss": 1.8389, "step": 1166 }, { "epoch": 0.12277748553392952, "grad_norm": 1.0573186874389648, "learning_rate": 0.00019552709720589966, "loss": 2.0146, "step": 1167 }, { "epoch": 0.12288269331930562, "grad_norm": 0.8099238276481628, "learning_rate": 0.00019551701389360795, "loss": 1.593, "step": 1168 }, { "epoch": 0.12298790110468175, "grad_norm": 1.0094003677368164, "learning_rate": 0.00019550691948923992, "loss": 2.4218, "step": 1169 }, { "epoch": 0.12309310889005787, "grad_norm": 0.8765414953231812, "learning_rate": 0.00019549681399396785, "loss": 2.2399, "step": 1170 }, { "epoch": 0.12319831667543399, "grad_norm": 1.4000588655471802, "learning_rate": 0.00019548669740896525, "loss": 1.7225, "step": 1171 }, { "epoch": 0.1233035244608101, "grad_norm": 1.909455418586731, "learning_rate": 0.0001954765697354069, "loss": 2.0569, "step": 1172 }, { "epoch": 0.12340873224618622, "grad_norm": 1.0092931985855103, "learning_rate": 0.00019546643097446888, "loss": 1.8983, "step": 1173 }, { "epoch": 0.12351394003156234, "grad_norm": 1.216364860534668, "learning_rate": 0.0001954562811273286, "loss": 2.0644, "step": 1174 }, { "epoch": 0.12361914781693846, "grad_norm": 1.245184302330017, "learning_rate": 0.00019544612019516472, "loss": 1.4739, "step": 1175 }, { "epoch": 0.12372435560231457, "grad_norm": 1.5006418228149414, "learning_rate": 0.00019543594817915722, "loss": 2.0464, "step": 1176 }, { "epoch": 0.12382956338769069, "grad_norm": 1.2883630990982056, "learning_rate": 0.00019542576508048732, "loss": 1.9773, "step": 1177 }, { "epoch": 0.12393477117306681, "grad_norm": 1.4748666286468506, "learning_rate": 0.00019541557090033753, "loss": 2.0239, "step": 1178 }, { "epoch": 0.12403997895844293, "grad_norm": 1.251774549484253, "learning_rate": 0.0001954053656398917, "loss": 1.8751, "step": 1179 }, { "epoch": 0.12414518674381904, "grad_norm": 1.2670092582702637, "learning_rate": 0.00019539514930033493, "loss": 2.093, "step": 1180 }, { "epoch": 0.12425039452919516, "grad_norm": 0.845373272895813, "learning_rate": 0.00019538492188285358, "loss": 2.2009, "step": 1181 }, { "epoch": 0.12435560231457128, "grad_norm": 1.3684707880020142, "learning_rate": 0.00019537468338863537, "loss": 2.1124, "step": 1182 }, { "epoch": 0.1244608100999474, "grad_norm": 1.0904892683029175, "learning_rate": 0.0001953644338188692, "loss": 2.2674, "step": 1183 }, { "epoch": 0.12456601788532351, "grad_norm": 1.0843944549560547, "learning_rate": 0.0001953541731747454, "loss": 2.1427, "step": 1184 }, { "epoch": 0.12467122567069963, "grad_norm": 1.4140195846557617, "learning_rate": 0.00019534390145745545, "loss": 1.9423, "step": 1185 }, { "epoch": 0.12477643345607575, "grad_norm": 1.2071352005004883, "learning_rate": 0.00019533361866819218, "loss": 1.7771, "step": 1186 }, { "epoch": 0.12488164124145187, "grad_norm": 0.9805304408073425, "learning_rate": 0.0001953233248081497, "loss": 2.1534, "step": 1187 }, { "epoch": 0.12498684902682798, "grad_norm": 0.8106799721717834, "learning_rate": 0.0001953130198785234, "loss": 1.8775, "step": 1188 }, { "epoch": 0.12509205681220412, "grad_norm": 1.2186012268066406, "learning_rate": 0.00019530270388050998, "loss": 2.0908, "step": 1189 }, { "epoch": 0.1251972645975802, "grad_norm": 1.270003318786621, "learning_rate": 0.00019529237681530735, "loss": 1.9182, "step": 1190 }, { "epoch": 0.12530247238295633, "grad_norm": 1.1836543083190918, "learning_rate": 0.00019528203868411482, "loss": 2.2695, "step": 1191 }, { "epoch": 0.12540768016833245, "grad_norm": 1.4582277536392212, "learning_rate": 0.00019527168948813288, "loss": 1.7637, "step": 1192 }, { "epoch": 0.12551288795370857, "grad_norm": 1.24919593334198, "learning_rate": 0.00019526132922856334, "loss": 1.8924, "step": 1193 }, { "epoch": 0.1256180957390847, "grad_norm": 1.0060888528823853, "learning_rate": 0.00019525095790660937, "loss": 2.2829, "step": 1194 }, { "epoch": 0.12572330352446082, "grad_norm": 1.7646416425704956, "learning_rate": 0.00019524057552347527, "loss": 2.0088, "step": 1195 }, { "epoch": 0.12582851130983694, "grad_norm": 1.0595672130584717, "learning_rate": 0.00019523018208036677, "loss": 1.9792, "step": 1196 }, { "epoch": 0.12593371909521306, "grad_norm": 0.9665113091468811, "learning_rate": 0.00019521977757849083, "loss": 1.8552, "step": 1197 }, { "epoch": 0.12603892688058915, "grad_norm": 0.996906042098999, "learning_rate": 0.00019520936201905566, "loss": 1.9122, "step": 1198 }, { "epoch": 0.12614413466596527, "grad_norm": 1.0974137783050537, "learning_rate": 0.0001951989354032708, "loss": 2.3548, "step": 1199 }, { "epoch": 0.1262493424513414, "grad_norm": 1.2760165929794312, "learning_rate": 0.00019518849773234704, "loss": 1.9927, "step": 1200 }, { "epoch": 0.12635455023671752, "grad_norm": 0.8448911309242249, "learning_rate": 0.0001951780490074965, "loss": 2.1343, "step": 1201 }, { "epoch": 0.12645975802209364, "grad_norm": 0.8463537096977234, "learning_rate": 0.00019516758922993256, "loss": 2.4417, "step": 1202 }, { "epoch": 0.12656496580746976, "grad_norm": 0.971531331539154, "learning_rate": 0.0001951571184008698, "loss": 2.0901, "step": 1203 }, { "epoch": 0.12667017359284588, "grad_norm": 1.1150871515274048, "learning_rate": 0.00019514663652152428, "loss": 2.3631, "step": 1204 }, { "epoch": 0.126775381378222, "grad_norm": 1.44049870967865, "learning_rate": 0.00019513614359311315, "loss": 1.9413, "step": 1205 }, { "epoch": 0.1268805891635981, "grad_norm": 1.1765494346618652, "learning_rate": 0.00019512563961685494, "loss": 2.2888, "step": 1206 }, { "epoch": 0.12698579694897422, "grad_norm": 1.1426244974136353, "learning_rate": 0.00019511512459396944, "loss": 1.7372, "step": 1207 }, { "epoch": 0.12709100473435034, "grad_norm": 1.2937824726104736, "learning_rate": 0.00019510459852567773, "loss": 1.7583, "step": 1208 }, { "epoch": 0.12719621251972646, "grad_norm": 1.0427653789520264, "learning_rate": 0.0001950940614132022, "loss": 2.2694, "step": 1209 }, { "epoch": 0.12730142030510258, "grad_norm": 1.0768921375274658, "learning_rate": 0.00019508351325776642, "loss": 2.0166, "step": 1210 }, { "epoch": 0.1274066280904787, "grad_norm": 1.1876052618026733, "learning_rate": 0.00019507295406059533, "loss": 2.2435, "step": 1211 }, { "epoch": 0.12751183587585482, "grad_norm": 1.4047654867172241, "learning_rate": 0.0001950623838229152, "loss": 1.9735, "step": 1212 }, { "epoch": 0.12761704366123094, "grad_norm": 1.5884112119674683, "learning_rate": 0.00019505180254595343, "loss": 1.8768, "step": 1213 }, { "epoch": 0.12772225144660704, "grad_norm": 0.8576595187187195, "learning_rate": 0.00019504121023093888, "loss": 2.186, "step": 1214 }, { "epoch": 0.12782745923198316, "grad_norm": 1.0435853004455566, "learning_rate": 0.00019503060687910148, "loss": 1.963, "step": 1215 }, { "epoch": 0.12793266701735928, "grad_norm": 1.279901385307312, "learning_rate": 0.0001950199924916727, "loss": 1.7715, "step": 1216 }, { "epoch": 0.1280378748027354, "grad_norm": 0.7529373168945312, "learning_rate": 0.00019500936706988502, "loss": 2.0821, "step": 1217 }, { "epoch": 0.12814308258811152, "grad_norm": 1.64811372756958, "learning_rate": 0.00019499873061497246, "loss": 2.1265, "step": 1218 }, { "epoch": 0.12824829037348764, "grad_norm": 1.1517812013626099, "learning_rate": 0.00019498808312817006, "loss": 1.9678, "step": 1219 }, { "epoch": 0.12835349815886377, "grad_norm": 1.0397827625274658, "learning_rate": 0.00019497742461071441, "loss": 1.9882, "step": 1220 }, { "epoch": 0.1284587059442399, "grad_norm": 1.1834861040115356, "learning_rate": 0.0001949667550638432, "loss": 2.175, "step": 1221 }, { "epoch": 0.12856391372961598, "grad_norm": 1.0176396369934082, "learning_rate": 0.00019495607448879546, "loss": 1.9798, "step": 1222 }, { "epoch": 0.1286691215149921, "grad_norm": 1.2823526859283447, "learning_rate": 0.00019494538288681145, "loss": 1.8454, "step": 1223 }, { "epoch": 0.12877432930036822, "grad_norm": 1.0017826557159424, "learning_rate": 0.00019493468025913276, "loss": 2.1296, "step": 1224 }, { "epoch": 0.12887953708574434, "grad_norm": 0.8220755457878113, "learning_rate": 0.00019492396660700226, "loss": 1.9987, "step": 1225 }, { "epoch": 0.12898474487112047, "grad_norm": 0.9506868124008179, "learning_rate": 0.00019491324193166408, "loss": 2.3586, "step": 1226 }, { "epoch": 0.1290899526564966, "grad_norm": 0.796829342842102, "learning_rate": 0.00019490250623436367, "loss": 1.4214, "step": 1227 }, { "epoch": 0.1291951604418727, "grad_norm": 1.117042899131775, "learning_rate": 0.00019489175951634775, "loss": 1.7607, "step": 1228 }, { "epoch": 0.12930036822724883, "grad_norm": 1.1482354402542114, "learning_rate": 0.00019488100177886427, "loss": 1.782, "step": 1229 }, { "epoch": 0.12940557601262492, "grad_norm": 0.8992315530776978, "learning_rate": 0.00019487023302316243, "loss": 2.3632, "step": 1230 }, { "epoch": 0.12951078379800104, "grad_norm": 0.9906787872314453, "learning_rate": 0.00019485945325049288, "loss": 1.8036, "step": 1231 }, { "epoch": 0.12961599158337717, "grad_norm": 1.0407614707946777, "learning_rate": 0.00019484866246210738, "loss": 1.9722, "step": 1232 }, { "epoch": 0.1297211993687533, "grad_norm": 1.3917356729507446, "learning_rate": 0.00019483786065925904, "loss": 1.6965, "step": 1233 }, { "epoch": 0.1298264071541294, "grad_norm": 0.9043229818344116, "learning_rate": 0.0001948270478432022, "loss": 1.9409, "step": 1234 }, { "epoch": 0.12993161493950553, "grad_norm": 1.2875796556472778, "learning_rate": 0.0001948162240151926, "loss": 2.0062, "step": 1235 }, { "epoch": 0.13003682272488165, "grad_norm": 1.3274993896484375, "learning_rate": 0.00019480538917648711, "loss": 1.8964, "step": 1236 }, { "epoch": 0.13014203051025777, "grad_norm": 0.974236786365509, "learning_rate": 0.00019479454332834396, "loss": 1.4836, "step": 1237 }, { "epoch": 0.13024723829563387, "grad_norm": 0.9269886016845703, "learning_rate": 0.00019478368647202264, "loss": 2.19, "step": 1238 }, { "epoch": 0.13035244608101, "grad_norm": 0.7717685699462891, "learning_rate": 0.0001947728186087839, "loss": 2.4556, "step": 1239 }, { "epoch": 0.1304576538663861, "grad_norm": 1.7023308277130127, "learning_rate": 0.00019476193973988988, "loss": 2.433, "step": 1240 }, { "epoch": 0.13056286165176223, "grad_norm": 1.3712750673294067, "learning_rate": 0.0001947510498666038, "loss": 2.1469, "step": 1241 }, { "epoch": 0.13066806943713835, "grad_norm": 1.1237108707427979, "learning_rate": 0.0001947401489901903, "loss": 2.3985, "step": 1242 }, { "epoch": 0.13077327722251447, "grad_norm": 1.0519534349441528, "learning_rate": 0.0001947292371119153, "loss": 2.3266, "step": 1243 }, { "epoch": 0.1308784850078906, "grad_norm": 0.9474523663520813, "learning_rate": 0.0001947183142330459, "loss": 2.0388, "step": 1244 }, { "epoch": 0.13098369279326671, "grad_norm": 1.21075439453125, "learning_rate": 0.00019470738035485058, "loss": 1.7828, "step": 1245 }, { "epoch": 0.1310889005786428, "grad_norm": 0.9895322322845459, "learning_rate": 0.00019469643547859904, "loss": 2.0604, "step": 1246 }, { "epoch": 0.13119410836401893, "grad_norm": 1.5811057090759277, "learning_rate": 0.0001946854796055623, "loss": 1.7441, "step": 1247 }, { "epoch": 0.13129931614939505, "grad_norm": 1.3504817485809326, "learning_rate": 0.00019467451273701256, "loss": 2.4175, "step": 1248 }, { "epoch": 0.13140452393477117, "grad_norm": 1.2100443840026855, "learning_rate": 0.00019466353487422345, "loss": 2.0371, "step": 1249 }, { "epoch": 0.1315097317201473, "grad_norm": 0.8898875713348389, "learning_rate": 0.00019465254601846974, "loss": 2.3904, "step": 1250 }, { "epoch": 0.13161493950552342, "grad_norm": 1.0012081861495972, "learning_rate": 0.00019464154617102755, "loss": 1.8023, "step": 1251 }, { "epoch": 0.13172014729089954, "grad_norm": 1.0802556276321411, "learning_rate": 0.00019463053533317425, "loss": 2.0063, "step": 1252 }, { "epoch": 0.13182535507627566, "grad_norm": 1.0085639953613281, "learning_rate": 0.00019461951350618849, "loss": 1.7158, "step": 1253 }, { "epoch": 0.13193056286165175, "grad_norm": 0.7718974351882935, "learning_rate": 0.00019460848069135017, "loss": 2.2337, "step": 1254 }, { "epoch": 0.13203577064702787, "grad_norm": 1.247361421585083, "learning_rate": 0.0001945974368899406, "loss": 2.0403, "step": 1255 }, { "epoch": 0.132140978432404, "grad_norm": 0.9886590838432312, "learning_rate": 0.00019458638210324212, "loss": 2.1477, "step": 1256 }, { "epoch": 0.13224618621778012, "grad_norm": 0.9976163506507874, "learning_rate": 0.00019457531633253856, "loss": 2.0871, "step": 1257 }, { "epoch": 0.13235139400315624, "grad_norm": 0.9172899127006531, "learning_rate": 0.00019456423957911497, "loss": 2.0631, "step": 1258 }, { "epoch": 0.13245660178853236, "grad_norm": 1.053161382675171, "learning_rate": 0.0001945531518442576, "loss": 2.0908, "step": 1259 }, { "epoch": 0.13256180957390848, "grad_norm": 1.0828722715377808, "learning_rate": 0.00019454205312925408, "loss": 1.9512, "step": 1260 }, { "epoch": 0.1326670173592846, "grad_norm": 0.9323759078979492, "learning_rate": 0.00019453094343539325, "loss": 2.3588, "step": 1261 }, { "epoch": 0.1327722251446607, "grad_norm": 1.2428725957870483, "learning_rate": 0.00019451982276396526, "loss": 1.9449, "step": 1262 }, { "epoch": 0.13287743293003682, "grad_norm": 1.8878873586654663, "learning_rate": 0.00019450869111626147, "loss": 1.4895, "step": 1263 }, { "epoch": 0.13298264071541294, "grad_norm": 1.343825101852417, "learning_rate": 0.0001944975484935746, "loss": 1.713, "step": 1264 }, { "epoch": 0.13308784850078906, "grad_norm": 0.9605780243873596, "learning_rate": 0.0001944863948971986, "loss": 2.0469, "step": 1265 }, { "epoch": 0.13319305628616518, "grad_norm": 1.6616069078445435, "learning_rate": 0.0001944752303284287, "loss": 2.0865, "step": 1266 }, { "epoch": 0.1332982640715413, "grad_norm": 1.17947518825531, "learning_rate": 0.0001944640547885614, "loss": 1.984, "step": 1267 }, { "epoch": 0.13340347185691742, "grad_norm": 1.6048225164413452, "learning_rate": 0.00019445286827889446, "loss": 1.7243, "step": 1268 }, { "epoch": 0.13350867964229354, "grad_norm": 0.9817343950271606, "learning_rate": 0.00019444167080072698, "loss": 1.7584, "step": 1269 }, { "epoch": 0.13361388742766964, "grad_norm": 1.3782764673233032, "learning_rate": 0.00019443046235535923, "loss": 1.9988, "step": 1270 }, { "epoch": 0.13371909521304576, "grad_norm": 1.0830388069152832, "learning_rate": 0.00019441924294409289, "loss": 2.0023, "step": 1271 }, { "epoch": 0.13382430299842188, "grad_norm": 1.0523784160614014, "learning_rate": 0.00019440801256823074, "loss": 2.1114, "step": 1272 }, { "epoch": 0.133929510783798, "grad_norm": 0.7290390729904175, "learning_rate": 0.00019439677122907697, "loss": 2.0477, "step": 1273 }, { "epoch": 0.13403471856917412, "grad_norm": 1.1659435033798218, "learning_rate": 0.00019438551892793701, "loss": 2.0237, "step": 1274 }, { "epoch": 0.13413992635455024, "grad_norm": 0.9051696062088013, "learning_rate": 0.00019437425566611754, "loss": 1.4642, "step": 1275 }, { "epoch": 0.13424513413992636, "grad_norm": 0.9961209297180176, "learning_rate": 0.0001943629814449265, "loss": 2.1073, "step": 1276 }, { "epoch": 0.13435034192530249, "grad_norm": 1.1717655658721924, "learning_rate": 0.0001943516962656732, "loss": 1.878, "step": 1277 }, { "epoch": 0.13445554971067858, "grad_norm": 1.0735725164413452, "learning_rate": 0.00019434040012966807, "loss": 1.8939, "step": 1278 }, { "epoch": 0.1345607574960547, "grad_norm": 1.1567238569259644, "learning_rate": 0.00019432909303822296, "loss": 2.2329, "step": 1279 }, { "epoch": 0.13466596528143082, "grad_norm": 2.026787757873535, "learning_rate": 0.00019431777499265087, "loss": 1.5744, "step": 1280 }, { "epoch": 0.13477117306680694, "grad_norm": 1.0909669399261475, "learning_rate": 0.00019430644599426614, "loss": 2.273, "step": 1281 }, { "epoch": 0.13487638085218306, "grad_norm": 1.2396634817123413, "learning_rate": 0.0001942951060443844, "loss": 1.8932, "step": 1282 }, { "epoch": 0.13498158863755919, "grad_norm": 1.163996696472168, "learning_rate": 0.00019428375514432254, "loss": 2.2442, "step": 1283 }, { "epoch": 0.1350867964229353, "grad_norm": 0.9469679594039917, "learning_rate": 0.0001942723932953986, "loss": 2.4131, "step": 1284 }, { "epoch": 0.13519200420831143, "grad_norm": 0.993421196937561, "learning_rate": 0.00019426102049893208, "loss": 2.3047, "step": 1285 }, { "epoch": 0.13529721199368752, "grad_norm": 1.0937494039535522, "learning_rate": 0.00019424963675624364, "loss": 1.9778, "step": 1286 }, { "epoch": 0.13540241977906364, "grad_norm": 1.0145161151885986, "learning_rate": 0.00019423824206865527, "loss": 2.3466, "step": 1287 }, { "epoch": 0.13550762756443976, "grad_norm": 1.2493735551834106, "learning_rate": 0.00019422683643749013, "loss": 2.1595, "step": 1288 }, { "epoch": 0.13561283534981589, "grad_norm": 0.7461625933647156, "learning_rate": 0.00019421541986407276, "loss": 1.9241, "step": 1289 }, { "epoch": 0.135718043135192, "grad_norm": 1.2610676288604736, "learning_rate": 0.00019420399234972894, "loss": 1.9008, "step": 1290 }, { "epoch": 0.13582325092056813, "grad_norm": 1.5543406009674072, "learning_rate": 0.0001941925538957857, "loss": 1.8748, "step": 1291 }, { "epoch": 0.13592845870594425, "grad_norm": 0.6136517524719238, "learning_rate": 0.00019418110450357135, "loss": 1.6854, "step": 1292 }, { "epoch": 0.13603366649132037, "grad_norm": 1.5480318069458008, "learning_rate": 0.00019416964417441542, "loss": 2.0514, "step": 1293 }, { "epoch": 0.13613887427669646, "grad_norm": 0.9149646759033203, "learning_rate": 0.00019415817290964883, "loss": 2.1665, "step": 1294 }, { "epoch": 0.13624408206207259, "grad_norm": 0.9369128346443176, "learning_rate": 0.0001941466907106037, "loss": 1.9612, "step": 1295 }, { "epoch": 0.1363492898474487, "grad_norm": 0.8914728164672852, "learning_rate": 0.0001941351975786134, "loss": 1.5696, "step": 1296 }, { "epoch": 0.13645449763282483, "grad_norm": 1.2152458429336548, "learning_rate": 0.00019412369351501255, "loss": 2.3433, "step": 1297 }, { "epoch": 0.13655970541820095, "grad_norm": 0.6514436602592468, "learning_rate": 0.0001941121785211371, "loss": 2.3054, "step": 1298 }, { "epoch": 0.13666491320357707, "grad_norm": 1.1356117725372314, "learning_rate": 0.0001941006525983243, "loss": 2.2101, "step": 1299 }, { "epoch": 0.1367701209889532, "grad_norm": 1.195465326309204, "learning_rate": 0.00019408911574791255, "loss": 1.6433, "step": 1300 }, { "epoch": 0.1368753287743293, "grad_norm": 1.0879439115524292, "learning_rate": 0.00019407756797124164, "loss": 2.0753, "step": 1301 }, { "epoch": 0.1369805365597054, "grad_norm": 1.4740591049194336, "learning_rate": 0.00019406600926965255, "loss": 2.1329, "step": 1302 }, { "epoch": 0.13708574434508153, "grad_norm": 1.2994344234466553, "learning_rate": 0.00019405443964448757, "loss": 1.9924, "step": 1303 }, { "epoch": 0.13719095213045765, "grad_norm": 1.0827962160110474, "learning_rate": 0.0001940428590970902, "loss": 2.1453, "step": 1304 }, { "epoch": 0.13729615991583377, "grad_norm": 0.9276465177536011, "learning_rate": 0.0001940312676288053, "loss": 2.3533, "step": 1305 }, { "epoch": 0.1374013677012099, "grad_norm": 1.7329941987991333, "learning_rate": 0.00019401966524097892, "loss": 1.963, "step": 1306 }, { "epoch": 0.137506575486586, "grad_norm": 1.348925232887268, "learning_rate": 0.00019400805193495839, "loss": 1.9953, "step": 1307 }, { "epoch": 0.13761178327196213, "grad_norm": 1.512202262878418, "learning_rate": 0.00019399642771209238, "loss": 1.7091, "step": 1308 }, { "epoch": 0.13771699105733826, "grad_norm": 1.0821994543075562, "learning_rate": 0.00019398479257373073, "loss": 2.0883, "step": 1309 }, { "epoch": 0.13782219884271435, "grad_norm": 0.9924389719963074, "learning_rate": 0.00019397314652122463, "loss": 2.0062, "step": 1310 }, { "epoch": 0.13792740662809047, "grad_norm": 1.1338258981704712, "learning_rate": 0.00019396148955592643, "loss": 2.1751, "step": 1311 }, { "epoch": 0.1380326144134666, "grad_norm": 1.3012595176696777, "learning_rate": 0.00019394982167918987, "loss": 1.8652, "step": 1312 }, { "epoch": 0.1381378221988427, "grad_norm": 1.109009861946106, "learning_rate": 0.0001939381428923699, "loss": 2.3266, "step": 1313 }, { "epoch": 0.13824302998421883, "grad_norm": 0.9063061475753784, "learning_rate": 0.00019392645319682273, "loss": 1.8569, "step": 1314 }, { "epoch": 0.13834823776959496, "grad_norm": 0.9491289854049683, "learning_rate": 0.00019391475259390584, "loss": 2.031, "step": 1315 }, { "epoch": 0.13845344555497108, "grad_norm": 1.2872332334518433, "learning_rate": 0.00019390304108497794, "loss": 2.0533, "step": 1316 }, { "epoch": 0.1385586533403472, "grad_norm": 1.0027546882629395, "learning_rate": 0.00019389131867139913, "loss": 2.1953, "step": 1317 }, { "epoch": 0.1386638611257233, "grad_norm": 0.9697312712669373, "learning_rate": 0.00019387958535453068, "loss": 1.8958, "step": 1318 }, { "epoch": 0.1387690689110994, "grad_norm": 0.9787667393684387, "learning_rate": 0.00019386784113573508, "loss": 2.074, "step": 1319 }, { "epoch": 0.13887427669647553, "grad_norm": 1.1985487937927246, "learning_rate": 0.00019385608601637624, "loss": 2.2367, "step": 1320 }, { "epoch": 0.13897948448185166, "grad_norm": 0.9592642188072205, "learning_rate": 0.00019384431999781916, "loss": 1.7508, "step": 1321 }, { "epoch": 0.13908469226722778, "grad_norm": 2.198991060256958, "learning_rate": 0.0001938325430814302, "loss": 1.7532, "step": 1322 }, { "epoch": 0.1391899000526039, "grad_norm": 1.6652568578720093, "learning_rate": 0.00019382075526857705, "loss": 1.5727, "step": 1323 }, { "epoch": 0.13929510783798002, "grad_norm": 0.9190332889556885, "learning_rate": 0.00019380895656062846, "loss": 2.206, "step": 1324 }, { "epoch": 0.13940031562335614, "grad_norm": 1.020340085029602, "learning_rate": 0.00019379714695895472, "loss": 2.0502, "step": 1325 }, { "epoch": 0.13950552340873223, "grad_norm": 1.3590896129608154, "learning_rate": 0.00019378532646492714, "loss": 2.3099, "step": 1326 }, { "epoch": 0.13961073119410836, "grad_norm": 0.7917930483818054, "learning_rate": 0.00019377349507991842, "loss": 2.2146, "step": 1327 }, { "epoch": 0.13971593897948448, "grad_norm": 1.0664583444595337, "learning_rate": 0.00019376165280530252, "loss": 2.5567, "step": 1328 }, { "epoch": 0.1398211467648606, "grad_norm": 0.7282881736755371, "learning_rate": 0.00019374979964245463, "loss": 1.9319, "step": 1329 }, { "epoch": 0.13992635455023672, "grad_norm": 0.9543462991714478, "learning_rate": 0.0001937379355927512, "loss": 1.9968, "step": 1330 }, { "epoch": 0.14003156233561284, "grad_norm": 0.8911160826683044, "learning_rate": 0.00019372606065757003, "loss": 1.9732, "step": 1331 }, { "epoch": 0.14013677012098896, "grad_norm": 0.8454022407531738, "learning_rate": 0.00019371417483829003, "loss": 1.6059, "step": 1332 }, { "epoch": 0.14024197790636508, "grad_norm": 0.9383593797683716, "learning_rate": 0.00019370227813629147, "loss": 2.2838, "step": 1333 }, { "epoch": 0.14034718569174118, "grad_norm": 1.4767099618911743, "learning_rate": 0.00019369037055295594, "loss": 1.5007, "step": 1334 }, { "epoch": 0.1404523934771173, "grad_norm": 1.4801223278045654, "learning_rate": 0.00019367845208966618, "loss": 2.0174, "step": 1335 }, { "epoch": 0.14055760126249342, "grad_norm": 0.995274543762207, "learning_rate": 0.00019366652274780628, "loss": 2.0762, "step": 1336 }, { "epoch": 0.14066280904786954, "grad_norm": 0.9528397917747498, "learning_rate": 0.0001936545825287615, "loss": 2.177, "step": 1337 }, { "epoch": 0.14076801683324566, "grad_norm": 1.0369575023651123, "learning_rate": 0.00019364263143391847, "loss": 1.8945, "step": 1338 }, { "epoch": 0.14087322461862178, "grad_norm": 0.8390545845031738, "learning_rate": 0.00019363066946466502, "loss": 1.9435, "step": 1339 }, { "epoch": 0.1409784324039979, "grad_norm": 1.2658418416976929, "learning_rate": 0.0001936186966223902, "loss": 2.3394, "step": 1340 }, { "epoch": 0.14108364018937403, "grad_norm": 0.9371610283851624, "learning_rate": 0.00019360671290848447, "loss": 2.1424, "step": 1341 }, { "epoch": 0.14118884797475012, "grad_norm": 0.9509402513504028, "learning_rate": 0.00019359471832433936, "loss": 1.9365, "step": 1342 }, { "epoch": 0.14129405576012624, "grad_norm": 1.1386808156967163, "learning_rate": 0.00019358271287134784, "loss": 1.7742, "step": 1343 }, { "epoch": 0.14139926354550236, "grad_norm": 0.946628987789154, "learning_rate": 0.00019357069655090404, "loss": 2.1615, "step": 1344 }, { "epoch": 0.14150447133087848, "grad_norm": 0.8672672510147095, "learning_rate": 0.00019355866936440337, "loss": 1.9622, "step": 1345 }, { "epoch": 0.1416096791162546, "grad_norm": 1.2565221786499023, "learning_rate": 0.0001935466313132425, "loss": 1.6286, "step": 1346 }, { "epoch": 0.14171488690163073, "grad_norm": 1.0216825008392334, "learning_rate": 0.00019353458239881936, "loss": 1.6907, "step": 1347 }, { "epoch": 0.14182009468700685, "grad_norm": 1.3723381757736206, "learning_rate": 0.00019352252262253318, "loss": 1.64, "step": 1348 }, { "epoch": 0.14192530247238297, "grad_norm": 1.0351169109344482, "learning_rate": 0.00019351045198578445, "loss": 2.2069, "step": 1349 }, { "epoch": 0.14203051025775906, "grad_norm": 1.1500434875488281, "learning_rate": 0.00019349837048997478, "loss": 1.8967, "step": 1350 }, { "epoch": 0.14213571804313518, "grad_norm": 1.0211580991744995, "learning_rate": 0.00019348627813650727, "loss": 1.6682, "step": 1351 }, { "epoch": 0.1422409258285113, "grad_norm": 0.8861009478569031, "learning_rate": 0.00019347417492678615, "loss": 2.383, "step": 1352 }, { "epoch": 0.14234613361388743, "grad_norm": 1.089370608329773, "learning_rate": 0.00019346206086221686, "loss": 2.0606, "step": 1353 }, { "epoch": 0.14245134139926355, "grad_norm": 0.9320117235183716, "learning_rate": 0.00019344993594420622, "loss": 2.165, "step": 1354 }, { "epoch": 0.14255654918463967, "grad_norm": 0.9916388988494873, "learning_rate": 0.00019343780017416223, "loss": 2.1013, "step": 1355 }, { "epoch": 0.1426617569700158, "grad_norm": 0.894123375415802, "learning_rate": 0.00019342565355349417, "loss": 2.3444, "step": 1356 }, { "epoch": 0.1427669647553919, "grad_norm": 0.835313081741333, "learning_rate": 0.00019341349608361267, "loss": 2.1591, "step": 1357 }, { "epoch": 0.142872172540768, "grad_norm": 0.9938138723373413, "learning_rate": 0.00019340132776592942, "loss": 2.1381, "step": 1358 }, { "epoch": 0.14297738032614413, "grad_norm": 1.408219337463379, "learning_rate": 0.00019338914860185752, "loss": 1.9765, "step": 1359 }, { "epoch": 0.14308258811152025, "grad_norm": 1.1644525527954102, "learning_rate": 0.00019337695859281137, "loss": 1.788, "step": 1360 }, { "epoch": 0.14318779589689637, "grad_norm": 0.6040117144584656, "learning_rate": 0.00019336475774020648, "loss": 2.1476, "step": 1361 }, { "epoch": 0.1432930036822725, "grad_norm": 0.9746436476707458, "learning_rate": 0.0001933525460454597, "loss": 1.6193, "step": 1362 }, { "epoch": 0.1433982114676486, "grad_norm": 1.6310737133026123, "learning_rate": 0.00019334032350998919, "loss": 2.009, "step": 1363 }, { "epoch": 0.14350341925302473, "grad_norm": 0.9230230450630188, "learning_rate": 0.00019332809013521428, "loss": 2.1239, "step": 1364 }, { "epoch": 0.14360862703840085, "grad_norm": 0.9308440685272217, "learning_rate": 0.00019331584592255553, "loss": 1.8617, "step": 1365 }, { "epoch": 0.14371383482377695, "grad_norm": 1.1313167810440063, "learning_rate": 0.0001933035908734349, "loss": 1.4825, "step": 1366 }, { "epoch": 0.14381904260915307, "grad_norm": 1.0187263488769531, "learning_rate": 0.0001932913249892755, "loss": 2.1542, "step": 1367 }, { "epoch": 0.1439242503945292, "grad_norm": 1.1843492984771729, "learning_rate": 0.00019327904827150176, "loss": 1.8278, "step": 1368 }, { "epoch": 0.1440294581799053, "grad_norm": 1.6638554334640503, "learning_rate": 0.00019326676072153927, "loss": 1.9298, "step": 1369 }, { "epoch": 0.14413466596528143, "grad_norm": 0.9473015666007996, "learning_rate": 0.00019325446234081498, "loss": 2.2619, "step": 1370 }, { "epoch": 0.14423987375065755, "grad_norm": 0.9866594672203064, "learning_rate": 0.00019324215313075706, "loss": 1.7889, "step": 1371 }, { "epoch": 0.14434508153603368, "grad_norm": 1.3924474716186523, "learning_rate": 0.00019322983309279495, "loss": 1.8161, "step": 1372 }, { "epoch": 0.1444502893214098, "grad_norm": 1.1654231548309326, "learning_rate": 0.00019321750222835933, "loss": 1.8105, "step": 1373 }, { "epoch": 0.1445554971067859, "grad_norm": 0.7712641358375549, "learning_rate": 0.0001932051605388821, "loss": 1.7791, "step": 1374 }, { "epoch": 0.144660704892162, "grad_norm": 0.7534984350204468, "learning_rate": 0.00019319280802579654, "loss": 1.9788, "step": 1375 }, { "epoch": 0.14476591267753813, "grad_norm": 0.9810335040092468, "learning_rate": 0.00019318044469053702, "loss": 2.0382, "step": 1376 }, { "epoch": 0.14487112046291425, "grad_norm": 1.1605186462402344, "learning_rate": 0.0001931680705345393, "loss": 2.2038, "step": 1377 }, { "epoch": 0.14497632824829038, "grad_norm": 1.2045458555221558, "learning_rate": 0.00019315568555924035, "loss": 2.3353, "step": 1378 }, { "epoch": 0.1450815360336665, "grad_norm": 1.1022682189941406, "learning_rate": 0.0001931432897660784, "loss": 1.8617, "step": 1379 }, { "epoch": 0.14518674381904262, "grad_norm": 1.5427874326705933, "learning_rate": 0.0001931308831564929, "loss": 2.1449, "step": 1380 }, { "epoch": 0.14529195160441874, "grad_norm": 1.1985043287277222, "learning_rate": 0.00019311846573192461, "loss": 2.0909, "step": 1381 }, { "epoch": 0.14539715938979483, "grad_norm": 1.052314043045044, "learning_rate": 0.00019310603749381558, "loss": 1.9035, "step": 1382 }, { "epoch": 0.14550236717517095, "grad_norm": 1.17412269115448, "learning_rate": 0.00019309359844360893, "loss": 1.6764, "step": 1383 }, { "epoch": 0.14560757496054708, "grad_norm": 1.25741446018219, "learning_rate": 0.00019308114858274932, "loss": 2.1154, "step": 1384 }, { "epoch": 0.1457127827459232, "grad_norm": 1.7540018558502197, "learning_rate": 0.0001930686879126824, "loss": 1.1782, "step": 1385 }, { "epoch": 0.14581799053129932, "grad_norm": 1.423569917678833, "learning_rate": 0.00019305621643485522, "loss": 2.217, "step": 1386 }, { "epoch": 0.14592319831667544, "grad_norm": 1.670623540878296, "learning_rate": 0.00019304373415071605, "loss": 2.1301, "step": 1387 }, { "epoch": 0.14602840610205156, "grad_norm": 1.0802117586135864, "learning_rate": 0.00019303124106171443, "loss": 2.0065, "step": 1388 }, { "epoch": 0.14613361388742768, "grad_norm": 0.7987265586853027, "learning_rate": 0.00019301873716930107, "loss": 2.3087, "step": 1389 }, { "epoch": 0.14623882167280378, "grad_norm": 1.3220068216323853, "learning_rate": 0.00019300622247492814, "loss": 2.1964, "step": 1390 }, { "epoch": 0.1463440294581799, "grad_norm": 1.0792020559310913, "learning_rate": 0.00019299369698004884, "loss": 2.0078, "step": 1391 }, { "epoch": 0.14644923724355602, "grad_norm": 2.057413101196289, "learning_rate": 0.0001929811606861177, "loss": 1.783, "step": 1392 }, { "epoch": 0.14655444502893214, "grad_norm": 1.4398462772369385, "learning_rate": 0.0001929686135945906, "loss": 1.3909, "step": 1393 }, { "epoch": 0.14665965281430826, "grad_norm": 1.0750278234481812, "learning_rate": 0.0001929560557069245, "loss": 1.8938, "step": 1394 }, { "epoch": 0.14676486059968438, "grad_norm": 0.8204072713851929, "learning_rate": 0.00019294348702457773, "loss": 2.4199, "step": 1395 }, { "epoch": 0.1468700683850605, "grad_norm": 0.9489021897315979, "learning_rate": 0.0001929309075490099, "loss": 2.0696, "step": 1396 }, { "epoch": 0.14697527617043663, "grad_norm": 0.8120673298835754, "learning_rate": 0.00019291831728168182, "loss": 2.0151, "step": 1397 }, { "epoch": 0.14708048395581272, "grad_norm": 1.4392826557159424, "learning_rate": 0.00019290571622405548, "loss": 1.8832, "step": 1398 }, { "epoch": 0.14718569174118884, "grad_norm": 1.1055705547332764, "learning_rate": 0.00019289310437759427, "loss": 1.829, "step": 1399 }, { "epoch": 0.14729089952656496, "grad_norm": 0.5811572074890137, "learning_rate": 0.00019288048174376273, "loss": 2.2987, "step": 1400 }, { "epoch": 0.14739610731194108, "grad_norm": 1.0770584344863892, "learning_rate": 0.0001928678483240267, "loss": 1.9941, "step": 1401 }, { "epoch": 0.1475013150973172, "grad_norm": 1.227790117263794, "learning_rate": 0.00019285520411985326, "loss": 2.0154, "step": 1402 }, { "epoch": 0.14760652288269333, "grad_norm": 0.6818186640739441, "learning_rate": 0.0001928425491327107, "loss": 2.2323, "step": 1403 }, { "epoch": 0.14771173066806945, "grad_norm": 1.290170431137085, "learning_rate": 0.00019282988336406865, "loss": 2.1058, "step": 1404 }, { "epoch": 0.14781693845344557, "grad_norm": 0.9615659713745117, "learning_rate": 0.0001928172068153979, "loss": 2.0933, "step": 1405 }, { "epoch": 0.14792214623882166, "grad_norm": 0.9037755131721497, "learning_rate": 0.00019280451948817059, "loss": 2.0112, "step": 1406 }, { "epoch": 0.14802735402419778, "grad_norm": 1.2614983320236206, "learning_rate": 0.00019279182138386003, "loss": 1.8352, "step": 1407 }, { "epoch": 0.1481325618095739, "grad_norm": 1.1680060625076294, "learning_rate": 0.0001927791125039408, "loss": 2.0655, "step": 1408 }, { "epoch": 0.14823776959495003, "grad_norm": 1.1708638668060303, "learning_rate": 0.00019276639284988875, "loss": 1.9568, "step": 1409 }, { "epoch": 0.14834297738032615, "grad_norm": 1.2957854270935059, "learning_rate": 0.00019275366242318097, "loss": 1.9057, "step": 1410 }, { "epoch": 0.14844818516570227, "grad_norm": 1.3663761615753174, "learning_rate": 0.00019274092122529584, "loss": 1.9964, "step": 1411 }, { "epoch": 0.1485533929510784, "grad_norm": 1.0850753784179688, "learning_rate": 0.00019272816925771288, "loss": 1.8218, "step": 1412 }, { "epoch": 0.1486586007364545, "grad_norm": 0.7503268718719482, "learning_rate": 0.00019271540652191296, "loss": 2.2849, "step": 1413 }, { "epoch": 0.1487638085218306, "grad_norm": 1.1095421314239502, "learning_rate": 0.0001927026330193782, "loss": 2.0907, "step": 1414 }, { "epoch": 0.14886901630720673, "grad_norm": 1.947831630706787, "learning_rate": 0.00019268984875159191, "loss": 2.3621, "step": 1415 }, { "epoch": 0.14897422409258285, "grad_norm": 1.045089602470398, "learning_rate": 0.00019267705372003876, "loss": 1.8236, "step": 1416 }, { "epoch": 0.14907943187795897, "grad_norm": 1.137406826019287, "learning_rate": 0.0001926642479262045, "loss": 2.1149, "step": 1417 }, { "epoch": 0.1491846396633351, "grad_norm": 1.469742774963379, "learning_rate": 0.00019265143137157627, "loss": 1.8987, "step": 1418 }, { "epoch": 0.1492898474487112, "grad_norm": 1.202427864074707, "learning_rate": 0.00019263860405764241, "loss": 1.8768, "step": 1419 }, { "epoch": 0.14939505523408733, "grad_norm": 0.9847029447555542, "learning_rate": 0.0001926257659858925, "loss": 1.9719, "step": 1420 }, { "epoch": 0.14950026301946345, "grad_norm": 1.1331363916397095, "learning_rate": 0.00019261291715781743, "loss": 2.2542, "step": 1421 }, { "epoch": 0.14960547080483955, "grad_norm": 1.19657564163208, "learning_rate": 0.00019260005757490922, "loss": 2.2169, "step": 1422 }, { "epoch": 0.14971067859021567, "grad_norm": 1.2917791604995728, "learning_rate": 0.00019258718723866127, "loss": 2.0383, "step": 1423 }, { "epoch": 0.1498158863755918, "grad_norm": 1.163900375366211, "learning_rate": 0.00019257430615056816, "loss": 1.9061, "step": 1424 }, { "epoch": 0.1499210941609679, "grad_norm": 1.1655508279800415, "learning_rate": 0.00019256141431212568, "loss": 2.2014, "step": 1425 }, { "epoch": 0.15002630194634403, "grad_norm": 0.8750263452529907, "learning_rate": 0.00019254851172483098, "loss": 2.0095, "step": 1426 }, { "epoch": 0.15013150973172015, "grad_norm": 1.1399056911468506, "learning_rate": 0.00019253559839018235, "loss": 1.8426, "step": 1427 }, { "epoch": 0.15023671751709627, "grad_norm": 1.3897678852081299, "learning_rate": 0.00019252267430967942, "loss": 1.3708, "step": 1428 }, { "epoch": 0.1503419253024724, "grad_norm": 0.97458416223526, "learning_rate": 0.00019250973948482298, "loss": 1.6044, "step": 1429 }, { "epoch": 0.1504471330878485, "grad_norm": 1.1727815866470337, "learning_rate": 0.0001924967939171151, "loss": 2.0115, "step": 1430 }, { "epoch": 0.1505523408732246, "grad_norm": 1.3556091785430908, "learning_rate": 0.00019248383760805916, "loss": 1.7871, "step": 1431 }, { "epoch": 0.15065754865860073, "grad_norm": 0.9026738405227661, "learning_rate": 0.00019247087055915968, "loss": 1.9852, "step": 1432 }, { "epoch": 0.15076275644397685, "grad_norm": 1.1884734630584717, "learning_rate": 0.0001924578927719225, "loss": 2.3177, "step": 1433 }, { "epoch": 0.15086796422935297, "grad_norm": 1.2027161121368408, "learning_rate": 0.00019244490424785468, "loss": 1.6248, "step": 1434 }, { "epoch": 0.1509731720147291, "grad_norm": 1.37662935256958, "learning_rate": 0.00019243190498846458, "loss": 2.0508, "step": 1435 }, { "epoch": 0.15107837980010522, "grad_norm": 1.3299387693405151, "learning_rate": 0.00019241889499526169, "loss": 1.862, "step": 1436 }, { "epoch": 0.15118358758548134, "grad_norm": 1.6885327100753784, "learning_rate": 0.00019240587426975686, "loss": 2.6639, "step": 1437 }, { "epoch": 0.15128879537085743, "grad_norm": 1.0421466827392578, "learning_rate": 0.00019239284281346214, "loss": 1.8263, "step": 1438 }, { "epoch": 0.15139400315623355, "grad_norm": 1.2554149627685547, "learning_rate": 0.00019237980062789082, "loss": 2.4153, "step": 1439 }, { "epoch": 0.15149921094160967, "grad_norm": 0.9526044130325317, "learning_rate": 0.00019236674771455747, "loss": 2.236, "step": 1440 }, { "epoch": 0.1516044187269858, "grad_norm": 0.9509533643722534, "learning_rate": 0.00019235368407497788, "loss": 1.966, "step": 1441 }, { "epoch": 0.15170962651236192, "grad_norm": 1.2150377035140991, "learning_rate": 0.00019234060971066902, "loss": 1.6399, "step": 1442 }, { "epoch": 0.15181483429773804, "grad_norm": 1.228989601135254, "learning_rate": 0.00019232752462314923, "loss": 2.1176, "step": 1443 }, { "epoch": 0.15192004208311416, "grad_norm": 1.2654527425765991, "learning_rate": 0.0001923144288139381, "loss": 1.5625, "step": 1444 }, { "epoch": 0.15202524986849028, "grad_norm": 1.086130976676941, "learning_rate": 0.00019230132228455628, "loss": 1.8363, "step": 1445 }, { "epoch": 0.15213045765386637, "grad_norm": 1.0267728567123413, "learning_rate": 0.00019228820503652586, "loss": 1.7803, "step": 1446 }, { "epoch": 0.1522356654392425, "grad_norm": 1.3312456607818604, "learning_rate": 0.00019227507707137006, "loss": 1.7026, "step": 1447 }, { "epoch": 0.15234087322461862, "grad_norm": 1.434946060180664, "learning_rate": 0.00019226193839061347, "loss": 1.7181, "step": 1448 }, { "epoch": 0.15244608100999474, "grad_norm": 2.0896921157836914, "learning_rate": 0.00019224878899578175, "loss": 2.235, "step": 1449 }, { "epoch": 0.15255128879537086, "grad_norm": 0.7048740386962891, "learning_rate": 0.00019223562888840193, "loss": 2.3047, "step": 1450 }, { "epoch": 0.15265649658074698, "grad_norm": 1.5920569896697998, "learning_rate": 0.00019222245807000223, "loss": 2.2711, "step": 1451 }, { "epoch": 0.1527617043661231, "grad_norm": 0.8454103469848633, "learning_rate": 0.00019220927654211217, "loss": 1.8763, "step": 1452 }, { "epoch": 0.15286691215149922, "grad_norm": 0.9767968654632568, "learning_rate": 0.0001921960843062625, "loss": 2.1151, "step": 1453 }, { "epoch": 0.15297211993687532, "grad_norm": 1.5461039543151855, "learning_rate": 0.00019218288136398513, "loss": 1.5243, "step": 1454 }, { "epoch": 0.15307732772225144, "grad_norm": 1.1929031610488892, "learning_rate": 0.0001921696677168133, "loss": 1.914, "step": 1455 }, { "epoch": 0.15318253550762756, "grad_norm": 1.3064931631088257, "learning_rate": 0.00019215644336628148, "loss": 1.9663, "step": 1456 }, { "epoch": 0.15328774329300368, "grad_norm": 1.21585214138031, "learning_rate": 0.0001921432083139253, "loss": 2.0572, "step": 1457 }, { "epoch": 0.1533929510783798, "grad_norm": 1.0481001138687134, "learning_rate": 0.00019212996256128182, "loss": 2.2602, "step": 1458 }, { "epoch": 0.15349815886375592, "grad_norm": 1.2363125085830688, "learning_rate": 0.00019211670610988913, "loss": 1.9804, "step": 1459 }, { "epoch": 0.15360336664913204, "grad_norm": 0.9035821557044983, "learning_rate": 0.0001921034389612867, "loss": 2.1159, "step": 1460 }, { "epoch": 0.15370857443450817, "grad_norm": 1.1142661571502686, "learning_rate": 0.00019209016111701522, "loss": 2.3541, "step": 1461 }, { "epoch": 0.15381378221988426, "grad_norm": 0.9691978096961975, "learning_rate": 0.00019207687257861655, "loss": 2.0898, "step": 1462 }, { "epoch": 0.15391899000526038, "grad_norm": 1.0944209098815918, "learning_rate": 0.00019206357334763388, "loss": 1.7832, "step": 1463 }, { "epoch": 0.1540241977906365, "grad_norm": 1.0283271074295044, "learning_rate": 0.00019205026342561157, "loss": 2.3259, "step": 1464 }, { "epoch": 0.15412940557601262, "grad_norm": 0.9159666299819946, "learning_rate": 0.0001920369428140953, "loss": 2.1943, "step": 1465 }, { "epoch": 0.15423461336138874, "grad_norm": 1.10454261302948, "learning_rate": 0.00019202361151463194, "loss": 1.7206, "step": 1466 }, { "epoch": 0.15433982114676487, "grad_norm": 1.2136410474777222, "learning_rate": 0.00019201026952876958, "loss": 2.1148, "step": 1467 }, { "epoch": 0.154445028932141, "grad_norm": 1.4272185564041138, "learning_rate": 0.00019199691685805763, "loss": 2.401, "step": 1468 }, { "epoch": 0.1545502367175171, "grad_norm": 1.2169204950332642, "learning_rate": 0.00019198355350404667, "loss": 2.039, "step": 1469 }, { "epoch": 0.1546554445028932, "grad_norm": 0.9736878275871277, "learning_rate": 0.0001919701794682885, "loss": 2.3821, "step": 1470 }, { "epoch": 0.15476065228826932, "grad_norm": 1.0728427171707153, "learning_rate": 0.00019195679475233625, "loss": 2.079, "step": 1471 }, { "epoch": 0.15486586007364544, "grad_norm": 0.9223616719245911, "learning_rate": 0.00019194339935774422, "loss": 1.9857, "step": 1472 }, { "epoch": 0.15497106785902157, "grad_norm": 0.7518747448921204, "learning_rate": 0.00019192999328606803, "loss": 2.2304, "step": 1473 }, { "epoch": 0.1550762756443977, "grad_norm": 1.1213191747665405, "learning_rate": 0.0001919165765388644, "loss": 2.1738, "step": 1474 }, { "epoch": 0.1551814834297738, "grad_norm": 1.642525553703308, "learning_rate": 0.00019190314911769142, "loss": 1.8865, "step": 1475 }, { "epoch": 0.15528669121514993, "grad_norm": 0.9955556392669678, "learning_rate": 0.00019188971102410837, "loss": 2.1058, "step": 1476 }, { "epoch": 0.15539189900052605, "grad_norm": 0.9576829075813293, "learning_rate": 0.00019187626225967576, "loss": 1.9758, "step": 1477 }, { "epoch": 0.15549710678590214, "grad_norm": 1.4288785457611084, "learning_rate": 0.00019186280282595535, "loss": 1.7091, "step": 1478 }, { "epoch": 0.15560231457127827, "grad_norm": 1.115845799446106, "learning_rate": 0.00019184933272451015, "loss": 2.1405, "step": 1479 }, { "epoch": 0.1557075223566544, "grad_norm": 1.4371943473815918, "learning_rate": 0.0001918358519569044, "loss": 2.0125, "step": 1480 }, { "epoch": 0.1558127301420305, "grad_norm": 1.4342418909072876, "learning_rate": 0.00019182236052470354, "loss": 2.0421, "step": 1481 }, { "epoch": 0.15591793792740663, "grad_norm": 1.1712485551834106, "learning_rate": 0.00019180885842947436, "loss": 2.1814, "step": 1482 }, { "epoch": 0.15602314571278275, "grad_norm": 0.816021740436554, "learning_rate": 0.00019179534567278475, "loss": 2.2745, "step": 1483 }, { "epoch": 0.15612835349815887, "grad_norm": 1.6961162090301514, "learning_rate": 0.0001917818222562039, "loss": 1.8956, "step": 1484 }, { "epoch": 0.156233561283535, "grad_norm": 1.0247284173965454, "learning_rate": 0.0001917682881813023, "loss": 1.9351, "step": 1485 }, { "epoch": 0.1563387690689111, "grad_norm": 0.9849500060081482, "learning_rate": 0.00019175474344965157, "loss": 1.6681, "step": 1486 }, { "epoch": 0.1564439768542872, "grad_norm": 1.3697773218154907, "learning_rate": 0.00019174118806282458, "loss": 1.9333, "step": 1487 }, { "epoch": 0.15654918463966333, "grad_norm": 1.421255111694336, "learning_rate": 0.00019172762202239558, "loss": 1.9458, "step": 1488 }, { "epoch": 0.15665439242503945, "grad_norm": 1.1675089597702026, "learning_rate": 0.00019171404532993986, "loss": 2.0317, "step": 1489 }, { "epoch": 0.15675960021041557, "grad_norm": 0.8886706233024597, "learning_rate": 0.00019170045798703406, "loss": 2.0854, "step": 1490 }, { "epoch": 0.1568648079957917, "grad_norm": 1.510884404182434, "learning_rate": 0.00019168685999525607, "loss": 2.1653, "step": 1491 }, { "epoch": 0.15697001578116782, "grad_norm": 1.1406757831573486, "learning_rate": 0.00019167325135618487, "loss": 2.0393, "step": 1492 }, { "epoch": 0.15707522356654394, "grad_norm": 0.9392661452293396, "learning_rate": 0.00019165963207140095, "loss": 2.1535, "step": 1493 }, { "epoch": 0.15718043135192003, "grad_norm": 0.9734488725662231, "learning_rate": 0.00019164600214248575, "loss": 1.9538, "step": 1494 }, { "epoch": 0.15728563913729615, "grad_norm": 1.0362365245819092, "learning_rate": 0.0001916323615710221, "loss": 1.6926, "step": 1495 }, { "epoch": 0.15739084692267227, "grad_norm": 1.099932074546814, "learning_rate": 0.00019161871035859403, "loss": 1.9106, "step": 1496 }, { "epoch": 0.1574960547080484, "grad_norm": 1.4597340822219849, "learning_rate": 0.0001916050485067868, "loss": 2.1146, "step": 1497 }, { "epoch": 0.15760126249342452, "grad_norm": 1.3075284957885742, "learning_rate": 0.00019159137601718697, "loss": 2.6296, "step": 1498 }, { "epoch": 0.15770647027880064, "grad_norm": 1.0450359582901, "learning_rate": 0.00019157769289138225, "loss": 1.7802, "step": 1499 }, { "epoch": 0.15781167806417676, "grad_norm": 1.9684633016586304, "learning_rate": 0.0001915639991309616, "loss": 2.1394, "step": 1500 }, { "epoch": 0.15791688584955288, "grad_norm": 1.4822932481765747, "learning_rate": 0.00019155029473751526, "loss": 1.9256, "step": 1501 }, { "epoch": 0.15802209363492897, "grad_norm": 0.9977061748504639, "learning_rate": 0.00019153657971263463, "loss": 1.7428, "step": 1502 }, { "epoch": 0.1581273014203051, "grad_norm": 1.2301387786865234, "learning_rate": 0.00019152285405791243, "loss": 1.8108, "step": 1503 }, { "epoch": 0.15823250920568122, "grad_norm": 0.9384319186210632, "learning_rate": 0.00019150911777494258, "loss": 2.55, "step": 1504 }, { "epoch": 0.15833771699105734, "grad_norm": 0.9354838728904724, "learning_rate": 0.00019149537086532022, "loss": 2.1337, "step": 1505 }, { "epoch": 0.15844292477643346, "grad_norm": 1.6690521240234375, "learning_rate": 0.0001914816133306417, "loss": 2.1056, "step": 1506 }, { "epoch": 0.15854813256180958, "grad_norm": 1.3110581636428833, "learning_rate": 0.0001914678451725047, "loss": 2.1933, "step": 1507 }, { "epoch": 0.1586533403471857, "grad_norm": 1.4883490800857544, "learning_rate": 0.000191454066392508, "loss": 1.751, "step": 1508 }, { "epoch": 0.15875854813256182, "grad_norm": 1.056282639503479, "learning_rate": 0.00019144027699225172, "loss": 1.4755, "step": 1509 }, { "epoch": 0.15886375591793792, "grad_norm": 1.4437083005905151, "learning_rate": 0.00019142647697333723, "loss": 2.0506, "step": 1510 }, { "epoch": 0.15896896370331404, "grad_norm": 1.2706918716430664, "learning_rate": 0.00019141266633736697, "loss": 2.2959, "step": 1511 }, { "epoch": 0.15907417148869016, "grad_norm": 0.7875407934188843, "learning_rate": 0.00019139884508594484, "loss": 1.9302, "step": 1512 }, { "epoch": 0.15917937927406628, "grad_norm": 1.6068692207336426, "learning_rate": 0.00019138501322067577, "loss": 2.0472, "step": 1513 }, { "epoch": 0.1592845870594424, "grad_norm": 1.250916600227356, "learning_rate": 0.00019137117074316602, "loss": 2.3526, "step": 1514 }, { "epoch": 0.15938979484481852, "grad_norm": 0.9924522638320923, "learning_rate": 0.00019135731765502313, "loss": 1.804, "step": 1515 }, { "epoch": 0.15949500263019464, "grad_norm": 0.9752918481826782, "learning_rate": 0.00019134345395785572, "loss": 1.527, "step": 1516 }, { "epoch": 0.15960021041557076, "grad_norm": 0.8332353830337524, "learning_rate": 0.00019132957965327382, "loss": 2.2386, "step": 1517 }, { "epoch": 0.15970541820094686, "grad_norm": 1.0230636596679688, "learning_rate": 0.0001913156947428886, "loss": 1.935, "step": 1518 }, { "epoch": 0.15981062598632298, "grad_norm": 1.3288902044296265, "learning_rate": 0.00019130179922831241, "loss": 2.178, "step": 1519 }, { "epoch": 0.1599158337716991, "grad_norm": 1.1439303159713745, "learning_rate": 0.00019128789311115892, "loss": 2.2228, "step": 1520 }, { "epoch": 0.16002104155707522, "grad_norm": 1.0876315832138062, "learning_rate": 0.00019127397639304305, "loss": 2.0773, "step": 1521 }, { "epoch": 0.16012624934245134, "grad_norm": 1.0651696920394897, "learning_rate": 0.00019126004907558085, "loss": 2.0508, "step": 1522 }, { "epoch": 0.16023145712782746, "grad_norm": 1.309206485748291, "learning_rate": 0.00019124611116038963, "loss": 2.4417, "step": 1523 }, { "epoch": 0.16033666491320359, "grad_norm": 1.2913589477539062, "learning_rate": 0.00019123216264908802, "loss": 2.1339, "step": 1524 }, { "epoch": 0.1604418726985797, "grad_norm": 1.7865016460418701, "learning_rate": 0.00019121820354329577, "loss": 2.0545, "step": 1525 }, { "epoch": 0.1605470804839558, "grad_norm": 1.2007263898849487, "learning_rate": 0.00019120423384463392, "loss": 1.865, "step": 1526 }, { "epoch": 0.16065228826933192, "grad_norm": 1.5868077278137207, "learning_rate": 0.0001911902535547247, "loss": 2.2881, "step": 1527 }, { "epoch": 0.16075749605470804, "grad_norm": 0.9651376605033875, "learning_rate": 0.00019117626267519162, "loss": 1.9428, "step": 1528 }, { "epoch": 0.16086270384008416, "grad_norm": 1.0492637157440186, "learning_rate": 0.0001911622612076594, "loss": 1.8641, "step": 1529 }, { "epoch": 0.16096791162546029, "grad_norm": 1.4848203659057617, "learning_rate": 0.000191148249153754, "loss": 2.256, "step": 1530 }, { "epoch": 0.1610731194108364, "grad_norm": 1.3082940578460693, "learning_rate": 0.00019113422651510255, "loss": 2.2692, "step": 1531 }, { "epoch": 0.16117832719621253, "grad_norm": 0.7588707804679871, "learning_rate": 0.00019112019329333346, "loss": 2.3249, "step": 1532 }, { "epoch": 0.16128353498158865, "grad_norm": 0.7588993906974792, "learning_rate": 0.0001911061494900764, "loss": 2.1544, "step": 1533 }, { "epoch": 0.16138874276696474, "grad_norm": 1.9041491746902466, "learning_rate": 0.00019109209510696217, "loss": 1.9289, "step": 1534 }, { "epoch": 0.16149395055234086, "grad_norm": 1.3282427787780762, "learning_rate": 0.00019107803014562294, "loss": 1.6547, "step": 1535 }, { "epoch": 0.16159915833771699, "grad_norm": 1.084847092628479, "learning_rate": 0.00019106395460769196, "loss": 1.9799, "step": 1536 }, { "epoch": 0.1617043661230931, "grad_norm": 1.2308772802352905, "learning_rate": 0.0001910498684948038, "loss": 1.7734, "step": 1537 }, { "epoch": 0.16180957390846923, "grad_norm": 1.1046706438064575, "learning_rate": 0.0001910357718085942, "loss": 1.8926, "step": 1538 }, { "epoch": 0.16191478169384535, "grad_norm": 0.9326602816581726, "learning_rate": 0.00019102166455070024, "loss": 1.679, "step": 1539 }, { "epoch": 0.16201998947922147, "grad_norm": 2.735374927520752, "learning_rate": 0.0001910075467227601, "loss": 2.2685, "step": 1540 }, { "epoch": 0.1621251972645976, "grad_norm": 0.7285892367362976, "learning_rate": 0.00019099341832641323, "loss": 2.1306, "step": 1541 }, { "epoch": 0.16223040504997369, "grad_norm": 1.3543635606765747, "learning_rate": 0.0001909792793633003, "loss": 1.7381, "step": 1542 }, { "epoch": 0.1623356128353498, "grad_norm": 1.4372515678405762, "learning_rate": 0.00019096512983506327, "loss": 1.3866, "step": 1543 }, { "epoch": 0.16244082062072593, "grad_norm": 1.202860951423645, "learning_rate": 0.00019095096974334523, "loss": 1.9439, "step": 1544 }, { "epoch": 0.16254602840610205, "grad_norm": 1.5187021493911743, "learning_rate": 0.0001909367990897906, "loss": 1.6743, "step": 1545 }, { "epoch": 0.16265123619147817, "grad_norm": 0.9257557392120361, "learning_rate": 0.00019092261787604492, "loss": 2.1086, "step": 1546 }, { "epoch": 0.1627564439768543, "grad_norm": 1.202397108078003, "learning_rate": 0.00019090842610375503, "loss": 2.1001, "step": 1547 }, { "epoch": 0.1628616517622304, "grad_norm": 1.3095595836639404, "learning_rate": 0.000190894223774569, "loss": 2.525, "step": 1548 }, { "epoch": 0.16296685954760654, "grad_norm": 1.2810163497924805, "learning_rate": 0.00019088001089013603, "loss": 2.0124, "step": 1549 }, { "epoch": 0.16307206733298263, "grad_norm": 2.0133657455444336, "learning_rate": 0.00019086578745210666, "loss": 1.9361, "step": 1550 }, { "epoch": 0.16317727511835875, "grad_norm": 1.1870371103286743, "learning_rate": 0.00019085155346213264, "loss": 1.7296, "step": 1551 }, { "epoch": 0.16328248290373487, "grad_norm": 1.0325980186462402, "learning_rate": 0.00019083730892186686, "loss": 2.4486, "step": 1552 }, { "epoch": 0.163387690689111, "grad_norm": 1.313650131225586, "learning_rate": 0.00019082305383296352, "loss": 2.4732, "step": 1553 }, { "epoch": 0.16349289847448711, "grad_norm": 0.8540735840797424, "learning_rate": 0.00019080878819707802, "loss": 1.8071, "step": 1554 }, { "epoch": 0.16359810625986324, "grad_norm": 1.3443700075149536, "learning_rate": 0.00019079451201586695, "loss": 2.3741, "step": 1555 }, { "epoch": 0.16370331404523936, "grad_norm": 1.2170543670654297, "learning_rate": 0.0001907802252909882, "loss": 2.0448, "step": 1556 }, { "epoch": 0.16380852183061548, "grad_norm": 0.8038926720619202, "learning_rate": 0.0001907659280241008, "loss": 2.1864, "step": 1557 }, { "epoch": 0.16391372961599157, "grad_norm": 1.2641410827636719, "learning_rate": 0.00019075162021686505, "loss": 1.7906, "step": 1558 }, { "epoch": 0.1640189374013677, "grad_norm": 0.9859477877616882, "learning_rate": 0.0001907373018709425, "loss": 2.3066, "step": 1559 }, { "epoch": 0.16412414518674381, "grad_norm": 1.0549753904342651, "learning_rate": 0.00019072297298799589, "loss": 2.0271, "step": 1560 }, { "epoch": 0.16422935297211994, "grad_norm": 1.015430212020874, "learning_rate": 0.0001907086335696892, "loss": 2.1745, "step": 1561 }, { "epoch": 0.16433456075749606, "grad_norm": 0.9478970766067505, "learning_rate": 0.00019069428361768754, "loss": 2.1743, "step": 1562 }, { "epoch": 0.16443976854287218, "grad_norm": 0.8643477559089661, "learning_rate": 0.00019067992313365735, "loss": 2.4163, "step": 1563 }, { "epoch": 0.1645449763282483, "grad_norm": 1.1437067985534668, "learning_rate": 0.00019066555211926634, "loss": 2.0313, "step": 1564 }, { "epoch": 0.16465018411362442, "grad_norm": 1.8715989589691162, "learning_rate": 0.00019065117057618332, "loss": 2.3086, "step": 1565 }, { "epoch": 0.16475539189900051, "grad_norm": 1.1657463312149048, "learning_rate": 0.00019063677850607834, "loss": 2.4182, "step": 1566 }, { "epoch": 0.16486059968437664, "grad_norm": 1.0688217878341675, "learning_rate": 0.00019062237591062272, "loss": 2.3177, "step": 1567 }, { "epoch": 0.16496580746975276, "grad_norm": 1.5269672870635986, "learning_rate": 0.000190607962791489, "loss": 1.4833, "step": 1568 }, { "epoch": 0.16507101525512888, "grad_norm": 0.9268459677696228, "learning_rate": 0.00019059353915035096, "loss": 2.1505, "step": 1569 }, { "epoch": 0.165176223040505, "grad_norm": 1.0593661069869995, "learning_rate": 0.00019057910498888352, "loss": 2.1309, "step": 1570 }, { "epoch": 0.16528143082588112, "grad_norm": 1.552679181098938, "learning_rate": 0.00019056466030876288, "loss": 1.8734, "step": 1571 }, { "epoch": 0.16538663861125724, "grad_norm": 1.9197827577590942, "learning_rate": 0.00019055020511166647, "loss": 1.7033, "step": 1572 }, { "epoch": 0.16549184639663336, "grad_norm": 1.1575535535812378, "learning_rate": 0.0001905357393992729, "loss": 2.3521, "step": 1573 }, { "epoch": 0.16559705418200946, "grad_norm": 1.0774550437927246, "learning_rate": 0.00019052126317326207, "loss": 2.1065, "step": 1574 }, { "epoch": 0.16570226196738558, "grad_norm": 1.0978038311004639, "learning_rate": 0.000190506776435315, "loss": 1.7549, "step": 1575 }, { "epoch": 0.1658074697527617, "grad_norm": 0.8305821418762207, "learning_rate": 0.00019049227918711402, "loss": 2.5738, "step": 1576 }, { "epoch": 0.16591267753813782, "grad_norm": 1.5406334400177002, "learning_rate": 0.00019047777143034266, "loss": 1.5855, "step": 1577 }, { "epoch": 0.16601788532351394, "grad_norm": 1.2726386785507202, "learning_rate": 0.00019046325316668562, "loss": 1.9179, "step": 1578 }, { "epoch": 0.16612309310889006, "grad_norm": 0.9605535864830017, "learning_rate": 0.0001904487243978289, "loss": 2.2189, "step": 1579 }, { "epoch": 0.16622830089426618, "grad_norm": 1.0368754863739014, "learning_rate": 0.00019043418512545963, "loss": 2.1578, "step": 1580 }, { "epoch": 0.1663335086796423, "grad_norm": 0.9062163829803467, "learning_rate": 0.00019041963535126625, "loss": 2.2097, "step": 1581 }, { "epoch": 0.1664387164650184, "grad_norm": 1.6533317565917969, "learning_rate": 0.00019040507507693836, "loss": 2.1455, "step": 1582 }, { "epoch": 0.16654392425039452, "grad_norm": 0.9693316221237183, "learning_rate": 0.0001903905043041668, "loss": 2.1971, "step": 1583 }, { "epoch": 0.16664913203577064, "grad_norm": 0.8169092535972595, "learning_rate": 0.00019037592303464362, "loss": 2.4642, "step": 1584 }, { "epoch": 0.16675433982114676, "grad_norm": 1.0367541313171387, "learning_rate": 0.0001903613312700621, "loss": 2.2354, "step": 1585 }, { "epoch": 0.16685954760652288, "grad_norm": 1.3058645725250244, "learning_rate": 0.00019034672901211672, "loss": 1.8324, "step": 1586 }, { "epoch": 0.166964755391899, "grad_norm": 1.1962265968322754, "learning_rate": 0.0001903321162625032, "loss": 2.0343, "step": 1587 }, { "epoch": 0.16706996317727513, "grad_norm": 1.3322851657867432, "learning_rate": 0.0001903174930229185, "loss": 2.0793, "step": 1588 }, { "epoch": 0.16717517096265125, "grad_norm": 0.9945839643478394, "learning_rate": 0.00019030285929506075, "loss": 1.8876, "step": 1589 }, { "epoch": 0.16728037874802734, "grad_norm": 1.0389971733093262, "learning_rate": 0.0001902882150806293, "loss": 2.1234, "step": 1590 }, { "epoch": 0.16738558653340346, "grad_norm": 1.1944520473480225, "learning_rate": 0.00019027356038132473, "loss": 2.3191, "step": 1591 }, { "epoch": 0.16749079431877958, "grad_norm": 1.4879711866378784, "learning_rate": 0.00019025889519884887, "loss": 1.7512, "step": 1592 }, { "epoch": 0.1675960021041557, "grad_norm": 0.7883063554763794, "learning_rate": 0.00019024421953490472, "loss": 2.2303, "step": 1593 }, { "epoch": 0.16770120988953183, "grad_norm": 1.214263677597046, "learning_rate": 0.00019022953339119654, "loss": 1.8914, "step": 1594 }, { "epoch": 0.16780641767490795, "grad_norm": 1.0355852842330933, "learning_rate": 0.00019021483676942973, "loss": 2.1548, "step": 1595 }, { "epoch": 0.16791162546028407, "grad_norm": 1.011046290397644, "learning_rate": 0.00019020012967131106, "loss": 1.8183, "step": 1596 }, { "epoch": 0.1680168332456602, "grad_norm": 1.062743067741394, "learning_rate": 0.0001901854120985483, "loss": 1.9263, "step": 1597 }, { "epoch": 0.16812204103103628, "grad_norm": 0.8611815571784973, "learning_rate": 0.00019017068405285058, "loss": 2.2722, "step": 1598 }, { "epoch": 0.1682272488164124, "grad_norm": 1.0582350492477417, "learning_rate": 0.0001901559455359283, "loss": 2.1025, "step": 1599 }, { "epoch": 0.16833245660178853, "grad_norm": 0.7138919234275818, "learning_rate": 0.00019014119654949294, "loss": 1.8953, "step": 1600 }, { "epoch": 0.16843766438716465, "grad_norm": 1.2566789388656616, "learning_rate": 0.00019012643709525722, "loss": 2.494, "step": 1601 }, { "epoch": 0.16854287217254077, "grad_norm": 2.009523391723633, "learning_rate": 0.00019011166717493517, "loss": 1.0732, "step": 1602 }, { "epoch": 0.1686480799579169, "grad_norm": 1.5301228761672974, "learning_rate": 0.0001900968867902419, "loss": 1.6838, "step": 1603 }, { "epoch": 0.168753287743293, "grad_norm": 1.3879108428955078, "learning_rate": 0.0001900820959428939, "loss": 2.1531, "step": 1604 }, { "epoch": 0.16885849552866913, "grad_norm": 1.2042590379714966, "learning_rate": 0.0001900672946346087, "loss": 2.2596, "step": 1605 }, { "epoch": 0.16896370331404523, "grad_norm": 1.7592346668243408, "learning_rate": 0.00019005248286710518, "loss": 2.145, "step": 1606 }, { "epoch": 0.16906891109942135, "grad_norm": 0.7835527658462524, "learning_rate": 0.00019003766064210336, "loss": 2.4961, "step": 1607 }, { "epoch": 0.16917411888479747, "grad_norm": 1.1461858749389648, "learning_rate": 0.00019002282796132448, "loss": 1.8269, "step": 1608 }, { "epoch": 0.1692793266701736, "grad_norm": 1.6687688827514648, "learning_rate": 0.000190007984826491, "loss": 1.8541, "step": 1609 }, { "epoch": 0.1693845344555497, "grad_norm": 0.8094751238822937, "learning_rate": 0.0001899931312393267, "loss": 2.0899, "step": 1610 }, { "epoch": 0.16948974224092583, "grad_norm": 1.5134220123291016, "learning_rate": 0.00018997826720155636, "loss": 2.0814, "step": 1611 }, { "epoch": 0.16959495002630195, "grad_norm": 1.034388542175293, "learning_rate": 0.00018996339271490616, "loss": 1.7283, "step": 1612 }, { "epoch": 0.16970015781167808, "grad_norm": 0.7677897214889526, "learning_rate": 0.0001899485077811034, "loss": 2.0904, "step": 1613 }, { "epoch": 0.16980536559705417, "grad_norm": 1.4724828004837036, "learning_rate": 0.00018993361240187665, "loss": 1.9291, "step": 1614 }, { "epoch": 0.1699105733824303, "grad_norm": 1.181376338005066, "learning_rate": 0.00018991870657895558, "loss": 1.8633, "step": 1615 }, { "epoch": 0.1700157811678064, "grad_norm": 1.2235981225967407, "learning_rate": 0.00018990379031407124, "loss": 1.8902, "step": 1616 }, { "epoch": 0.17012098895318253, "grad_norm": 1.796578288078308, "learning_rate": 0.0001898888636089558, "loss": 1.7133, "step": 1617 }, { "epoch": 0.17022619673855865, "grad_norm": 0.8562111854553223, "learning_rate": 0.00018987392646534258, "loss": 1.918, "step": 1618 }, { "epoch": 0.17033140452393478, "grad_norm": 1.1729540824890137, "learning_rate": 0.00018985897888496627, "loss": 2.0491, "step": 1619 }, { "epoch": 0.1704366123093109, "grad_norm": 1.2273286581039429, "learning_rate": 0.0001898440208695626, "loss": 1.9927, "step": 1620 }, { "epoch": 0.17054182009468702, "grad_norm": 1.4244452714920044, "learning_rate": 0.00018982905242086867, "loss": 1.7184, "step": 1621 }, { "epoch": 0.1706470278800631, "grad_norm": 1.2891523838043213, "learning_rate": 0.00018981407354062268, "loss": 2.1326, "step": 1622 }, { "epoch": 0.17075223566543923, "grad_norm": 1.7193819284439087, "learning_rate": 0.00018979908423056408, "loss": 2.0726, "step": 1623 }, { "epoch": 0.17085744345081536, "grad_norm": 1.0913747549057007, "learning_rate": 0.00018978408449243353, "loss": 2.1798, "step": 1624 }, { "epoch": 0.17096265123619148, "grad_norm": 1.2384060621261597, "learning_rate": 0.00018976907432797287, "loss": 2.2418, "step": 1625 }, { "epoch": 0.1710678590215676, "grad_norm": 1.3474977016448975, "learning_rate": 0.00018975405373892524, "loss": 2.2404, "step": 1626 }, { "epoch": 0.17117306680694372, "grad_norm": 1.227752685546875, "learning_rate": 0.0001897390227270349, "loss": 2.2127, "step": 1627 }, { "epoch": 0.17127827459231984, "grad_norm": 1.1790754795074463, "learning_rate": 0.00018972398129404736, "loss": 2.3705, "step": 1628 }, { "epoch": 0.17138348237769596, "grad_norm": 1.323519229888916, "learning_rate": 0.00018970892944170933, "loss": 1.9858, "step": 1629 }, { "epoch": 0.17148869016307206, "grad_norm": 0.9499973058700562, "learning_rate": 0.0001896938671717687, "loss": 1.9775, "step": 1630 }, { "epoch": 0.17159389794844818, "grad_norm": 0.8820815086364746, "learning_rate": 0.00018967879448597463, "loss": 2.2519, "step": 1631 }, { "epoch": 0.1716991057338243, "grad_norm": 1.1254478693008423, "learning_rate": 0.00018966371138607748, "loss": 2.1188, "step": 1632 }, { "epoch": 0.17180431351920042, "grad_norm": 0.9858778715133667, "learning_rate": 0.00018964861787382876, "loss": 2.0559, "step": 1633 }, { "epoch": 0.17190952130457654, "grad_norm": 0.9913214445114136, "learning_rate": 0.0001896335139509813, "loss": 2.1155, "step": 1634 }, { "epoch": 0.17201472908995266, "grad_norm": 0.8039067983627319, "learning_rate": 0.00018961839961928898, "loss": 1.7209, "step": 1635 }, { "epoch": 0.17211993687532878, "grad_norm": 1.1919358968734741, "learning_rate": 0.00018960327488050705, "loss": 1.7773, "step": 1636 }, { "epoch": 0.1722251446607049, "grad_norm": 1.3590924739837646, "learning_rate": 0.00018958813973639184, "loss": 1.134, "step": 1637 }, { "epoch": 0.172330352446081, "grad_norm": 0.8363476991653442, "learning_rate": 0.00018957299418870095, "loss": 2.165, "step": 1638 }, { "epoch": 0.17243556023145712, "grad_norm": 0.8952641487121582, "learning_rate": 0.00018955783823919325, "loss": 2.4521, "step": 1639 }, { "epoch": 0.17254076801683324, "grad_norm": 1.4345828294754028, "learning_rate": 0.0001895426718896287, "loss": 1.983, "step": 1640 }, { "epoch": 0.17264597580220936, "grad_norm": 1.2068872451782227, "learning_rate": 0.00018952749514176848, "loss": 2.0875, "step": 1641 }, { "epoch": 0.17275118358758548, "grad_norm": 1.1335731744766235, "learning_rate": 0.00018951230799737508, "loss": 2.2086, "step": 1642 }, { "epoch": 0.1728563913729616, "grad_norm": 1.1118179559707642, "learning_rate": 0.0001894971104582121, "loss": 2.2505, "step": 1643 }, { "epoch": 0.17296159915833773, "grad_norm": 1.528136968612671, "learning_rate": 0.0001894819025260444, "loss": 1.9484, "step": 1644 }, { "epoch": 0.17306680694371385, "grad_norm": 1.4697853326797485, "learning_rate": 0.00018946668420263802, "loss": 2.1765, "step": 1645 }, { "epoch": 0.17317201472908994, "grad_norm": 1.0284318923950195, "learning_rate": 0.0001894514554897602, "loss": 2.173, "step": 1646 }, { "epoch": 0.17327722251446606, "grad_norm": 0.9980498552322388, "learning_rate": 0.0001894362163891794, "loss": 2.1735, "step": 1647 }, { "epoch": 0.17338243029984218, "grad_norm": 1.4780910015106201, "learning_rate": 0.00018942096690266534, "loss": 1.8087, "step": 1648 }, { "epoch": 0.1734876380852183, "grad_norm": 0.8312754034996033, "learning_rate": 0.00018940570703198878, "loss": 2.2845, "step": 1649 }, { "epoch": 0.17359284587059443, "grad_norm": 0.8467996120452881, "learning_rate": 0.00018939043677892192, "loss": 2.1111, "step": 1650 }, { "epoch": 0.17369805365597055, "grad_norm": 1.1280200481414795, "learning_rate": 0.00018937515614523797, "loss": 1.7534, "step": 1651 }, { "epoch": 0.17380326144134667, "grad_norm": 1.277297019958496, "learning_rate": 0.00018935986513271146, "loss": 2.1373, "step": 1652 }, { "epoch": 0.1739084692267228, "grad_norm": 0.9894921779632568, "learning_rate": 0.00018934456374311806, "loss": 1.9866, "step": 1653 }, { "epoch": 0.17401367701209888, "grad_norm": 1.4973275661468506, "learning_rate": 0.00018932925197823468, "loss": 1.9207, "step": 1654 }, { "epoch": 0.174118884797475, "grad_norm": 0.9163431525230408, "learning_rate": 0.0001893139298398394, "loss": 2.214, "step": 1655 }, { "epoch": 0.17422409258285113, "grad_norm": 1.1179120540618896, "learning_rate": 0.00018929859732971162, "loss": 2.3422, "step": 1656 }, { "epoch": 0.17432930036822725, "grad_norm": 1.4268755912780762, "learning_rate": 0.00018928325444963172, "loss": 1.8902, "step": 1657 }, { "epoch": 0.17443450815360337, "grad_norm": 0.9193028211593628, "learning_rate": 0.0001892679012013815, "loss": 2.2039, "step": 1658 }, { "epoch": 0.1745397159389795, "grad_norm": 0.7367107272148132, "learning_rate": 0.00018925253758674386, "loss": 2.1194, "step": 1659 }, { "epoch": 0.1746449237243556, "grad_norm": 1.8018954992294312, "learning_rate": 0.00018923716360750293, "loss": 2.2415, "step": 1660 }, { "epoch": 0.17475013150973173, "grad_norm": 1.6338098049163818, "learning_rate": 0.00018922177926544405, "loss": 1.9905, "step": 1661 }, { "epoch": 0.17485533929510783, "grad_norm": 1.4636485576629639, "learning_rate": 0.00018920638456235375, "loss": 2.3444, "step": 1662 }, { "epoch": 0.17496054708048395, "grad_norm": 0.698484480381012, "learning_rate": 0.00018919097950001977, "loss": 1.9483, "step": 1663 }, { "epoch": 0.17506575486586007, "grad_norm": 0.85833340883255, "learning_rate": 0.00018917556408023102, "loss": 2.0118, "step": 1664 }, { "epoch": 0.1751709626512362, "grad_norm": 1.2028058767318726, "learning_rate": 0.00018916013830477766, "loss": 2.0637, "step": 1665 }, { "epoch": 0.1752761704366123, "grad_norm": 0.8081074357032776, "learning_rate": 0.00018914470217545103, "loss": 1.7637, "step": 1666 }, { "epoch": 0.17538137822198843, "grad_norm": 0.9753775000572205, "learning_rate": 0.0001891292556940437, "loss": 2.4205, "step": 1667 }, { "epoch": 0.17548658600736455, "grad_norm": 0.8399510383605957, "learning_rate": 0.00018911379886234938, "loss": 1.6638, "step": 1668 }, { "epoch": 0.17559179379274067, "grad_norm": 1.3433152437210083, "learning_rate": 0.00018909833168216306, "loss": 2.049, "step": 1669 }, { "epoch": 0.17569700157811677, "grad_norm": 0.8703423142433167, "learning_rate": 0.00018908285415528088, "loss": 1.973, "step": 1670 }, { "epoch": 0.1758022093634929, "grad_norm": 1.1213691234588623, "learning_rate": 0.0001890673662835002, "loss": 2.2099, "step": 1671 }, { "epoch": 0.175907417148869, "grad_norm": 1.0827136039733887, "learning_rate": 0.00018905186806861957, "loss": 2.2668, "step": 1672 }, { "epoch": 0.17601262493424513, "grad_norm": 1.356737494468689, "learning_rate": 0.0001890363595124387, "loss": 2.206, "step": 1673 }, { "epoch": 0.17611783271962125, "grad_norm": 1.1863815784454346, "learning_rate": 0.00018902084061675863, "loss": 1.8536, "step": 1674 }, { "epoch": 0.17622304050499737, "grad_norm": 0.8727356791496277, "learning_rate": 0.00018900531138338144, "loss": 2.0044, "step": 1675 }, { "epoch": 0.1763282482903735, "grad_norm": 1.1327565908432007, "learning_rate": 0.00018898977181411054, "loss": 1.8945, "step": 1676 }, { "epoch": 0.17643345607574962, "grad_norm": 0.8317870497703552, "learning_rate": 0.0001889742219107505, "loss": 2.3596, "step": 1677 }, { "epoch": 0.1765386638611257, "grad_norm": 1.6865041255950928, "learning_rate": 0.00018895866167510704, "loss": 2.0753, "step": 1678 }, { "epoch": 0.17664387164650183, "grad_norm": 1.1657840013504028, "learning_rate": 0.00018894309110898712, "loss": 2.2913, "step": 1679 }, { "epoch": 0.17674907943187795, "grad_norm": 1.2765939235687256, "learning_rate": 0.0001889275102141989, "loss": 1.9399, "step": 1680 }, { "epoch": 0.17685428721725407, "grad_norm": 1.0933256149291992, "learning_rate": 0.0001889119189925518, "loss": 2.0644, "step": 1681 }, { "epoch": 0.1769594950026302, "grad_norm": 1.196715235710144, "learning_rate": 0.0001888963174458563, "loss": 2.2077, "step": 1682 }, { "epoch": 0.17706470278800632, "grad_norm": 0.8405833840370178, "learning_rate": 0.00018888070557592418, "loss": 2.35, "step": 1683 }, { "epoch": 0.17716991057338244, "grad_norm": 0.6497626304626465, "learning_rate": 0.0001888650833845684, "loss": 2.0005, "step": 1684 }, { "epoch": 0.17727511835875856, "grad_norm": 1.1812434196472168, "learning_rate": 0.00018884945087360312, "loss": 1.9776, "step": 1685 }, { "epoch": 0.17738032614413465, "grad_norm": 1.2201387882232666, "learning_rate": 0.00018883380804484367, "loss": 1.9608, "step": 1686 }, { "epoch": 0.17748553392951077, "grad_norm": 1.2153204679489136, "learning_rate": 0.00018881815490010662, "loss": 2.1224, "step": 1687 }, { "epoch": 0.1775907417148869, "grad_norm": 0.8220624327659607, "learning_rate": 0.00018880249144120973, "loss": 2.0308, "step": 1688 }, { "epoch": 0.17769594950026302, "grad_norm": 1.218924880027771, "learning_rate": 0.0001887868176699719, "loss": 2.0924, "step": 1689 }, { "epoch": 0.17780115728563914, "grad_norm": 1.4680471420288086, "learning_rate": 0.0001887711335882133, "loss": 1.4879, "step": 1690 }, { "epoch": 0.17790636507101526, "grad_norm": 0.9292452335357666, "learning_rate": 0.00018875543919775534, "loss": 2.3925, "step": 1691 }, { "epoch": 0.17801157285639138, "grad_norm": 0.935463011264801, "learning_rate": 0.00018873973450042044, "loss": 1.7671, "step": 1692 }, { "epoch": 0.1781167806417675, "grad_norm": 1.0145299434661865, "learning_rate": 0.00018872401949803237, "loss": 1.9564, "step": 1693 }, { "epoch": 0.1782219884271436, "grad_norm": 1.4179667234420776, "learning_rate": 0.00018870829419241608, "loss": 1.9507, "step": 1694 }, { "epoch": 0.17832719621251972, "grad_norm": 1.5500625371932983, "learning_rate": 0.0001886925585853977, "loss": 1.6376, "step": 1695 }, { "epoch": 0.17843240399789584, "grad_norm": 1.2018243074417114, "learning_rate": 0.0001886768126788046, "loss": 1.9299, "step": 1696 }, { "epoch": 0.17853761178327196, "grad_norm": 1.6733994483947754, "learning_rate": 0.0001886610564744652, "loss": 2.1827, "step": 1697 }, { "epoch": 0.17864281956864808, "grad_norm": 1.0895193815231323, "learning_rate": 0.00018864528997420928, "loss": 2.2253, "step": 1698 }, { "epoch": 0.1787480273540242, "grad_norm": 1.5276850461959839, "learning_rate": 0.0001886295131798677, "loss": 2.1097, "step": 1699 }, { "epoch": 0.17885323513940032, "grad_norm": 1.0547844171524048, "learning_rate": 0.00018861372609327263, "loss": 1.8359, "step": 1700 }, { "epoch": 0.17895844292477645, "grad_norm": 1.3544683456420898, "learning_rate": 0.00018859792871625736, "loss": 1.8435, "step": 1701 }, { "epoch": 0.17906365071015254, "grad_norm": 1.3501803874969482, "learning_rate": 0.0001885821210506564, "loss": 1.7588, "step": 1702 }, { "epoch": 0.17916885849552866, "grad_norm": 1.4099911451339722, "learning_rate": 0.00018856630309830536, "loss": 2.0634, "step": 1703 }, { "epoch": 0.17927406628090478, "grad_norm": 1.3355512619018555, "learning_rate": 0.0001885504748610412, "loss": 1.6948, "step": 1704 }, { "epoch": 0.1793792740662809, "grad_norm": 1.5482978820800781, "learning_rate": 0.000188534636340702, "loss": 1.7464, "step": 1705 }, { "epoch": 0.17948448185165702, "grad_norm": 1.2930607795715332, "learning_rate": 0.000188518787539127, "loss": 2.3852, "step": 1706 }, { "epoch": 0.17958968963703315, "grad_norm": 1.4282091856002808, "learning_rate": 0.00018850292845815672, "loss": 1.8695, "step": 1707 }, { "epoch": 0.17969489742240927, "grad_norm": 1.1130648851394653, "learning_rate": 0.00018848705909963275, "loss": 2.0868, "step": 1708 }, { "epoch": 0.1798001052077854, "grad_norm": 1.1909211874008179, "learning_rate": 0.000188471179465398, "loss": 1.8749, "step": 1709 }, { "epoch": 0.17990531299316148, "grad_norm": 0.7360309958457947, "learning_rate": 0.00018845528955729654, "loss": 1.9737, "step": 1710 }, { "epoch": 0.1800105207785376, "grad_norm": 1.4310061931610107, "learning_rate": 0.00018843938937717356, "loss": 2.1225, "step": 1711 }, { "epoch": 0.18011572856391372, "grad_norm": 1.2631195783615112, "learning_rate": 0.00018842347892687552, "loss": 1.7833, "step": 1712 }, { "epoch": 0.18022093634928985, "grad_norm": 1.025079607963562, "learning_rate": 0.00018840755820825002, "loss": 2.2132, "step": 1713 }, { "epoch": 0.18032614413466597, "grad_norm": 0.9161180257797241, "learning_rate": 0.0001883916272231459, "loss": 2.1401, "step": 1714 }, { "epoch": 0.1804313519200421, "grad_norm": 0.9029863476753235, "learning_rate": 0.0001883756859734132, "loss": 1.8179, "step": 1715 }, { "epoch": 0.1805365597054182, "grad_norm": 1.060342788696289, "learning_rate": 0.00018835973446090312, "loss": 1.8488, "step": 1716 }, { "epoch": 0.18064176749079433, "grad_norm": 1.106149435043335, "learning_rate": 0.000188343772687468, "loss": 2.1516, "step": 1717 }, { "epoch": 0.18074697527617042, "grad_norm": 1.5603445768356323, "learning_rate": 0.0001883278006549615, "loss": 1.7992, "step": 1718 }, { "epoch": 0.18085218306154655, "grad_norm": 1.4736990928649902, "learning_rate": 0.00018831181836523832, "loss": 1.5836, "step": 1719 }, { "epoch": 0.18095739084692267, "grad_norm": 1.0077097415924072, "learning_rate": 0.00018829582582015453, "loss": 1.9346, "step": 1720 }, { "epoch": 0.1810625986322988, "grad_norm": 1.0348749160766602, "learning_rate": 0.0001882798230215672, "loss": 1.9961, "step": 1721 }, { "epoch": 0.1811678064176749, "grad_norm": 1.658117413520813, "learning_rate": 0.00018826380997133475, "loss": 1.9501, "step": 1722 }, { "epoch": 0.18127301420305103, "grad_norm": 0.9015898704528809, "learning_rate": 0.00018824778667131669, "loss": 2.412, "step": 1723 }, { "epoch": 0.18137822198842715, "grad_norm": 1.5265978574752808, "learning_rate": 0.00018823175312337374, "loss": 1.9471, "step": 1724 }, { "epoch": 0.18148342977380327, "grad_norm": 1.099857211112976, "learning_rate": 0.00018821570932936785, "loss": 2.1251, "step": 1725 }, { "epoch": 0.18158863755917937, "grad_norm": 0.8147949576377869, "learning_rate": 0.0001881996552911621, "loss": 1.7347, "step": 1726 }, { "epoch": 0.1816938453445555, "grad_norm": 1.1035587787628174, "learning_rate": 0.00018818359101062087, "loss": 2.0451, "step": 1727 }, { "epoch": 0.1817990531299316, "grad_norm": 1.2525732517242432, "learning_rate": 0.00018816751648960956, "loss": 2.0103, "step": 1728 }, { "epoch": 0.18190426091530773, "grad_norm": 1.0820599794387817, "learning_rate": 0.0001881514317299949, "loss": 2.0033, "step": 1729 }, { "epoch": 0.18200946870068385, "grad_norm": 1.1944822072982788, "learning_rate": 0.00018813533673364474, "loss": 1.5899, "step": 1730 }, { "epoch": 0.18211467648605997, "grad_norm": 0.9098942875862122, "learning_rate": 0.00018811923150242814, "loss": 1.6848, "step": 1731 }, { "epoch": 0.1822198842714361, "grad_norm": 1.7374067306518555, "learning_rate": 0.00018810311603821534, "loss": 1.9592, "step": 1732 }, { "epoch": 0.18232509205681222, "grad_norm": 1.2925838232040405, "learning_rate": 0.00018808699034287784, "loss": 2.1075, "step": 1733 }, { "epoch": 0.1824302998421883, "grad_norm": 1.4516898393630981, "learning_rate": 0.0001880708544182882, "loss": 1.8392, "step": 1734 }, { "epoch": 0.18253550762756443, "grad_norm": 1.116715908050537, "learning_rate": 0.00018805470826632024, "loss": 2.0185, "step": 1735 }, { "epoch": 0.18264071541294055, "grad_norm": 0.9944667816162109, "learning_rate": 0.00018803855188884896, "loss": 2.3831, "step": 1736 }, { "epoch": 0.18274592319831667, "grad_norm": 0.8294582962989807, "learning_rate": 0.00018802238528775055, "loss": 2.1609, "step": 1737 }, { "epoch": 0.1828511309836928, "grad_norm": 1.0243922472000122, "learning_rate": 0.0001880062084649024, "loss": 2.3259, "step": 1738 }, { "epoch": 0.18295633876906892, "grad_norm": 1.0900909900665283, "learning_rate": 0.00018799002142218306, "loss": 2.0693, "step": 1739 }, { "epoch": 0.18306154655444504, "grad_norm": 1.310567855834961, "learning_rate": 0.00018797382416147227, "loss": 2.1476, "step": 1740 }, { "epoch": 0.18316675433982116, "grad_norm": 0.9449117183685303, "learning_rate": 0.00018795761668465098, "loss": 2.0165, "step": 1741 }, { "epoch": 0.18327196212519725, "grad_norm": 1.4861035346984863, "learning_rate": 0.0001879413989936013, "loss": 1.583, "step": 1742 }, { "epoch": 0.18337716991057337, "grad_norm": 1.134390950202942, "learning_rate": 0.00018792517109020654, "loss": 1.9263, "step": 1743 }, { "epoch": 0.1834823776959495, "grad_norm": 1.9425288438796997, "learning_rate": 0.00018790893297635118, "loss": 2.2563, "step": 1744 }, { "epoch": 0.18358758548132562, "grad_norm": 1.343895673751831, "learning_rate": 0.0001878926846539209, "loss": 1.8078, "step": 1745 }, { "epoch": 0.18369279326670174, "grad_norm": 1.0244340896606445, "learning_rate": 0.00018787642612480261, "loss": 2.0321, "step": 1746 }, { "epoch": 0.18379800105207786, "grad_norm": 1.5598095655441284, "learning_rate": 0.0001878601573908843, "loss": 2.149, "step": 1747 }, { "epoch": 0.18390320883745398, "grad_norm": 1.1912977695465088, "learning_rate": 0.00018784387845405525, "loss": 2.3885, "step": 1748 }, { "epoch": 0.1840084166228301, "grad_norm": 1.210438847541809, "learning_rate": 0.00018782758931620584, "loss": 1.8181, "step": 1749 }, { "epoch": 0.1841136244082062, "grad_norm": 1.1713554859161377, "learning_rate": 0.0001878112899792277, "loss": 1.9201, "step": 1750 }, { "epoch": 0.18421883219358232, "grad_norm": 1.1458523273468018, "learning_rate": 0.0001877949804450136, "loss": 1.8281, "step": 1751 }, { "epoch": 0.18432403997895844, "grad_norm": 0.9206444621086121, "learning_rate": 0.00018777866071545751, "loss": 2.0485, "step": 1752 }, { "epoch": 0.18442924776433456, "grad_norm": 1.196629285812378, "learning_rate": 0.0001877623307924546, "loss": 2.2331, "step": 1753 }, { "epoch": 0.18453445554971068, "grad_norm": 1.2622110843658447, "learning_rate": 0.00018774599067790127, "loss": 1.6533, "step": 1754 }, { "epoch": 0.1846396633350868, "grad_norm": 1.3255887031555176, "learning_rate": 0.0001877296403736949, "loss": 1.9875, "step": 1755 }, { "epoch": 0.18474487112046292, "grad_norm": 0.9285155534744263, "learning_rate": 0.00018771327988173435, "loss": 2.0829, "step": 1756 }, { "epoch": 0.18485007890583904, "grad_norm": 1.3429813385009766, "learning_rate": 0.0001876969092039194, "loss": 2.3519, "step": 1757 }, { "epoch": 0.18495528669121514, "grad_norm": 0.8573184013366699, "learning_rate": 0.0001876805283421512, "loss": 2.2899, "step": 1758 }, { "epoch": 0.18506049447659126, "grad_norm": 1.3685758113861084, "learning_rate": 0.00018766413729833192, "loss": 2.1059, "step": 1759 }, { "epoch": 0.18516570226196738, "grad_norm": 1.2804449796676636, "learning_rate": 0.0001876477360743651, "loss": 2.1972, "step": 1760 }, { "epoch": 0.1852709100473435, "grad_norm": 1.6442826986312866, "learning_rate": 0.00018763132467215527, "loss": 1.6334, "step": 1761 }, { "epoch": 0.18537611783271962, "grad_norm": 1.3029580116271973, "learning_rate": 0.00018761490309360826, "loss": 1.8276, "step": 1762 }, { "epoch": 0.18548132561809574, "grad_norm": 1.166380524635315, "learning_rate": 0.00018759847134063108, "loss": 1.6849, "step": 1763 }, { "epoch": 0.18558653340347187, "grad_norm": 1.6139475107192993, "learning_rate": 0.0001875820294151319, "loss": 1.8161, "step": 1764 }, { "epoch": 0.185691741188848, "grad_norm": 1.228589653968811, "learning_rate": 0.00018756557731902, "loss": 1.7924, "step": 1765 }, { "epoch": 0.18579694897422408, "grad_norm": 1.2991068363189697, "learning_rate": 0.00018754911505420598, "loss": 1.9833, "step": 1766 }, { "epoch": 0.1859021567596002, "grad_norm": 1.323332667350769, "learning_rate": 0.00018753264262260153, "loss": 2.072, "step": 1767 }, { "epoch": 0.18600736454497632, "grad_norm": 1.4269095659255981, "learning_rate": 0.0001875161600261195, "loss": 2.2458, "step": 1768 }, { "epoch": 0.18611257233035244, "grad_norm": 1.6868364810943604, "learning_rate": 0.000187499667266674, "loss": 1.4101, "step": 1769 }, { "epoch": 0.18621778011572857, "grad_norm": 1.9576934576034546, "learning_rate": 0.0001874831643461803, "loss": 2.193, "step": 1770 }, { "epoch": 0.1863229879011047, "grad_norm": 1.4027034044265747, "learning_rate": 0.00018746665126655477, "loss": 2.0634, "step": 1771 }, { "epoch": 0.1864281956864808, "grad_norm": 1.0985709428787231, "learning_rate": 0.00018745012802971503, "loss": 2.1046, "step": 1772 }, { "epoch": 0.18653340347185693, "grad_norm": 0.9427407383918762, "learning_rate": 0.00018743359463757996, "loss": 2.001, "step": 1773 }, { "epoch": 0.18663861125723302, "grad_norm": 1.0140053033828735, "learning_rate": 0.0001874170510920694, "loss": 2.0172, "step": 1774 }, { "epoch": 0.18674381904260914, "grad_norm": 1.3929756879806519, "learning_rate": 0.00018740049739510454, "loss": 1.9716, "step": 1775 }, { "epoch": 0.18684902682798527, "grad_norm": 0.9349243640899658, "learning_rate": 0.00018738393354860775, "loss": 2.0728, "step": 1776 }, { "epoch": 0.1869542346133614, "grad_norm": 0.9166633486747742, "learning_rate": 0.00018736735955450251, "loss": 1.9776, "step": 1777 }, { "epoch": 0.1870594423987375, "grad_norm": 1.3984131813049316, "learning_rate": 0.0001873507754147135, "loss": 2.0968, "step": 1778 }, { "epoch": 0.18716465018411363, "grad_norm": 1.1432921886444092, "learning_rate": 0.0001873341811311666, "loss": 1.687, "step": 1779 }, { "epoch": 0.18726985796948975, "grad_norm": 1.310202956199646, "learning_rate": 0.00018731757670578878, "loss": 1.7605, "step": 1780 }, { "epoch": 0.18737506575486587, "grad_norm": 1.1294410228729248, "learning_rate": 0.00018730096214050832, "loss": 1.805, "step": 1781 }, { "epoch": 0.18748027354024197, "grad_norm": 0.7705867886543274, "learning_rate": 0.0001872843374372546, "loss": 2.1706, "step": 1782 }, { "epoch": 0.1875854813256181, "grad_norm": 1.1728864908218384, "learning_rate": 0.00018726770259795821, "loss": 2.1194, "step": 1783 }, { "epoch": 0.1876906891109942, "grad_norm": 1.0230637788772583, "learning_rate": 0.0001872510576245509, "loss": 2.2653, "step": 1784 }, { "epoch": 0.18779589689637033, "grad_norm": 1.1115864515304565, "learning_rate": 0.00018723440251896552, "loss": 2.1956, "step": 1785 }, { "epoch": 0.18790110468174645, "grad_norm": 1.0122426748275757, "learning_rate": 0.00018721773728313628, "loss": 1.6465, "step": 1786 }, { "epoch": 0.18800631246712257, "grad_norm": 0.9406682848930359, "learning_rate": 0.0001872010619189984, "loss": 1.7737, "step": 1787 }, { "epoch": 0.1881115202524987, "grad_norm": 1.0271477699279785, "learning_rate": 0.00018718437642848833, "loss": 2.3859, "step": 1788 }, { "epoch": 0.18821672803787481, "grad_norm": 1.2405545711517334, "learning_rate": 0.00018716768081354374, "loss": 1.8296, "step": 1789 }, { "epoch": 0.1883219358232509, "grad_norm": 1.5526014566421509, "learning_rate": 0.0001871509750761034, "loss": 2.0319, "step": 1790 }, { "epoch": 0.18842714360862703, "grad_norm": 1.2660319805145264, "learning_rate": 0.00018713425921810733, "loss": 2.2965, "step": 1791 }, { "epoch": 0.18853235139400315, "grad_norm": 0.9081699848175049, "learning_rate": 0.00018711753324149663, "loss": 2.2645, "step": 1792 }, { "epoch": 0.18863755917937927, "grad_norm": 0.7778252363204956, "learning_rate": 0.00018710079714821367, "loss": 2.1503, "step": 1793 }, { "epoch": 0.1887427669647554, "grad_norm": 0.903505802154541, "learning_rate": 0.00018708405094020197, "loss": 2.1276, "step": 1794 }, { "epoch": 0.18884797475013151, "grad_norm": 0.9733725190162659, "learning_rate": 0.00018706729461940617, "loss": 2.1536, "step": 1795 }, { "epoch": 0.18895318253550764, "grad_norm": 1.1625339984893799, "learning_rate": 0.00018705052818777219, "loss": 1.7845, "step": 1796 }, { "epoch": 0.18905839032088376, "grad_norm": 0.9139503836631775, "learning_rate": 0.000187033751647247, "loss": 2.1074, "step": 1797 }, { "epoch": 0.18916359810625985, "grad_norm": 1.0095267295837402, "learning_rate": 0.00018701696499977884, "loss": 1.7807, "step": 1798 }, { "epoch": 0.18926880589163597, "grad_norm": 0.9188694357872009, "learning_rate": 0.00018700016824731706, "loss": 1.9302, "step": 1799 }, { "epoch": 0.1893740136770121, "grad_norm": 0.9856252074241638, "learning_rate": 0.0001869833613918122, "loss": 2.1511, "step": 1800 }, { "epoch": 0.18947922146238821, "grad_norm": 1.3189128637313843, "learning_rate": 0.00018696654443521607, "loss": 1.8588, "step": 1801 }, { "epoch": 0.18958442924776434, "grad_norm": 1.3261507749557495, "learning_rate": 0.00018694971737948145, "loss": 1.9224, "step": 1802 }, { "epoch": 0.18968963703314046, "grad_norm": 1.4043636322021484, "learning_rate": 0.00018693288022656252, "loss": 1.8017, "step": 1803 }, { "epoch": 0.18979484481851658, "grad_norm": 1.6043760776519775, "learning_rate": 0.00018691603297841446, "loss": 1.9915, "step": 1804 }, { "epoch": 0.1899000526038927, "grad_norm": 0.9396890997886658, "learning_rate": 0.0001868991756369937, "loss": 1.8151, "step": 1805 }, { "epoch": 0.1900052603892688, "grad_norm": 1.0986272096633911, "learning_rate": 0.0001868823082042578, "loss": 2.1091, "step": 1806 }, { "epoch": 0.19011046817464491, "grad_norm": 0.7267233729362488, "learning_rate": 0.00018686543068216556, "loss": 2.2471, "step": 1807 }, { "epoch": 0.19021567596002104, "grad_norm": 0.7171627283096313, "learning_rate": 0.0001868485430726769, "loss": 1.785, "step": 1808 }, { "epoch": 0.19032088374539716, "grad_norm": 1.197615146636963, "learning_rate": 0.0001868316453777529, "loss": 2.0766, "step": 1809 }, { "epoch": 0.19042609153077328, "grad_norm": 1.0680128335952759, "learning_rate": 0.00018681473759935585, "loss": 2.1709, "step": 1810 }, { "epoch": 0.1905312993161494, "grad_norm": 1.3420791625976562, "learning_rate": 0.00018679781973944922, "loss": 2.1537, "step": 1811 }, { "epoch": 0.19063650710152552, "grad_norm": 0.9118467569351196, "learning_rate": 0.00018678089179999762, "loss": 1.8085, "step": 1812 }, { "epoch": 0.19074171488690164, "grad_norm": 0.9484670162200928, "learning_rate": 0.00018676395378296678, "loss": 2.1784, "step": 1813 }, { "epoch": 0.19084692267227774, "grad_norm": 0.7800343632698059, "learning_rate": 0.0001867470056903237, "loss": 2.0045, "step": 1814 }, { "epoch": 0.19095213045765386, "grad_norm": 1.0197774171829224, "learning_rate": 0.00018673004752403651, "loss": 2.1594, "step": 1815 }, { "epoch": 0.19105733824302998, "grad_norm": 1.9036952257156372, "learning_rate": 0.0001867130792860745, "loss": 2.3911, "step": 1816 }, { "epoch": 0.1911625460284061, "grad_norm": 1.0456516742706299, "learning_rate": 0.00018669610097840812, "loss": 2.0251, "step": 1817 }, { "epoch": 0.19126775381378222, "grad_norm": 1.23896324634552, "learning_rate": 0.00018667911260300904, "loss": 2.091, "step": 1818 }, { "epoch": 0.19137296159915834, "grad_norm": 1.0979390144348145, "learning_rate": 0.00018666211416184999, "loss": 1.8643, "step": 1819 }, { "epoch": 0.19147816938453446, "grad_norm": 1.4400193691253662, "learning_rate": 0.00018664510565690506, "loss": 1.9773, "step": 1820 }, { "epoch": 0.19158337716991058, "grad_norm": 1.1655758619308472, "learning_rate": 0.0001866280870901493, "loss": 2.0926, "step": 1821 }, { "epoch": 0.19168858495528668, "grad_norm": 1.0224357843399048, "learning_rate": 0.00018661105846355902, "loss": 1.7359, "step": 1822 }, { "epoch": 0.1917937927406628, "grad_norm": 1.4255973100662231, "learning_rate": 0.00018659401977911175, "loss": 2.5401, "step": 1823 }, { "epoch": 0.19189900052603892, "grad_norm": 1.0111758708953857, "learning_rate": 0.0001865769710387861, "loss": 2.319, "step": 1824 }, { "epoch": 0.19200420831141504, "grad_norm": 1.0198891162872314, "learning_rate": 0.00018655991224456191, "loss": 1.7324, "step": 1825 }, { "epoch": 0.19210941609679116, "grad_norm": 1.0950661897659302, "learning_rate": 0.00018654284339842013, "loss": 1.8189, "step": 1826 }, { "epoch": 0.19221462388216728, "grad_norm": 1.0327266454696655, "learning_rate": 0.0001865257645023429, "loss": 1.9624, "step": 1827 }, { "epoch": 0.1923198316675434, "grad_norm": 1.1767691373825073, "learning_rate": 0.0001865086755583136, "loss": 2.2126, "step": 1828 }, { "epoch": 0.19242503945291953, "grad_norm": 0.9736087918281555, "learning_rate": 0.0001864915765683167, "loss": 2.2341, "step": 1829 }, { "epoch": 0.19253024723829562, "grad_norm": 1.7082476615905762, "learning_rate": 0.00018647446753433777, "loss": 1.7682, "step": 1830 }, { "epoch": 0.19263545502367174, "grad_norm": 1.0709311962127686, "learning_rate": 0.00018645734845836368, "loss": 1.7948, "step": 1831 }, { "epoch": 0.19274066280904786, "grad_norm": 1.171373724937439, "learning_rate": 0.00018644021934238243, "loss": 2.1489, "step": 1832 }, { "epoch": 0.19284587059442398, "grad_norm": 1.0802509784698486, "learning_rate": 0.00018642308018838316, "loss": 2.4032, "step": 1833 }, { "epoch": 0.1929510783798001, "grad_norm": 1.047210693359375, "learning_rate": 0.00018640593099835618, "loss": 2.2197, "step": 1834 }, { "epoch": 0.19305628616517623, "grad_norm": 0.7914498448371887, "learning_rate": 0.00018638877177429292, "loss": 1.5993, "step": 1835 }, { "epoch": 0.19316149395055235, "grad_norm": 0.8980052471160889, "learning_rate": 0.0001863716025181861, "loss": 2.2428, "step": 1836 }, { "epoch": 0.19326670173592847, "grad_norm": 1.1372369527816772, "learning_rate": 0.00018635442323202946, "loss": 2.2588, "step": 1837 }, { "epoch": 0.19337190952130456, "grad_norm": 0.995132327079773, "learning_rate": 0.00018633723391781802, "loss": 2.2008, "step": 1838 }, { "epoch": 0.19347711730668068, "grad_norm": 1.5439562797546387, "learning_rate": 0.00018632003457754793, "loss": 2.0667, "step": 1839 }, { "epoch": 0.1935823250920568, "grad_norm": 1.945412039756775, "learning_rate": 0.00018630282521321645, "loss": 2.2433, "step": 1840 }, { "epoch": 0.19368753287743293, "grad_norm": 0.7718631029129028, "learning_rate": 0.00018628560582682207, "loss": 2.3626, "step": 1841 }, { "epoch": 0.19379274066280905, "grad_norm": 1.3353549242019653, "learning_rate": 0.0001862683764203644, "loss": 1.9693, "step": 1842 }, { "epoch": 0.19389794844818517, "grad_norm": 1.200984239578247, "learning_rate": 0.00018625113699584426, "loss": 2.0508, "step": 1843 }, { "epoch": 0.1940031562335613, "grad_norm": 1.5393805503845215, "learning_rate": 0.00018623388755526364, "loss": 1.7895, "step": 1844 }, { "epoch": 0.1941083640189374, "grad_norm": 1.4993560314178467, "learning_rate": 0.00018621662810062558, "loss": 1.9832, "step": 1845 }, { "epoch": 0.1942135718043135, "grad_norm": 1.0732812881469727, "learning_rate": 0.00018619935863393444, "loss": 1.6443, "step": 1846 }, { "epoch": 0.19431877958968963, "grad_norm": 1.4361530542373657, "learning_rate": 0.0001861820791571956, "loss": 2.2628, "step": 1847 }, { "epoch": 0.19442398737506575, "grad_norm": 0.9760112166404724, "learning_rate": 0.00018616478967241568, "loss": 2.043, "step": 1848 }, { "epoch": 0.19452919516044187, "grad_norm": 1.2888370752334595, "learning_rate": 0.00018614749018160248, "loss": 2.0368, "step": 1849 }, { "epoch": 0.194634402945818, "grad_norm": 1.2557207345962524, "learning_rate": 0.00018613018068676493, "loss": 2.0163, "step": 1850 }, { "epoch": 0.1947396107311941, "grad_norm": 1.2219709157943726, "learning_rate": 0.00018611286118991313, "loss": 1.4421, "step": 1851 }, { "epoch": 0.19484481851657023, "grad_norm": 0.9364914894104004, "learning_rate": 0.0001860955316930583, "loss": 1.4738, "step": 1852 }, { "epoch": 0.19495002630194636, "grad_norm": 1.3384712934494019, "learning_rate": 0.0001860781921982129, "loss": 2.2564, "step": 1853 }, { "epoch": 0.19505523408732245, "grad_norm": 2.036452054977417, "learning_rate": 0.00018606084270739049, "loss": 2.0251, "step": 1854 }, { "epoch": 0.19516044187269857, "grad_norm": 1.2831714153289795, "learning_rate": 0.00018604348322260578, "loss": 1.8063, "step": 1855 }, { "epoch": 0.1952656496580747, "grad_norm": 0.8491331934928894, "learning_rate": 0.0001860261137458747, "loss": 1.6988, "step": 1856 }, { "epoch": 0.1953708574434508, "grad_norm": 1.3487755060195923, "learning_rate": 0.00018600873427921435, "loss": 1.9603, "step": 1857 }, { "epoch": 0.19547606522882693, "grad_norm": 1.7233006954193115, "learning_rate": 0.00018599134482464287, "loss": 1.8172, "step": 1858 }, { "epoch": 0.19558127301420306, "grad_norm": 1.1606876850128174, "learning_rate": 0.0001859739453841797, "loss": 1.8372, "step": 1859 }, { "epoch": 0.19568648079957918, "grad_norm": 1.3234946727752686, "learning_rate": 0.00018595653595984536, "loss": 2.0716, "step": 1860 }, { "epoch": 0.1957916885849553, "grad_norm": 1.0269123315811157, "learning_rate": 0.0001859391165536615, "loss": 1.8958, "step": 1861 }, { "epoch": 0.1958968963703314, "grad_norm": 1.1604347229003906, "learning_rate": 0.0001859216871676511, "loss": 2.1997, "step": 1862 }, { "epoch": 0.1960021041557075, "grad_norm": 0.8779052495956421, "learning_rate": 0.00018590424780383805, "loss": 1.8132, "step": 1863 }, { "epoch": 0.19610731194108363, "grad_norm": 1.0170305967330933, "learning_rate": 0.0001858867984642476, "loss": 2.1858, "step": 1864 }, { "epoch": 0.19621251972645976, "grad_norm": 0.9716585874557495, "learning_rate": 0.00018586933915090605, "loss": 2.1944, "step": 1865 }, { "epoch": 0.19631772751183588, "grad_norm": 1.326249361038208, "learning_rate": 0.0001858518698658409, "loss": 1.8595, "step": 1866 }, { "epoch": 0.196422935297212, "grad_norm": 0.9899535179138184, "learning_rate": 0.00018583439061108084, "loss": 2.2828, "step": 1867 }, { "epoch": 0.19652814308258812, "grad_norm": 0.9519728422164917, "learning_rate": 0.0001858169013886556, "loss": 2.2894, "step": 1868 }, { "epoch": 0.19663335086796424, "grad_norm": 1.11469304561615, "learning_rate": 0.0001857994022005962, "loss": 2.0972, "step": 1869 }, { "epoch": 0.19673855865334033, "grad_norm": 0.7735962271690369, "learning_rate": 0.00018578189304893479, "loss": 1.8865, "step": 1870 }, { "epoch": 0.19684376643871646, "grad_norm": 1.504098653793335, "learning_rate": 0.00018576437393570458, "loss": 2.2802, "step": 1871 }, { "epoch": 0.19694897422409258, "grad_norm": 0.8838121891021729, "learning_rate": 0.00018574684486294006, "loss": 2.0275, "step": 1872 }, { "epoch": 0.1970541820094687, "grad_norm": 0.868427574634552, "learning_rate": 0.0001857293058326768, "loss": 2.2436, "step": 1873 }, { "epoch": 0.19715938979484482, "grad_norm": 1.472787857055664, "learning_rate": 0.00018571175684695154, "loss": 2.3729, "step": 1874 }, { "epoch": 0.19726459758022094, "grad_norm": 0.9417343139648438, "learning_rate": 0.00018569419790780218, "loss": 2.0141, "step": 1875 }, { "epoch": 0.19736980536559706, "grad_norm": 1.3230361938476562, "learning_rate": 0.00018567662901726784, "loss": 2.1847, "step": 1876 }, { "epoch": 0.19747501315097318, "grad_norm": 0.8992961049079895, "learning_rate": 0.00018565905017738868, "loss": 2.3444, "step": 1877 }, { "epoch": 0.19758022093634928, "grad_norm": 1.455939769744873, "learning_rate": 0.00018564146139020608, "loss": 2.0235, "step": 1878 }, { "epoch": 0.1976854287217254, "grad_norm": 1.1618740558624268, "learning_rate": 0.00018562386265776263, "loss": 2.627, "step": 1879 }, { "epoch": 0.19779063650710152, "grad_norm": 0.8262205719947815, "learning_rate": 0.00018560625398210192, "loss": 2.0536, "step": 1880 }, { "epoch": 0.19789584429247764, "grad_norm": 0.9469594955444336, "learning_rate": 0.00018558863536526885, "loss": 2.3183, "step": 1881 }, { "epoch": 0.19800105207785376, "grad_norm": 1.1301559209823608, "learning_rate": 0.00018557100680930937, "loss": 2.3325, "step": 1882 }, { "epoch": 0.19810625986322988, "grad_norm": 0.8819060921669006, "learning_rate": 0.00018555336831627063, "loss": 1.6358, "step": 1883 }, { "epoch": 0.198211467648606, "grad_norm": 1.2020803689956665, "learning_rate": 0.000185535719888201, "loss": 2.0548, "step": 1884 }, { "epoch": 0.19831667543398213, "grad_norm": 1.0018656253814697, "learning_rate": 0.00018551806152714985, "loss": 2.1452, "step": 1885 }, { "epoch": 0.19842188321935822, "grad_norm": 1.1717722415924072, "learning_rate": 0.00018550039323516783, "loss": 2.0638, "step": 1886 }, { "epoch": 0.19852709100473434, "grad_norm": 1.4248872995376587, "learning_rate": 0.00018548271501430668, "loss": 1.6008, "step": 1887 }, { "epoch": 0.19863229879011046, "grad_norm": 1.5258474349975586, "learning_rate": 0.00018546502686661934, "loss": 1.6953, "step": 1888 }, { "epoch": 0.19873750657548658, "grad_norm": 1.2465327978134155, "learning_rate": 0.00018544732879415986, "loss": 2.2588, "step": 1889 }, { "epoch": 0.1988427143608627, "grad_norm": 1.0645866394042969, "learning_rate": 0.00018542962079898346, "loss": 2.2034, "step": 1890 }, { "epoch": 0.19894792214623883, "grad_norm": 1.7710703611373901, "learning_rate": 0.00018541190288314647, "loss": 2.1372, "step": 1891 }, { "epoch": 0.19905312993161495, "grad_norm": 1.4344483613967896, "learning_rate": 0.00018539417504870648, "loss": 1.7988, "step": 1892 }, { "epoch": 0.19915833771699107, "grad_norm": 1.4004452228546143, "learning_rate": 0.00018537643729772216, "loss": 1.9769, "step": 1893 }, { "epoch": 0.19926354550236716, "grad_norm": 0.8972628116607666, "learning_rate": 0.00018535868963225326, "loss": 2.2142, "step": 1894 }, { "epoch": 0.19936875328774328, "grad_norm": 0.7937827706336975, "learning_rate": 0.00018534093205436087, "loss": 2.0791, "step": 1895 }, { "epoch": 0.1994739610731194, "grad_norm": 1.6337438821792603, "learning_rate": 0.00018532316456610704, "loss": 1.486, "step": 1896 }, { "epoch": 0.19957916885849553, "grad_norm": 2.0083436965942383, "learning_rate": 0.00018530538716955504, "loss": 1.8192, "step": 1897 }, { "epoch": 0.19968437664387165, "grad_norm": 1.693180799484253, "learning_rate": 0.0001852875998667694, "loss": 1.4399, "step": 1898 }, { "epoch": 0.19978958442924777, "grad_norm": 0.8119178414344788, "learning_rate": 0.0001852698026598156, "loss": 2.0997, "step": 1899 }, { "epoch": 0.1998947922146239, "grad_norm": 1.1830841302871704, "learning_rate": 0.0001852519955507604, "loss": 1.7776, "step": 1900 }, { "epoch": 0.2, "grad_norm": 0.7960675358772278, "learning_rate": 0.00018523417854167168, "loss": 2.2009, "step": 1901 }, { "epoch": 0.2001052077853761, "grad_norm": 0.9493720531463623, "learning_rate": 0.00018521635163461846, "loss": 1.9135, "step": 1902 }, { "epoch": 0.20021041557075223, "grad_norm": 1.2081838846206665, "learning_rate": 0.00018519851483167097, "loss": 1.9611, "step": 1903 }, { "epoch": 0.20031562335612835, "grad_norm": 1.2762458324432373, "learning_rate": 0.00018518066813490047, "loss": 2.1839, "step": 1904 }, { "epoch": 0.20042083114150447, "grad_norm": 1.249342679977417, "learning_rate": 0.0001851628115463795, "loss": 1.6887, "step": 1905 }, { "epoch": 0.2005260389268806, "grad_norm": 1.3049440383911133, "learning_rate": 0.00018514494506818166, "loss": 2.3789, "step": 1906 }, { "epoch": 0.2006312467122567, "grad_norm": 1.0053954124450684, "learning_rate": 0.0001851270687023817, "loss": 2.2105, "step": 1907 }, { "epoch": 0.20073645449763283, "grad_norm": 1.1582366228103638, "learning_rate": 0.0001851091824510556, "loss": 1.9553, "step": 1908 }, { "epoch": 0.20084166228300895, "grad_norm": 1.2567706108093262, "learning_rate": 0.00018509128631628036, "loss": 2.3391, "step": 1909 }, { "epoch": 0.20094687006838505, "grad_norm": 0.823357105255127, "learning_rate": 0.00018507338030013427, "loss": 2.1023, "step": 1910 }, { "epoch": 0.20105207785376117, "grad_norm": 0.7399005889892578, "learning_rate": 0.0001850554644046967, "loss": 1.947, "step": 1911 }, { "epoch": 0.2011572856391373, "grad_norm": 1.4578897953033447, "learning_rate": 0.00018503753863204807, "loss": 1.2573, "step": 1912 }, { "epoch": 0.2012624934245134, "grad_norm": 0.9766344428062439, "learning_rate": 0.00018501960298427013, "loss": 1.9681, "step": 1913 }, { "epoch": 0.20136770120988953, "grad_norm": 1.1950548887252808, "learning_rate": 0.00018500165746344562, "loss": 2.1528, "step": 1914 }, { "epoch": 0.20147290899526565, "grad_norm": 1.4169249534606934, "learning_rate": 0.0001849837020716586, "loss": 2.0959, "step": 1915 }, { "epoch": 0.20157811678064178, "grad_norm": 1.3415583372116089, "learning_rate": 0.00018496573681099401, "loss": 1.7363, "step": 1916 }, { "epoch": 0.2016833245660179, "grad_norm": 1.3929237127304077, "learning_rate": 0.00018494776168353827, "loss": 1.8159, "step": 1917 }, { "epoch": 0.201788532351394, "grad_norm": 1.296832799911499, "learning_rate": 0.00018492977669137868, "loss": 2.2187, "step": 1918 }, { "epoch": 0.2018937401367701, "grad_norm": 0.8477041125297546, "learning_rate": 0.00018491178183660376, "loss": 2.0901, "step": 1919 }, { "epoch": 0.20199894792214623, "grad_norm": 1.353830099105835, "learning_rate": 0.00018489377712130326, "loss": 2.3666, "step": 1920 }, { "epoch": 0.20210415570752235, "grad_norm": 1.109371304512024, "learning_rate": 0.00018487576254756793, "loss": 2.1329, "step": 1921 }, { "epoch": 0.20220936349289848, "grad_norm": 1.1594985723495483, "learning_rate": 0.0001848577381174898, "loss": 2.059, "step": 1922 }, { "epoch": 0.2023145712782746, "grad_norm": 1.2802788019180298, "learning_rate": 0.00018483970383316198, "loss": 2.0002, "step": 1923 }, { "epoch": 0.20241977906365072, "grad_norm": 1.8601975440979004, "learning_rate": 0.00018482165969667874, "loss": 2.3373, "step": 1924 }, { "epoch": 0.20252498684902684, "grad_norm": 1.242659330368042, "learning_rate": 0.00018480360571013544, "loss": 1.755, "step": 1925 }, { "epoch": 0.20263019463440293, "grad_norm": 1.3230918645858765, "learning_rate": 0.00018478554187562868, "loss": 2.2089, "step": 1926 }, { "epoch": 0.20273540241977905, "grad_norm": 1.1816704273223877, "learning_rate": 0.00018476746819525613, "loss": 1.6534, "step": 1927 }, { "epoch": 0.20284061020515518, "grad_norm": 1.1363966464996338, "learning_rate": 0.00018474938467111663, "loss": 2.1807, "step": 1928 }, { "epoch": 0.2029458179905313, "grad_norm": 1.0613765716552734, "learning_rate": 0.00018473129130531016, "loss": 1.87, "step": 1929 }, { "epoch": 0.20305102577590742, "grad_norm": 0.7839847207069397, "learning_rate": 0.00018471318809993784, "loss": 1.6391, "step": 1930 }, { "epoch": 0.20315623356128354, "grad_norm": 1.5794448852539062, "learning_rate": 0.00018469507505710194, "loss": 2.2534, "step": 1931 }, { "epoch": 0.20326144134665966, "grad_norm": 1.1429455280303955, "learning_rate": 0.0001846769521789059, "loss": 1.6465, "step": 1932 }, { "epoch": 0.20336664913203578, "grad_norm": 1.0982143878936768, "learning_rate": 0.0001846588194674542, "loss": 1.8907, "step": 1933 }, { "epoch": 0.20347185691741188, "grad_norm": 1.2898775339126587, "learning_rate": 0.00018464067692485254, "loss": 1.6681, "step": 1934 }, { "epoch": 0.203577064702788, "grad_norm": 1.1818253993988037, "learning_rate": 0.00018462252455320785, "loss": 2.2638, "step": 1935 }, { "epoch": 0.20368227248816412, "grad_norm": 1.7338184118270874, "learning_rate": 0.000184604362354628, "loss": 2.2689, "step": 1936 }, { "epoch": 0.20378748027354024, "grad_norm": 1.2104718685150146, "learning_rate": 0.00018458619033122218, "loss": 1.9735, "step": 1937 }, { "epoch": 0.20389268805891636, "grad_norm": 0.9602882266044617, "learning_rate": 0.00018456800848510056, "loss": 2.2646, "step": 1938 }, { "epoch": 0.20399789584429248, "grad_norm": 0.9840019941329956, "learning_rate": 0.00018454981681837463, "loss": 1.8798, "step": 1939 }, { "epoch": 0.2041031036296686, "grad_norm": 0.9403349161148071, "learning_rate": 0.0001845316153331569, "loss": 2.063, "step": 1940 }, { "epoch": 0.20420831141504472, "grad_norm": 1.7658482789993286, "learning_rate": 0.000184513404031561, "loss": 2.1487, "step": 1941 }, { "epoch": 0.20431351920042082, "grad_norm": 1.6682535409927368, "learning_rate": 0.00018449518291570183, "loss": 2.1085, "step": 1942 }, { "epoch": 0.20441872698579694, "grad_norm": 1.355338215827942, "learning_rate": 0.00018447695198769526, "loss": 2.2586, "step": 1943 }, { "epoch": 0.20452393477117306, "grad_norm": 1.2086683511734009, "learning_rate": 0.00018445871124965843, "loss": 2.0692, "step": 1944 }, { "epoch": 0.20462914255654918, "grad_norm": 1.9813165664672852, "learning_rate": 0.00018444046070370963, "loss": 1.7036, "step": 1945 }, { "epoch": 0.2047343503419253, "grad_norm": 1.0692365169525146, "learning_rate": 0.00018442220035196812, "loss": 2.2982, "step": 1946 }, { "epoch": 0.20483955812730142, "grad_norm": 1.4883077144622803, "learning_rate": 0.00018440393019655452, "loss": 2.304, "step": 1947 }, { "epoch": 0.20494476591267755, "grad_norm": 2.0655646324157715, "learning_rate": 0.00018438565023959043, "loss": 2.0391, "step": 1948 }, { "epoch": 0.20504997369805367, "grad_norm": 1.2688896656036377, "learning_rate": 0.00018436736048319866, "loss": 1.6661, "step": 1949 }, { "epoch": 0.20515518148342976, "grad_norm": 1.0065981149673462, "learning_rate": 0.00018434906092950313, "loss": 2.1238, "step": 1950 }, { "epoch": 0.20526038926880588, "grad_norm": 1.2444024085998535, "learning_rate": 0.00018433075158062891, "loss": 2.0663, "step": 1951 }, { "epoch": 0.205365597054182, "grad_norm": 1.5449178218841553, "learning_rate": 0.00018431243243870223, "loss": 2.1242, "step": 1952 }, { "epoch": 0.20547080483955812, "grad_norm": 0.7938444018363953, "learning_rate": 0.00018429410350585034, "loss": 2.2292, "step": 1953 }, { "epoch": 0.20557601262493425, "grad_norm": 1.2025595903396606, "learning_rate": 0.00018427576478420186, "loss": 2.057, "step": 1954 }, { "epoch": 0.20568122041031037, "grad_norm": 1.0300257205963135, "learning_rate": 0.00018425741627588627, "loss": 1.9549, "step": 1955 }, { "epoch": 0.2057864281956865, "grad_norm": 1.1756985187530518, "learning_rate": 0.0001842390579830344, "loss": 1.7581, "step": 1956 }, { "epoch": 0.2058916359810626, "grad_norm": 1.201172947883606, "learning_rate": 0.00018422068990777812, "loss": 1.9362, "step": 1957 }, { "epoch": 0.2059968437664387, "grad_norm": 1.3182251453399658, "learning_rate": 0.00018420231205225048, "loss": 2.057, "step": 1958 }, { "epoch": 0.20610205155181482, "grad_norm": 1.0256531238555908, "learning_rate": 0.00018418392441858555, "loss": 1.3062, "step": 1959 }, { "epoch": 0.20620725933719095, "grad_norm": 1.0511935949325562, "learning_rate": 0.00018416552700891873, "loss": 2.1073, "step": 1960 }, { "epoch": 0.20631246712256707, "grad_norm": 1.2152209281921387, "learning_rate": 0.0001841471198253864, "loss": 2.1933, "step": 1961 }, { "epoch": 0.2064176749079432, "grad_norm": 1.1236765384674072, "learning_rate": 0.00018412870287012612, "loss": 1.9926, "step": 1962 }, { "epoch": 0.2065228826933193, "grad_norm": 1.0651775598526, "learning_rate": 0.00018411027614527665, "loss": 2.0337, "step": 1963 }, { "epoch": 0.20662809047869543, "grad_norm": 1.3447325229644775, "learning_rate": 0.00018409183965297776, "loss": 2.0808, "step": 1964 }, { "epoch": 0.20673329826407155, "grad_norm": 1.0653175115585327, "learning_rate": 0.0001840733933953704, "loss": 2.1391, "step": 1965 }, { "epoch": 0.20683850604944765, "grad_norm": 0.9823555946350098, "learning_rate": 0.0001840549373745968, "loss": 2.2805, "step": 1966 }, { "epoch": 0.20694371383482377, "grad_norm": 1.1034152507781982, "learning_rate": 0.00018403647159280002, "loss": 2.1311, "step": 1967 }, { "epoch": 0.2070489216201999, "grad_norm": 1.0444360971450806, "learning_rate": 0.00018401799605212457, "loss": 2.3337, "step": 1968 }, { "epoch": 0.207154129405576, "grad_norm": 0.9108467102050781, "learning_rate": 0.00018399951075471588, "loss": 1.8849, "step": 1969 }, { "epoch": 0.20725933719095213, "grad_norm": 1.4065741300582886, "learning_rate": 0.0001839810157027206, "loss": 1.8689, "step": 1970 }, { "epoch": 0.20736454497632825, "grad_norm": 1.062463641166687, "learning_rate": 0.00018396251089828654, "loss": 1.9281, "step": 1971 }, { "epoch": 0.20746975276170437, "grad_norm": 0.9371905326843262, "learning_rate": 0.00018394399634356256, "loss": 2.3794, "step": 1972 }, { "epoch": 0.2075749605470805, "grad_norm": 1.0700284242630005, "learning_rate": 0.0001839254720406987, "loss": 2.0316, "step": 1973 }, { "epoch": 0.2076801683324566, "grad_norm": 1.718322992324829, "learning_rate": 0.00018390693799184613, "loss": 1.9874, "step": 1974 }, { "epoch": 0.2077853761178327, "grad_norm": 1.9158811569213867, "learning_rate": 0.00018388839419915715, "loss": 1.595, "step": 1975 }, { "epoch": 0.20789058390320883, "grad_norm": 0.9104257225990295, "learning_rate": 0.00018386984066478518, "loss": 1.8649, "step": 1976 }, { "epoch": 0.20799579168858495, "grad_norm": 1.0303380489349365, "learning_rate": 0.00018385127739088482, "loss": 1.9061, "step": 1977 }, { "epoch": 0.20810099947396107, "grad_norm": 0.9922066926956177, "learning_rate": 0.0001838327043796117, "loss": 1.966, "step": 1978 }, { "epoch": 0.2082062072593372, "grad_norm": 1.0381168127059937, "learning_rate": 0.0001838141216331227, "loss": 2.3589, "step": 1979 }, { "epoch": 0.20831141504471332, "grad_norm": 1.5640273094177246, "learning_rate": 0.00018379552915357575, "loss": 1.9377, "step": 1980 }, { "epoch": 0.20841662283008944, "grad_norm": 1.299121618270874, "learning_rate": 0.00018377692694312994, "loss": 2.0013, "step": 1981 }, { "epoch": 0.20852183061546553, "grad_norm": 1.0334281921386719, "learning_rate": 0.0001837583150039454, "loss": 1.9686, "step": 1982 }, { "epoch": 0.20862703840084165, "grad_norm": 1.1700999736785889, "learning_rate": 0.00018373969333818364, "loss": 1.6355, "step": 1983 }, { "epoch": 0.20873224618621777, "grad_norm": 1.4064104557037354, "learning_rate": 0.00018372106194800703, "loss": 2.3571, "step": 1984 }, { "epoch": 0.2088374539715939, "grad_norm": 1.18000328540802, "learning_rate": 0.00018370242083557914, "loss": 2.0538, "step": 1985 }, { "epoch": 0.20894266175697002, "grad_norm": 0.7155065536499023, "learning_rate": 0.00018368377000306475, "loss": 2.2469, "step": 1986 }, { "epoch": 0.20904786954234614, "grad_norm": 0.7191174626350403, "learning_rate": 0.00018366510945262972, "loss": 2.0709, "step": 1987 }, { "epoch": 0.20915307732772226, "grad_norm": 1.1349201202392578, "learning_rate": 0.00018364643918644108, "loss": 1.3495, "step": 1988 }, { "epoch": 0.20925828511309838, "grad_norm": 0.8776866793632507, "learning_rate": 0.00018362775920666684, "loss": 2.28, "step": 1989 }, { "epoch": 0.20936349289847447, "grad_norm": 1.0915156602859497, "learning_rate": 0.00018360906951547633, "loss": 2.3553, "step": 1990 }, { "epoch": 0.2094687006838506, "grad_norm": 1.0722743272781372, "learning_rate": 0.00018359037011503988, "loss": 2.0794, "step": 1991 }, { "epoch": 0.20957390846922672, "grad_norm": 1.764479398727417, "learning_rate": 0.000183571661007529, "loss": 2.4184, "step": 1992 }, { "epoch": 0.20967911625460284, "grad_norm": 0.9234581589698792, "learning_rate": 0.00018355294219511633, "loss": 2.3174, "step": 1993 }, { "epoch": 0.20978432403997896, "grad_norm": 1.334053635597229, "learning_rate": 0.00018353421367997563, "loss": 1.8774, "step": 1994 }, { "epoch": 0.20988953182535508, "grad_norm": 0.8430811166763306, "learning_rate": 0.00018351547546428175, "loss": 2.094, "step": 1995 }, { "epoch": 0.2099947396107312, "grad_norm": 1.3585329055786133, "learning_rate": 0.00018349672755021073, "loss": 1.9657, "step": 1996 }, { "epoch": 0.21009994739610732, "grad_norm": 1.2807389497756958, "learning_rate": 0.00018347796993993968, "loss": 1.1998, "step": 1997 }, { "epoch": 0.21020515518148342, "grad_norm": 1.249908685684204, "learning_rate": 0.00018345920263564683, "loss": 2.0678, "step": 1998 }, { "epoch": 0.21031036296685954, "grad_norm": 1.2682554721832275, "learning_rate": 0.00018344042563951167, "loss": 2.2967, "step": 1999 }, { "epoch": 0.21041557075223566, "grad_norm": 1.0497610569000244, "learning_rate": 0.0001834216389537146, "loss": 2.2477, "step": 2000 }, { "epoch": 0.21052077853761178, "grad_norm": 1.0887739658355713, "learning_rate": 0.00018340284258043732, "loss": 1.9668, "step": 2001 }, { "epoch": 0.2106259863229879, "grad_norm": 1.3399494886398315, "learning_rate": 0.00018338403652186255, "loss": 1.4247, "step": 2002 }, { "epoch": 0.21073119410836402, "grad_norm": 1.225252389907837, "learning_rate": 0.0001833652207801742, "loss": 1.7742, "step": 2003 }, { "epoch": 0.21083640189374014, "grad_norm": 0.9872320294380188, "learning_rate": 0.0001833463953575573, "loss": 2.131, "step": 2004 }, { "epoch": 0.21094160967911627, "grad_norm": 1.161528468132019, "learning_rate": 0.00018332756025619796, "loss": 2.1038, "step": 2005 }, { "epoch": 0.21104681746449236, "grad_norm": 0.8207192420959473, "learning_rate": 0.00018330871547828342, "loss": 1.9778, "step": 2006 }, { "epoch": 0.21115202524986848, "grad_norm": 1.2393897771835327, "learning_rate": 0.00018328986102600207, "loss": 1.8538, "step": 2007 }, { "epoch": 0.2112572330352446, "grad_norm": 1.052492380142212, "learning_rate": 0.00018327099690154344, "loss": 2.0008, "step": 2008 }, { "epoch": 0.21136244082062072, "grad_norm": 1.1104390621185303, "learning_rate": 0.00018325212310709815, "loss": 2.2228, "step": 2009 }, { "epoch": 0.21146764860599684, "grad_norm": 1.1710113286972046, "learning_rate": 0.00018323323964485795, "loss": 2.3255, "step": 2010 }, { "epoch": 0.21157285639137297, "grad_norm": 1.5669982433319092, "learning_rate": 0.00018321434651701567, "loss": 2.0485, "step": 2011 }, { "epoch": 0.2116780641767491, "grad_norm": 1.486722707748413, "learning_rate": 0.00018319544372576537, "loss": 1.396, "step": 2012 }, { "epoch": 0.2117832719621252, "grad_norm": 0.9690135717391968, "learning_rate": 0.00018317653127330216, "loss": 1.9004, "step": 2013 }, { "epoch": 0.2118884797475013, "grad_norm": 1.1157475709915161, "learning_rate": 0.00018315760916182228, "loss": 1.9181, "step": 2014 }, { "epoch": 0.21199368753287742, "grad_norm": 1.8870946168899536, "learning_rate": 0.00018313867739352304, "loss": 1.8504, "step": 2015 }, { "epoch": 0.21209889531825354, "grad_norm": 1.5294593572616577, "learning_rate": 0.000183119735970603, "loss": 2.0109, "step": 2016 }, { "epoch": 0.21220410310362967, "grad_norm": 1.3320099115371704, "learning_rate": 0.00018310078489526172, "loss": 2.149, "step": 2017 }, { "epoch": 0.2123093108890058, "grad_norm": 1.6566550731658936, "learning_rate": 0.0001830818241696999, "loss": 1.8748, "step": 2018 }, { "epoch": 0.2124145186743819, "grad_norm": 1.4480758905410767, "learning_rate": 0.00018306285379611947, "loss": 1.9223, "step": 2019 }, { "epoch": 0.21251972645975803, "grad_norm": 1.1992216110229492, "learning_rate": 0.00018304387377672331, "loss": 2.334, "step": 2020 }, { "epoch": 0.21262493424513415, "grad_norm": 1.6246322393417358, "learning_rate": 0.00018302488411371556, "loss": 1.818, "step": 2021 }, { "epoch": 0.21273014203051024, "grad_norm": 1.3721061944961548, "learning_rate": 0.00018300588480930143, "loss": 2.0362, "step": 2022 }, { "epoch": 0.21283534981588637, "grad_norm": 1.2342585325241089, "learning_rate": 0.00018298687586568721, "loss": 1.7764, "step": 2023 }, { "epoch": 0.2129405576012625, "grad_norm": 1.8841065168380737, "learning_rate": 0.00018296785728508038, "loss": 1.5991, "step": 2024 }, { "epoch": 0.2130457653866386, "grad_norm": 1.1739152669906616, "learning_rate": 0.00018294882906968947, "loss": 2.1558, "step": 2025 }, { "epoch": 0.21315097317201473, "grad_norm": 0.8841888904571533, "learning_rate": 0.00018292979122172418, "loss": 1.7489, "step": 2026 }, { "epoch": 0.21325618095739085, "grad_norm": 1.0950676202774048, "learning_rate": 0.00018291074374339534, "loss": 2.2481, "step": 2027 }, { "epoch": 0.21336138874276697, "grad_norm": 1.693406581878662, "learning_rate": 0.00018289168663691486, "loss": 2.3203, "step": 2028 }, { "epoch": 0.2134665965281431, "grad_norm": 0.8693836331367493, "learning_rate": 0.0001828726199044957, "loss": 2.1194, "step": 2029 }, { "epoch": 0.2135718043135192, "grad_norm": 0.9883258938789368, "learning_rate": 0.00018285354354835215, "loss": 2.0936, "step": 2030 }, { "epoch": 0.2136770120988953, "grad_norm": 1.0930025577545166, "learning_rate": 0.00018283445757069942, "loss": 1.9489, "step": 2031 }, { "epoch": 0.21378221988427143, "grad_norm": 1.8851250410079956, "learning_rate": 0.00018281536197375386, "loss": 1.5551, "step": 2032 }, { "epoch": 0.21388742766964755, "grad_norm": 0.933934211730957, "learning_rate": 0.00018279625675973304, "loss": 1.7964, "step": 2033 }, { "epoch": 0.21399263545502367, "grad_norm": 0.9329856038093567, "learning_rate": 0.00018277714193085554, "loss": 1.8394, "step": 2034 }, { "epoch": 0.2140978432403998, "grad_norm": 2.0386810302734375, "learning_rate": 0.00018275801748934115, "loss": 2.0189, "step": 2035 }, { "epoch": 0.21420305102577591, "grad_norm": 1.8662831783294678, "learning_rate": 0.0001827388834374107, "loss": 1.7599, "step": 2036 }, { "epoch": 0.21430825881115204, "grad_norm": 1.1685835123062134, "learning_rate": 0.0001827197397772862, "loss": 1.8818, "step": 2037 }, { "epoch": 0.21441346659652813, "grad_norm": 0.8799912333488464, "learning_rate": 0.00018270058651119063, "loss": 2.1813, "step": 2038 }, { "epoch": 0.21451867438190425, "grad_norm": 0.8218079805374146, "learning_rate": 0.00018268142364134834, "loss": 2.3432, "step": 2039 }, { "epoch": 0.21462388216728037, "grad_norm": 1.1055721044540405, "learning_rate": 0.00018266225116998457, "loss": 1.585, "step": 2040 }, { "epoch": 0.2147290899526565, "grad_norm": 1.1287375688552856, "learning_rate": 0.00018264306909932575, "loss": 2.0216, "step": 2041 }, { "epoch": 0.21483429773803261, "grad_norm": 1.085099220275879, "learning_rate": 0.0001826238774315995, "loss": 1.9095, "step": 2042 }, { "epoch": 0.21493950552340874, "grad_norm": 1.0446321964263916, "learning_rate": 0.0001826046761690344, "loss": 2.0777, "step": 2043 }, { "epoch": 0.21504471330878486, "grad_norm": 1.3021361827850342, "learning_rate": 0.0001825854653138603, "loss": 2.0682, "step": 2044 }, { "epoch": 0.21514992109416098, "grad_norm": 0.9500402808189392, "learning_rate": 0.00018256624486830803, "loss": 2.3274, "step": 2045 }, { "epoch": 0.21525512887953707, "grad_norm": 1.077355980873108, "learning_rate": 0.00018254701483460964, "loss": 1.9925, "step": 2046 }, { "epoch": 0.2153603366649132, "grad_norm": 1.6127686500549316, "learning_rate": 0.00018252777521499821, "loss": 1.9386, "step": 2047 }, { "epoch": 0.21546554445028931, "grad_norm": 0.7945027351379395, "learning_rate": 0.00018250852601170805, "loss": 2.1476, "step": 2048 }, { "epoch": 0.21557075223566544, "grad_norm": 1.248733401298523, "learning_rate": 0.00018248926722697444, "loss": 2.1239, "step": 2049 }, { "epoch": 0.21567596002104156, "grad_norm": 1.2039722204208374, "learning_rate": 0.00018246999886303383, "loss": 2.132, "step": 2050 }, { "epoch": 0.21578116780641768, "grad_norm": 1.0198581218719482, "learning_rate": 0.00018245072092212388, "loss": 1.758, "step": 2051 }, { "epoch": 0.2158863755917938, "grad_norm": 1.2120723724365234, "learning_rate": 0.00018243143340648316, "loss": 2.1045, "step": 2052 }, { "epoch": 0.21599158337716992, "grad_norm": 0.8687598705291748, "learning_rate": 0.00018241213631835153, "loss": 1.568, "step": 2053 }, { "epoch": 0.21609679116254601, "grad_norm": 1.0547373294830322, "learning_rate": 0.0001823928296599699, "loss": 2.3057, "step": 2054 }, { "epoch": 0.21620199894792214, "grad_norm": 0.9490458369255066, "learning_rate": 0.00018237351343358026, "loss": 2.2151, "step": 2055 }, { "epoch": 0.21630720673329826, "grad_norm": 1.1601332426071167, "learning_rate": 0.00018235418764142575, "loss": 2.0495, "step": 2056 }, { "epoch": 0.21641241451867438, "grad_norm": 1.8616313934326172, "learning_rate": 0.00018233485228575063, "loss": 1.877, "step": 2057 }, { "epoch": 0.2165176223040505, "grad_norm": 1.3617507219314575, "learning_rate": 0.00018231550736880024, "loss": 2.1049, "step": 2058 }, { "epoch": 0.21662283008942662, "grad_norm": 1.3656481504440308, "learning_rate": 0.00018229615289282102, "loss": 2.0189, "step": 2059 }, { "epoch": 0.21672803787480274, "grad_norm": 1.2620902061462402, "learning_rate": 0.0001822767888600606, "loss": 2.1029, "step": 2060 }, { "epoch": 0.21683324566017886, "grad_norm": 0.9363958239555359, "learning_rate": 0.00018225741527276755, "loss": 1.9721, "step": 2061 }, { "epoch": 0.21693845344555496, "grad_norm": 0.7752951979637146, "learning_rate": 0.0001822380321331918, "loss": 1.9241, "step": 2062 }, { "epoch": 0.21704366123093108, "grad_norm": 1.2256505489349365, "learning_rate": 0.00018221863944358412, "loss": 2.0667, "step": 2063 }, { "epoch": 0.2171488690163072, "grad_norm": 1.6273034811019897, "learning_rate": 0.00018219923720619663, "loss": 1.9696, "step": 2064 }, { "epoch": 0.21725407680168332, "grad_norm": 1.1030242443084717, "learning_rate": 0.00018217982542328238, "loss": 2.0116, "step": 2065 }, { "epoch": 0.21735928458705944, "grad_norm": 0.9453311562538147, "learning_rate": 0.00018216040409709563, "loss": 1.7222, "step": 2066 }, { "epoch": 0.21746449237243556, "grad_norm": 1.2243266105651855, "learning_rate": 0.00018214097322989168, "loss": 2.2173, "step": 2067 }, { "epoch": 0.21756970015781169, "grad_norm": 2.4034857749938965, "learning_rate": 0.000182121532823927, "loss": 1.7776, "step": 2068 }, { "epoch": 0.2176749079431878, "grad_norm": 1.4039760828018188, "learning_rate": 0.00018210208288145914, "loss": 2.3769, "step": 2069 }, { "epoch": 0.2177801157285639, "grad_norm": 1.2816754579544067, "learning_rate": 0.00018208262340474677, "loss": 1.9379, "step": 2070 }, { "epoch": 0.21788532351394002, "grad_norm": 1.6516449451446533, "learning_rate": 0.0001820631543960496, "loss": 1.7285, "step": 2071 }, { "epoch": 0.21799053129931614, "grad_norm": 1.015907645225525, "learning_rate": 0.00018204367585762855, "loss": 1.749, "step": 2072 }, { "epoch": 0.21809573908469226, "grad_norm": 1.1783592700958252, "learning_rate": 0.00018202418779174556, "loss": 2.1641, "step": 2073 }, { "epoch": 0.21820094687006839, "grad_norm": 0.8992342352867126, "learning_rate": 0.00018200469020066378, "loss": 1.8566, "step": 2074 }, { "epoch": 0.2183061546554445, "grad_norm": 1.1242965459823608, "learning_rate": 0.00018198518308664734, "loss": 1.4988, "step": 2075 }, { "epoch": 0.21841136244082063, "grad_norm": 1.6040006875991821, "learning_rate": 0.00018196566645196157, "loss": 2.4172, "step": 2076 }, { "epoch": 0.21851657022619675, "grad_norm": 1.6206622123718262, "learning_rate": 0.00018194614029887286, "loss": 1.8929, "step": 2077 }, { "epoch": 0.21862177801157284, "grad_norm": 1.8726282119750977, "learning_rate": 0.0001819266046296487, "loss": 1.8977, "step": 2078 }, { "epoch": 0.21872698579694896, "grad_norm": 0.9685164093971252, "learning_rate": 0.00018190705944655776, "loss": 2.2144, "step": 2079 }, { "epoch": 0.21883219358232509, "grad_norm": 1.0125783681869507, "learning_rate": 0.00018188750475186968, "loss": 1.9572, "step": 2080 }, { "epoch": 0.2189374013677012, "grad_norm": 1.1588927507400513, "learning_rate": 0.00018186794054785534, "loss": 1.7875, "step": 2081 }, { "epoch": 0.21904260915307733, "grad_norm": 0.96497642993927, "learning_rate": 0.00018184836683678667, "loss": 2.2885, "step": 2082 }, { "epoch": 0.21914781693845345, "grad_norm": 1.0914496183395386, "learning_rate": 0.00018182878362093665, "loss": 1.9495, "step": 2083 }, { "epoch": 0.21925302472382957, "grad_norm": 1.0270262956619263, "learning_rate": 0.00018180919090257945, "loss": 1.7052, "step": 2084 }, { "epoch": 0.2193582325092057, "grad_norm": 1.0222289562225342, "learning_rate": 0.00018178958868399033, "loss": 2.1131, "step": 2085 }, { "epoch": 0.21946344029458179, "grad_norm": 2.0089921951293945, "learning_rate": 0.00018176997696744556, "loss": 2.0778, "step": 2086 }, { "epoch": 0.2195686480799579, "grad_norm": 1.4937543869018555, "learning_rate": 0.00018175035575522264, "loss": 1.8517, "step": 2087 }, { "epoch": 0.21967385586533403, "grad_norm": 1.0364367961883545, "learning_rate": 0.00018173072504960012, "loss": 1.9049, "step": 2088 }, { "epoch": 0.21977906365071015, "grad_norm": 1.225338339805603, "learning_rate": 0.00018171108485285763, "loss": 2.0639, "step": 2089 }, { "epoch": 0.21988427143608627, "grad_norm": 1.0059758424758911, "learning_rate": 0.0001816914351672759, "loss": 1.9738, "step": 2090 }, { "epoch": 0.2199894792214624, "grad_norm": 1.3843426704406738, "learning_rate": 0.00018167177599513683, "loss": 2.1056, "step": 2091 }, { "epoch": 0.2200946870068385, "grad_norm": 1.4146124124526978, "learning_rate": 0.00018165210733872336, "loss": 2.014, "step": 2092 }, { "epoch": 0.22019989479221463, "grad_norm": 0.9480724334716797, "learning_rate": 0.00018163242920031953, "loss": 2.1525, "step": 2093 }, { "epoch": 0.22030510257759073, "grad_norm": 1.4912493228912354, "learning_rate": 0.00018161274158221048, "loss": 2.697, "step": 2094 }, { "epoch": 0.22041031036296685, "grad_norm": 1.2136460542678833, "learning_rate": 0.00018159304448668253, "loss": 1.5769, "step": 2095 }, { "epoch": 0.22051551814834297, "grad_norm": 0.991580069065094, "learning_rate": 0.00018157333791602297, "loss": 2.2276, "step": 2096 }, { "epoch": 0.2206207259337191, "grad_norm": 0.8912844061851501, "learning_rate": 0.00018155362187252032, "loss": 2.0501, "step": 2097 }, { "epoch": 0.2207259337190952, "grad_norm": 1.1324025392532349, "learning_rate": 0.00018153389635846412, "loss": 2.2222, "step": 2098 }, { "epoch": 0.22083114150447133, "grad_norm": 0.7306355237960815, "learning_rate": 0.000181514161376145, "loss": 2.0178, "step": 2099 }, { "epoch": 0.22093634928984746, "grad_norm": 1.2878000736236572, "learning_rate": 0.00018149441692785474, "loss": 2.1164, "step": 2100 }, { "epoch": 0.22104155707522358, "grad_norm": 1.2069727182388306, "learning_rate": 0.00018147466301588622, "loss": 1.9003, "step": 2101 }, { "epoch": 0.22114676486059967, "grad_norm": 0.8253376483917236, "learning_rate": 0.00018145489964253332, "loss": 1.7612, "step": 2102 }, { "epoch": 0.2212519726459758, "grad_norm": 1.2956628799438477, "learning_rate": 0.0001814351268100912, "loss": 2.1136, "step": 2103 }, { "epoch": 0.2213571804313519, "grad_norm": 0.9152336120605469, "learning_rate": 0.00018141534452085595, "loss": 1.8014, "step": 2104 }, { "epoch": 0.22146238821672803, "grad_norm": 1.4479109048843384, "learning_rate": 0.00018139555277712482, "loss": 1.951, "step": 2105 }, { "epoch": 0.22156759600210416, "grad_norm": 1.5760619640350342, "learning_rate": 0.0001813757515811962, "loss": 1.9071, "step": 2106 }, { "epoch": 0.22167280378748028, "grad_norm": 1.1229628324508667, "learning_rate": 0.0001813559409353695, "loss": 1.8727, "step": 2107 }, { "epoch": 0.2217780115728564, "grad_norm": 0.8988067507743835, "learning_rate": 0.0001813361208419453, "loss": 2.2383, "step": 2108 }, { "epoch": 0.22188321935823252, "grad_norm": 1.3481158018112183, "learning_rate": 0.0001813162913032252, "loss": 2.0623, "step": 2109 }, { "epoch": 0.2219884271436086, "grad_norm": 1.297059416770935, "learning_rate": 0.00018129645232151193, "loss": 2.4296, "step": 2110 }, { "epoch": 0.22209363492898473, "grad_norm": 1.9435932636260986, "learning_rate": 0.0001812766038991094, "loss": 2.1651, "step": 2111 }, { "epoch": 0.22219884271436086, "grad_norm": 1.3611021041870117, "learning_rate": 0.00018125674603832248, "loss": 1.9258, "step": 2112 }, { "epoch": 0.22230405049973698, "grad_norm": 1.4265265464782715, "learning_rate": 0.00018123687874145721, "loss": 1.6352, "step": 2113 }, { "epoch": 0.2224092582851131, "grad_norm": 0.9899458289146423, "learning_rate": 0.00018121700201082072, "loss": 2.3061, "step": 2114 }, { "epoch": 0.22251446607048922, "grad_norm": 0.9266303777694702, "learning_rate": 0.00018119711584872123, "loss": 2.1004, "step": 2115 }, { "epoch": 0.22261967385586534, "grad_norm": 1.2452328205108643, "learning_rate": 0.00018117722025746806, "loss": 2.2262, "step": 2116 }, { "epoch": 0.22272488164124146, "grad_norm": 1.2869268655776978, "learning_rate": 0.0001811573152393716, "loss": 2.3989, "step": 2117 }, { "epoch": 0.22283008942661758, "grad_norm": 0.9200719594955444, "learning_rate": 0.00018113740079674337, "loss": 1.9065, "step": 2118 }, { "epoch": 0.22293529721199368, "grad_norm": 0.869301974773407, "learning_rate": 0.00018111747693189595, "loss": 1.7604, "step": 2119 }, { "epoch": 0.2230405049973698, "grad_norm": 1.119777798652649, "learning_rate": 0.00018109754364714305, "loss": 2.128, "step": 2120 }, { "epoch": 0.22314571278274592, "grad_norm": 1.4669591188430786, "learning_rate": 0.00018107760094479948, "loss": 2.174, "step": 2121 }, { "epoch": 0.22325092056812204, "grad_norm": 0.9276453256607056, "learning_rate": 0.0001810576488271811, "loss": 2.1165, "step": 2122 }, { "epoch": 0.22335612835349816, "grad_norm": 0.9469357132911682, "learning_rate": 0.00018103768729660485, "loss": 2.2046, "step": 2123 }, { "epoch": 0.22346133613887428, "grad_norm": 1.525495171546936, "learning_rate": 0.00018101771635538883, "loss": 1.4605, "step": 2124 }, { "epoch": 0.2235665439242504, "grad_norm": 1.022232174873352, "learning_rate": 0.00018099773600585223, "loss": 2.0554, "step": 2125 }, { "epoch": 0.22367175170962653, "grad_norm": 1.015611171722412, "learning_rate": 0.00018097774625031523, "loss": 2.064, "step": 2126 }, { "epoch": 0.22377695949500262, "grad_norm": 1.5566353797912598, "learning_rate": 0.0001809577470910992, "loss": 1.9989, "step": 2127 }, { "epoch": 0.22388216728037874, "grad_norm": 1.2023249864578247, "learning_rate": 0.0001809377385305266, "loss": 1.9456, "step": 2128 }, { "epoch": 0.22398737506575486, "grad_norm": 1.1629719734191895, "learning_rate": 0.00018091772057092097, "loss": 2.3635, "step": 2129 }, { "epoch": 0.22409258285113098, "grad_norm": 1.1971515417099, "learning_rate": 0.00018089769321460688, "loss": 2.101, "step": 2130 }, { "epoch": 0.2241977906365071, "grad_norm": 0.9968559741973877, "learning_rate": 0.00018087765646391008, "loss": 1.94, "step": 2131 }, { "epoch": 0.22430299842188323, "grad_norm": 1.0076571702957153, "learning_rate": 0.00018085761032115736, "loss": 2.1199, "step": 2132 }, { "epoch": 0.22440820620725935, "grad_norm": 1.0761382579803467, "learning_rate": 0.00018083755478867658, "loss": 1.8256, "step": 2133 }, { "epoch": 0.22451341399263547, "grad_norm": 1.2399243116378784, "learning_rate": 0.00018081748986879679, "loss": 1.6247, "step": 2134 }, { "epoch": 0.22461862177801156, "grad_norm": 0.6975569725036621, "learning_rate": 0.00018079741556384802, "loss": 2.0154, "step": 2135 }, { "epoch": 0.22472382956338768, "grad_norm": 0.8129298686981201, "learning_rate": 0.00018077733187616142, "loss": 1.9586, "step": 2136 }, { "epoch": 0.2248290373487638, "grad_norm": 1.2736451625823975, "learning_rate": 0.0001807572388080693, "loss": 2.3598, "step": 2137 }, { "epoch": 0.22493424513413993, "grad_norm": 1.5190147161483765, "learning_rate": 0.00018073713636190494, "loss": 2.1019, "step": 2138 }, { "epoch": 0.22503945291951605, "grad_norm": 1.2464752197265625, "learning_rate": 0.0001807170245400028, "loss": 2.0247, "step": 2139 }, { "epoch": 0.22514466070489217, "grad_norm": 1.5845669507980347, "learning_rate": 0.0001806969033446984, "loss": 1.6235, "step": 2140 }, { "epoch": 0.2252498684902683, "grad_norm": 1.4465845823287964, "learning_rate": 0.00018067677277832834, "loss": 1.8367, "step": 2141 }, { "epoch": 0.2253550762756444, "grad_norm": 1.5385208129882812, "learning_rate": 0.00018065663284323028, "loss": 1.9726, "step": 2142 }, { "epoch": 0.2254602840610205, "grad_norm": 1.2862590551376343, "learning_rate": 0.0001806364835417431, "loss": 2.216, "step": 2143 }, { "epoch": 0.22556549184639663, "grad_norm": 1.4813519716262817, "learning_rate": 0.0001806163248762066, "loss": 2.0377, "step": 2144 }, { "epoch": 0.22567069963177275, "grad_norm": 3.828671455383301, "learning_rate": 0.00018059615684896176, "loss": 2.1483, "step": 2145 }, { "epoch": 0.22577590741714887, "grad_norm": 1.3726669549942017, "learning_rate": 0.00018057597946235062, "loss": 1.8516, "step": 2146 }, { "epoch": 0.225881115202525, "grad_norm": 1.2943693399429321, "learning_rate": 0.0001805557927187163, "loss": 1.9399, "step": 2147 }, { "epoch": 0.2259863229879011, "grad_norm": 1.1112357378005981, "learning_rate": 0.00018053559662040302, "loss": 2.064, "step": 2148 }, { "epoch": 0.22609153077327723, "grad_norm": 1.11170494556427, "learning_rate": 0.00018051539116975613, "loss": 1.9364, "step": 2149 }, { "epoch": 0.22619673855865335, "grad_norm": 0.9152960181236267, "learning_rate": 0.000180495176369122, "loss": 2.0502, "step": 2150 }, { "epoch": 0.22630194634402945, "grad_norm": 1.5197174549102783, "learning_rate": 0.00018047495222084812, "loss": 2.0064, "step": 2151 }, { "epoch": 0.22640715412940557, "grad_norm": 1.3327728509902954, "learning_rate": 0.000180454718727283, "loss": 1.2452, "step": 2152 }, { "epoch": 0.2265123619147817, "grad_norm": 1.113082766532898, "learning_rate": 0.00018043447589077634, "loss": 1.7021, "step": 2153 }, { "epoch": 0.2266175697001578, "grad_norm": 1.596432089805603, "learning_rate": 0.00018041422371367885, "loss": 1.9555, "step": 2154 }, { "epoch": 0.22672277748553393, "grad_norm": 1.9314936399459839, "learning_rate": 0.00018039396219834237, "loss": 1.774, "step": 2155 }, { "epoch": 0.22682798527091005, "grad_norm": 1.492937445640564, "learning_rate": 0.00018037369134711977, "loss": 2.2366, "step": 2156 }, { "epoch": 0.22693319305628618, "grad_norm": 1.4675370454788208, "learning_rate": 0.00018035341116236507, "loss": 1.9737, "step": 2157 }, { "epoch": 0.2270384008416623, "grad_norm": 1.595471978187561, "learning_rate": 0.00018033312164643332, "loss": 2.141, "step": 2158 }, { "epoch": 0.2271436086270384, "grad_norm": 1.6596059799194336, "learning_rate": 0.0001803128228016807, "loss": 1.5989, "step": 2159 }, { "epoch": 0.2272488164124145, "grad_norm": 1.2517389059066772, "learning_rate": 0.00018029251463046444, "loss": 1.805, "step": 2160 }, { "epoch": 0.22735402419779063, "grad_norm": 1.7200475931167603, "learning_rate": 0.00018027219713514283, "loss": 0.9951, "step": 2161 }, { "epoch": 0.22745923198316675, "grad_norm": 1.616018295288086, "learning_rate": 0.00018025187031807532, "loss": 1.9761, "step": 2162 }, { "epoch": 0.22756443976854288, "grad_norm": 1.3265496492385864, "learning_rate": 0.00018023153418162235, "loss": 1.8146, "step": 2163 }, { "epoch": 0.227669647553919, "grad_norm": 1.3496745824813843, "learning_rate": 0.0001802111887281455, "loss": 1.7593, "step": 2164 }, { "epoch": 0.22777485533929512, "grad_norm": 1.171179175376892, "learning_rate": 0.0001801908339600075, "loss": 1.7885, "step": 2165 }, { "epoch": 0.22788006312467124, "grad_norm": 1.5108046531677246, "learning_rate": 0.00018017046987957197, "loss": 1.9684, "step": 2166 }, { "epoch": 0.22798527091004733, "grad_norm": 1.351288914680481, "learning_rate": 0.00018015009648920374, "loss": 1.671, "step": 2167 }, { "epoch": 0.22809047869542345, "grad_norm": 1.2217696905136108, "learning_rate": 0.00018012971379126875, "loss": 1.9302, "step": 2168 }, { "epoch": 0.22819568648079958, "grad_norm": 1.644564151763916, "learning_rate": 0.00018010932178813397, "loss": 1.7754, "step": 2169 }, { "epoch": 0.2283008942661757, "grad_norm": 1.167407751083374, "learning_rate": 0.00018008892048216744, "loss": 1.9996, "step": 2170 }, { "epoch": 0.22840610205155182, "grad_norm": 1.467685341835022, "learning_rate": 0.00018006850987573834, "loss": 1.9082, "step": 2171 }, { "epoch": 0.22851130983692794, "grad_norm": 1.2934365272521973, "learning_rate": 0.0001800480899712168, "loss": 2.4512, "step": 2172 }, { "epoch": 0.22861651762230406, "grad_norm": 1.2714877128601074, "learning_rate": 0.00018002766077097415, "loss": 2.1337, "step": 2173 }, { "epoch": 0.22872172540768018, "grad_norm": 1.2429519891738892, "learning_rate": 0.0001800072222773828, "loss": 1.7869, "step": 2174 }, { "epoch": 0.22882693319305628, "grad_norm": 1.2201809883117676, "learning_rate": 0.00017998677449281621, "loss": 1.6531, "step": 2175 }, { "epoch": 0.2289321409784324, "grad_norm": 1.0419690608978271, "learning_rate": 0.00017996631741964888, "loss": 2.3172, "step": 2176 }, { "epoch": 0.22903734876380852, "grad_norm": 1.217642903327942, "learning_rate": 0.0001799458510602564, "loss": 1.8395, "step": 2177 }, { "epoch": 0.22914255654918464, "grad_norm": 1.229682445526123, "learning_rate": 0.0001799253754170155, "loss": 2.1528, "step": 2178 }, { "epoch": 0.22924776433456076, "grad_norm": 0.8546053767204285, "learning_rate": 0.00017990489049230396, "loss": 2.0306, "step": 2179 }, { "epoch": 0.22935297211993688, "grad_norm": 1.6027251482009888, "learning_rate": 0.0001798843962885006, "loss": 2.03, "step": 2180 }, { "epoch": 0.229458179905313, "grad_norm": 1.3929436206817627, "learning_rate": 0.00017986389280798533, "loss": 2.0689, "step": 2181 }, { "epoch": 0.22956338769068912, "grad_norm": 1.221983790397644, "learning_rate": 0.00017984338005313922, "loss": 2.0643, "step": 2182 }, { "epoch": 0.22966859547606522, "grad_norm": 1.0649733543395996, "learning_rate": 0.00017982285802634426, "loss": 1.6149, "step": 2183 }, { "epoch": 0.22977380326144134, "grad_norm": 1.2897831201553345, "learning_rate": 0.00017980232672998368, "loss": 2.0364, "step": 2184 }, { "epoch": 0.22987901104681746, "grad_norm": 0.9529754519462585, "learning_rate": 0.00017978178616644166, "loss": 1.6717, "step": 2185 }, { "epoch": 0.22998421883219358, "grad_norm": 1.1082221269607544, "learning_rate": 0.00017976123633810354, "loss": 2.2986, "step": 2186 }, { "epoch": 0.2300894266175697, "grad_norm": 1.5675081014633179, "learning_rate": 0.00017974067724735567, "loss": 1.7103, "step": 2187 }, { "epoch": 0.23019463440294582, "grad_norm": 1.7995283603668213, "learning_rate": 0.00017972010889658554, "loss": 2.0048, "step": 2188 }, { "epoch": 0.23029984218832195, "grad_norm": 1.0870211124420166, "learning_rate": 0.00017969953128818168, "loss": 2.0764, "step": 2189 }, { "epoch": 0.23040504997369807, "grad_norm": 1.2951061725616455, "learning_rate": 0.0001796789444245337, "loss": 2.4356, "step": 2190 }, { "epoch": 0.23051025775907416, "grad_norm": 1.5553895235061646, "learning_rate": 0.00017965834830803228, "loss": 2.0726, "step": 2191 }, { "epoch": 0.23061546554445028, "grad_norm": 1.519358515739441, "learning_rate": 0.0001796377429410692, "loss": 2.0531, "step": 2192 }, { "epoch": 0.2307206733298264, "grad_norm": 1.6568381786346436, "learning_rate": 0.00017961712832603724, "loss": 1.7599, "step": 2193 }, { "epoch": 0.23082588111520252, "grad_norm": 0.9388352036476135, "learning_rate": 0.00017959650446533037, "loss": 2.2012, "step": 2194 }, { "epoch": 0.23093108890057865, "grad_norm": 1.2496514320373535, "learning_rate": 0.0001795758713613435, "loss": 1.9036, "step": 2195 }, { "epoch": 0.23103629668595477, "grad_norm": 1.169584035873413, "learning_rate": 0.00017955522901647275, "loss": 2.3784, "step": 2196 }, { "epoch": 0.2311415044713309, "grad_norm": 0.9535863399505615, "learning_rate": 0.00017953457743311523, "loss": 1.9531, "step": 2197 }, { "epoch": 0.231246712256707, "grad_norm": 0.8409315347671509, "learning_rate": 0.00017951391661366912, "loss": 1.9737, "step": 2198 }, { "epoch": 0.2313519200420831, "grad_norm": 1.8287293910980225, "learning_rate": 0.00017949324656053373, "loss": 1.6721, "step": 2199 }, { "epoch": 0.23145712782745922, "grad_norm": 1.6631537675857544, "learning_rate": 0.00017947256727610935, "loss": 1.6647, "step": 2200 }, { "epoch": 0.23156233561283535, "grad_norm": 2.273388147354126, "learning_rate": 0.00017945187876279746, "loss": 1.5766, "step": 2201 }, { "epoch": 0.23166754339821147, "grad_norm": 1.3821104764938354, "learning_rate": 0.0001794311810230005, "loss": 2.6111, "step": 2202 }, { "epoch": 0.2317727511835876, "grad_norm": 0.8510130643844604, "learning_rate": 0.00017941047405912203, "loss": 2.1294, "step": 2203 }, { "epoch": 0.2318779589689637, "grad_norm": 1.8961708545684814, "learning_rate": 0.00017938975787356673, "loss": 2.1342, "step": 2204 }, { "epoch": 0.23198316675433983, "grad_norm": 1.116129994392395, "learning_rate": 0.00017936903246874026, "loss": 1.8121, "step": 2205 }, { "epoch": 0.23208837453971595, "grad_norm": 1.1960958242416382, "learning_rate": 0.0001793482978470494, "loss": 1.9618, "step": 2206 }, { "epoch": 0.23219358232509205, "grad_norm": 1.3476707935333252, "learning_rate": 0.00017932755401090203, "loss": 2.1292, "step": 2207 }, { "epoch": 0.23229879011046817, "grad_norm": 1.285470962524414, "learning_rate": 0.00017930680096270697, "loss": 2.2751, "step": 2208 }, { "epoch": 0.2324039978958443, "grad_norm": 1.4089267253875732, "learning_rate": 0.00017928603870487434, "loss": 1.8112, "step": 2209 }, { "epoch": 0.2325092056812204, "grad_norm": 0.8862564563751221, "learning_rate": 0.00017926526723981506, "loss": 2.086, "step": 2210 }, { "epoch": 0.23261441346659653, "grad_norm": 1.1878046989440918, "learning_rate": 0.00017924448656994133, "loss": 1.8758, "step": 2211 }, { "epoch": 0.23271962125197265, "grad_norm": 1.1390782594680786, "learning_rate": 0.00017922369669766633, "loss": 1.7815, "step": 2212 }, { "epoch": 0.23282482903734877, "grad_norm": 1.1724015474319458, "learning_rate": 0.0001792028976254043, "loss": 1.8632, "step": 2213 }, { "epoch": 0.2329300368227249, "grad_norm": 1.2103734016418457, "learning_rate": 0.00017918208935557058, "loss": 1.739, "step": 2214 }, { "epoch": 0.233035244608101, "grad_norm": 1.0550634860992432, "learning_rate": 0.00017916127189058158, "loss": 2.301, "step": 2215 }, { "epoch": 0.2331404523934771, "grad_norm": 1.6345337629318237, "learning_rate": 0.0001791404452328547, "loss": 1.9167, "step": 2216 }, { "epoch": 0.23324566017885323, "grad_norm": 0.9604158997535706, "learning_rate": 0.00017911960938480858, "loss": 1.8264, "step": 2217 }, { "epoch": 0.23335086796422935, "grad_norm": 1.6715139150619507, "learning_rate": 0.00017909876434886273, "loss": 1.826, "step": 2218 }, { "epoch": 0.23345607574960547, "grad_norm": 1.1755411624908447, "learning_rate": 0.00017907791012743783, "loss": 1.5755, "step": 2219 }, { "epoch": 0.2335612835349816, "grad_norm": 1.8296947479248047, "learning_rate": 0.00017905704672295563, "loss": 1.5263, "step": 2220 }, { "epoch": 0.23366649132035772, "grad_norm": 1.162333607673645, "learning_rate": 0.00017903617413783893, "loss": 2.2662, "step": 2221 }, { "epoch": 0.23377169910573384, "grad_norm": 1.608921766281128, "learning_rate": 0.0001790152923745116, "loss": 2.2386, "step": 2222 }, { "epoch": 0.23387690689110993, "grad_norm": 0.9484199285507202, "learning_rate": 0.00017899440143539854, "loss": 1.5288, "step": 2223 }, { "epoch": 0.23398211467648605, "grad_norm": 0.7746570110321045, "learning_rate": 0.00017897350132292577, "loss": 1.7404, "step": 2224 }, { "epoch": 0.23408732246186217, "grad_norm": 1.0333335399627686, "learning_rate": 0.00017895259203952032, "loss": 2.0346, "step": 2225 }, { "epoch": 0.2341925302472383, "grad_norm": 1.308410882949829, "learning_rate": 0.00017893167358761037, "loss": 2.0028, "step": 2226 }, { "epoch": 0.23429773803261442, "grad_norm": 1.7827560901641846, "learning_rate": 0.00017891074596962508, "loss": 1.665, "step": 2227 }, { "epoch": 0.23440294581799054, "grad_norm": 2.48459792137146, "learning_rate": 0.0001788898091879947, "loss": 1.9477, "step": 2228 }, { "epoch": 0.23450815360336666, "grad_norm": 1.5465545654296875, "learning_rate": 0.00017886886324515054, "loss": 2.1428, "step": 2229 }, { "epoch": 0.23461336138874278, "grad_norm": 1.2667497396469116, "learning_rate": 0.00017884790814352502, "loss": 1.6537, "step": 2230 }, { "epoch": 0.23471856917411887, "grad_norm": 1.3898303508758545, "learning_rate": 0.00017882694388555157, "loss": 2.175, "step": 2231 }, { "epoch": 0.234823776959495, "grad_norm": 1.1822909116744995, "learning_rate": 0.0001788059704736647, "loss": 1.6444, "step": 2232 }, { "epoch": 0.23492898474487112, "grad_norm": 1.4326550960540771, "learning_rate": 0.00017878498791029998, "loss": 2.4223, "step": 2233 }, { "epoch": 0.23503419253024724, "grad_norm": 1.0603430271148682, "learning_rate": 0.00017876399619789406, "loss": 1.8874, "step": 2234 }, { "epoch": 0.23513940031562336, "grad_norm": 0.9951735138893127, "learning_rate": 0.0001787429953388846, "loss": 1.9595, "step": 2235 }, { "epoch": 0.23524460810099948, "grad_norm": 0.889345645904541, "learning_rate": 0.0001787219853357104, "loss": 1.5804, "step": 2236 }, { "epoch": 0.2353498158863756, "grad_norm": 1.0618045330047607, "learning_rate": 0.00017870096619081123, "loss": 1.8707, "step": 2237 }, { "epoch": 0.23545502367175172, "grad_norm": 0.9510637521743774, "learning_rate": 0.00017867993790662804, "loss": 2.1408, "step": 2238 }, { "epoch": 0.23556023145712782, "grad_norm": 0.9027166366577148, "learning_rate": 0.00017865890048560277, "loss": 2.3268, "step": 2239 }, { "epoch": 0.23566543924250394, "grad_norm": 2.181145191192627, "learning_rate": 0.00017863785393017838, "loss": 2.1089, "step": 2240 }, { "epoch": 0.23577064702788006, "grad_norm": 0.9953977465629578, "learning_rate": 0.00017861679824279897, "loss": 2.0335, "step": 2241 }, { "epoch": 0.23587585481325618, "grad_norm": 1.1575350761413574, "learning_rate": 0.00017859573342590964, "loss": 1.7528, "step": 2242 }, { "epoch": 0.2359810625986323, "grad_norm": 1.475678563117981, "learning_rate": 0.00017857465948195662, "loss": 2.1524, "step": 2243 }, { "epoch": 0.23608627038400842, "grad_norm": 1.2343095541000366, "learning_rate": 0.00017855357641338712, "loss": 2.0534, "step": 2244 }, { "epoch": 0.23619147816938454, "grad_norm": 1.7605794668197632, "learning_rate": 0.0001785324842226495, "loss": 2.1463, "step": 2245 }, { "epoch": 0.23629668595476067, "grad_norm": 1.5780607461929321, "learning_rate": 0.00017851138291219301, "loss": 1.9925, "step": 2246 }, { "epoch": 0.23640189374013676, "grad_norm": 1.1927117109298706, "learning_rate": 0.00017849027248446824, "loss": 1.7036, "step": 2247 }, { "epoch": 0.23650710152551288, "grad_norm": 0.9393693208694458, "learning_rate": 0.00017846915294192654, "loss": 1.765, "step": 2248 }, { "epoch": 0.236612309310889, "grad_norm": 2.274392604827881, "learning_rate": 0.00017844802428702052, "loss": 1.7931, "step": 2249 }, { "epoch": 0.23671751709626512, "grad_norm": 1.2075339555740356, "learning_rate": 0.00017842688652220377, "loss": 2.3247, "step": 2250 }, { "epoch": 0.23682272488164124, "grad_norm": 1.0093438625335693, "learning_rate": 0.00017840573964993093, "loss": 2.0814, "step": 2251 }, { "epoch": 0.23692793266701737, "grad_norm": 1.147470474243164, "learning_rate": 0.00017838458367265772, "loss": 1.8764, "step": 2252 }, { "epoch": 0.2370331404523935, "grad_norm": 1.6974701881408691, "learning_rate": 0.00017836341859284093, "loss": 1.4435, "step": 2253 }, { "epoch": 0.2371383482377696, "grad_norm": 1.7535107135772705, "learning_rate": 0.00017834224441293836, "loss": 1.4787, "step": 2254 }, { "epoch": 0.2372435560231457, "grad_norm": 1.3474122285842896, "learning_rate": 0.00017832106113540897, "loss": 1.3433, "step": 2255 }, { "epoch": 0.23734876380852182, "grad_norm": 1.8467276096343994, "learning_rate": 0.0001782998687627126, "loss": 2.1831, "step": 2256 }, { "epoch": 0.23745397159389794, "grad_norm": 1.08902907371521, "learning_rate": 0.00017827866729731035, "loss": 2.0054, "step": 2257 }, { "epoch": 0.23755917937927407, "grad_norm": 0.9274120926856995, "learning_rate": 0.0001782574567416642, "loss": 2.1129, "step": 2258 }, { "epoch": 0.2376643871646502, "grad_norm": 0.9804845452308655, "learning_rate": 0.00017823623709823733, "loss": 1.9947, "step": 2259 }, { "epoch": 0.2377695949500263, "grad_norm": 2.3834564685821533, "learning_rate": 0.00017821500836949386, "loss": 1.8823, "step": 2260 }, { "epoch": 0.23787480273540243, "grad_norm": 1.2635396718978882, "learning_rate": 0.000178193770557899, "loss": 2.2756, "step": 2261 }, { "epoch": 0.23798001052077855, "grad_norm": 0.8009362816810608, "learning_rate": 0.00017817252366591907, "loss": 2.1824, "step": 2262 }, { "epoch": 0.23808521830615464, "grad_norm": 1.8110861778259277, "learning_rate": 0.0001781512676960214, "loss": 1.3582, "step": 2263 }, { "epoch": 0.23819042609153077, "grad_norm": 1.50228750705719, "learning_rate": 0.00017813000265067433, "loss": 1.5663, "step": 2264 }, { "epoch": 0.2382956338769069, "grad_norm": 1.1287262439727783, "learning_rate": 0.00017810872853234733, "loss": 1.9131, "step": 2265 }, { "epoch": 0.238400841662283, "grad_norm": 1.1263632774353027, "learning_rate": 0.0001780874453435109, "loss": 1.8182, "step": 2266 }, { "epoch": 0.23850604944765913, "grad_norm": 1.2594873905181885, "learning_rate": 0.0001780661530866366, "loss": 1.8225, "step": 2267 }, { "epoch": 0.23861125723303525, "grad_norm": 1.1252297163009644, "learning_rate": 0.00017804485176419697, "loss": 2.0544, "step": 2268 }, { "epoch": 0.23871646501841137, "grad_norm": 1.2109135389328003, "learning_rate": 0.00017802354137866572, "loss": 2.2275, "step": 2269 }, { "epoch": 0.2388216728037875, "grad_norm": 1.0338249206542969, "learning_rate": 0.00017800222193251752, "loss": 2.1604, "step": 2270 }, { "epoch": 0.2389268805891636, "grad_norm": 1.1437997817993164, "learning_rate": 0.00017798089342822816, "loss": 2.406, "step": 2271 }, { "epoch": 0.2390320883745397, "grad_norm": 1.0176033973693848, "learning_rate": 0.00017795955586827442, "loss": 2.3426, "step": 2272 }, { "epoch": 0.23913729615991583, "grad_norm": 1.5589518547058105, "learning_rate": 0.00017793820925513418, "loss": 2.1739, "step": 2273 }, { "epoch": 0.23924250394529195, "grad_norm": 0.8495704531669617, "learning_rate": 0.00017791685359128633, "loss": 2.1634, "step": 2274 }, { "epoch": 0.23934771173066807, "grad_norm": 1.0862325429916382, "learning_rate": 0.00017789548887921087, "loss": 2.2329, "step": 2275 }, { "epoch": 0.2394529195160442, "grad_norm": 1.8624638319015503, "learning_rate": 0.00017787411512138875, "loss": 1.7344, "step": 2276 }, { "epoch": 0.23955812730142031, "grad_norm": 1.0375189781188965, "learning_rate": 0.0001778527323203021, "loss": 1.7976, "step": 2277 }, { "epoch": 0.23966333508679644, "grad_norm": 0.92099928855896, "learning_rate": 0.000177831340478434, "loss": 2.173, "step": 2278 }, { "epoch": 0.23976854287217253, "grad_norm": 2.462179183959961, "learning_rate": 0.00017780993959826865, "loss": 1.6677, "step": 2279 }, { "epoch": 0.23987375065754865, "grad_norm": 1.415610432624817, "learning_rate": 0.00017778852968229123, "loss": 2.1977, "step": 2280 }, { "epoch": 0.23997895844292477, "grad_norm": 1.6684101819992065, "learning_rate": 0.000177767110732988, "loss": 1.7925, "step": 2281 }, { "epoch": 0.2400841662283009, "grad_norm": 0.9390462040901184, "learning_rate": 0.00017774568275284627, "loss": 1.7441, "step": 2282 }, { "epoch": 0.24018937401367702, "grad_norm": 1.377502679824829, "learning_rate": 0.00017772424574435443, "loss": 1.847, "step": 2283 }, { "epoch": 0.24029458179905314, "grad_norm": 1.1089184284210205, "learning_rate": 0.00017770279971000185, "loss": 2.3168, "step": 2284 }, { "epoch": 0.24039978958442926, "grad_norm": 1.1749531030654907, "learning_rate": 0.00017768134465227903, "loss": 2.303, "step": 2285 }, { "epoch": 0.24050499736980538, "grad_norm": 1.2190381288528442, "learning_rate": 0.00017765988057367747, "loss": 1.7057, "step": 2286 }, { "epoch": 0.24061020515518147, "grad_norm": 0.8646054267883301, "learning_rate": 0.00017763840747668966, "loss": 1.7069, "step": 2287 }, { "epoch": 0.2407154129405576, "grad_norm": 0.8684479594230652, "learning_rate": 0.00017761692536380928, "loss": 1.9386, "step": 2288 }, { "epoch": 0.24082062072593372, "grad_norm": 1.0403144359588623, "learning_rate": 0.00017759543423753093, "loss": 1.9768, "step": 2289 }, { "epoch": 0.24092582851130984, "grad_norm": 1.298659086227417, "learning_rate": 0.00017757393410035033, "loss": 1.9757, "step": 2290 }, { "epoch": 0.24103103629668596, "grad_norm": 1.4886418581008911, "learning_rate": 0.00017755242495476418, "loss": 1.6223, "step": 2291 }, { "epoch": 0.24113624408206208, "grad_norm": 1.2547330856323242, "learning_rate": 0.00017753090680327032, "loss": 2.0024, "step": 2292 }, { "epoch": 0.2412414518674382, "grad_norm": 1.3677291870117188, "learning_rate": 0.00017750937964836755, "loss": 1.797, "step": 2293 }, { "epoch": 0.24134665965281432, "grad_norm": 1.4770119190216064, "learning_rate": 0.00017748784349255577, "loss": 2.2053, "step": 2294 }, { "epoch": 0.24145186743819042, "grad_norm": 0.817295491695404, "learning_rate": 0.00017746629833833585, "loss": 1.9076, "step": 2295 }, { "epoch": 0.24155707522356654, "grad_norm": 1.4393826723098755, "learning_rate": 0.00017744474418820985, "loss": 1.6945, "step": 2296 }, { "epoch": 0.24166228300894266, "grad_norm": 1.3872381448745728, "learning_rate": 0.00017742318104468067, "loss": 1.8003, "step": 2297 }, { "epoch": 0.24176749079431878, "grad_norm": 1.1846232414245605, "learning_rate": 0.00017740160891025245, "loss": 1.9204, "step": 2298 }, { "epoch": 0.2418726985796949, "grad_norm": 0.8881604075431824, "learning_rate": 0.00017738002778743027, "loss": 2.1186, "step": 2299 }, { "epoch": 0.24197790636507102, "grad_norm": 1.211893081665039, "learning_rate": 0.00017735843767872024, "loss": 2.0164, "step": 2300 }, { "epoch": 0.24208311415044714, "grad_norm": 1.456984281539917, "learning_rate": 0.0001773368385866296, "loss": 1.7958, "step": 2301 }, { "epoch": 0.24218832193582326, "grad_norm": 1.3485262393951416, "learning_rate": 0.00017731523051366658, "loss": 1.935, "step": 2302 }, { "epoch": 0.24229352972119936, "grad_norm": 1.3391015529632568, "learning_rate": 0.0001772936134623404, "loss": 1.9303, "step": 2303 }, { "epoch": 0.24239873750657548, "grad_norm": 1.4360216856002808, "learning_rate": 0.00017727198743516142, "loss": 1.9268, "step": 2304 }, { "epoch": 0.2425039452919516, "grad_norm": 1.5892516374588013, "learning_rate": 0.00017725035243464099, "loss": 1.6656, "step": 2305 }, { "epoch": 0.24260915307732772, "grad_norm": 1.308066964149475, "learning_rate": 0.0001772287084632915, "loss": 1.7519, "step": 2306 }, { "epoch": 0.24271436086270384, "grad_norm": 1.079184651374817, "learning_rate": 0.00017720705552362644, "loss": 2.3011, "step": 2307 }, { "epoch": 0.24281956864807996, "grad_norm": 1.0954896211624146, "learning_rate": 0.00017718539361816023, "loss": 2.275, "step": 2308 }, { "epoch": 0.24292477643345609, "grad_norm": 1.0315781831741333, "learning_rate": 0.00017716372274940843, "loss": 1.8872, "step": 2309 }, { "epoch": 0.2430299842188322, "grad_norm": 0.9103153944015503, "learning_rate": 0.00017714204291988762, "loss": 2.0967, "step": 2310 }, { "epoch": 0.2431351920042083, "grad_norm": 1.182989239692688, "learning_rate": 0.00017712035413211535, "loss": 1.6962, "step": 2311 }, { "epoch": 0.24324039978958442, "grad_norm": 0.8709691166877747, "learning_rate": 0.00017709865638861034, "loss": 1.6754, "step": 2312 }, { "epoch": 0.24334560757496054, "grad_norm": 1.3362677097320557, "learning_rate": 0.0001770769496918922, "loss": 1.6432, "step": 2313 }, { "epoch": 0.24345081536033666, "grad_norm": 1.2893030643463135, "learning_rate": 0.00017705523404448176, "loss": 2.0481, "step": 2314 }, { "epoch": 0.24355602314571279, "grad_norm": 0.913367748260498, "learning_rate": 0.00017703350944890068, "loss": 2.1723, "step": 2315 }, { "epoch": 0.2436612309310889, "grad_norm": 0.9429225325584412, "learning_rate": 0.00017701177590767183, "loss": 2.47, "step": 2316 }, { "epoch": 0.24376643871646503, "grad_norm": 0.9314096570014954, "learning_rate": 0.00017699003342331904, "loss": 2.2355, "step": 2317 }, { "epoch": 0.24387164650184115, "grad_norm": 1.0147193670272827, "learning_rate": 0.0001769682819983672, "loss": 2.2912, "step": 2318 }, { "epoch": 0.24397685428721724, "grad_norm": 0.9757153987884521, "learning_rate": 0.00017694652163534222, "loss": 1.817, "step": 2319 }, { "epoch": 0.24408206207259336, "grad_norm": 1.4220627546310425, "learning_rate": 0.00017692475233677105, "loss": 2.0166, "step": 2320 }, { "epoch": 0.24418726985796949, "grad_norm": 1.103240728378296, "learning_rate": 0.0001769029741051817, "loss": 1.9228, "step": 2321 }, { "epoch": 0.2442924776433456, "grad_norm": 1.1101772785186768, "learning_rate": 0.0001768811869431032, "loss": 2.1224, "step": 2322 }, { "epoch": 0.24439768542872173, "grad_norm": 1.5481605529785156, "learning_rate": 0.00017685939085306562, "loss": 2.0526, "step": 2323 }, { "epoch": 0.24450289321409785, "grad_norm": 1.3062520027160645, "learning_rate": 0.00017683758583760008, "loss": 1.8925, "step": 2324 }, { "epoch": 0.24460810099947397, "grad_norm": 1.5073399543762207, "learning_rate": 0.00017681577189923873, "loss": 2.0869, "step": 2325 }, { "epoch": 0.2447133087848501, "grad_norm": 0.8444550037384033, "learning_rate": 0.00017679394904051473, "loss": 2.1741, "step": 2326 }, { "epoch": 0.24481851657022619, "grad_norm": 0.9408750534057617, "learning_rate": 0.0001767721172639623, "loss": 1.9257, "step": 2327 }, { "epoch": 0.2449237243556023, "grad_norm": 1.572348713874817, "learning_rate": 0.0001767502765721167, "loss": 2.2418, "step": 2328 }, { "epoch": 0.24502893214097843, "grad_norm": 1.0508718490600586, "learning_rate": 0.0001767284269675142, "loss": 2.1225, "step": 2329 }, { "epoch": 0.24513413992635455, "grad_norm": 1.1722843647003174, "learning_rate": 0.00017670656845269214, "loss": 2.103, "step": 2330 }, { "epoch": 0.24523934771173067, "grad_norm": 1.4209791421890259, "learning_rate": 0.00017668470103018887, "loss": 1.5394, "step": 2331 }, { "epoch": 0.2453445554971068, "grad_norm": 1.2421069145202637, "learning_rate": 0.00017666282470254381, "loss": 2.0012, "step": 2332 }, { "epoch": 0.2454497632824829, "grad_norm": 0.9292833209037781, "learning_rate": 0.00017664093947229736, "loss": 2.2945, "step": 2333 }, { "epoch": 0.24555497106785903, "grad_norm": 1.0548170804977417, "learning_rate": 0.00017661904534199097, "loss": 2.0605, "step": 2334 }, { "epoch": 0.24566017885323513, "grad_norm": 0.8535258769989014, "learning_rate": 0.00017659714231416714, "loss": 2.3665, "step": 2335 }, { "epoch": 0.24576538663861125, "grad_norm": 1.2200136184692383, "learning_rate": 0.00017657523039136942, "loss": 2.0179, "step": 2336 }, { "epoch": 0.24587059442398737, "grad_norm": 1.0717980861663818, "learning_rate": 0.00017655330957614234, "loss": 2.1129, "step": 2337 }, { "epoch": 0.2459758022093635, "grad_norm": 0.9226016402244568, "learning_rate": 0.0001765313798710315, "loss": 2.2021, "step": 2338 }, { "epoch": 0.2460810099947396, "grad_norm": 1.2788020372390747, "learning_rate": 0.00017650944127858354, "loss": 1.7879, "step": 2339 }, { "epoch": 0.24618621778011573, "grad_norm": 1.342400074005127, "learning_rate": 0.00017648749380134608, "loss": 1.8949, "step": 2340 }, { "epoch": 0.24629142556549186, "grad_norm": 0.979184627532959, "learning_rate": 0.00017646553744186784, "loss": 1.773, "step": 2341 }, { "epoch": 0.24639663335086798, "grad_norm": 1.4060213565826416, "learning_rate": 0.00017644357220269856, "loss": 2.2494, "step": 2342 }, { "epoch": 0.24650184113624407, "grad_norm": 1.8778382539749146, "learning_rate": 0.0001764215980863889, "loss": 2.1433, "step": 2343 }, { "epoch": 0.2466070489216202, "grad_norm": 1.214531421661377, "learning_rate": 0.00017639961509549078, "loss": 1.936, "step": 2344 }, { "epoch": 0.2467122567069963, "grad_norm": 1.4594982862472534, "learning_rate": 0.0001763776232325569, "loss": 1.9461, "step": 2345 }, { "epoch": 0.24681746449237243, "grad_norm": 1.5665651559829712, "learning_rate": 0.00017635562250014112, "loss": 2.5673, "step": 2346 }, { "epoch": 0.24692267227774856, "grad_norm": 2.044776439666748, "learning_rate": 0.00017633361290079837, "loss": 2.6654, "step": 2347 }, { "epoch": 0.24702788006312468, "grad_norm": 1.7862943410873413, "learning_rate": 0.0001763115944370845, "loss": 2.3661, "step": 2348 }, { "epoch": 0.2471330878485008, "grad_norm": 1.3187406063079834, "learning_rate": 0.00017628956711155644, "loss": 1.5839, "step": 2349 }, { "epoch": 0.24723829563387692, "grad_norm": 1.1463435888290405, "learning_rate": 0.0001762675309267722, "loss": 2.1677, "step": 2350 }, { "epoch": 0.247343503419253, "grad_norm": 1.8317863941192627, "learning_rate": 0.00017624548588529072, "loss": 2.1135, "step": 2351 }, { "epoch": 0.24744871120462913, "grad_norm": 1.2014631032943726, "learning_rate": 0.00017622343198967202, "loss": 1.9363, "step": 2352 }, { "epoch": 0.24755391899000526, "grad_norm": 1.3410941362380981, "learning_rate": 0.00017620136924247719, "loss": 2.149, "step": 2353 }, { "epoch": 0.24765912677538138, "grad_norm": 1.2935651540756226, "learning_rate": 0.00017617929764626825, "loss": 2.3697, "step": 2354 }, { "epoch": 0.2477643345607575, "grad_norm": 1.6687452793121338, "learning_rate": 0.00017615721720360834, "loss": 1.979, "step": 2355 }, { "epoch": 0.24786954234613362, "grad_norm": 1.249712347984314, "learning_rate": 0.00017613512791706155, "loss": 1.6578, "step": 2356 }, { "epoch": 0.24797475013150974, "grad_norm": 1.1926326751708984, "learning_rate": 0.0001761130297891931, "loss": 2.3114, "step": 2357 }, { "epoch": 0.24807995791688586, "grad_norm": 1.159462332725525, "learning_rate": 0.00017609092282256912, "loss": 1.8345, "step": 2358 }, { "epoch": 0.24818516570226196, "grad_norm": 1.2362000942230225, "learning_rate": 0.00017606880701975683, "loss": 2.6279, "step": 2359 }, { "epoch": 0.24829037348763808, "grad_norm": 1.116899013519287, "learning_rate": 0.00017604668238332448, "loss": 2.3413, "step": 2360 }, { "epoch": 0.2483955812730142, "grad_norm": 1.3379223346710205, "learning_rate": 0.0001760245489158413, "loss": 1.7864, "step": 2361 }, { "epoch": 0.24850078905839032, "grad_norm": 1.2308326959609985, "learning_rate": 0.00017600240661987763, "loss": 2.0573, "step": 2362 }, { "epoch": 0.24860599684376644, "grad_norm": 1.1983873844146729, "learning_rate": 0.00017598025549800473, "loss": 1.7869, "step": 2363 }, { "epoch": 0.24871120462914256, "grad_norm": 1.1547279357910156, "learning_rate": 0.00017595809555279494, "loss": 1.9424, "step": 2364 }, { "epoch": 0.24881641241451868, "grad_norm": 1.0015462636947632, "learning_rate": 0.00017593592678682166, "loss": 2.0275, "step": 2365 }, { "epoch": 0.2489216201998948, "grad_norm": 0.9650968313217163, "learning_rate": 0.00017591374920265923, "loss": 1.815, "step": 2366 }, { "epoch": 0.2490268279852709, "grad_norm": 1.2822868824005127, "learning_rate": 0.00017589156280288311, "loss": 2.1693, "step": 2367 }, { "epoch": 0.24913203577064702, "grad_norm": 1.4724416732788086, "learning_rate": 0.00017586936759006968, "loss": 2.15, "step": 2368 }, { "epoch": 0.24923724355602314, "grad_norm": 1.5108137130737305, "learning_rate": 0.00017584716356679647, "loss": 1.6316, "step": 2369 }, { "epoch": 0.24934245134139926, "grad_norm": 1.047283411026001, "learning_rate": 0.0001758249507356419, "loss": 1.9975, "step": 2370 }, { "epoch": 0.24944765912677538, "grad_norm": 1.0887430906295776, "learning_rate": 0.00017580272909918545, "loss": 1.6365, "step": 2371 }, { "epoch": 0.2495528669121515, "grad_norm": 0.8896718621253967, "learning_rate": 0.0001757804986600077, "loss": 1.9713, "step": 2372 }, { "epoch": 0.24965807469752763, "grad_norm": 0.9527761340141296, "learning_rate": 0.00017575825942069018, "loss": 1.6953, "step": 2373 }, { "epoch": 0.24976328248290375, "grad_norm": 1.2873328924179077, "learning_rate": 0.00017573601138381548, "loss": 2.5118, "step": 2374 }, { "epoch": 0.24986849026827984, "grad_norm": 0.9107836484909058, "learning_rate": 0.00017571375455196714, "loss": 1.8383, "step": 2375 }, { "epoch": 0.24997369805365596, "grad_norm": 1.5500174760818481, "learning_rate": 0.00017569148892772983, "loss": 1.7582, "step": 2376 }, { "epoch": 0.2500789058390321, "grad_norm": 1.8706609010696411, "learning_rate": 0.0001756692145136891, "loss": 1.4764, "step": 2377 }, { "epoch": 0.25018411362440823, "grad_norm": 1.1527718305587769, "learning_rate": 0.00017564693131243172, "loss": 2.3996, "step": 2378 }, { "epoch": 0.2502893214097843, "grad_norm": 1.112915277481079, "learning_rate": 0.0001756246393265453, "loss": 2.4006, "step": 2379 }, { "epoch": 0.2503945291951604, "grad_norm": 1.4158530235290527, "learning_rate": 0.00017560233855861855, "loss": 2.4866, "step": 2380 }, { "epoch": 0.25049973698053657, "grad_norm": 1.486147403717041, "learning_rate": 0.00017558002901124113, "loss": 1.7218, "step": 2381 }, { "epoch": 0.25060494476591266, "grad_norm": 1.7639609575271606, "learning_rate": 0.00017555771068700386, "loss": 1.7781, "step": 2382 }, { "epoch": 0.2507101525512888, "grad_norm": 1.4476866722106934, "learning_rate": 0.00017553538358849844, "loss": 1.8637, "step": 2383 }, { "epoch": 0.2508153603366649, "grad_norm": 1.6569194793701172, "learning_rate": 0.00017551304771831766, "loss": 1.8196, "step": 2384 }, { "epoch": 0.25092056812204105, "grad_norm": 1.3412871360778809, "learning_rate": 0.0001754907030790553, "loss": 1.9298, "step": 2385 }, { "epoch": 0.25102577590741715, "grad_norm": 1.0702909231185913, "learning_rate": 0.00017546834967330617, "loss": 2.0842, "step": 2386 }, { "epoch": 0.25113098369279324, "grad_norm": 0.887639045715332, "learning_rate": 0.00017544598750366614, "loss": 2.1446, "step": 2387 }, { "epoch": 0.2512361914781694, "grad_norm": 0.9364414811134338, "learning_rate": 0.000175423616572732, "loss": 2.218, "step": 2388 }, { "epoch": 0.2513413992635455, "grad_norm": 1.431705355644226, "learning_rate": 0.00017540123688310162, "loss": 1.7036, "step": 2389 }, { "epoch": 0.25144660704892163, "grad_norm": 1.2119871377944946, "learning_rate": 0.00017537884843737392, "loss": 2.1576, "step": 2390 }, { "epoch": 0.2515518148342977, "grad_norm": 1.0442527532577515, "learning_rate": 0.00017535645123814873, "loss": 2.1174, "step": 2391 }, { "epoch": 0.2516570226196739, "grad_norm": 1.7288084030151367, "learning_rate": 0.000175334045288027, "loss": 1.7428, "step": 2392 }, { "epoch": 0.25176223040504997, "grad_norm": 0.9551234245300293, "learning_rate": 0.00017531163058961066, "loss": 1.8753, "step": 2393 }, { "epoch": 0.2518674381904261, "grad_norm": 1.1774919033050537, "learning_rate": 0.0001752892071455027, "loss": 2.1542, "step": 2394 }, { "epoch": 0.2519726459758022, "grad_norm": 1.0245354175567627, "learning_rate": 0.000175266774958307, "loss": 1.8211, "step": 2395 }, { "epoch": 0.2520778537611783, "grad_norm": 1.1799890995025635, "learning_rate": 0.0001752443340306286, "loss": 1.9991, "step": 2396 }, { "epoch": 0.25218306154655445, "grad_norm": 2.0197417736053467, "learning_rate": 0.00017522188436507342, "loss": 1.7232, "step": 2397 }, { "epoch": 0.25228826933193055, "grad_norm": 1.235841155052185, "learning_rate": 0.0001751994259642485, "loss": 2.0587, "step": 2398 }, { "epoch": 0.2523934771173067, "grad_norm": 1.3500566482543945, "learning_rate": 0.00017517695883076192, "loss": 1.611, "step": 2399 }, { "epoch": 0.2524986849026828, "grad_norm": 1.6199384927749634, "learning_rate": 0.00017515448296722262, "loss": 1.7509, "step": 2400 }, { "epoch": 0.25260389268805894, "grad_norm": 1.55356764793396, "learning_rate": 0.00017513199837624073, "loss": 1.9007, "step": 2401 }, { "epoch": 0.25270910047343503, "grad_norm": 1.423980474472046, "learning_rate": 0.00017510950506042727, "loss": 1.6977, "step": 2402 }, { "epoch": 0.2528143082588111, "grad_norm": 1.0989725589752197, "learning_rate": 0.00017508700302239428, "loss": 1.8371, "step": 2403 }, { "epoch": 0.2529195160441873, "grad_norm": 1.6548444032669067, "learning_rate": 0.00017506449226475492, "loss": 1.9493, "step": 2404 }, { "epoch": 0.25302472382956337, "grad_norm": 1.6886351108551025, "learning_rate": 0.00017504197279012321, "loss": 2.4116, "step": 2405 }, { "epoch": 0.2531299316149395, "grad_norm": 0.9853400588035583, "learning_rate": 0.00017501944460111436, "loss": 2.1326, "step": 2406 }, { "epoch": 0.2532351394003156, "grad_norm": 0.8209208250045776, "learning_rate": 0.00017499690770034443, "loss": 2.1675, "step": 2407 }, { "epoch": 0.25334034718569176, "grad_norm": 1.097050666809082, "learning_rate": 0.00017497436209043055, "loss": 2.0179, "step": 2408 }, { "epoch": 0.25344555497106785, "grad_norm": 1.007564663887024, "learning_rate": 0.00017495180777399088, "loss": 2.0509, "step": 2409 }, { "epoch": 0.253550762756444, "grad_norm": 2.713827133178711, "learning_rate": 0.00017492924475364462, "loss": 1.6709, "step": 2410 }, { "epoch": 0.2536559705418201, "grad_norm": 1.4967379570007324, "learning_rate": 0.00017490667303201186, "loss": 1.4405, "step": 2411 }, { "epoch": 0.2537611783271962, "grad_norm": 1.0364857912063599, "learning_rate": 0.00017488409261171386, "loss": 2.3166, "step": 2412 }, { "epoch": 0.25386638611257234, "grad_norm": 1.656065583229065, "learning_rate": 0.00017486150349537276, "loss": 1.8276, "step": 2413 }, { "epoch": 0.25397159389794843, "grad_norm": 1.0365979671478271, "learning_rate": 0.00017483890568561173, "loss": 2.1835, "step": 2414 }, { "epoch": 0.2540768016833246, "grad_norm": 0.7417734861373901, "learning_rate": 0.0001748162991850551, "loss": 2.2011, "step": 2415 }, { "epoch": 0.2541820094687007, "grad_norm": 1.164849042892456, "learning_rate": 0.00017479368399632797, "loss": 2.0921, "step": 2416 }, { "epoch": 0.2542872172540768, "grad_norm": 1.243613362312317, "learning_rate": 0.0001747710601220566, "loss": 1.3508, "step": 2417 }, { "epoch": 0.2543924250394529, "grad_norm": 1.5702705383300781, "learning_rate": 0.0001747484275648682, "loss": 2.0768, "step": 2418 }, { "epoch": 0.254497632824829, "grad_norm": 1.5284066200256348, "learning_rate": 0.0001747257863273911, "loss": 2.144, "step": 2419 }, { "epoch": 0.25460284061020516, "grad_norm": 1.222301721572876, "learning_rate": 0.0001747031364122545, "loss": 1.833, "step": 2420 }, { "epoch": 0.25470804839558125, "grad_norm": 1.341837763786316, "learning_rate": 0.00017468047782208865, "loss": 1.9542, "step": 2421 }, { "epoch": 0.2548132561809574, "grad_norm": 1.10076904296875, "learning_rate": 0.00017465781055952482, "loss": 1.6218, "step": 2422 }, { "epoch": 0.2549184639663335, "grad_norm": 1.2603121995925903, "learning_rate": 0.0001746351346271953, "loss": 2.0813, "step": 2423 }, { "epoch": 0.25502367175170965, "grad_norm": 1.3991905450820923, "learning_rate": 0.00017461245002773336, "loss": 2.2184, "step": 2424 }, { "epoch": 0.25512887953708574, "grad_norm": 0.874963641166687, "learning_rate": 0.00017458975676377326, "loss": 2.2639, "step": 2425 }, { "epoch": 0.2552340873224619, "grad_norm": 1.275423288345337, "learning_rate": 0.00017456705483795038, "loss": 1.8021, "step": 2426 }, { "epoch": 0.255339295107838, "grad_norm": 1.325136423110962, "learning_rate": 0.00017454434425290093, "loss": 2.2287, "step": 2427 }, { "epoch": 0.2554445028932141, "grad_norm": 1.783543348312378, "learning_rate": 0.00017452162501126227, "loss": 1.6924, "step": 2428 }, { "epoch": 0.2555497106785902, "grad_norm": 1.0781643390655518, "learning_rate": 0.0001744988971156727, "loss": 1.8557, "step": 2429 }, { "epoch": 0.2556549184639663, "grad_norm": 1.3726019859313965, "learning_rate": 0.00017447616056877148, "loss": 1.7486, "step": 2430 }, { "epoch": 0.25576012624934247, "grad_norm": 1.8802924156188965, "learning_rate": 0.000174453415373199, "loss": 1.8817, "step": 2431 }, { "epoch": 0.25586533403471856, "grad_norm": 1.2403502464294434, "learning_rate": 0.00017443066153159656, "loss": 2.513, "step": 2432 }, { "epoch": 0.2559705418200947, "grad_norm": 0.8978909850120544, "learning_rate": 0.00017440789904660652, "loss": 2.163, "step": 2433 }, { "epoch": 0.2560757496054708, "grad_norm": 1.4527589082717896, "learning_rate": 0.00017438512792087218, "loss": 2.2242, "step": 2434 }, { "epoch": 0.2561809573908469, "grad_norm": 1.3555766344070435, "learning_rate": 0.00017436234815703788, "loss": 2.0565, "step": 2435 }, { "epoch": 0.25628616517622305, "grad_norm": 1.2842960357666016, "learning_rate": 0.0001743395597577489, "loss": 2.1045, "step": 2436 }, { "epoch": 0.25639137296159914, "grad_norm": 0.9545568823814392, "learning_rate": 0.0001743167627256517, "loss": 1.7778, "step": 2437 }, { "epoch": 0.2564965807469753, "grad_norm": 1.2293113470077515, "learning_rate": 0.00017429395706339355, "loss": 2.1365, "step": 2438 }, { "epoch": 0.2566017885323514, "grad_norm": 1.2413405179977417, "learning_rate": 0.0001742711427736228, "loss": 2.3555, "step": 2439 }, { "epoch": 0.25670699631772753, "grad_norm": 0.9717143774032593, "learning_rate": 0.00017424831985898883, "loss": 1.5027, "step": 2440 }, { "epoch": 0.2568122041031036, "grad_norm": 1.797363042831421, "learning_rate": 0.0001742254883221419, "loss": 1.6288, "step": 2441 }, { "epoch": 0.2569174118884798, "grad_norm": 1.5854530334472656, "learning_rate": 0.0001742026481657335, "loss": 2.0474, "step": 2442 }, { "epoch": 0.25702261967385587, "grad_norm": 1.1225696802139282, "learning_rate": 0.0001741797993924159, "loss": 2.2607, "step": 2443 }, { "epoch": 0.25712782745923196, "grad_norm": 1.4167970418930054, "learning_rate": 0.00017415694200484247, "loss": 1.8007, "step": 2444 }, { "epoch": 0.2572330352446081, "grad_norm": 1.275691032409668, "learning_rate": 0.00017413407600566755, "loss": 1.6061, "step": 2445 }, { "epoch": 0.2573382430299842, "grad_norm": 1.4618197679519653, "learning_rate": 0.00017411120139754652, "loss": 2.1667, "step": 2446 }, { "epoch": 0.25744345081536035, "grad_norm": 1.4069091081619263, "learning_rate": 0.00017408831818313566, "loss": 1.4578, "step": 2447 }, { "epoch": 0.25754865860073645, "grad_norm": 1.2108229398727417, "learning_rate": 0.0001740654263650924, "loss": 2.1124, "step": 2448 }, { "epoch": 0.2576538663861126, "grad_norm": 1.4401681423187256, "learning_rate": 0.0001740425259460751, "loss": 1.7578, "step": 2449 }, { "epoch": 0.2577590741714887, "grad_norm": 1.814821481704712, "learning_rate": 0.00017401961692874304, "loss": 2.2632, "step": 2450 }, { "epoch": 0.2578642819568648, "grad_norm": 1.439861536026001, "learning_rate": 0.00017399669931575663, "loss": 2.0677, "step": 2451 }, { "epoch": 0.25796948974224093, "grad_norm": 1.9982361793518066, "learning_rate": 0.0001739737731097772, "loss": 2.1271, "step": 2452 }, { "epoch": 0.258074697527617, "grad_norm": 1.41429603099823, "learning_rate": 0.00017395083831346707, "loss": 1.7051, "step": 2453 }, { "epoch": 0.2581799053129932, "grad_norm": 1.6925151348114014, "learning_rate": 0.0001739278949294896, "loss": 2.1179, "step": 2454 }, { "epoch": 0.25828511309836927, "grad_norm": 1.9045064449310303, "learning_rate": 0.0001739049429605091, "loss": 1.7847, "step": 2455 }, { "epoch": 0.2583903208837454, "grad_norm": 1.0189872980117798, "learning_rate": 0.00017388198240919102, "loss": 2.0259, "step": 2456 }, { "epoch": 0.2584955286691215, "grad_norm": 1.117735505104065, "learning_rate": 0.00017385901327820157, "loss": 2.3478, "step": 2457 }, { "epoch": 0.25860073645449766, "grad_norm": 1.060653567314148, "learning_rate": 0.0001738360355702081, "loss": 2.1214, "step": 2458 }, { "epoch": 0.25870594423987375, "grad_norm": 1.3654861450195312, "learning_rate": 0.00017381304928787897, "loss": 1.7643, "step": 2459 }, { "epoch": 0.25881115202524985, "grad_norm": 0.8313367366790771, "learning_rate": 0.00017379005443388348, "loss": 1.7377, "step": 2460 }, { "epoch": 0.258916359810626, "grad_norm": 0.9707303643226624, "learning_rate": 0.00017376705101089198, "loss": 2.323, "step": 2461 }, { "epoch": 0.2590215675960021, "grad_norm": 1.2245289087295532, "learning_rate": 0.0001737440390215757, "loss": 1.6078, "step": 2462 }, { "epoch": 0.25912677538137824, "grad_norm": 1.0803886651992798, "learning_rate": 0.00017372101846860707, "loss": 1.9575, "step": 2463 }, { "epoch": 0.25923198316675433, "grad_norm": 0.9316054582595825, "learning_rate": 0.00017369798935465926, "loss": 1.5473, "step": 2464 }, { "epoch": 0.2593371909521305, "grad_norm": 1.0661040544509888, "learning_rate": 0.00017367495168240667, "loss": 2.1103, "step": 2465 }, { "epoch": 0.2594423987375066, "grad_norm": 0.8651044368743896, "learning_rate": 0.00017365190545452452, "loss": 2.0468, "step": 2466 }, { "epoch": 0.25954760652288267, "grad_norm": 1.3294917345046997, "learning_rate": 0.00017362885067368915, "loss": 1.7681, "step": 2467 }, { "epoch": 0.2596528143082588, "grad_norm": 1.514784574508667, "learning_rate": 0.0001736057873425778, "loss": 2.0482, "step": 2468 }, { "epoch": 0.2597580220936349, "grad_norm": 1.4999936819076538, "learning_rate": 0.00017358271546386874, "loss": 1.8877, "step": 2469 }, { "epoch": 0.25986322987901106, "grad_norm": 0.9134921431541443, "learning_rate": 0.00017355963504024123, "loss": 2.1311, "step": 2470 }, { "epoch": 0.25996843766438715, "grad_norm": 1.9929500818252563, "learning_rate": 0.0001735365460743755, "loss": 1.5913, "step": 2471 }, { "epoch": 0.2600736454497633, "grad_norm": 1.1672104597091675, "learning_rate": 0.00017351344856895287, "loss": 1.9764, "step": 2472 }, { "epoch": 0.2601788532351394, "grad_norm": 1.5550178289413452, "learning_rate": 0.0001734903425266555, "loss": 2.003, "step": 2473 }, { "epoch": 0.26028406102051554, "grad_norm": 0.8885960578918457, "learning_rate": 0.00017346722795016665, "loss": 1.7482, "step": 2474 }, { "epoch": 0.26038926880589164, "grad_norm": 0.9775580763816833, "learning_rate": 0.00017344410484217056, "loss": 2.274, "step": 2475 }, { "epoch": 0.26049447659126773, "grad_norm": 1.9509919881820679, "learning_rate": 0.00017342097320535244, "loss": 2.0511, "step": 2476 }, { "epoch": 0.2605996843766439, "grad_norm": 1.423757553100586, "learning_rate": 0.00017339783304239843, "loss": 1.6038, "step": 2477 }, { "epoch": 0.26070489216202, "grad_norm": 1.0820887088775635, "learning_rate": 0.0001733746843559958, "loss": 1.811, "step": 2478 }, { "epoch": 0.2608100999473961, "grad_norm": 1.143119215965271, "learning_rate": 0.00017335152714883267, "loss": 1.6442, "step": 2479 }, { "epoch": 0.2609153077327722, "grad_norm": 0.8639788627624512, "learning_rate": 0.00017332836142359823, "loss": 2.1835, "step": 2480 }, { "epoch": 0.26102051551814837, "grad_norm": 1.1601319313049316, "learning_rate": 0.00017330518718298264, "loss": 1.8785, "step": 2481 }, { "epoch": 0.26112572330352446, "grad_norm": 1.2111576795578003, "learning_rate": 0.00017328200442967706, "loss": 2.1807, "step": 2482 }, { "epoch": 0.26123093108890055, "grad_norm": 1.271608591079712, "learning_rate": 0.00017325881316637362, "loss": 2.0741, "step": 2483 }, { "epoch": 0.2613361388742767, "grad_norm": 1.105197548866272, "learning_rate": 0.00017323561339576543, "loss": 1.9965, "step": 2484 }, { "epoch": 0.2614413466596528, "grad_norm": 1.9587740898132324, "learning_rate": 0.00017321240512054663, "loss": 1.8361, "step": 2485 }, { "epoch": 0.26154655444502894, "grad_norm": 1.2962583303451538, "learning_rate": 0.0001731891883434123, "loss": 2.3446, "step": 2486 }, { "epoch": 0.26165176223040504, "grad_norm": 1.0470434427261353, "learning_rate": 0.00017316596306705853, "loss": 2.0182, "step": 2487 }, { "epoch": 0.2617569700157812, "grad_norm": 2.4733150005340576, "learning_rate": 0.0001731427292941824, "loss": 2.066, "step": 2488 }, { "epoch": 0.2618621778011573, "grad_norm": 1.580335259437561, "learning_rate": 0.00017311948702748196, "loss": 2.2136, "step": 2489 }, { "epoch": 0.26196738558653343, "grad_norm": 1.2528669834136963, "learning_rate": 0.0001730962362696563, "loss": 2.0774, "step": 2490 }, { "epoch": 0.2620725933719095, "grad_norm": 1.2130522727966309, "learning_rate": 0.0001730729770234054, "loss": 1.7999, "step": 2491 }, { "epoch": 0.2621778011572856, "grad_norm": 1.3857465982437134, "learning_rate": 0.00017304970929143032, "loss": 1.8678, "step": 2492 }, { "epoch": 0.26228300894266177, "grad_norm": 1.4128077030181885, "learning_rate": 0.00017302643307643304, "loss": 2.1356, "step": 2493 }, { "epoch": 0.26238821672803786, "grad_norm": 1.1475266218185425, "learning_rate": 0.00017300314838111653, "loss": 1.8912, "step": 2494 }, { "epoch": 0.262493424513414, "grad_norm": 0.7525075674057007, "learning_rate": 0.0001729798552081848, "loss": 1.9335, "step": 2495 }, { "epoch": 0.2625986322987901, "grad_norm": 1.9416919946670532, "learning_rate": 0.00017295655356034284, "loss": 2.0356, "step": 2496 }, { "epoch": 0.26270384008416625, "grad_norm": 1.7642756700515747, "learning_rate": 0.00017293324344029652, "loss": 1.6941, "step": 2497 }, { "epoch": 0.26280904786954234, "grad_norm": 1.2156851291656494, "learning_rate": 0.00017290992485075282, "loss": 1.9932, "step": 2498 }, { "epoch": 0.26291425565491844, "grad_norm": 1.551274061203003, "learning_rate": 0.00017288659779441962, "loss": 2.309, "step": 2499 }, { "epoch": 0.2630194634402946, "grad_norm": 1.3318612575531006, "learning_rate": 0.00017286326227400583, "loss": 1.9814, "step": 2500 }, { "epoch": 0.2631246712256707, "grad_norm": 0.949734628200531, "learning_rate": 0.00017283991829222133, "loss": 1.8459, "step": 2501 }, { "epoch": 0.26322987901104683, "grad_norm": 1.3344646692276, "learning_rate": 0.00017281656585177698, "loss": 1.2661, "step": 2502 }, { "epoch": 0.2633350867964229, "grad_norm": 1.1102912425994873, "learning_rate": 0.0001727932049553846, "loss": 1.8715, "step": 2503 }, { "epoch": 0.2634402945817991, "grad_norm": 1.1450645923614502, "learning_rate": 0.00017276983560575703, "loss": 1.9675, "step": 2504 }, { "epoch": 0.26354550236717517, "grad_norm": 2.4335386753082275, "learning_rate": 0.0001727464578056081, "loss": 1.688, "step": 2505 }, { "epoch": 0.2636507101525513, "grad_norm": 1.290065884590149, "learning_rate": 0.00017272307155765258, "loss": 2.4197, "step": 2506 }, { "epoch": 0.2637559179379274, "grad_norm": 1.097562313079834, "learning_rate": 0.00017269967686460617, "loss": 1.8908, "step": 2507 }, { "epoch": 0.2638611257233035, "grad_norm": 1.137505292892456, "learning_rate": 0.00017267627372918575, "loss": 1.9866, "step": 2508 }, { "epoch": 0.26396633350867965, "grad_norm": 1.3950042724609375, "learning_rate": 0.00017265286215410893, "loss": 2.0389, "step": 2509 }, { "epoch": 0.26407154129405574, "grad_norm": 1.7362642288208008, "learning_rate": 0.00017262944214209452, "loss": 1.6798, "step": 2510 }, { "epoch": 0.2641767490794319, "grad_norm": 1.443983793258667, "learning_rate": 0.0001726060136958621, "loss": 2.0108, "step": 2511 }, { "epoch": 0.264281956864808, "grad_norm": 1.4797279834747314, "learning_rate": 0.00017258257681813244, "loss": 2.1318, "step": 2512 }, { "epoch": 0.26438716465018414, "grad_norm": 3.060091257095337, "learning_rate": 0.00017255913151162714, "loss": 2.1173, "step": 2513 }, { "epoch": 0.26449237243556023, "grad_norm": 1.2308971881866455, "learning_rate": 0.00017253567777906882, "loss": 1.9013, "step": 2514 }, { "epoch": 0.2645975802209363, "grad_norm": 1.0173864364624023, "learning_rate": 0.00017251221562318108, "loss": 2.111, "step": 2515 }, { "epoch": 0.2647027880063125, "grad_norm": 1.6821645498275757, "learning_rate": 0.0001724887450466885, "loss": 2.4451, "step": 2516 }, { "epoch": 0.26480799579168857, "grad_norm": 1.388883352279663, "learning_rate": 0.0001724652660523167, "loss": 2.4442, "step": 2517 }, { "epoch": 0.2649132035770647, "grad_norm": 1.0989224910736084, "learning_rate": 0.00017244177864279215, "loss": 1.6966, "step": 2518 }, { "epoch": 0.2650184113624408, "grad_norm": 1.3560047149658203, "learning_rate": 0.0001724182828208424, "loss": 1.9355, "step": 2519 }, { "epoch": 0.26512361914781696, "grad_norm": 0.8736411333084106, "learning_rate": 0.00017239477858919594, "loss": 1.9303, "step": 2520 }, { "epoch": 0.26522882693319305, "grad_norm": 0.9820002317428589, "learning_rate": 0.00017237126595058224, "loss": 2.123, "step": 2521 }, { "epoch": 0.2653340347185692, "grad_norm": 1.382918357849121, "learning_rate": 0.0001723477449077317, "loss": 2.0202, "step": 2522 }, { "epoch": 0.2654392425039453, "grad_norm": 1.1429475545883179, "learning_rate": 0.00017232421546337583, "loss": 1.7081, "step": 2523 }, { "epoch": 0.2655444502893214, "grad_norm": 1.5039023160934448, "learning_rate": 0.00017230067762024693, "loss": 1.82, "step": 2524 }, { "epoch": 0.26564965807469754, "grad_norm": 1.7104127407073975, "learning_rate": 0.00017227713138107844, "loss": 2.1606, "step": 2525 }, { "epoch": 0.26575486586007363, "grad_norm": 1.3519093990325928, "learning_rate": 0.0001722535767486047, "loss": 1.8595, "step": 2526 }, { "epoch": 0.2658600736454498, "grad_norm": 1.0122334957122803, "learning_rate": 0.000172230013725561, "loss": 2.1709, "step": 2527 }, { "epoch": 0.2659652814308259, "grad_norm": 1.7543777227401733, "learning_rate": 0.00017220644231468366, "loss": 2.0135, "step": 2528 }, { "epoch": 0.266070489216202, "grad_norm": 0.9146006107330322, "learning_rate": 0.00017218286251870994, "loss": 1.9128, "step": 2529 }, { "epoch": 0.2661756970015781, "grad_norm": 1.8088133335113525, "learning_rate": 0.0001721592743403781, "loss": 1.7378, "step": 2530 }, { "epoch": 0.2662809047869542, "grad_norm": 0.819983720779419, "learning_rate": 0.00017213567778242731, "loss": 2.0829, "step": 2531 }, { "epoch": 0.26638611257233036, "grad_norm": 0.8384549021720886, "learning_rate": 0.00017211207284759784, "loss": 1.9466, "step": 2532 }, { "epoch": 0.26649132035770645, "grad_norm": 2.0535356998443604, "learning_rate": 0.00017208845953863076, "loss": 2.1554, "step": 2533 }, { "epoch": 0.2665965281430826, "grad_norm": 0.8103688955307007, "learning_rate": 0.00017206483785826832, "loss": 2.2735, "step": 2534 }, { "epoch": 0.2667017359284587, "grad_norm": 1.2302720546722412, "learning_rate": 0.00017204120780925353, "loss": 1.7633, "step": 2535 }, { "epoch": 0.26680694371383484, "grad_norm": 1.417521595954895, "learning_rate": 0.0001720175693943305, "loss": 1.8596, "step": 2536 }, { "epoch": 0.26691215149921094, "grad_norm": 1.3307616710662842, "learning_rate": 0.00017199392261624429, "loss": 1.6087, "step": 2537 }, { "epoch": 0.2670173592845871, "grad_norm": 1.0965129137039185, "learning_rate": 0.0001719702674777409, "loss": 2.2497, "step": 2538 }, { "epoch": 0.2671225670699632, "grad_norm": 1.9108279943466187, "learning_rate": 0.00017194660398156737, "loss": 1.6981, "step": 2539 }, { "epoch": 0.2672277748553393, "grad_norm": 1.4486432075500488, "learning_rate": 0.0001719229321304716, "loss": 1.6847, "step": 2540 }, { "epoch": 0.2673329826407154, "grad_norm": 1.0305055379867554, "learning_rate": 0.00017189925192720258, "loss": 2.2104, "step": 2541 }, { "epoch": 0.2674381904260915, "grad_norm": 1.1246416568756104, "learning_rate": 0.0001718755633745102, "loss": 1.5966, "step": 2542 }, { "epoch": 0.26754339821146766, "grad_norm": 1.09429931640625, "learning_rate": 0.00017185186647514531, "loss": 2.6052, "step": 2543 }, { "epoch": 0.26764860599684376, "grad_norm": 1.0281459093093872, "learning_rate": 0.0001718281612318598, "loss": 1.998, "step": 2544 }, { "epoch": 0.2677538137822199, "grad_norm": 1.2437368631362915, "learning_rate": 0.0001718044476474064, "loss": 2.2582, "step": 2545 }, { "epoch": 0.267859021567596, "grad_norm": 1.0594416856765747, "learning_rate": 0.00017178072572453896, "loss": 1.4396, "step": 2546 }, { "epoch": 0.2679642293529721, "grad_norm": 1.3116780519485474, "learning_rate": 0.00017175699546601223, "loss": 2.0078, "step": 2547 }, { "epoch": 0.26806943713834824, "grad_norm": 1.8310174942016602, "learning_rate": 0.00017173325687458188, "loss": 1.6103, "step": 2548 }, { "epoch": 0.26817464492372434, "grad_norm": 1.762455940246582, "learning_rate": 0.00017170950995300466, "loss": 1.9907, "step": 2549 }, { "epoch": 0.2682798527091005, "grad_norm": 1.2596118450164795, "learning_rate": 0.00017168575470403815, "loss": 1.6975, "step": 2550 }, { "epoch": 0.2683850604944766, "grad_norm": 1.0478299856185913, "learning_rate": 0.000171661991130441, "loss": 1.7204, "step": 2551 }, { "epoch": 0.26849026827985273, "grad_norm": 2.9845941066741943, "learning_rate": 0.0001716382192349728, "loss": 1.4462, "step": 2552 }, { "epoch": 0.2685954760652288, "grad_norm": 1.6229699850082397, "learning_rate": 0.00017161443902039412, "loss": 1.9217, "step": 2553 }, { "epoch": 0.26870068385060497, "grad_norm": 1.0252301692962646, "learning_rate": 0.00017159065048946644, "loss": 1.9798, "step": 2554 }, { "epoch": 0.26880589163598106, "grad_norm": 1.1335787773132324, "learning_rate": 0.00017156685364495226, "loss": 2.2337, "step": 2555 }, { "epoch": 0.26891109942135716, "grad_norm": 1.192686676979065, "learning_rate": 0.00017154304848961504, "loss": 2.0498, "step": 2556 }, { "epoch": 0.2690163072067333, "grad_norm": 1.2302987575531006, "learning_rate": 0.00017151923502621918, "loss": 2.0112, "step": 2557 }, { "epoch": 0.2691215149921094, "grad_norm": 1.11729097366333, "learning_rate": 0.00017149541325753008, "loss": 1.5503, "step": 2558 }, { "epoch": 0.26922672277748555, "grad_norm": 1.276808261871338, "learning_rate": 0.00017147158318631402, "loss": 1.7576, "step": 2559 }, { "epoch": 0.26933193056286164, "grad_norm": 1.1111189126968384, "learning_rate": 0.0001714477448153384, "loss": 1.7231, "step": 2560 }, { "epoch": 0.2694371383482378, "grad_norm": 1.3912397623062134, "learning_rate": 0.00017142389814737142, "loss": 1.9727, "step": 2561 }, { "epoch": 0.2695423461336139, "grad_norm": 1.2298855781555176, "learning_rate": 0.00017140004318518236, "loss": 1.8264, "step": 2562 }, { "epoch": 0.26964755391899, "grad_norm": 0.9467676877975464, "learning_rate": 0.0001713761799315414, "loss": 2.2141, "step": 2563 }, { "epoch": 0.26975276170436613, "grad_norm": 1.3311164379119873, "learning_rate": 0.00017135230838921967, "loss": 1.7519, "step": 2564 }, { "epoch": 0.2698579694897422, "grad_norm": 1.4334806203842163, "learning_rate": 0.00017132842856098937, "loss": 2.1874, "step": 2565 }, { "epoch": 0.26996317727511837, "grad_norm": 1.0612467527389526, "learning_rate": 0.0001713045404496235, "loss": 2.026, "step": 2566 }, { "epoch": 0.27006838506049446, "grad_norm": 2.4904496669769287, "learning_rate": 0.00017128064405789618, "loss": 1.9635, "step": 2567 }, { "epoch": 0.2701735928458706, "grad_norm": 1.3276349306106567, "learning_rate": 0.00017125673938858237, "loss": 1.9939, "step": 2568 }, { "epoch": 0.2702788006312467, "grad_norm": 1.099576711654663, "learning_rate": 0.0001712328264444581, "loss": 2.0464, "step": 2569 }, { "epoch": 0.27038400841662286, "grad_norm": 1.4065220355987549, "learning_rate": 0.00017120890522830017, "loss": 2.1369, "step": 2570 }, { "epoch": 0.27048921620199895, "grad_norm": 1.4357450008392334, "learning_rate": 0.00017118497574288664, "loss": 1.6478, "step": 2571 }, { "epoch": 0.27059442398737504, "grad_norm": 1.6182183027267456, "learning_rate": 0.00017116103799099625, "loss": 1.3441, "step": 2572 }, { "epoch": 0.2706996317727512, "grad_norm": 1.831085443496704, "learning_rate": 0.00017113709197540887, "loss": 1.8925, "step": 2573 }, { "epoch": 0.2708048395581273, "grad_norm": 1.2130355834960938, "learning_rate": 0.0001711131376989052, "loss": 1.8865, "step": 2574 }, { "epoch": 0.27091004734350344, "grad_norm": 0.9673578143119812, "learning_rate": 0.00017108917516426704, "loss": 1.6841, "step": 2575 }, { "epoch": 0.27101525512887953, "grad_norm": 1.0448040962219238, "learning_rate": 0.00017106520437427708, "loss": 2.3308, "step": 2576 }, { "epoch": 0.2711204629142557, "grad_norm": 0.8160276412963867, "learning_rate": 0.00017104122533171895, "loss": 2.3082, "step": 2577 }, { "epoch": 0.27122567069963177, "grad_norm": 1.134219765663147, "learning_rate": 0.00017101723803937722, "loss": 2.3576, "step": 2578 }, { "epoch": 0.27133087848500786, "grad_norm": 1.3649998903274536, "learning_rate": 0.00017099324250003753, "loss": 2.0624, "step": 2579 }, { "epoch": 0.271436086270384, "grad_norm": 1.4927576780319214, "learning_rate": 0.00017096923871648634, "loss": 1.884, "step": 2580 }, { "epoch": 0.2715412940557601, "grad_norm": 1.5744551420211792, "learning_rate": 0.00017094522669151117, "loss": 1.2819, "step": 2581 }, { "epoch": 0.27164650184113626, "grad_norm": 1.319411277770996, "learning_rate": 0.00017092120642790042, "loss": 1.7153, "step": 2582 }, { "epoch": 0.27175170962651235, "grad_norm": 1.3168647289276123, "learning_rate": 0.00017089717792844353, "loss": 1.0528, "step": 2583 }, { "epoch": 0.2718569174118885, "grad_norm": 1.3358632326126099, "learning_rate": 0.00017087314119593078, "loss": 2.0915, "step": 2584 }, { "epoch": 0.2719621251972646, "grad_norm": 1.1743086576461792, "learning_rate": 0.00017084909623315357, "loss": 1.875, "step": 2585 }, { "epoch": 0.27206733298264074, "grad_norm": 1.6610081195831299, "learning_rate": 0.00017082504304290408, "loss": 2.0099, "step": 2586 }, { "epoch": 0.27217254076801684, "grad_norm": 1.2851457595825195, "learning_rate": 0.0001708009816279756, "loss": 1.8666, "step": 2587 }, { "epoch": 0.27227774855339293, "grad_norm": 0.9746414422988892, "learning_rate": 0.00017077691199116223, "loss": 2.0146, "step": 2588 }, { "epoch": 0.2723829563387691, "grad_norm": 0.9980962872505188, "learning_rate": 0.00017075283413525916, "loss": 2.1908, "step": 2589 }, { "epoch": 0.27248816412414517, "grad_norm": 1.0672366619110107, "learning_rate": 0.00017072874806306246, "loss": 2.4187, "step": 2590 }, { "epoch": 0.2725933719095213, "grad_norm": 1.1528050899505615, "learning_rate": 0.00017070465377736914, "loss": 1.5965, "step": 2591 }, { "epoch": 0.2726985796948974, "grad_norm": 1.3461138010025024, "learning_rate": 0.00017068055128097718, "loss": 2.3055, "step": 2592 }, { "epoch": 0.27280378748027356, "grad_norm": 0.876352071762085, "learning_rate": 0.00017065644057668555, "loss": 1.8871, "step": 2593 }, { "epoch": 0.27290899526564966, "grad_norm": 1.1798096895217896, "learning_rate": 0.00017063232166729413, "loss": 1.7881, "step": 2594 }, { "epoch": 0.27301420305102575, "grad_norm": 0.9815201163291931, "learning_rate": 0.00017060819455560382, "loss": 1.9022, "step": 2595 }, { "epoch": 0.2731194108364019, "grad_norm": 1.1576168537139893, "learning_rate": 0.00017058405924441636, "loss": 1.8402, "step": 2596 }, { "epoch": 0.273224618621778, "grad_norm": 1.1924322843551636, "learning_rate": 0.00017055991573653454, "loss": 1.7346, "step": 2597 }, { "epoch": 0.27332982640715414, "grad_norm": 1.4050421714782715, "learning_rate": 0.00017053576403476206, "loss": 1.8869, "step": 2598 }, { "epoch": 0.27343503419253024, "grad_norm": 1.221558928489685, "learning_rate": 0.00017051160414190353, "loss": 2.056, "step": 2599 }, { "epoch": 0.2735402419779064, "grad_norm": 1.141769528388977, "learning_rate": 0.00017048743606076463, "loss": 2.0981, "step": 2600 }, { "epoch": 0.2736454497632825, "grad_norm": 1.7314319610595703, "learning_rate": 0.0001704632597941519, "loss": 1.6496, "step": 2601 }, { "epoch": 0.2737506575486586, "grad_norm": 1.8792369365692139, "learning_rate": 0.0001704390753448728, "loss": 2.0167, "step": 2602 }, { "epoch": 0.2738558653340347, "grad_norm": 1.1898305416107178, "learning_rate": 0.00017041488271573587, "loss": 2.1775, "step": 2603 }, { "epoch": 0.2739610731194108, "grad_norm": 1.3105398416519165, "learning_rate": 0.00017039068190955047, "loss": 2.2253, "step": 2604 }, { "epoch": 0.27406628090478696, "grad_norm": 1.4331053495407104, "learning_rate": 0.00017036647292912696, "loss": 1.6136, "step": 2605 }, { "epoch": 0.27417148869016306, "grad_norm": 1.059443712234497, "learning_rate": 0.00017034225577727667, "loss": 1.6369, "step": 2606 }, { "epoch": 0.2742766964755392, "grad_norm": 1.0023260116577148, "learning_rate": 0.00017031803045681188, "loss": 2.3698, "step": 2607 }, { "epoch": 0.2743819042609153, "grad_norm": 2.0491862297058105, "learning_rate": 0.00017029379697054573, "loss": 1.6626, "step": 2608 }, { "epoch": 0.27448711204629145, "grad_norm": 1.3866617679595947, "learning_rate": 0.0001702695553212924, "loss": 1.8494, "step": 2609 }, { "epoch": 0.27459231983166754, "grad_norm": 1.2196578979492188, "learning_rate": 0.00017024530551186702, "loss": 1.9425, "step": 2610 }, { "epoch": 0.27469752761704364, "grad_norm": 0.9433722496032715, "learning_rate": 0.00017022104754508562, "loss": 2.2177, "step": 2611 }, { "epoch": 0.2748027354024198, "grad_norm": 1.2304083108901978, "learning_rate": 0.0001701967814237652, "loss": 1.6013, "step": 2612 }, { "epoch": 0.2749079431877959, "grad_norm": 1.4996156692504883, "learning_rate": 0.0001701725071507237, "loss": 1.9624, "step": 2613 }, { "epoch": 0.275013150973172, "grad_norm": 2.1100172996520996, "learning_rate": 0.00017014822472878, "loss": 1.4203, "step": 2614 }, { "epoch": 0.2751183587585481, "grad_norm": 1.0386134386062622, "learning_rate": 0.00017012393416075398, "loss": 2.0622, "step": 2615 }, { "epoch": 0.27522356654392427, "grad_norm": 2.1451127529144287, "learning_rate": 0.0001700996354494664, "loss": 1.6151, "step": 2616 }, { "epoch": 0.27532877432930036, "grad_norm": 1.6166155338287354, "learning_rate": 0.000170075328597739, "loss": 1.8475, "step": 2617 }, { "epoch": 0.2754339821146765, "grad_norm": 1.1394798755645752, "learning_rate": 0.00017005101360839442, "loss": 1.9892, "step": 2618 }, { "epoch": 0.2755391899000526, "grad_norm": 1.3837336301803589, "learning_rate": 0.00017002669048425632, "loss": 2.2187, "step": 2619 }, { "epoch": 0.2756443976854287, "grad_norm": 1.8999110460281372, "learning_rate": 0.00017000235922814922, "loss": 2.209, "step": 2620 }, { "epoch": 0.27574960547080485, "grad_norm": 1.2286471128463745, "learning_rate": 0.00016997801984289866, "loss": 1.8712, "step": 2621 }, { "epoch": 0.27585481325618094, "grad_norm": 1.0605058670043945, "learning_rate": 0.00016995367233133113, "loss": 2.3471, "step": 2622 }, { "epoch": 0.2759600210415571, "grad_norm": 1.838855266571045, "learning_rate": 0.00016992931669627392, "loss": 1.7718, "step": 2623 }, { "epoch": 0.2760652288269332, "grad_norm": 1.4109746217727661, "learning_rate": 0.00016990495294055548, "loss": 1.8921, "step": 2624 }, { "epoch": 0.27617043661230933, "grad_norm": 1.4461547136306763, "learning_rate": 0.00016988058106700505, "loss": 2.1139, "step": 2625 }, { "epoch": 0.2762756443976854, "grad_norm": 1.5509105920791626, "learning_rate": 0.00016985620107845282, "loss": 1.7556, "step": 2626 }, { "epoch": 0.2763808521830615, "grad_norm": 0.8982523679733276, "learning_rate": 0.00016983181297773, "loss": 1.9906, "step": 2627 }, { "epoch": 0.27648605996843767, "grad_norm": 1.2106822729110718, "learning_rate": 0.0001698074167676687, "loss": 1.9958, "step": 2628 }, { "epoch": 0.27659126775381376, "grad_norm": 1.0443248748779297, "learning_rate": 0.00016978301245110195, "loss": 1.5866, "step": 2629 }, { "epoch": 0.2766964755391899, "grad_norm": 0.8765720129013062, "learning_rate": 0.00016975860003086378, "loss": 2.1551, "step": 2630 }, { "epoch": 0.276801683324566, "grad_norm": 1.5277172327041626, "learning_rate": 0.00016973417950978906, "loss": 1.5894, "step": 2631 }, { "epoch": 0.27690689110994215, "grad_norm": 1.0646064281463623, "learning_rate": 0.00016970975089071371, "loss": 2.0302, "step": 2632 }, { "epoch": 0.27701209889531825, "grad_norm": 0.9117512702941895, "learning_rate": 0.00016968531417647456, "loss": 2.1135, "step": 2633 }, { "epoch": 0.2771173066806944, "grad_norm": 1.429713487625122, "learning_rate": 0.0001696608693699093, "loss": 1.8757, "step": 2634 }, { "epoch": 0.2772225144660705, "grad_norm": 1.1439270973205566, "learning_rate": 0.00016963641647385673, "loss": 1.9884, "step": 2635 }, { "epoch": 0.2773277222514466, "grad_norm": 0.8106045126914978, "learning_rate": 0.00016961195549115637, "loss": 2.0197, "step": 2636 }, { "epoch": 0.27743293003682273, "grad_norm": 1.2242646217346191, "learning_rate": 0.00016958748642464887, "loss": 2.0144, "step": 2637 }, { "epoch": 0.2775381378221988, "grad_norm": 1.4185434579849243, "learning_rate": 0.00016956300927717575, "loss": 2.1412, "step": 2638 }, { "epoch": 0.277643345607575, "grad_norm": 1.560724139213562, "learning_rate": 0.0001695385240515794, "loss": 2.1001, "step": 2639 }, { "epoch": 0.27774855339295107, "grad_norm": 1.4623364210128784, "learning_rate": 0.00016951403075070323, "loss": 1.9271, "step": 2640 }, { "epoch": 0.2778537611783272, "grad_norm": 0.8837911486625671, "learning_rate": 0.00016948952937739155, "loss": 1.621, "step": 2641 }, { "epoch": 0.2779589689637033, "grad_norm": 1.3567240238189697, "learning_rate": 0.00016946501993448968, "loss": 1.9028, "step": 2642 }, { "epoch": 0.2780641767490794, "grad_norm": 1.2848210334777832, "learning_rate": 0.00016944050242484378, "loss": 1.6725, "step": 2643 }, { "epoch": 0.27816938453445555, "grad_norm": 1.0903668403625488, "learning_rate": 0.00016941597685130098, "loss": 1.9121, "step": 2644 }, { "epoch": 0.27827459231983165, "grad_norm": 1.519652247428894, "learning_rate": 0.0001693914432167094, "loss": 2.1452, "step": 2645 }, { "epoch": 0.2783798001052078, "grad_norm": 1.244167447090149, "learning_rate": 0.000169366901523918, "loss": 2.3742, "step": 2646 }, { "epoch": 0.2784850078905839, "grad_norm": 1.5084514617919922, "learning_rate": 0.00016934235177577673, "loss": 1.4103, "step": 2647 }, { "epoch": 0.27859021567596004, "grad_norm": 1.1031715869903564, "learning_rate": 0.00016931779397513652, "loss": 1.1998, "step": 2648 }, { "epoch": 0.27869542346133613, "grad_norm": 1.4771610498428345, "learning_rate": 0.0001692932281248491, "loss": 1.7789, "step": 2649 }, { "epoch": 0.2788006312467123, "grad_norm": 1.3364812135696411, "learning_rate": 0.00016926865422776737, "loss": 1.576, "step": 2650 }, { "epoch": 0.2789058390320884, "grad_norm": 1.3395233154296875, "learning_rate": 0.00016924407228674485, "loss": 2.1849, "step": 2651 }, { "epoch": 0.27901104681746447, "grad_norm": 1.4787706136703491, "learning_rate": 0.00016921948230463625, "loss": 1.7253, "step": 2652 }, { "epoch": 0.2791162546028406, "grad_norm": 1.3256549835205078, "learning_rate": 0.0001691948842842971, "loss": 1.7977, "step": 2653 }, { "epoch": 0.2792214623882167, "grad_norm": 1.2367287874221802, "learning_rate": 0.0001691702782285839, "loss": 2.3646, "step": 2654 }, { "epoch": 0.27932667017359286, "grad_norm": 1.422200083732605, "learning_rate": 0.00016914566414035403, "loss": 1.379, "step": 2655 }, { "epoch": 0.27943187795896895, "grad_norm": 1.134897232055664, "learning_rate": 0.0001691210420224659, "loss": 2.3472, "step": 2656 }, { "epoch": 0.2795370857443451, "grad_norm": 1.5080149173736572, "learning_rate": 0.00016909641187777877, "loss": 1.8669, "step": 2657 }, { "epoch": 0.2796422935297212, "grad_norm": 0.9561126828193665, "learning_rate": 0.00016907177370915287, "loss": 2.1019, "step": 2658 }, { "epoch": 0.2797475013150973, "grad_norm": 1.668290138244629, "learning_rate": 0.00016904712751944931, "loss": 2.1505, "step": 2659 }, { "epoch": 0.27985270910047344, "grad_norm": 1.1527786254882812, "learning_rate": 0.0001690224733115302, "loss": 2.1848, "step": 2660 }, { "epoch": 0.27995791688584953, "grad_norm": 1.4785900115966797, "learning_rate": 0.0001689978110882586, "loss": 2.2729, "step": 2661 }, { "epoch": 0.2800631246712257, "grad_norm": 1.4332002401351929, "learning_rate": 0.00016897314085249834, "loss": 2.0227, "step": 2662 }, { "epoch": 0.2801683324566018, "grad_norm": 1.4204810857772827, "learning_rate": 0.00016894846260711438, "loss": 2.3247, "step": 2663 }, { "epoch": 0.2802735402419779, "grad_norm": 1.2798484563827515, "learning_rate": 0.00016892377635497252, "loss": 1.9749, "step": 2664 }, { "epoch": 0.280378748027354, "grad_norm": 1.1675047874450684, "learning_rate": 0.00016889908209893943, "loss": 2.3451, "step": 2665 }, { "epoch": 0.28048395581273017, "grad_norm": 0.8004907369613647, "learning_rate": 0.00016887437984188286, "loss": 2.2131, "step": 2666 }, { "epoch": 0.28058916359810626, "grad_norm": 1.09357750415802, "learning_rate": 0.00016884966958667132, "loss": 2.1521, "step": 2667 }, { "epoch": 0.28069437138348236, "grad_norm": 0.926487147808075, "learning_rate": 0.00016882495133617437, "loss": 2.0104, "step": 2668 }, { "epoch": 0.2807995791688585, "grad_norm": 1.332603096961975, "learning_rate": 0.0001688002250932625, "loss": 1.4487, "step": 2669 }, { "epoch": 0.2809047869542346, "grad_norm": 0.9102864265441895, "learning_rate": 0.000168775490860807, "loss": 2.0708, "step": 2670 }, { "epoch": 0.28100999473961075, "grad_norm": 1.8600473403930664, "learning_rate": 0.0001687507486416802, "loss": 1.901, "step": 2671 }, { "epoch": 0.28111520252498684, "grad_norm": 1.7245584726333618, "learning_rate": 0.00016872599843875544, "loss": 1.7357, "step": 2672 }, { "epoch": 0.281220410310363, "grad_norm": 0.940122663974762, "learning_rate": 0.00016870124025490673, "loss": 1.7142, "step": 2673 }, { "epoch": 0.2813256180957391, "grad_norm": 1.2285487651824951, "learning_rate": 0.0001686764740930092, "loss": 2.1235, "step": 2674 }, { "epoch": 0.2814308258811152, "grad_norm": 1.5378491878509521, "learning_rate": 0.0001686516999559389, "loss": 1.9053, "step": 2675 }, { "epoch": 0.2815360336664913, "grad_norm": 1.1821976900100708, "learning_rate": 0.00016862691784657273, "loss": 2.2413, "step": 2676 }, { "epoch": 0.2816412414518674, "grad_norm": 0.931596040725708, "learning_rate": 0.0001686021277677886, "loss": 1.9538, "step": 2677 }, { "epoch": 0.28174644923724357, "grad_norm": 2.422799825668335, "learning_rate": 0.00016857732972246528, "loss": 2.2234, "step": 2678 }, { "epoch": 0.28185165702261966, "grad_norm": 1.1872382164001465, "learning_rate": 0.00016855252371348245, "loss": 2.0544, "step": 2679 }, { "epoch": 0.2819568648079958, "grad_norm": 0.8804065585136414, "learning_rate": 0.0001685277097437208, "loss": 1.9305, "step": 2680 }, { "epoch": 0.2820620725933719, "grad_norm": 0.8961731195449829, "learning_rate": 0.00016850288781606186, "loss": 1.8034, "step": 2681 }, { "epoch": 0.28216728037874805, "grad_norm": 1.362924575805664, "learning_rate": 0.00016847805793338818, "loss": 2.5519, "step": 2682 }, { "epoch": 0.28227248816412415, "grad_norm": 1.105214238166809, "learning_rate": 0.00016845322009858307, "loss": 2.212, "step": 2683 }, { "epoch": 0.28237769594950024, "grad_norm": 1.1936612129211426, "learning_rate": 0.00016842837431453093, "loss": 2.4178, "step": 2684 }, { "epoch": 0.2824829037348764, "grad_norm": 1.873302936553955, "learning_rate": 0.000168403520584117, "loss": 1.7252, "step": 2685 }, { "epoch": 0.2825881115202525, "grad_norm": 1.440673589706421, "learning_rate": 0.0001683786589102275, "loss": 2.1752, "step": 2686 }, { "epoch": 0.28269331930562863, "grad_norm": 1.075239658355713, "learning_rate": 0.0001683537892957495, "loss": 1.8977, "step": 2687 }, { "epoch": 0.2827985270910047, "grad_norm": 1.338554859161377, "learning_rate": 0.00016832891174357103, "loss": 1.5485, "step": 2688 }, { "epoch": 0.2829037348763809, "grad_norm": 1.2033013105392456, "learning_rate": 0.00016830402625658104, "loss": 2.182, "step": 2689 }, { "epoch": 0.28300894266175697, "grad_norm": 1.282525658607483, "learning_rate": 0.00016827913283766938, "loss": 1.8707, "step": 2690 }, { "epoch": 0.28311415044713306, "grad_norm": 1.3480403423309326, "learning_rate": 0.0001682542314897269, "loss": 1.5163, "step": 2691 }, { "epoch": 0.2832193582325092, "grad_norm": 1.0361692905426025, "learning_rate": 0.00016822932221564524, "loss": 2.2251, "step": 2692 }, { "epoch": 0.2833245660178853, "grad_norm": 1.486932635307312, "learning_rate": 0.0001682044050183171, "loss": 1.9494, "step": 2693 }, { "epoch": 0.28342977380326145, "grad_norm": 1.127858281135559, "learning_rate": 0.00016817947990063598, "loss": 2.0038, "step": 2694 }, { "epoch": 0.28353498158863755, "grad_norm": 1.3391180038452148, "learning_rate": 0.00016815454686549636, "loss": 1.6913, "step": 2695 }, { "epoch": 0.2836401893740137, "grad_norm": 1.6128203868865967, "learning_rate": 0.00016812960591579366, "loss": 1.7916, "step": 2696 }, { "epoch": 0.2837453971593898, "grad_norm": 1.50484037399292, "learning_rate": 0.00016810465705442416, "loss": 1.7511, "step": 2697 }, { "epoch": 0.28385060494476594, "grad_norm": 1.4077752828598022, "learning_rate": 0.00016807970028428508, "loss": 2.2318, "step": 2698 }, { "epoch": 0.28395581273014203, "grad_norm": 1.3436038494110107, "learning_rate": 0.0001680547356082746, "loss": 2.0339, "step": 2699 }, { "epoch": 0.2840610205155181, "grad_norm": 1.6516331434249878, "learning_rate": 0.00016802976302929178, "loss": 1.5578, "step": 2700 }, { "epoch": 0.2841662283008943, "grad_norm": 2.065028429031372, "learning_rate": 0.0001680047825502366, "loss": 1.8351, "step": 2701 }, { "epoch": 0.28427143608627037, "grad_norm": 1.5128923654556274, "learning_rate": 0.00016797979417400996, "loss": 1.9321, "step": 2702 }, { "epoch": 0.2843766438716465, "grad_norm": 2.0304954051971436, "learning_rate": 0.00016795479790351366, "loss": 1.3774, "step": 2703 }, { "epoch": 0.2844818516570226, "grad_norm": 1.0998564958572388, "learning_rate": 0.00016792979374165046, "loss": 2.1468, "step": 2704 }, { "epoch": 0.28458705944239876, "grad_norm": 2.0375499725341797, "learning_rate": 0.00016790478169132397, "loss": 2.3572, "step": 2705 }, { "epoch": 0.28469226722777485, "grad_norm": 1.1634581089019775, "learning_rate": 0.00016787976175543882, "loss": 2.1935, "step": 2706 }, { "epoch": 0.28479747501315095, "grad_norm": 1.2639390230178833, "learning_rate": 0.00016785473393690045, "loss": 1.9523, "step": 2707 }, { "epoch": 0.2849026827985271, "grad_norm": 1.566070795059204, "learning_rate": 0.00016782969823861526, "loss": 2.3277, "step": 2708 }, { "epoch": 0.2850078905839032, "grad_norm": 1.1275237798690796, "learning_rate": 0.0001678046546634906, "loss": 1.8413, "step": 2709 }, { "epoch": 0.28511309836927934, "grad_norm": 1.370336890220642, "learning_rate": 0.00016777960321443463, "loss": 2.0708, "step": 2710 }, { "epoch": 0.28521830615465543, "grad_norm": 1.3864731788635254, "learning_rate": 0.00016775454389435655, "loss": 1.8423, "step": 2711 }, { "epoch": 0.2853235139400316, "grad_norm": 1.9456918239593506, "learning_rate": 0.0001677294767061664, "loss": 2.1698, "step": 2712 }, { "epoch": 0.2854287217254077, "grad_norm": 1.2052172422409058, "learning_rate": 0.00016770440165277516, "loss": 2.0182, "step": 2713 }, { "epoch": 0.2855339295107838, "grad_norm": 1.085540533065796, "learning_rate": 0.0001676793187370947, "loss": 2.1485, "step": 2714 }, { "epoch": 0.2856391372961599, "grad_norm": 1.0382028818130493, "learning_rate": 0.0001676542279620378, "loss": 1.9553, "step": 2715 }, { "epoch": 0.285744345081536, "grad_norm": 1.1799119710922241, "learning_rate": 0.0001676291293305182, "loss": 1.8491, "step": 2716 }, { "epoch": 0.28584955286691216, "grad_norm": 1.027492880821228, "learning_rate": 0.00016760402284545048, "loss": 2.1497, "step": 2717 }, { "epoch": 0.28595476065228825, "grad_norm": 1.3454300165176392, "learning_rate": 0.00016757890850975025, "loss": 2.153, "step": 2718 }, { "epoch": 0.2860599684376644, "grad_norm": 1.4455000162124634, "learning_rate": 0.00016755378632633388, "loss": 1.9732, "step": 2719 }, { "epoch": 0.2861651762230405, "grad_norm": 1.3513553142547607, "learning_rate": 0.00016752865629811873, "loss": 1.5569, "step": 2720 }, { "epoch": 0.28627038400841665, "grad_norm": 0.8183251619338989, "learning_rate": 0.00016750351842802314, "loss": 1.4716, "step": 2721 }, { "epoch": 0.28637559179379274, "grad_norm": 2.2286715507507324, "learning_rate": 0.00016747837271896622, "loss": 1.947, "step": 2722 }, { "epoch": 0.28648079957916883, "grad_norm": 0.9977225065231323, "learning_rate": 0.00016745321917386804, "loss": 2.348, "step": 2723 }, { "epoch": 0.286586007364545, "grad_norm": 0.8259665369987488, "learning_rate": 0.00016742805779564968, "loss": 2.4167, "step": 2724 }, { "epoch": 0.2866912151499211, "grad_norm": 0.9765976667404175, "learning_rate": 0.00016740288858723302, "loss": 1.8912, "step": 2725 }, { "epoch": 0.2867964229352972, "grad_norm": 1.2133702039718628, "learning_rate": 0.0001673777115515408, "loss": 2.5914, "step": 2726 }, { "epoch": 0.2869016307206733, "grad_norm": 1.3183568716049194, "learning_rate": 0.00016735252669149685, "loss": 1.7136, "step": 2727 }, { "epoch": 0.28700683850604947, "grad_norm": 1.4817814826965332, "learning_rate": 0.00016732733401002574, "loss": 1.8334, "step": 2728 }, { "epoch": 0.28711204629142556, "grad_norm": 1.6449335813522339, "learning_rate": 0.00016730213351005303, "loss": 2.3261, "step": 2729 }, { "epoch": 0.2872172540768017, "grad_norm": 1.0374553203582764, "learning_rate": 0.0001672769251945052, "loss": 2.4651, "step": 2730 }, { "epoch": 0.2873224618621778, "grad_norm": 1.436585783958435, "learning_rate": 0.0001672517090663096, "loss": 1.9923, "step": 2731 }, { "epoch": 0.2874276696475539, "grad_norm": 1.1871074438095093, "learning_rate": 0.00016722648512839446, "loss": 2.0888, "step": 2732 }, { "epoch": 0.28753287743293005, "grad_norm": 1.0876706838607788, "learning_rate": 0.00016720125338368894, "loss": 1.427, "step": 2733 }, { "epoch": 0.28763808521830614, "grad_norm": 0.8011285662651062, "learning_rate": 0.00016717601383512318, "loss": 1.8155, "step": 2734 }, { "epoch": 0.2877432930036823, "grad_norm": 1.4743174314498901, "learning_rate": 0.00016715076648562814, "loss": 2.1159, "step": 2735 }, { "epoch": 0.2878485007890584, "grad_norm": 2.063098907470703, "learning_rate": 0.00016712551133813572, "loss": 2.3086, "step": 2736 }, { "epoch": 0.28795370857443453, "grad_norm": 1.3487850427627563, "learning_rate": 0.0001671002483955787, "loss": 2.1455, "step": 2737 }, { "epoch": 0.2880589163598106, "grad_norm": 1.5087181329727173, "learning_rate": 0.00016707497766089082, "loss": 1.8298, "step": 2738 }, { "epoch": 0.2881641241451867, "grad_norm": 1.2308396100997925, "learning_rate": 0.00016704969913700662, "loss": 2.4025, "step": 2739 }, { "epoch": 0.28826933193056287, "grad_norm": 1.2315316200256348, "learning_rate": 0.00016702441282686166, "loss": 1.8982, "step": 2740 }, { "epoch": 0.28837453971593896, "grad_norm": 2.0835769176483154, "learning_rate": 0.00016699911873339232, "loss": 1.9045, "step": 2741 }, { "epoch": 0.2884797475013151, "grad_norm": 0.9750843048095703, "learning_rate": 0.00016697381685953596, "loss": 1.8595, "step": 2742 }, { "epoch": 0.2885849552866912, "grad_norm": 1.373468279838562, "learning_rate": 0.0001669485072082308, "loss": 2.2024, "step": 2743 }, { "epoch": 0.28869016307206735, "grad_norm": 1.492432951927185, "learning_rate": 0.00016692318978241594, "loss": 2.0474, "step": 2744 }, { "epoch": 0.28879537085744345, "grad_norm": 2.0323355197906494, "learning_rate": 0.00016689786458503141, "loss": 2.0644, "step": 2745 }, { "epoch": 0.2889005786428196, "grad_norm": 1.1842961311340332, "learning_rate": 0.0001668725316190182, "loss": 2.1992, "step": 2746 }, { "epoch": 0.2890057864281957, "grad_norm": 1.259732961654663, "learning_rate": 0.00016684719088731807, "loss": 1.7769, "step": 2747 }, { "epoch": 0.2891109942135718, "grad_norm": 0.8478556275367737, "learning_rate": 0.0001668218423928738, "loss": 1.7708, "step": 2748 }, { "epoch": 0.28921620199894793, "grad_norm": 0.826978325843811, "learning_rate": 0.000166796486138629, "loss": 1.8427, "step": 2749 }, { "epoch": 0.289321409784324, "grad_norm": 0.9389429688453674, "learning_rate": 0.00016677112212752824, "loss": 1.7696, "step": 2750 }, { "epoch": 0.2894266175697002, "grad_norm": 1.0777794122695923, "learning_rate": 0.0001667457503625169, "loss": 1.8971, "step": 2751 }, { "epoch": 0.28953182535507627, "grad_norm": 1.1226226091384888, "learning_rate": 0.00016672037084654139, "loss": 1.8703, "step": 2752 }, { "epoch": 0.2896370331404524, "grad_norm": 1.2667369842529297, "learning_rate": 0.0001666949835825489, "loss": 1.8995, "step": 2753 }, { "epoch": 0.2897422409258285, "grad_norm": 1.5252774953842163, "learning_rate": 0.0001666695885734876, "loss": 2.2547, "step": 2754 }, { "epoch": 0.2898474487112046, "grad_norm": 1.3169254064559937, "learning_rate": 0.0001666441858223065, "loss": 1.7265, "step": 2755 }, { "epoch": 0.28995265649658075, "grad_norm": 1.177160382270813, "learning_rate": 0.00016661877533195556, "loss": 1.8653, "step": 2756 }, { "epoch": 0.29005786428195685, "grad_norm": 1.028063416481018, "learning_rate": 0.00016659335710538564, "loss": 2.0327, "step": 2757 }, { "epoch": 0.290163072067333, "grad_norm": 1.1455798149108887, "learning_rate": 0.00016656793114554842, "loss": 2.0376, "step": 2758 }, { "epoch": 0.2902682798527091, "grad_norm": 1.1332205533981323, "learning_rate": 0.00016654249745539656, "loss": 1.9765, "step": 2759 }, { "epoch": 0.29037348763808524, "grad_norm": 2.2703967094421387, "learning_rate": 0.00016651705603788362, "loss": 2.2095, "step": 2760 }, { "epoch": 0.29047869542346133, "grad_norm": 1.228402853012085, "learning_rate": 0.00016649160689596396, "loss": 2.0485, "step": 2761 }, { "epoch": 0.2905839032088375, "grad_norm": 0.901681125164032, "learning_rate": 0.00016646615003259295, "loss": 1.8973, "step": 2762 }, { "epoch": 0.2906891109942136, "grad_norm": 1.9370111227035522, "learning_rate": 0.00016644068545072682, "loss": 2.3251, "step": 2763 }, { "epoch": 0.29079431877958967, "grad_norm": 1.6907079219818115, "learning_rate": 0.00016641521315332265, "loss": 1.8123, "step": 2764 }, { "epoch": 0.2908995265649658, "grad_norm": 0.9530484080314636, "learning_rate": 0.0001663897331433385, "loss": 1.4562, "step": 2765 }, { "epoch": 0.2910047343503419, "grad_norm": 0.8451758623123169, "learning_rate": 0.00016636424542373324, "loss": 1.7969, "step": 2766 }, { "epoch": 0.29110994213571806, "grad_norm": 0.9732938408851624, "learning_rate": 0.00016633874999746667, "loss": 1.7659, "step": 2767 }, { "epoch": 0.29121514992109415, "grad_norm": 1.020836591720581, "learning_rate": 0.00016631324686749958, "loss": 2.0531, "step": 2768 }, { "epoch": 0.2913203577064703, "grad_norm": 1.1106858253479004, "learning_rate": 0.0001662877360367934, "loss": 1.9912, "step": 2769 }, { "epoch": 0.2914255654918464, "grad_norm": 0.9008655548095703, "learning_rate": 0.0001662622175083108, "loss": 1.8145, "step": 2770 }, { "epoch": 0.2915307732772225, "grad_norm": 0.9763224124908447, "learning_rate": 0.00016623669128501504, "loss": 2.3397, "step": 2771 }, { "epoch": 0.29163598106259864, "grad_norm": 1.1344367265701294, "learning_rate": 0.00016621115736987046, "loss": 1.7173, "step": 2772 }, { "epoch": 0.29174118884797473, "grad_norm": 1.7770837545394897, "learning_rate": 0.00016618561576584216, "loss": 1.9058, "step": 2773 }, { "epoch": 0.2918463966333509, "grad_norm": 0.8264276385307312, "learning_rate": 0.00016616006647589626, "loss": 2.0614, "step": 2774 }, { "epoch": 0.291951604418727, "grad_norm": 1.0680514574050903, "learning_rate": 0.0001661345095029997, "loss": 1.7038, "step": 2775 }, { "epoch": 0.2920568122041031, "grad_norm": 1.3225088119506836, "learning_rate": 0.00016610894485012033, "loss": 1.9587, "step": 2776 }, { "epoch": 0.2921620199894792, "grad_norm": 2.3858277797698975, "learning_rate": 0.0001660833725202269, "loss": 2.4027, "step": 2777 }, { "epoch": 0.29226722777485536, "grad_norm": 1.472761869430542, "learning_rate": 0.00016605779251628903, "loss": 1.394, "step": 2778 }, { "epoch": 0.29237243556023146, "grad_norm": 1.8934521675109863, "learning_rate": 0.00016603220484127723, "loss": 1.8053, "step": 2779 }, { "epoch": 0.29247764334560755, "grad_norm": 1.2446554899215698, "learning_rate": 0.00016600660949816291, "loss": 2.0428, "step": 2780 }, { "epoch": 0.2925828511309837, "grad_norm": 1.7331035137176514, "learning_rate": 0.00016598100648991838, "loss": 1.9776, "step": 2781 }, { "epoch": 0.2926880589163598, "grad_norm": 1.300978422164917, "learning_rate": 0.00016595539581951686, "loss": 1.6838, "step": 2782 }, { "epoch": 0.29279326670173594, "grad_norm": 1.042041540145874, "learning_rate": 0.0001659297774899324, "loss": 1.783, "step": 2783 }, { "epoch": 0.29289847448711204, "grad_norm": 1.229884147644043, "learning_rate": 0.00016590415150413997, "loss": 1.9093, "step": 2784 }, { "epoch": 0.2930036822724882, "grad_norm": 1.859123706817627, "learning_rate": 0.00016587851786511543, "loss": 2.0027, "step": 2785 }, { "epoch": 0.2931088900578643, "grad_norm": 1.4690340757369995, "learning_rate": 0.00016585287657583557, "loss": 1.4002, "step": 2786 }, { "epoch": 0.2932140978432404, "grad_norm": 1.5459439754486084, "learning_rate": 0.00016582722763927802, "loss": 1.4447, "step": 2787 }, { "epoch": 0.2933193056286165, "grad_norm": 1.0940881967544556, "learning_rate": 0.00016580157105842123, "loss": 1.9658, "step": 2788 }, { "epoch": 0.2934245134139926, "grad_norm": 1.102705478668213, "learning_rate": 0.00016577590683624472, "loss": 2.0337, "step": 2789 }, { "epoch": 0.29352972119936876, "grad_norm": 1.6445786952972412, "learning_rate": 0.00016575023497572872, "loss": 1.674, "step": 2790 }, { "epoch": 0.29363492898474486, "grad_norm": 1.018031120300293, "learning_rate": 0.00016572455547985446, "loss": 2.1392, "step": 2791 }, { "epoch": 0.293740136770121, "grad_norm": 1.7538018226623535, "learning_rate": 0.00016569886835160399, "loss": 1.8097, "step": 2792 }, { "epoch": 0.2938453445554971, "grad_norm": 1.397821307182312, "learning_rate": 0.00016567317359396028, "loss": 2.0166, "step": 2793 }, { "epoch": 0.29395055234087325, "grad_norm": 1.5573742389678955, "learning_rate": 0.0001656474712099072, "loss": 1.9029, "step": 2794 }, { "epoch": 0.29405576012624934, "grad_norm": 0.9295175671577454, "learning_rate": 0.0001656217612024294, "loss": 2.1548, "step": 2795 }, { "epoch": 0.29416096791162544, "grad_norm": 0.8610682487487793, "learning_rate": 0.00016559604357451263, "loss": 2.2194, "step": 2796 }, { "epoch": 0.2942661756970016, "grad_norm": 1.385067343711853, "learning_rate": 0.00016557031832914327, "loss": 1.9511, "step": 2797 }, { "epoch": 0.2943713834823777, "grad_norm": 0.9952557682991028, "learning_rate": 0.00016554458546930878, "loss": 2.6092, "step": 2798 }, { "epoch": 0.29447659126775383, "grad_norm": 1.1171859502792358, "learning_rate": 0.0001655188449979974, "loss": 1.2882, "step": 2799 }, { "epoch": 0.2945817990531299, "grad_norm": 1.6443709135055542, "learning_rate": 0.00016549309691819833, "loss": 1.6552, "step": 2800 }, { "epoch": 0.29468700683850607, "grad_norm": 0.9413725137710571, "learning_rate": 0.00016546734123290156, "loss": 2.4733, "step": 2801 }, { "epoch": 0.29479221462388217, "grad_norm": 1.1774075031280518, "learning_rate": 0.000165441577945098, "loss": 1.8616, "step": 2802 }, { "epoch": 0.29489742240925826, "grad_norm": 0.9130268692970276, "learning_rate": 0.00016541580705777955, "loss": 2.1065, "step": 2803 }, { "epoch": 0.2950026301946344, "grad_norm": 1.0994932651519775, "learning_rate": 0.0001653900285739388, "loss": 2.1053, "step": 2804 }, { "epoch": 0.2951078379800105, "grad_norm": 0.8119322657585144, "learning_rate": 0.00016536424249656933, "loss": 2.3161, "step": 2805 }, { "epoch": 0.29521304576538665, "grad_norm": 1.0407122373580933, "learning_rate": 0.00016533844882866568, "loss": 2.0664, "step": 2806 }, { "epoch": 0.29531825355076274, "grad_norm": 1.3417972326278687, "learning_rate": 0.00016531264757322308, "loss": 1.924, "step": 2807 }, { "epoch": 0.2954234613361389, "grad_norm": 1.2302452325820923, "learning_rate": 0.0001652868387332378, "loss": 1.8267, "step": 2808 }, { "epoch": 0.295528669121515, "grad_norm": 1.2221964597702026, "learning_rate": 0.00016526102231170691, "loss": 2.0054, "step": 2809 }, { "epoch": 0.29563387690689114, "grad_norm": 1.5506309270858765, "learning_rate": 0.0001652351983116284, "loss": 1.9843, "step": 2810 }, { "epoch": 0.29573908469226723, "grad_norm": 1.3232911825180054, "learning_rate": 0.00016520936673600117, "loss": 2.2001, "step": 2811 }, { "epoch": 0.2958442924776433, "grad_norm": 1.1889708042144775, "learning_rate": 0.00016518352758782486, "loss": 1.3271, "step": 2812 }, { "epoch": 0.29594950026301947, "grad_norm": 1.4930849075317383, "learning_rate": 0.00016515768087010013, "loss": 2.121, "step": 2813 }, { "epoch": 0.29605470804839557, "grad_norm": 1.3592177629470825, "learning_rate": 0.0001651318265858285, "loss": 2.0658, "step": 2814 }, { "epoch": 0.2961599158337717, "grad_norm": 1.2253750562667847, "learning_rate": 0.00016510596473801232, "loss": 1.8405, "step": 2815 }, { "epoch": 0.2962651236191478, "grad_norm": 1.1184381246566772, "learning_rate": 0.00016508009532965485, "loss": 1.7584, "step": 2816 }, { "epoch": 0.29637033140452396, "grad_norm": 1.332506537437439, "learning_rate": 0.0001650542183637602, "loss": 1.8449, "step": 2817 }, { "epoch": 0.29647553918990005, "grad_norm": 1.4818626642227173, "learning_rate": 0.00016502833384333338, "loss": 2.0897, "step": 2818 }, { "epoch": 0.29658074697527614, "grad_norm": 0.7966058254241943, "learning_rate": 0.0001650024417713803, "loss": 1.9058, "step": 2819 }, { "epoch": 0.2966859547606523, "grad_norm": 1.4730877876281738, "learning_rate": 0.00016497654215090772, "loss": 1.8389, "step": 2820 }, { "epoch": 0.2967911625460284, "grad_norm": 1.3068904876708984, "learning_rate": 0.00016495063498492326, "loss": 2.4661, "step": 2821 }, { "epoch": 0.29689637033140454, "grad_norm": 1.5194685459136963, "learning_rate": 0.00016492472027643541, "loss": 1.685, "step": 2822 }, { "epoch": 0.29700157811678063, "grad_norm": 0.9527503848075867, "learning_rate": 0.00016489879802845361, "loss": 2.2541, "step": 2823 }, { "epoch": 0.2971067859021568, "grad_norm": 1.2973871231079102, "learning_rate": 0.0001648728682439881, "loss": 2.3992, "step": 2824 }, { "epoch": 0.29721199368753287, "grad_norm": 1.154152750968933, "learning_rate": 0.00016484693092605002, "loss": 1.7631, "step": 2825 }, { "epoch": 0.297317201472909, "grad_norm": 1.4371882677078247, "learning_rate": 0.00016482098607765137, "loss": 1.8837, "step": 2826 }, { "epoch": 0.2974224092582851, "grad_norm": 1.1324549913406372, "learning_rate": 0.00016479503370180507, "loss": 2.2255, "step": 2827 }, { "epoch": 0.2975276170436612, "grad_norm": 0.9485172629356384, "learning_rate": 0.0001647690738015249, "loss": 2.1955, "step": 2828 }, { "epoch": 0.29763282482903736, "grad_norm": 1.0813753604888916, "learning_rate": 0.0001647431063798254, "loss": 1.8017, "step": 2829 }, { "epoch": 0.29773803261441345, "grad_norm": 1.0293214321136475, "learning_rate": 0.0001647171314397222, "loss": 1.9523, "step": 2830 }, { "epoch": 0.2978432403997896, "grad_norm": 1.2349849939346313, "learning_rate": 0.00016469114898423165, "loss": 1.8325, "step": 2831 }, { "epoch": 0.2979484481851657, "grad_norm": 1.0925050973892212, "learning_rate": 0.00016466515901637096, "loss": 2.1776, "step": 2832 }, { "epoch": 0.29805365597054184, "grad_norm": 1.1909685134887695, "learning_rate": 0.0001646391615391583, "loss": 1.9048, "step": 2833 }, { "epoch": 0.29815886375591794, "grad_norm": 1.5420279502868652, "learning_rate": 0.00016461315655561263, "loss": 1.8361, "step": 2834 }, { "epoch": 0.29826407154129403, "grad_norm": 1.3840932846069336, "learning_rate": 0.00016458714406875392, "loss": 1.9673, "step": 2835 }, { "epoch": 0.2983692793266702, "grad_norm": 1.4501628875732422, "learning_rate": 0.0001645611240816028, "loss": 1.3108, "step": 2836 }, { "epoch": 0.29847448711204627, "grad_norm": 1.304292917251587, "learning_rate": 0.00016453509659718093, "loss": 1.4608, "step": 2837 }, { "epoch": 0.2985796948974224, "grad_norm": 1.1828423738479614, "learning_rate": 0.00016450906161851082, "loss": 1.8168, "step": 2838 }, { "epoch": 0.2986849026827985, "grad_norm": 1.2528148889541626, "learning_rate": 0.00016448301914861584, "loss": 1.5209, "step": 2839 }, { "epoch": 0.29879011046817466, "grad_norm": 0.9708895683288574, "learning_rate": 0.00016445696919052013, "loss": 2.2244, "step": 2840 }, { "epoch": 0.29889531825355076, "grad_norm": 1.3018192052841187, "learning_rate": 0.00016443091174724885, "loss": 1.6512, "step": 2841 }, { "epoch": 0.2990005260389269, "grad_norm": 1.1458721160888672, "learning_rate": 0.00016440484682182799, "loss": 2.0022, "step": 2842 }, { "epoch": 0.299105733824303, "grad_norm": 2.0912718772888184, "learning_rate": 0.00016437877441728433, "loss": 1.9703, "step": 2843 }, { "epoch": 0.2992109416096791, "grad_norm": 1.1203174591064453, "learning_rate": 0.00016435269453664558, "loss": 1.9248, "step": 2844 }, { "epoch": 0.29931614939505524, "grad_norm": 1.6757680177688599, "learning_rate": 0.00016432660718294033, "loss": 1.8856, "step": 2845 }, { "epoch": 0.29942135718043134, "grad_norm": 1.9626332521438599, "learning_rate": 0.00016430051235919802, "loss": 1.3917, "step": 2846 }, { "epoch": 0.2995265649658075, "grad_norm": 1.4652724266052246, "learning_rate": 0.00016427441006844893, "loss": 2.1621, "step": 2847 }, { "epoch": 0.2996317727511836, "grad_norm": 1.40691339969635, "learning_rate": 0.00016424830031372425, "loss": 1.8587, "step": 2848 }, { "epoch": 0.2997369805365597, "grad_norm": 1.054113745689392, "learning_rate": 0.000164222183098056, "loss": 1.9425, "step": 2849 }, { "epoch": 0.2998421883219358, "grad_norm": 1.6056944131851196, "learning_rate": 0.00016419605842447714, "loss": 1.5958, "step": 2850 }, { "epoch": 0.2999473961073119, "grad_norm": 1.3629722595214844, "learning_rate": 0.00016416992629602142, "loss": 1.524, "step": 2851 }, { "epoch": 0.30005260389268806, "grad_norm": 1.3340497016906738, "learning_rate": 0.00016414378671572344, "loss": 1.2919, "step": 2852 }, { "epoch": 0.30015781167806416, "grad_norm": 1.4080469608306885, "learning_rate": 0.00016411763968661873, "loss": 2.1183, "step": 2853 }, { "epoch": 0.3002630194634403, "grad_norm": 1.1511781215667725, "learning_rate": 0.00016409148521174367, "loss": 1.4441, "step": 2854 }, { "epoch": 0.3003682272488164, "grad_norm": 1.404482126235962, "learning_rate": 0.00016406532329413546, "loss": 1.9554, "step": 2855 }, { "epoch": 0.30047343503419255, "grad_norm": 1.4892315864562988, "learning_rate": 0.00016403915393683221, "loss": 1.9983, "step": 2856 }, { "epoch": 0.30057864281956864, "grad_norm": 1.159440040588379, "learning_rate": 0.00016401297714287294, "loss": 2.1476, "step": 2857 }, { "epoch": 0.3006838506049448, "grad_norm": 1.5921545028686523, "learning_rate": 0.00016398679291529738, "loss": 2.1005, "step": 2858 }, { "epoch": 0.3007890583903209, "grad_norm": 0.9857234358787537, "learning_rate": 0.00016396060125714628, "loss": 1.659, "step": 2859 }, { "epoch": 0.300894266175697, "grad_norm": 1.3065168857574463, "learning_rate": 0.00016393440217146114, "loss": 2.1434, "step": 2860 }, { "epoch": 0.3009994739610731, "grad_norm": 1.689706563949585, "learning_rate": 0.00016390819566128445, "loss": 1.8146, "step": 2861 }, { "epoch": 0.3011046817464492, "grad_norm": 2.270273208618164, "learning_rate": 0.00016388198172965942, "loss": 2.1251, "step": 2862 }, { "epoch": 0.30120988953182537, "grad_norm": 1.3508756160736084, "learning_rate": 0.00016385576037963021, "loss": 1.9374, "step": 2863 }, { "epoch": 0.30131509731720146, "grad_norm": 1.7013899087905884, "learning_rate": 0.00016382953161424185, "loss": 1.9598, "step": 2864 }, { "epoch": 0.3014203051025776, "grad_norm": 1.457298755645752, "learning_rate": 0.00016380329543654013, "loss": 2.2662, "step": 2865 }, { "epoch": 0.3015255128879537, "grad_norm": 1.280228853225708, "learning_rate": 0.00016377705184957185, "loss": 1.9162, "step": 2866 }, { "epoch": 0.3016307206733298, "grad_norm": 1.6916749477386475, "learning_rate": 0.00016375080085638451, "loss": 2.1827, "step": 2867 }, { "epoch": 0.30173592845870595, "grad_norm": 1.3950283527374268, "learning_rate": 0.00016372454246002663, "loss": 1.6382, "step": 2868 }, { "epoch": 0.30184113624408204, "grad_norm": 1.4823285341262817, "learning_rate": 0.00016369827666354745, "loss": 1.5104, "step": 2869 }, { "epoch": 0.3019463440294582, "grad_norm": 0.9238750338554382, "learning_rate": 0.00016367200346999714, "loss": 2.1355, "step": 2870 }, { "epoch": 0.3020515518148343, "grad_norm": 1.290266990661621, "learning_rate": 0.00016364572288242677, "loss": 1.9956, "step": 2871 }, { "epoch": 0.30215675960021043, "grad_norm": 0.9198346138000488, "learning_rate": 0.00016361943490388815, "loss": 2.0828, "step": 2872 }, { "epoch": 0.3022619673855865, "grad_norm": 1.6661968231201172, "learning_rate": 0.00016359313953743406, "loss": 1.7579, "step": 2873 }, { "epoch": 0.3023671751709627, "grad_norm": 1.031909465789795, "learning_rate": 0.00016356683678611807, "loss": 2.0152, "step": 2874 }, { "epoch": 0.30247238295633877, "grad_norm": 1.662083387374878, "learning_rate": 0.00016354052665299468, "loss": 2.1388, "step": 2875 }, { "epoch": 0.30257759074171486, "grad_norm": 1.226048231124878, "learning_rate": 0.00016351420914111916, "loss": 2.4793, "step": 2876 }, { "epoch": 0.302682798527091, "grad_norm": 1.6051714420318604, "learning_rate": 0.00016348788425354766, "loss": 1.7133, "step": 2877 }, { "epoch": 0.3027880063124671, "grad_norm": 0.972946047782898, "learning_rate": 0.00016346155199333721, "loss": 1.8606, "step": 2878 }, { "epoch": 0.30289321409784326, "grad_norm": 1.1665260791778564, "learning_rate": 0.00016343521236354574, "loss": 2.1413, "step": 2879 }, { "epoch": 0.30299842188321935, "grad_norm": 1.250375509262085, "learning_rate": 0.00016340886536723192, "loss": 1.6386, "step": 2880 }, { "epoch": 0.3031036296685955, "grad_norm": 1.2102625370025635, "learning_rate": 0.00016338251100745537, "loss": 1.9187, "step": 2881 }, { "epoch": 0.3032088374539716, "grad_norm": 2.1711556911468506, "learning_rate": 0.00016335614928727652, "loss": 1.7001, "step": 2882 }, { "epoch": 0.3033140452393477, "grad_norm": 2.243464231491089, "learning_rate": 0.0001633297802097567, "loss": 1.7878, "step": 2883 }, { "epoch": 0.30341925302472383, "grad_norm": 1.4908397197723389, "learning_rate": 0.00016330340377795804, "loss": 1.846, "step": 2884 }, { "epoch": 0.3035244608100999, "grad_norm": 1.6034961938858032, "learning_rate": 0.00016327701999494353, "loss": 1.6955, "step": 2885 }, { "epoch": 0.3036296685954761, "grad_norm": 1.2690043449401855, "learning_rate": 0.0001632506288637771, "loss": 2.009, "step": 2886 }, { "epoch": 0.30373487638085217, "grad_norm": 1.138432502746582, "learning_rate": 0.00016322423038752336, "loss": 1.8181, "step": 2887 }, { "epoch": 0.3038400841662283, "grad_norm": 0.7713850140571594, "learning_rate": 0.000163197824569248, "loss": 2.1781, "step": 2888 }, { "epoch": 0.3039452919516044, "grad_norm": 1.5833183526992798, "learning_rate": 0.00016317141141201731, "loss": 2.0445, "step": 2889 }, { "epoch": 0.30405049973698056, "grad_norm": 1.196471095085144, "learning_rate": 0.0001631449909188987, "loss": 1.6292, "step": 2890 }, { "epoch": 0.30415570752235666, "grad_norm": 1.7439603805541992, "learning_rate": 0.0001631185630929602, "loss": 2.1841, "step": 2891 }, { "epoch": 0.30426091530773275, "grad_norm": 1.4162222146987915, "learning_rate": 0.00016309212793727077, "loss": 1.9838, "step": 2892 }, { "epoch": 0.3043661230931089, "grad_norm": 1.5100573301315308, "learning_rate": 0.00016306568545490033, "loss": 2.1413, "step": 2893 }, { "epoch": 0.304471330878485, "grad_norm": 1.3554414510726929, "learning_rate": 0.00016303923564891948, "loss": 2.3482, "step": 2894 }, { "epoch": 0.30457653866386114, "grad_norm": 1.4444999694824219, "learning_rate": 0.0001630127785223998, "loss": 2.3344, "step": 2895 }, { "epoch": 0.30468174644923723, "grad_norm": 1.267393708229065, "learning_rate": 0.00016298631407841361, "loss": 1.4887, "step": 2896 }, { "epoch": 0.3047869542346134, "grad_norm": 1.479801893234253, "learning_rate": 0.00016295984232003426, "loss": 2.2112, "step": 2897 }, { "epoch": 0.3048921620199895, "grad_norm": 1.5580319166183472, "learning_rate": 0.0001629333632503357, "loss": 1.7964, "step": 2898 }, { "epoch": 0.30499736980536557, "grad_norm": 1.1315181255340576, "learning_rate": 0.00016290687687239283, "loss": 1.6472, "step": 2899 }, { "epoch": 0.3051025775907417, "grad_norm": 1.8991228342056274, "learning_rate": 0.00016288038318928156, "loss": 1.6517, "step": 2900 }, { "epoch": 0.3052077853761178, "grad_norm": 1.7622857093811035, "learning_rate": 0.00016285388220407847, "loss": 1.8343, "step": 2901 }, { "epoch": 0.30531299316149396, "grad_norm": 1.0036766529083252, "learning_rate": 0.00016282737391986097, "loss": 2.2478, "step": 2902 }, { "epoch": 0.30541820094687006, "grad_norm": 1.0161508321762085, "learning_rate": 0.00016280085833970744, "loss": 2.0217, "step": 2903 }, { "epoch": 0.3055234087322462, "grad_norm": 1.6816000938415527, "learning_rate": 0.00016277433546669703, "loss": 1.6079, "step": 2904 }, { "epoch": 0.3056286165176223, "grad_norm": 1.2823631763458252, "learning_rate": 0.00016274780530390977, "loss": 1.6762, "step": 2905 }, { "epoch": 0.30573382430299845, "grad_norm": 1.2397310733795166, "learning_rate": 0.00016272126785442644, "loss": 1.9937, "step": 2906 }, { "epoch": 0.30583903208837454, "grad_norm": 1.5026395320892334, "learning_rate": 0.0001626947231213289, "loss": 1.6613, "step": 2907 }, { "epoch": 0.30594423987375063, "grad_norm": 1.2583715915679932, "learning_rate": 0.00016266817110769955, "loss": 2.0048, "step": 2908 }, { "epoch": 0.3060494476591268, "grad_norm": 0.9579928517341614, "learning_rate": 0.00016264161181662188, "loss": 2.0814, "step": 2909 }, { "epoch": 0.3061546554445029, "grad_norm": 1.003151297569275, "learning_rate": 0.0001626150452511801, "loss": 2.2672, "step": 2910 }, { "epoch": 0.306259863229879, "grad_norm": 1.4802247285842896, "learning_rate": 0.00016258847141445928, "loss": 2.5276, "step": 2911 }, { "epoch": 0.3063650710152551, "grad_norm": 1.5277745723724365, "learning_rate": 0.00016256189030954538, "loss": 2.1005, "step": 2912 }, { "epoch": 0.30647027880063127, "grad_norm": 1.1211442947387695, "learning_rate": 0.00016253530193952517, "loss": 1.8205, "step": 2913 }, { "epoch": 0.30657548658600736, "grad_norm": 1.083178997039795, "learning_rate": 0.0001625087063074863, "loss": 2.128, "step": 2914 }, { "epoch": 0.30668069437138346, "grad_norm": 2.118847370147705, "learning_rate": 0.00016248210341651716, "loss": 1.9704, "step": 2915 }, { "epoch": 0.3067859021567596, "grad_norm": 1.371398687362671, "learning_rate": 0.00016245549326970713, "loss": 1.5593, "step": 2916 }, { "epoch": 0.3068911099421357, "grad_norm": 1.180167317390442, "learning_rate": 0.0001624288758701463, "loss": 1.7496, "step": 2917 }, { "epoch": 0.30699631772751185, "grad_norm": 1.3497270345687866, "learning_rate": 0.00016240225122092573, "loss": 2.0314, "step": 2918 }, { "epoch": 0.30710152551288794, "grad_norm": 1.6975984573364258, "learning_rate": 0.00016237561932513718, "loss": 1.77, "step": 2919 }, { "epoch": 0.3072067332982641, "grad_norm": 1.6393413543701172, "learning_rate": 0.00016234898018587337, "loss": 1.93, "step": 2920 }, { "epoch": 0.3073119410836402, "grad_norm": 1.7652052640914917, "learning_rate": 0.00016232233380622779, "loss": 1.8103, "step": 2921 }, { "epoch": 0.30741714886901633, "grad_norm": 1.489980936050415, "learning_rate": 0.00016229568018929483, "loss": 1.8286, "step": 2922 }, { "epoch": 0.3075223566543924, "grad_norm": 1.4050182104110718, "learning_rate": 0.00016226901933816962, "loss": 1.5679, "step": 2923 }, { "epoch": 0.3076275644397685, "grad_norm": 1.0518805980682373, "learning_rate": 0.0001622423512559483, "loss": 1.6626, "step": 2924 }, { "epoch": 0.30773277222514467, "grad_norm": 1.4546114206314087, "learning_rate": 0.00016221567594572762, "loss": 1.9034, "step": 2925 }, { "epoch": 0.30783798001052076, "grad_norm": 0.9748585820198059, "learning_rate": 0.00016218899341060542, "loss": 2.2178, "step": 2926 }, { "epoch": 0.3079431877958969, "grad_norm": 1.3530354499816895, "learning_rate": 0.00016216230365368017, "loss": 2.2276, "step": 2927 }, { "epoch": 0.308048395581273, "grad_norm": 0.9455937743186951, "learning_rate": 0.00016213560667805127, "loss": 1.9019, "step": 2928 }, { "epoch": 0.30815360336664915, "grad_norm": 1.0966365337371826, "learning_rate": 0.00016210890248681906, "loss": 1.5131, "step": 2929 }, { "epoch": 0.30825881115202525, "grad_norm": 0.9960981607437134, "learning_rate": 0.00016208219108308444, "loss": 1.9477, "step": 2930 }, { "epoch": 0.30836401893740134, "grad_norm": 1.0510518550872803, "learning_rate": 0.00016205547246994945, "loss": 1.7093, "step": 2931 }, { "epoch": 0.3084692267227775, "grad_norm": 1.185834527015686, "learning_rate": 0.00016202874665051674, "loss": 2.012, "step": 2932 }, { "epoch": 0.3085744345081536, "grad_norm": 0.9046087861061096, "learning_rate": 0.00016200201362788995, "loss": 1.9285, "step": 2933 }, { "epoch": 0.30867964229352973, "grad_norm": 1.357476830482483, "learning_rate": 0.00016197527340517352, "loss": 1.8586, "step": 2934 }, { "epoch": 0.3087848500789058, "grad_norm": 0.9783453941345215, "learning_rate": 0.00016194852598547263, "loss": 2.1739, "step": 2935 }, { "epoch": 0.308890057864282, "grad_norm": 1.2122621536254883, "learning_rate": 0.00016192177137189345, "loss": 1.7506, "step": 2936 }, { "epoch": 0.30899526564965807, "grad_norm": 1.8045505285263062, "learning_rate": 0.00016189500956754284, "loss": 1.656, "step": 2937 }, { "epoch": 0.3091004734350342, "grad_norm": 1.205743670463562, "learning_rate": 0.00016186824057552856, "loss": 1.9285, "step": 2938 }, { "epoch": 0.3092056812204103, "grad_norm": 1.1010884046554565, "learning_rate": 0.00016184146439895928, "loss": 2.1431, "step": 2939 }, { "epoch": 0.3093108890057864, "grad_norm": 0.7133980393409729, "learning_rate": 0.00016181468104094435, "loss": 2.1475, "step": 2940 }, { "epoch": 0.30941609679116255, "grad_norm": 1.0357547998428345, "learning_rate": 0.00016178789050459407, "loss": 1.8142, "step": 2941 }, { "epoch": 0.30952130457653865, "grad_norm": 1.1060856580734253, "learning_rate": 0.0001617610927930195, "loss": 1.4338, "step": 2942 }, { "epoch": 0.3096265123619148, "grad_norm": 2.330672264099121, "learning_rate": 0.00016173428790933265, "loss": 1.5461, "step": 2943 }, { "epoch": 0.3097317201472909, "grad_norm": 1.4529541730880737, "learning_rate": 0.0001617074758566462, "loss": 1.8743, "step": 2944 }, { "epoch": 0.30983692793266704, "grad_norm": 0.984163224697113, "learning_rate": 0.00016168065663807376, "loss": 2.2996, "step": 2945 }, { "epoch": 0.30994213571804313, "grad_norm": 1.3428571224212646, "learning_rate": 0.00016165383025672981, "loss": 2.0059, "step": 2946 }, { "epoch": 0.3100473435034192, "grad_norm": 0.8479394316673279, "learning_rate": 0.00016162699671572956, "loss": 2.0439, "step": 2947 }, { "epoch": 0.3101525512887954, "grad_norm": 0.9885584712028503, "learning_rate": 0.0001616001560181891, "loss": 2.2262, "step": 2948 }, { "epoch": 0.31025775907417147, "grad_norm": 1.3757953643798828, "learning_rate": 0.0001615733081672254, "loss": 1.9734, "step": 2949 }, { "epoch": 0.3103629668595476, "grad_norm": 1.0954573154449463, "learning_rate": 0.00016154645316595616, "loss": 1.7084, "step": 2950 }, { "epoch": 0.3104681746449237, "grad_norm": 1.236785888671875, "learning_rate": 0.00016151959101749996, "loss": 1.4454, "step": 2951 }, { "epoch": 0.31057338243029986, "grad_norm": 1.077486515045166, "learning_rate": 0.00016149272172497626, "loss": 1.6408, "step": 2952 }, { "epoch": 0.31067859021567595, "grad_norm": 0.9998572468757629, "learning_rate": 0.00016146584529150526, "loss": 2.4455, "step": 2953 }, { "epoch": 0.3107837980010521, "grad_norm": 1.189342737197876, "learning_rate": 0.00016143896172020808, "loss": 1.8845, "step": 2954 }, { "epoch": 0.3108890057864282, "grad_norm": 1.0967530012130737, "learning_rate": 0.00016141207101420655, "loss": 2.104, "step": 2955 }, { "epoch": 0.3109942135718043, "grad_norm": 1.4149389266967773, "learning_rate": 0.00016138517317662346, "loss": 1.6627, "step": 2956 }, { "epoch": 0.31109942135718044, "grad_norm": 0.9122399687767029, "learning_rate": 0.00016135826821058233, "loss": 1.7756, "step": 2957 }, { "epoch": 0.31120462914255653, "grad_norm": 1.4171504974365234, "learning_rate": 0.00016133135611920757, "loss": 1.9604, "step": 2958 }, { "epoch": 0.3113098369279327, "grad_norm": 1.4796384572982788, "learning_rate": 0.0001613044369056244, "loss": 1.6938, "step": 2959 }, { "epoch": 0.3114150447133088, "grad_norm": 1.3851723670959473, "learning_rate": 0.0001612775105729588, "loss": 1.7427, "step": 2960 }, { "epoch": 0.3115202524986849, "grad_norm": 1.3645011186599731, "learning_rate": 0.00016125057712433773, "loss": 1.7735, "step": 2961 }, { "epoch": 0.311625460284061, "grad_norm": 1.73271906375885, "learning_rate": 0.00016122363656288882, "loss": 2.1165, "step": 2962 }, { "epoch": 0.3117306680694371, "grad_norm": 0.9199891686439514, "learning_rate": 0.0001611966888917406, "loss": 1.6816, "step": 2963 }, { "epoch": 0.31183587585481326, "grad_norm": 1.2702879905700684, "learning_rate": 0.00016116973411402238, "loss": 2.1454, "step": 2964 }, { "epoch": 0.31194108364018935, "grad_norm": 1.4072431325912476, "learning_rate": 0.0001611427722328644, "loss": 1.9006, "step": 2965 }, { "epoch": 0.3120462914255655, "grad_norm": 0.7870166301727295, "learning_rate": 0.0001611158032513976, "loss": 1.8407, "step": 2966 }, { "epoch": 0.3121514992109416, "grad_norm": 1.3497145175933838, "learning_rate": 0.00016108882717275384, "loss": 1.6683, "step": 2967 }, { "epoch": 0.31225670699631775, "grad_norm": 1.090868592262268, "learning_rate": 0.00016106184400006569, "loss": 1.9826, "step": 2968 }, { "epoch": 0.31236191478169384, "grad_norm": 1.025602102279663, "learning_rate": 0.00016103485373646672, "loss": 2.011, "step": 2969 }, { "epoch": 0.31246712256707, "grad_norm": 1.3139764070510864, "learning_rate": 0.00016100785638509114, "loss": 2.148, "step": 2970 }, { "epoch": 0.3125723303524461, "grad_norm": 1.5432015657424927, "learning_rate": 0.00016098085194907413, "loss": 1.8339, "step": 2971 }, { "epoch": 0.3126775381378222, "grad_norm": 1.3246941566467285, "learning_rate": 0.00016095384043155156, "loss": 1.5852, "step": 2972 }, { "epoch": 0.3127827459231983, "grad_norm": 1.229443907737732, "learning_rate": 0.00016092682183566025, "loss": 1.4615, "step": 2973 }, { "epoch": 0.3128879537085744, "grad_norm": 1.3812941312789917, "learning_rate": 0.0001608997961645377, "loss": 2.0711, "step": 2974 }, { "epoch": 0.31299316149395057, "grad_norm": 1.3320645093917847, "learning_rate": 0.0001608727634213224, "loss": 2.0261, "step": 2975 }, { "epoch": 0.31309836927932666, "grad_norm": 1.8210208415985107, "learning_rate": 0.00016084572360915348, "loss": 1.314, "step": 2976 }, { "epoch": 0.3132035770647028, "grad_norm": 0.9022918939590454, "learning_rate": 0.00016081867673117106, "loss": 1.8564, "step": 2977 }, { "epoch": 0.3133087848500789, "grad_norm": 1.5104812383651733, "learning_rate": 0.00016079162279051602, "loss": 1.8883, "step": 2978 }, { "epoch": 0.313413992635455, "grad_norm": 1.4188772439956665, "learning_rate": 0.00016076456179032998, "loss": 2.1038, "step": 2979 }, { "epoch": 0.31351920042083115, "grad_norm": 1.3051005601882935, "learning_rate": 0.00016073749373375545, "loss": 2.2416, "step": 2980 }, { "epoch": 0.31362440820620724, "grad_norm": 1.0661115646362305, "learning_rate": 0.00016071041862393578, "loss": 1.8527, "step": 2981 }, { "epoch": 0.3137296159915834, "grad_norm": 1.1394139528274536, "learning_rate": 0.00016068333646401516, "loss": 1.468, "step": 2982 }, { "epoch": 0.3138348237769595, "grad_norm": 1.1041537523269653, "learning_rate": 0.00016065624725713847, "loss": 1.4634, "step": 2983 }, { "epoch": 0.31394003156233563, "grad_norm": 1.4715754985809326, "learning_rate": 0.00016062915100645153, "loss": 1.8815, "step": 2984 }, { "epoch": 0.3140452393477117, "grad_norm": 1.0146689414978027, "learning_rate": 0.0001606020477151009, "loss": 1.9733, "step": 2985 }, { "epoch": 0.3141504471330879, "grad_norm": 1.4208067655563354, "learning_rate": 0.00016057493738623406, "loss": 1.6583, "step": 2986 }, { "epoch": 0.31425565491846397, "grad_norm": 0.9717585444450378, "learning_rate": 0.0001605478200229992, "loss": 1.8575, "step": 2987 }, { "epoch": 0.31436086270384006, "grad_norm": 2.195236921310425, "learning_rate": 0.0001605206956285454, "loss": 1.4584, "step": 2988 }, { "epoch": 0.3144660704892162, "grad_norm": 1.941888451576233, "learning_rate": 0.00016049356420602247, "loss": 1.9882, "step": 2989 }, { "epoch": 0.3145712782745923, "grad_norm": 1.0348442792892456, "learning_rate": 0.00016046642575858115, "loss": 2.0475, "step": 2990 }, { "epoch": 0.31467648605996845, "grad_norm": 1.9281662702560425, "learning_rate": 0.00016043928028937292, "loss": 2.1587, "step": 2991 }, { "epoch": 0.31478169384534455, "grad_norm": 1.6395374536514282, "learning_rate": 0.00016041212780155007, "loss": 1.9435, "step": 2992 }, { "epoch": 0.3148869016307207, "grad_norm": 1.1363707780838013, "learning_rate": 0.0001603849682982658, "loss": 1.6754, "step": 2993 }, { "epoch": 0.3149921094160968, "grad_norm": 1.3249703645706177, "learning_rate": 0.00016035780178267394, "loss": 2.2371, "step": 2994 }, { "epoch": 0.3150973172014729, "grad_norm": 1.0637754201889038, "learning_rate": 0.00016033062825792935, "loss": 2.1387, "step": 2995 }, { "epoch": 0.31520252498684903, "grad_norm": 0.9320810437202454, "learning_rate": 0.00016030344772718756, "loss": 2.0091, "step": 2996 }, { "epoch": 0.3153077327722251, "grad_norm": 1.375025987625122, "learning_rate": 0.00016027626019360496, "loss": 2.2975, "step": 2997 }, { "epoch": 0.3154129405576013, "grad_norm": 1.532082200050354, "learning_rate": 0.00016024906566033874, "loss": 2.0983, "step": 2998 }, { "epoch": 0.31551814834297737, "grad_norm": 0.982284426689148, "learning_rate": 0.00016022186413054693, "loss": 1.4862, "step": 2999 }, { "epoch": 0.3156233561283535, "grad_norm": 1.5432162284851074, "learning_rate": 0.00016019465560738834, "loss": 2.0209, "step": 3000 }, { "epoch": 0.3157285639137296, "grad_norm": 1.4942352771759033, "learning_rate": 0.0001601674400940226, "loss": 2.1858, "step": 3001 }, { "epoch": 0.31583377169910576, "grad_norm": 1.9148730039596558, "learning_rate": 0.0001601402175936102, "loss": 1.2212, "step": 3002 }, { "epoch": 0.31593897948448185, "grad_norm": 1.283167839050293, "learning_rate": 0.00016011298810931232, "loss": 1.617, "step": 3003 }, { "epoch": 0.31604418726985795, "grad_norm": 1.2399694919586182, "learning_rate": 0.00016008575164429113, "loss": 1.8931, "step": 3004 }, { "epoch": 0.3161493950552341, "grad_norm": 1.6097055673599243, "learning_rate": 0.00016005850820170943, "loss": 1.6942, "step": 3005 }, { "epoch": 0.3162546028406102, "grad_norm": 0.8781225085258484, "learning_rate": 0.00016003125778473097, "loss": 1.8621, "step": 3006 }, { "epoch": 0.31635981062598634, "grad_norm": 1.7202116250991821, "learning_rate": 0.0001600040003965202, "loss": 2.01, "step": 3007 }, { "epoch": 0.31646501841136243, "grad_norm": 1.6193592548370361, "learning_rate": 0.00015997673604024244, "loss": 1.5022, "step": 3008 }, { "epoch": 0.3165702261967386, "grad_norm": 1.6493405103683472, "learning_rate": 0.00015994946471906382, "loss": 1.9556, "step": 3009 }, { "epoch": 0.3166754339821147, "grad_norm": 1.438834309577942, "learning_rate": 0.0001599221864361513, "loss": 2.3363, "step": 3010 }, { "epoch": 0.31678064176749077, "grad_norm": 1.1828105449676514, "learning_rate": 0.00015989490119467257, "loss": 2.0282, "step": 3011 }, { "epoch": 0.3168858495528669, "grad_norm": 1.8960435390472412, "learning_rate": 0.00015986760899779618, "loss": 1.9431, "step": 3012 }, { "epoch": 0.316991057338243, "grad_norm": 1.4587379693984985, "learning_rate": 0.0001598403098486915, "loss": 1.9177, "step": 3013 }, { "epoch": 0.31709626512361916, "grad_norm": 2.3861501216888428, "learning_rate": 0.00015981300375052872, "loss": 1.7569, "step": 3014 }, { "epoch": 0.31720147290899525, "grad_norm": 1.6264148950576782, "learning_rate": 0.00015978569070647876, "loss": 1.835, "step": 3015 }, { "epoch": 0.3173066806943714, "grad_norm": 1.7277299165725708, "learning_rate": 0.0001597583707197134, "loss": 2.2641, "step": 3016 }, { "epoch": 0.3174118884797475, "grad_norm": 1.0360989570617676, "learning_rate": 0.00015973104379340524, "loss": 1.9667, "step": 3017 }, { "epoch": 0.31751709626512364, "grad_norm": 0.7866742014884949, "learning_rate": 0.00015970370993072762, "loss": 1.7332, "step": 3018 }, { "epoch": 0.31762230405049974, "grad_norm": 1.069047212600708, "learning_rate": 0.0001596763691348548, "loss": 2.3551, "step": 3019 }, { "epoch": 0.31772751183587583, "grad_norm": 1.3254644870758057, "learning_rate": 0.00015964902140896175, "loss": 2.3056, "step": 3020 }, { "epoch": 0.317832719621252, "grad_norm": 0.88875812292099, "learning_rate": 0.00015962166675622424, "loss": 2.1039, "step": 3021 }, { "epoch": 0.3179379274066281, "grad_norm": 1.263662338256836, "learning_rate": 0.0001595943051798189, "loss": 2.3112, "step": 3022 }, { "epoch": 0.3180431351920042, "grad_norm": 0.8133673667907715, "learning_rate": 0.00015956693668292313, "loss": 2.2302, "step": 3023 }, { "epoch": 0.3181483429773803, "grad_norm": 3.3079943656921387, "learning_rate": 0.00015953956126871517, "loss": 2.2323, "step": 3024 }, { "epoch": 0.31825355076275647, "grad_norm": 1.0143603086471558, "learning_rate": 0.00015951217894037402, "loss": 2.016, "step": 3025 }, { "epoch": 0.31835875854813256, "grad_norm": 1.2601126432418823, "learning_rate": 0.0001594847897010795, "loss": 2.0657, "step": 3026 }, { "epoch": 0.31846396633350865, "grad_norm": 0.7915671467781067, "learning_rate": 0.00015945739355401222, "loss": 1.8548, "step": 3027 }, { "epoch": 0.3185691741188848, "grad_norm": 1.0004899501800537, "learning_rate": 0.0001594299905023536, "loss": 2.4322, "step": 3028 }, { "epoch": 0.3186743819042609, "grad_norm": 0.8855611681938171, "learning_rate": 0.0001594025805492859, "loss": 2.1558, "step": 3029 }, { "epoch": 0.31877958968963704, "grad_norm": 1.2269375324249268, "learning_rate": 0.00015937516369799216, "loss": 2.2797, "step": 3030 }, { "epoch": 0.31888479747501314, "grad_norm": 1.0635052919387817, "learning_rate": 0.00015934773995165613, "loss": 2.2713, "step": 3031 }, { "epoch": 0.3189900052603893, "grad_norm": 1.1982238292694092, "learning_rate": 0.0001593203093134625, "loss": 1.9763, "step": 3032 }, { "epoch": 0.3190952130457654, "grad_norm": 1.1229660511016846, "learning_rate": 0.0001592928717865967, "loss": 2.0421, "step": 3033 }, { "epoch": 0.31920042083114153, "grad_norm": 0.9558283686637878, "learning_rate": 0.00015926542737424492, "loss": 2.0596, "step": 3034 }, { "epoch": 0.3193056286165176, "grad_norm": 1.1449071168899536, "learning_rate": 0.00015923797607959422, "loss": 2.0033, "step": 3035 }, { "epoch": 0.3194108364018937, "grad_norm": 2.168914794921875, "learning_rate": 0.00015921051790583247, "loss": 1.6018, "step": 3036 }, { "epoch": 0.31951604418726987, "grad_norm": 0.9856159090995789, "learning_rate": 0.00015918305285614822, "loss": 1.8371, "step": 3037 }, { "epoch": 0.31962125197264596, "grad_norm": 1.085970163345337, "learning_rate": 0.0001591555809337309, "loss": 2.1444, "step": 3038 }, { "epoch": 0.3197264597580221, "grad_norm": 1.2033791542053223, "learning_rate": 0.0001591281021417708, "loss": 1.6321, "step": 3039 }, { "epoch": 0.3198316675433982, "grad_norm": 1.5555825233459473, "learning_rate": 0.0001591006164834589, "loss": 1.6976, "step": 3040 }, { "epoch": 0.31993687532877435, "grad_norm": 1.6176249980926514, "learning_rate": 0.00015907312396198697, "loss": 2.028, "step": 3041 }, { "epoch": 0.32004208311415044, "grad_norm": 1.1690542697906494, "learning_rate": 0.00015904562458054773, "loss": 1.9713, "step": 3042 }, { "epoch": 0.32014729089952654, "grad_norm": 1.2883795499801636, "learning_rate": 0.00015901811834233452, "loss": 2.1284, "step": 3043 }, { "epoch": 0.3202524986849027, "grad_norm": 1.3291345834732056, "learning_rate": 0.00015899060525054157, "loss": 1.5719, "step": 3044 }, { "epoch": 0.3203577064702788, "grad_norm": 1.5232844352722168, "learning_rate": 0.0001589630853083639, "loss": 1.8812, "step": 3045 }, { "epoch": 0.32046291425565493, "grad_norm": 0.951919436454773, "learning_rate": 0.0001589355585189973, "loss": 2.0791, "step": 3046 }, { "epoch": 0.320568122041031, "grad_norm": 1.3650517463684082, "learning_rate": 0.0001589080248856383, "loss": 1.834, "step": 3047 }, { "epoch": 0.32067332982640717, "grad_norm": 1.4583851099014282, "learning_rate": 0.00015888048441148442, "loss": 2.0812, "step": 3048 }, { "epoch": 0.32077853761178327, "grad_norm": 1.3375087976455688, "learning_rate": 0.00015885293709973374, "loss": 1.7964, "step": 3049 }, { "epoch": 0.3208837453971594, "grad_norm": 1.1333143711090088, "learning_rate": 0.0001588253829535853, "loss": 2.2698, "step": 3050 }, { "epoch": 0.3209889531825355, "grad_norm": 0.9990382790565491, "learning_rate": 0.0001587978219762388, "loss": 1.4186, "step": 3051 }, { "epoch": 0.3210941609679116, "grad_norm": 1.230421543121338, "learning_rate": 0.0001587702541708949, "loss": 1.8636, "step": 3052 }, { "epoch": 0.32119936875328775, "grad_norm": 1.4861817359924316, "learning_rate": 0.00015874267954075485, "loss": 2.0363, "step": 3053 }, { "epoch": 0.32130457653866384, "grad_norm": 1.4042917490005493, "learning_rate": 0.0001587150980890209, "loss": 1.6804, "step": 3054 }, { "epoch": 0.32140978432404, "grad_norm": 1.5827716588974, "learning_rate": 0.00015868750981889594, "loss": 1.7993, "step": 3055 }, { "epoch": 0.3215149921094161, "grad_norm": 1.274047613143921, "learning_rate": 0.00015865991473358373, "loss": 1.8368, "step": 3056 }, { "epoch": 0.32162019989479224, "grad_norm": 1.2829877138137817, "learning_rate": 0.00015863231283628877, "loss": 1.9838, "step": 3057 }, { "epoch": 0.32172540768016833, "grad_norm": 0.8687334060668945, "learning_rate": 0.00015860470413021642, "loss": 1.5084, "step": 3058 }, { "epoch": 0.3218306154655444, "grad_norm": 1.9248266220092773, "learning_rate": 0.00015857708861857274, "loss": 1.6113, "step": 3059 }, { "epoch": 0.32193582325092057, "grad_norm": 1.0948201417922974, "learning_rate": 0.00015854946630456467, "loss": 2.1865, "step": 3060 }, { "epoch": 0.32204103103629667, "grad_norm": 1.5295681953430176, "learning_rate": 0.00015852183719139985, "loss": 2.0163, "step": 3061 }, { "epoch": 0.3221462388216728, "grad_norm": 2.258373737335205, "learning_rate": 0.00015849420128228678, "loss": 2.2256, "step": 3062 }, { "epoch": 0.3222514466070489, "grad_norm": 1.5048192739486694, "learning_rate": 0.00015846655858043477, "loss": 1.7284, "step": 3063 }, { "epoch": 0.32235665439242506, "grad_norm": 1.728192925453186, "learning_rate": 0.0001584389090890538, "loss": 2.0966, "step": 3064 }, { "epoch": 0.32246186217780115, "grad_norm": 1.150688886642456, "learning_rate": 0.00015841125281135473, "loss": 1.9364, "step": 3065 }, { "epoch": 0.3225670699631773, "grad_norm": 1.404353141784668, "learning_rate": 0.0001583835897505493, "loss": 1.6709, "step": 3066 }, { "epoch": 0.3226722777485534, "grad_norm": 1.3692048788070679, "learning_rate": 0.00015835591990984974, "loss": 1.8023, "step": 3067 }, { "epoch": 0.3227774855339295, "grad_norm": 1.486565351486206, "learning_rate": 0.00015832824329246946, "loss": 1.8788, "step": 3068 }, { "epoch": 0.32288269331930564, "grad_norm": 0.9628434777259827, "learning_rate": 0.0001583005599016223, "loss": 2.3548, "step": 3069 }, { "epoch": 0.32298790110468173, "grad_norm": 0.9888852834701538, "learning_rate": 0.0001582728697405231, "loss": 2.1615, "step": 3070 }, { "epoch": 0.3230931088900579, "grad_norm": 1.8097474575042725, "learning_rate": 0.00015824517281238745, "loss": 1.4427, "step": 3071 }, { "epoch": 0.32319831667543397, "grad_norm": 1.5706349611282349, "learning_rate": 0.00015821746912043165, "loss": 1.9168, "step": 3072 }, { "epoch": 0.3233035244608101, "grad_norm": 3.1145801544189453, "learning_rate": 0.0001581897586678729, "loss": 1.4371, "step": 3073 }, { "epoch": 0.3234087322461862, "grad_norm": 1.5123764276504517, "learning_rate": 0.00015816204145792904, "loss": 1.9067, "step": 3074 }, { "epoch": 0.3235139400315623, "grad_norm": 0.8363044261932373, "learning_rate": 0.00015813431749381887, "loss": 2.0816, "step": 3075 }, { "epoch": 0.32361914781693846, "grad_norm": 1.4123753309249878, "learning_rate": 0.00015810658677876184, "loss": 1.9094, "step": 3076 }, { "epoch": 0.32372435560231455, "grad_norm": 1.268897533416748, "learning_rate": 0.0001580788493159782, "loss": 1.6229, "step": 3077 }, { "epoch": 0.3238295633876907, "grad_norm": 1.2455761432647705, "learning_rate": 0.00015805110510868907, "loss": 1.9262, "step": 3078 }, { "epoch": 0.3239347711730668, "grad_norm": 1.2342606782913208, "learning_rate": 0.00015802335416011625, "loss": 1.8135, "step": 3079 }, { "epoch": 0.32403997895844294, "grad_norm": 1.7797157764434814, "learning_rate": 0.00015799559647348236, "loss": 1.5395, "step": 3080 }, { "epoch": 0.32414518674381904, "grad_norm": 1.31904935836792, "learning_rate": 0.00015796783205201086, "loss": 1.5797, "step": 3081 }, { "epoch": 0.3242503945291952, "grad_norm": 1.5645530223846436, "learning_rate": 0.00015794006089892587, "loss": 1.6131, "step": 3082 }, { "epoch": 0.3243556023145713, "grad_norm": 1.483992099761963, "learning_rate": 0.00015791228301745245, "loss": 2.2207, "step": 3083 }, { "epoch": 0.32446081009994737, "grad_norm": 1.0606383085250854, "learning_rate": 0.00015788449841081626, "loss": 1.5808, "step": 3084 }, { "epoch": 0.3245660178853235, "grad_norm": 1.3656562566757202, "learning_rate": 0.00015785670708224389, "loss": 2.1471, "step": 3085 }, { "epoch": 0.3246712256706996, "grad_norm": 1.2391986846923828, "learning_rate": 0.00015782890903496264, "loss": 2.1729, "step": 3086 }, { "epoch": 0.32477643345607576, "grad_norm": 1.3538752794265747, "learning_rate": 0.0001578011042722006, "loss": 2.518, "step": 3087 }, { "epoch": 0.32488164124145186, "grad_norm": 1.5246939659118652, "learning_rate": 0.0001577732927971867, "loss": 2.2928, "step": 3088 }, { "epoch": 0.324986849026828, "grad_norm": 1.872592806816101, "learning_rate": 0.0001577454746131505, "loss": 2.0892, "step": 3089 }, { "epoch": 0.3250920568122041, "grad_norm": 1.573846697807312, "learning_rate": 0.00015771764972332254, "loss": 2.2432, "step": 3090 }, { "epoch": 0.3251972645975802, "grad_norm": 0.8727262020111084, "learning_rate": 0.00015768981813093393, "loss": 1.7948, "step": 3091 }, { "epoch": 0.32530247238295634, "grad_norm": 1.4603215456008911, "learning_rate": 0.00015766197983921673, "loss": 2.5792, "step": 3092 }, { "epoch": 0.32540768016833244, "grad_norm": 1.198878526687622, "learning_rate": 0.00015763413485140365, "loss": 2.1937, "step": 3093 }, { "epoch": 0.3255128879537086, "grad_norm": 1.553514003753662, "learning_rate": 0.00015760628317072834, "loss": 2.3651, "step": 3094 }, { "epoch": 0.3256180957390847, "grad_norm": 1.511500358581543, "learning_rate": 0.00015757842480042502, "loss": 2.1293, "step": 3095 }, { "epoch": 0.3257233035244608, "grad_norm": 1.5197930335998535, "learning_rate": 0.00015755055974372883, "loss": 2.1377, "step": 3096 }, { "epoch": 0.3258285113098369, "grad_norm": 1.308167815208435, "learning_rate": 0.00015752268800387563, "loss": 1.8966, "step": 3097 }, { "epoch": 0.32593371909521307, "grad_norm": 0.7427685260772705, "learning_rate": 0.0001574948095841021, "loss": 2.4438, "step": 3098 }, { "epoch": 0.32603892688058916, "grad_norm": 1.212828278541565, "learning_rate": 0.00015746692448764568, "loss": 1.8894, "step": 3099 }, { "epoch": 0.32614413466596526, "grad_norm": 1.2902162075042725, "learning_rate": 0.00015743903271774455, "loss": 1.7276, "step": 3100 }, { "epoch": 0.3262493424513414, "grad_norm": 1.072805404663086, "learning_rate": 0.0001574111342776377, "loss": 1.969, "step": 3101 }, { "epoch": 0.3263545502367175, "grad_norm": 3.502418279647827, "learning_rate": 0.00015738322917056486, "loss": 2.6105, "step": 3102 }, { "epoch": 0.32645975802209365, "grad_norm": 1.2981481552124023, "learning_rate": 0.00015735531739976657, "loss": 2.0216, "step": 3103 }, { "epoch": 0.32656496580746974, "grad_norm": 0.8937673568725586, "learning_rate": 0.00015732739896848414, "loss": 1.7309, "step": 3104 }, { "epoch": 0.3266701735928459, "grad_norm": 1.0238783359527588, "learning_rate": 0.00015729947387995962, "loss": 1.7886, "step": 3105 }, { "epoch": 0.326775381378222, "grad_norm": 1.0271786451339722, "learning_rate": 0.00015727154213743592, "loss": 1.7114, "step": 3106 }, { "epoch": 0.3268805891635981, "grad_norm": 1.1980253458023071, "learning_rate": 0.0001572436037441566, "loss": 2.0706, "step": 3107 }, { "epoch": 0.32698579694897423, "grad_norm": 1.797404408454895, "learning_rate": 0.0001572156587033661, "loss": 2.1992, "step": 3108 }, { "epoch": 0.3270910047343503, "grad_norm": 1.8935017585754395, "learning_rate": 0.00015718770701830955, "loss": 2.225, "step": 3109 }, { "epoch": 0.32719621251972647, "grad_norm": 1.513788104057312, "learning_rate": 0.0001571597486922329, "loss": 2.0227, "step": 3110 }, { "epoch": 0.32730142030510256, "grad_norm": 0.831935703754425, "learning_rate": 0.00015713178372838286, "loss": 1.5778, "step": 3111 }, { "epoch": 0.3274066280904787, "grad_norm": 1.2550475597381592, "learning_rate": 0.0001571038121300069, "loss": 1.7091, "step": 3112 }, { "epoch": 0.3275118358758548, "grad_norm": 1.2048956155776978, "learning_rate": 0.00015707583390035327, "loss": 2.2076, "step": 3113 }, { "epoch": 0.32761704366123096, "grad_norm": 1.4993027448654175, "learning_rate": 0.00015704784904267097, "loss": 2.2555, "step": 3114 }, { "epoch": 0.32772225144660705, "grad_norm": 1.5597890615463257, "learning_rate": 0.00015701985756020985, "loss": 1.8569, "step": 3115 }, { "epoch": 0.32782745923198314, "grad_norm": 1.346541166305542, "learning_rate": 0.00015699185945622043, "loss": 2.031, "step": 3116 }, { "epoch": 0.3279326670173593, "grad_norm": 1.1803152561187744, "learning_rate": 0.00015696385473395403, "loss": 1.9566, "step": 3117 }, { "epoch": 0.3280378748027354, "grad_norm": 1.5781463384628296, "learning_rate": 0.00015693584339666279, "loss": 2.0671, "step": 3118 }, { "epoch": 0.32814308258811153, "grad_norm": 1.163499355316162, "learning_rate": 0.0001569078254475995, "loss": 2.3857, "step": 3119 }, { "epoch": 0.32824829037348763, "grad_norm": 1.9654390811920166, "learning_rate": 0.00015687980089001787, "loss": 2.0257, "step": 3120 }, { "epoch": 0.3283534981588638, "grad_norm": 1.0074301958084106, "learning_rate": 0.00015685176972717223, "loss": 2.3659, "step": 3121 }, { "epoch": 0.32845870594423987, "grad_norm": 1.2155638933181763, "learning_rate": 0.00015682373196231782, "loss": 2.0024, "step": 3122 }, { "epoch": 0.32856391372961596, "grad_norm": 1.3712031841278076, "learning_rate": 0.0001567956875987105, "loss": 1.7298, "step": 3123 }, { "epoch": 0.3286691215149921, "grad_norm": 1.682850956916809, "learning_rate": 0.000156767636639607, "loss": 2.3542, "step": 3124 }, { "epoch": 0.3287743293003682, "grad_norm": 1.4702867269515991, "learning_rate": 0.00015673957908826479, "loss": 2.0367, "step": 3125 }, { "epoch": 0.32887953708574436, "grad_norm": 1.9041625261306763, "learning_rate": 0.00015671151494794211, "loss": 2.015, "step": 3126 }, { "epoch": 0.32898474487112045, "grad_norm": 1.1154800653457642, "learning_rate": 0.00015668344422189794, "loss": 1.9378, "step": 3127 }, { "epoch": 0.3290899526564966, "grad_norm": 1.1576582193374634, "learning_rate": 0.00015665536691339207, "loss": 2.1949, "step": 3128 }, { "epoch": 0.3291951604418727, "grad_norm": 0.9152946472167969, "learning_rate": 0.00015662728302568498, "loss": 1.8082, "step": 3129 }, { "epoch": 0.32930036822724884, "grad_norm": 1.0784518718719482, "learning_rate": 0.00015659919256203795, "loss": 2.1528, "step": 3130 }, { "epoch": 0.32940557601262493, "grad_norm": 1.0181763172149658, "learning_rate": 0.00015657109552571312, "loss": 2.1109, "step": 3131 }, { "epoch": 0.32951078379800103, "grad_norm": 1.47239351272583, "learning_rate": 0.00015654299191997324, "loss": 2.1156, "step": 3132 }, { "epoch": 0.3296159915833772, "grad_norm": 1.092498540878296, "learning_rate": 0.0001565148817480819, "loss": 1.7449, "step": 3133 }, { "epoch": 0.32972119936875327, "grad_norm": 0.9805752635002136, "learning_rate": 0.00015648676501330342, "loss": 1.804, "step": 3134 }, { "epoch": 0.3298264071541294, "grad_norm": 1.1172640323638916, "learning_rate": 0.00015645864171890295, "loss": 1.6379, "step": 3135 }, { "epoch": 0.3299316149395055, "grad_norm": 1.313032627105713, "learning_rate": 0.0001564305118681463, "loss": 2.2116, "step": 3136 }, { "epoch": 0.33003682272488166, "grad_norm": 1.0228915214538574, "learning_rate": 0.00015640237546430018, "loss": 2.1348, "step": 3137 }, { "epoch": 0.33014203051025776, "grad_norm": 1.2504109144210815, "learning_rate": 0.00015637423251063185, "loss": 1.7011, "step": 3138 }, { "epoch": 0.33024723829563385, "grad_norm": 1.2419320344924927, "learning_rate": 0.00015634608301040958, "loss": 1.5222, "step": 3139 }, { "epoch": 0.33035244608101, "grad_norm": 1.6612623929977417, "learning_rate": 0.00015631792696690225, "loss": 1.9523, "step": 3140 }, { "epoch": 0.3304576538663861, "grad_norm": 1.404956340789795, "learning_rate": 0.00015628976438337948, "loss": 1.6778, "step": 3141 }, { "epoch": 0.33056286165176224, "grad_norm": 2.2217705249786377, "learning_rate": 0.00015626159526311174, "loss": 1.7111, "step": 3142 }, { "epoch": 0.33066806943713833, "grad_norm": 1.682693362236023, "learning_rate": 0.0001562334196093702, "loss": 1.9422, "step": 3143 }, { "epoch": 0.3307732772225145, "grad_norm": 2.735732078552246, "learning_rate": 0.00015620523742542687, "loss": 1.8769, "step": 3144 }, { "epoch": 0.3308784850078906, "grad_norm": 1.4678375720977783, "learning_rate": 0.00015617704871455433, "loss": 2.3066, "step": 3145 }, { "epoch": 0.3309836927932667, "grad_norm": 1.5375454425811768, "learning_rate": 0.00015614885348002612, "loss": 1.7219, "step": 3146 }, { "epoch": 0.3310889005786428, "grad_norm": 1.09225332736969, "learning_rate": 0.00015612065172511646, "loss": 1.9515, "step": 3147 }, { "epoch": 0.3311941083640189, "grad_norm": 1.3868629932403564, "learning_rate": 0.0001560924434531003, "loss": 2.0467, "step": 3148 }, { "epoch": 0.33129931614939506, "grad_norm": 1.3525406122207642, "learning_rate": 0.00015606422866725343, "loss": 1.7597, "step": 3149 }, { "epoch": 0.33140452393477116, "grad_norm": 1.2624315023422241, "learning_rate": 0.00015603600737085227, "loss": 1.9116, "step": 3150 }, { "epoch": 0.3315097317201473, "grad_norm": 1.287229299545288, "learning_rate": 0.00015600777956717408, "loss": 1.8875, "step": 3151 }, { "epoch": 0.3316149395055234, "grad_norm": 1.0664699077606201, "learning_rate": 0.0001559795452594969, "loss": 1.8313, "step": 3152 }, { "epoch": 0.33172014729089955, "grad_norm": 1.5198394060134888, "learning_rate": 0.00015595130445109946, "loss": 1.6447, "step": 3153 }, { "epoch": 0.33182535507627564, "grad_norm": 1.22080659866333, "learning_rate": 0.0001559230571452613, "loss": 2.5198, "step": 3154 }, { "epoch": 0.33193056286165173, "grad_norm": 1.2474714517593384, "learning_rate": 0.00015589480334526266, "loss": 1.736, "step": 3155 }, { "epoch": 0.3320357706470279, "grad_norm": 1.2309380769729614, "learning_rate": 0.00015586654305438456, "loss": 1.5404, "step": 3156 }, { "epoch": 0.332140978432404, "grad_norm": 1.3968786001205444, "learning_rate": 0.00015583827627590875, "loss": 1.7834, "step": 3157 }, { "epoch": 0.3322461862177801, "grad_norm": 1.3345340490341187, "learning_rate": 0.00015581000301311782, "loss": 1.7573, "step": 3158 }, { "epoch": 0.3323513940031562, "grad_norm": 1.5625921487808228, "learning_rate": 0.00015578172326929498, "loss": 1.8578, "step": 3159 }, { "epoch": 0.33245660178853237, "grad_norm": 0.9418458342552185, "learning_rate": 0.0001557534370477243, "loss": 2.0889, "step": 3160 }, { "epoch": 0.33256180957390846, "grad_norm": 1.5350128412246704, "learning_rate": 0.00015572514435169063, "loss": 1.1002, "step": 3161 }, { "epoch": 0.3326670173592846, "grad_norm": 1.865135908126831, "learning_rate": 0.0001556968451844794, "loss": 1.707, "step": 3162 }, { "epoch": 0.3327722251446607, "grad_norm": 1.427790641784668, "learning_rate": 0.00015566853954937694, "loss": 1.8425, "step": 3163 }, { "epoch": 0.3328774329300368, "grad_norm": 1.1270205974578857, "learning_rate": 0.0001556402274496703, "loss": 1.5166, "step": 3164 }, { "epoch": 0.33298264071541295, "grad_norm": 1.270928978919983, "learning_rate": 0.0001556119088886473, "loss": 1.4399, "step": 3165 }, { "epoch": 0.33308784850078904, "grad_norm": 1.3536688089370728, "learning_rate": 0.0001555835838695964, "loss": 1.5239, "step": 3166 }, { "epoch": 0.3331930562861652, "grad_norm": 1.0648913383483887, "learning_rate": 0.00015555525239580698, "loss": 2.1152, "step": 3167 }, { "epoch": 0.3332982640715413, "grad_norm": 1.2379913330078125, "learning_rate": 0.00015552691447056903, "loss": 2.2395, "step": 3168 }, { "epoch": 0.33340347185691743, "grad_norm": 1.243102788925171, "learning_rate": 0.0001554985700971733, "loss": 1.8046, "step": 3169 }, { "epoch": 0.3335086796422935, "grad_norm": 1.4841232299804688, "learning_rate": 0.00015547021927891144, "loss": 1.4817, "step": 3170 }, { "epoch": 0.3336138874276697, "grad_norm": 1.3383890390396118, "learning_rate": 0.00015544186201907562, "loss": 1.7118, "step": 3171 }, { "epoch": 0.33371909521304577, "grad_norm": 1.7296276092529297, "learning_rate": 0.00015541349832095896, "loss": 1.689, "step": 3172 }, { "epoch": 0.33382430299842186, "grad_norm": 1.2010234594345093, "learning_rate": 0.0001553851281878552, "loss": 2.0383, "step": 3173 }, { "epoch": 0.333929510783798, "grad_norm": 1.05833899974823, "learning_rate": 0.00015535675162305887, "loss": 1.6341, "step": 3174 }, { "epoch": 0.3340347185691741, "grad_norm": 1.8304420709609985, "learning_rate": 0.0001553283686298653, "loss": 2.2299, "step": 3175 }, { "epoch": 0.33413992635455025, "grad_norm": 1.1802849769592285, "learning_rate": 0.00015529997921157044, "loss": 1.7604, "step": 3176 }, { "epoch": 0.33424513413992635, "grad_norm": 1.0604053735733032, "learning_rate": 0.00015527158337147112, "loss": 2.3878, "step": 3177 }, { "epoch": 0.3343503419253025, "grad_norm": 1.17803955078125, "learning_rate": 0.0001552431811128648, "loss": 2.2497, "step": 3178 }, { "epoch": 0.3344555497106786, "grad_norm": 1.691798210144043, "learning_rate": 0.0001552147724390498, "loss": 2.2167, "step": 3179 }, { "epoch": 0.3345607574960547, "grad_norm": 1.0652263164520264, "learning_rate": 0.00015518635735332507, "loss": 1.471, "step": 3180 }, { "epoch": 0.33466596528143083, "grad_norm": 1.2501590251922607, "learning_rate": 0.00015515793585899038, "loss": 1.8941, "step": 3181 }, { "epoch": 0.3347711730668069, "grad_norm": 0.7410596013069153, "learning_rate": 0.00015512950795934627, "loss": 1.897, "step": 3182 }, { "epoch": 0.3348763808521831, "grad_norm": 1.589612603187561, "learning_rate": 0.0001551010736576939, "loss": 2.0134, "step": 3183 }, { "epoch": 0.33498158863755917, "grad_norm": 1.5273739099502563, "learning_rate": 0.00015507263295733528, "loss": 2.0521, "step": 3184 }, { "epoch": 0.3350867964229353, "grad_norm": 1.4499183893203735, "learning_rate": 0.00015504418586157316, "loss": 2.2479, "step": 3185 }, { "epoch": 0.3351920042083114, "grad_norm": 1.040443778038025, "learning_rate": 0.000155015732373711, "loss": 2.3157, "step": 3186 }, { "epoch": 0.33529721199368756, "grad_norm": 1.6593208312988281, "learning_rate": 0.000154987272497053, "loss": 2.2596, "step": 3187 }, { "epoch": 0.33540241977906365, "grad_norm": 2.389070510864258, "learning_rate": 0.0001549588062349041, "loss": 1.6402, "step": 3188 }, { "epoch": 0.33550762756443975, "grad_norm": 1.5094181299209595, "learning_rate": 0.00015493033359057003, "loss": 1.8529, "step": 3189 }, { "epoch": 0.3356128353498159, "grad_norm": 0.9902091026306152, "learning_rate": 0.00015490185456735719, "loss": 2.0056, "step": 3190 }, { "epoch": 0.335718043135192, "grad_norm": 1.285557508468628, "learning_rate": 0.00015487336916857278, "loss": 2.1874, "step": 3191 }, { "epoch": 0.33582325092056814, "grad_norm": 1.2860695123672485, "learning_rate": 0.00015484487739752468, "loss": 2.1247, "step": 3192 }, { "epoch": 0.33592845870594423, "grad_norm": 1.0790120363235474, "learning_rate": 0.00015481637925752155, "loss": 2.0198, "step": 3193 }, { "epoch": 0.3360336664913204, "grad_norm": 1.1655452251434326, "learning_rate": 0.00015478787475187283, "loss": 1.522, "step": 3194 }, { "epoch": 0.3361388742766965, "grad_norm": 1.2023252248764038, "learning_rate": 0.00015475936388388862, "loss": 1.7807, "step": 3195 }, { "epoch": 0.33624408206207257, "grad_norm": 1.0464221239089966, "learning_rate": 0.00015473084665687984, "loss": 1.6904, "step": 3196 }, { "epoch": 0.3363492898474487, "grad_norm": 0.9979501366615295, "learning_rate": 0.00015470232307415803, "loss": 1.8852, "step": 3197 }, { "epoch": 0.3364544976328248, "grad_norm": 1.2959654331207275, "learning_rate": 0.00015467379313903557, "loss": 2.162, "step": 3198 }, { "epoch": 0.33655970541820096, "grad_norm": 1.7765225172042847, "learning_rate": 0.00015464525685482557, "loss": 1.5029, "step": 3199 }, { "epoch": 0.33666491320357705, "grad_norm": 1.5161840915679932, "learning_rate": 0.00015461671422484178, "loss": 1.7194, "step": 3200 }, { "epoch": 0.3367701209889532, "grad_norm": 1.1318210363388062, "learning_rate": 0.00015458816525239886, "loss": 1.8918, "step": 3201 }, { "epoch": 0.3368753287743293, "grad_norm": 1.2241750955581665, "learning_rate": 0.00015455960994081205, "loss": 1.732, "step": 3202 }, { "epoch": 0.33698053655970545, "grad_norm": 1.930464267730713, "learning_rate": 0.0001545310482933974, "loss": 1.8916, "step": 3203 }, { "epoch": 0.33708574434508154, "grad_norm": 1.4464246034622192, "learning_rate": 0.0001545024803134717, "loss": 1.728, "step": 3204 }, { "epoch": 0.33719095213045763, "grad_norm": 1.4596894979476929, "learning_rate": 0.00015447390600435238, "loss": 1.5601, "step": 3205 }, { "epoch": 0.3372961599158338, "grad_norm": 1.1431268453598022, "learning_rate": 0.00015444532536935777, "loss": 2.2355, "step": 3206 }, { "epoch": 0.3374013677012099, "grad_norm": 1.3519384860992432, "learning_rate": 0.0001544167384118068, "loss": 1.5459, "step": 3207 }, { "epoch": 0.337506575486586, "grad_norm": 1.1788008213043213, "learning_rate": 0.00015438814513501922, "loss": 2.1492, "step": 3208 }, { "epoch": 0.3376117832719621, "grad_norm": 1.527076244354248, "learning_rate": 0.00015435954554231541, "loss": 1.882, "step": 3209 }, { "epoch": 0.33771699105733827, "grad_norm": 1.9754518270492554, "learning_rate": 0.0001543309396370166, "loss": 1.8531, "step": 3210 }, { "epoch": 0.33782219884271436, "grad_norm": 1.0372203588485718, "learning_rate": 0.00015430232742244467, "loss": 2.1218, "step": 3211 }, { "epoch": 0.33792740662809045, "grad_norm": 1.702439785003662, "learning_rate": 0.00015427370890192224, "loss": 1.7968, "step": 3212 }, { "epoch": 0.3380326144134666, "grad_norm": 1.84778892993927, "learning_rate": 0.0001542450840787727, "loss": 1.9588, "step": 3213 }, { "epoch": 0.3381378221988427, "grad_norm": 1.402566909790039, "learning_rate": 0.00015421645295632023, "loss": 2.245, "step": 3214 }, { "epoch": 0.33824302998421885, "grad_norm": 1.550502061843872, "learning_rate": 0.0001541878155378896, "loss": 1.999, "step": 3215 }, { "epoch": 0.33834823776959494, "grad_norm": 2.043599843978882, "learning_rate": 0.00015415917182680638, "loss": 1.7732, "step": 3216 }, { "epoch": 0.3384534455549711, "grad_norm": 1.1463595628738403, "learning_rate": 0.00015413052182639683, "loss": 1.9726, "step": 3217 }, { "epoch": 0.3385586533403472, "grad_norm": 1.1954436302185059, "learning_rate": 0.0001541018655399881, "loss": 2.0425, "step": 3218 }, { "epoch": 0.33866386112572333, "grad_norm": 1.8237320184707642, "learning_rate": 0.00015407320297090786, "loss": 2.3845, "step": 3219 }, { "epoch": 0.3387690689110994, "grad_norm": 1.442068338394165, "learning_rate": 0.0001540445341224846, "loss": 1.988, "step": 3220 }, { "epoch": 0.3388742766964755, "grad_norm": 1.3725106716156006, "learning_rate": 0.00015401585899804755, "loss": 2.2294, "step": 3221 }, { "epoch": 0.33897948448185167, "grad_norm": 1.1935747861862183, "learning_rate": 0.00015398717760092666, "loss": 2.0404, "step": 3222 }, { "epoch": 0.33908469226722776, "grad_norm": 1.4062366485595703, "learning_rate": 0.00015395848993445265, "loss": 1.838, "step": 3223 }, { "epoch": 0.3391899000526039, "grad_norm": 1.1401127576828003, "learning_rate": 0.00015392979600195684, "loss": 1.7744, "step": 3224 }, { "epoch": 0.33929510783798, "grad_norm": 1.2197401523590088, "learning_rate": 0.00015390109580677144, "loss": 1.9057, "step": 3225 }, { "epoch": 0.33940031562335615, "grad_norm": 1.2865253686904907, "learning_rate": 0.00015387238935222927, "loss": 1.8107, "step": 3226 }, { "epoch": 0.33950552340873225, "grad_norm": 1.3906419277191162, "learning_rate": 0.0001538436766416639, "loss": 1.5141, "step": 3227 }, { "epoch": 0.33961073119410834, "grad_norm": 1.4416093826293945, "learning_rate": 0.00015381495767840967, "loss": 1.9024, "step": 3228 }, { "epoch": 0.3397159389794845, "grad_norm": 1.0017902851104736, "learning_rate": 0.00015378623246580165, "loss": 1.5883, "step": 3229 }, { "epoch": 0.3398211467648606, "grad_norm": 1.0945497751235962, "learning_rate": 0.00015375750100717555, "loss": 2.3554, "step": 3230 }, { "epoch": 0.33992635455023673, "grad_norm": 1.2821848392486572, "learning_rate": 0.00015372876330586784, "loss": 2.1862, "step": 3231 }, { "epoch": 0.3400315623356128, "grad_norm": 0.8067452311515808, "learning_rate": 0.00015370001936521583, "loss": 2.2071, "step": 3232 }, { "epoch": 0.340136770120989, "grad_norm": 1.01970374584198, "learning_rate": 0.00015367126918855738, "loss": 1.802, "step": 3233 }, { "epoch": 0.34024197790636507, "grad_norm": 0.9139577746391296, "learning_rate": 0.00015364251277923114, "loss": 1.951, "step": 3234 }, { "epoch": 0.3403471856917412, "grad_norm": 1.34104323387146, "learning_rate": 0.00015361375014057656, "loss": 1.7029, "step": 3235 }, { "epoch": 0.3404523934771173, "grad_norm": 1.2936007976531982, "learning_rate": 0.00015358498127593376, "loss": 2.0298, "step": 3236 }, { "epoch": 0.3405576012624934, "grad_norm": 1.3565287590026855, "learning_rate": 0.00015355620618864348, "loss": 1.7207, "step": 3237 }, { "epoch": 0.34066280904786955, "grad_norm": 1.0391281843185425, "learning_rate": 0.00015352742488204733, "loss": 1.8902, "step": 3238 }, { "epoch": 0.34076801683324565, "grad_norm": 1.0751245021820068, "learning_rate": 0.0001534986373594876, "loss": 1.6464, "step": 3239 }, { "epoch": 0.3408732246186218, "grad_norm": 1.167717456817627, "learning_rate": 0.0001534698436243073, "loss": 1.8188, "step": 3240 }, { "epoch": 0.3409784324039979, "grad_norm": 1.1624265909194946, "learning_rate": 0.00015344104367985014, "loss": 2.048, "step": 3241 }, { "epoch": 0.34108364018937404, "grad_norm": 1.119253158569336, "learning_rate": 0.00015341223752946052, "loss": 2.1498, "step": 3242 }, { "epoch": 0.34118884797475013, "grad_norm": 2.0956761837005615, "learning_rate": 0.00015338342517648367, "loss": 2.0089, "step": 3243 }, { "epoch": 0.3412940557601262, "grad_norm": 2.5349323749542236, "learning_rate": 0.0001533546066242654, "loss": 1.4671, "step": 3244 }, { "epoch": 0.3413992635455024, "grad_norm": 1.1138498783111572, "learning_rate": 0.0001533257818761524, "loss": 2.0825, "step": 3245 }, { "epoch": 0.34150447133087847, "grad_norm": 1.0680429935455322, "learning_rate": 0.00015329695093549192, "loss": 1.474, "step": 3246 }, { "epoch": 0.3416096791162546, "grad_norm": 1.8456655740737915, "learning_rate": 0.00015326811380563204, "loss": 2.3713, "step": 3247 }, { "epoch": 0.3417148869016307, "grad_norm": 1.1764317750930786, "learning_rate": 0.0001532392704899215, "loss": 2.155, "step": 3248 }, { "epoch": 0.34182009468700686, "grad_norm": 1.6202481985092163, "learning_rate": 0.0001532104209917098, "loss": 1.6939, "step": 3249 }, { "epoch": 0.34192530247238295, "grad_norm": 0.7920756340026855, "learning_rate": 0.00015318156531434713, "loss": 1.8199, "step": 3250 }, { "epoch": 0.3420305102577591, "grad_norm": 1.4184741973876953, "learning_rate": 0.00015315270346118442, "loss": 1.917, "step": 3251 }, { "epoch": 0.3421357180431352, "grad_norm": 1.0654038190841675, "learning_rate": 0.00015312383543557328, "loss": 1.8263, "step": 3252 }, { "epoch": 0.3422409258285113, "grad_norm": 0.845812976360321, "learning_rate": 0.00015309496124086603, "loss": 1.9213, "step": 3253 }, { "epoch": 0.34234613361388744, "grad_norm": 1.5473440885543823, "learning_rate": 0.0001530660808804158, "loss": 1.8193, "step": 3254 }, { "epoch": 0.34245134139926353, "grad_norm": 2.408548593521118, "learning_rate": 0.00015303719435757633, "loss": 1.9995, "step": 3255 }, { "epoch": 0.3425565491846397, "grad_norm": 1.4737664461135864, "learning_rate": 0.0001530083016757021, "loss": 1.8523, "step": 3256 }, { "epoch": 0.3426617569700158, "grad_norm": 1.6030080318450928, "learning_rate": 0.0001529794028381484, "loss": 1.8994, "step": 3257 }, { "epoch": 0.3427669647553919, "grad_norm": 1.5346298217773438, "learning_rate": 0.00015295049784827108, "loss": 1.7178, "step": 3258 }, { "epoch": 0.342872172540768, "grad_norm": 1.2144919633865356, "learning_rate": 0.0001529215867094268, "loss": 2.0721, "step": 3259 }, { "epoch": 0.3429773803261441, "grad_norm": 1.6161445379257202, "learning_rate": 0.00015289266942497293, "loss": 2.2569, "step": 3260 }, { "epoch": 0.34308258811152026, "grad_norm": 1.4736202955245972, "learning_rate": 0.00015286374599826754, "loss": 1.9167, "step": 3261 }, { "epoch": 0.34318779589689635, "grad_norm": 1.4252387285232544, "learning_rate": 0.0001528348164326694, "loss": 2.426, "step": 3262 }, { "epoch": 0.3432930036822725, "grad_norm": 0.9563824534416199, "learning_rate": 0.000152805880731538, "loss": 2.0023, "step": 3263 }, { "epoch": 0.3433982114676486, "grad_norm": 1.182991862297058, "learning_rate": 0.00015277693889823355, "loss": 1.8019, "step": 3264 }, { "epoch": 0.34350341925302474, "grad_norm": 0.8522358536720276, "learning_rate": 0.000152747990936117, "loss": 1.7264, "step": 3265 }, { "epoch": 0.34360862703840084, "grad_norm": 1.2024897336959839, "learning_rate": 0.0001527190368485499, "loss": 1.8793, "step": 3266 }, { "epoch": 0.343713834823777, "grad_norm": 1.762102484703064, "learning_rate": 0.0001526900766388947, "loss": 1.6688, "step": 3267 }, { "epoch": 0.3438190426091531, "grad_norm": 1.8461966514587402, "learning_rate": 0.00015266111031051442, "loss": 1.385, "step": 3268 }, { "epoch": 0.3439242503945292, "grad_norm": 1.0345498323440552, "learning_rate": 0.00015263213786677278, "loss": 1.694, "step": 3269 }, { "epoch": 0.3440294581799053, "grad_norm": 1.057798981666565, "learning_rate": 0.00015260315931103427, "loss": 1.4546, "step": 3270 }, { "epoch": 0.3441346659652814, "grad_norm": 1.6364519596099854, "learning_rate": 0.00015257417464666412, "loss": 1.8252, "step": 3271 }, { "epoch": 0.34423987375065757, "grad_norm": 0.9418036341667175, "learning_rate": 0.0001525451838770282, "loss": 2.3333, "step": 3272 }, { "epoch": 0.34434508153603366, "grad_norm": 1.4215340614318848, "learning_rate": 0.00015251618700549307, "loss": 1.7159, "step": 3273 }, { "epoch": 0.3444502893214098, "grad_norm": 0.8506739139556885, "learning_rate": 0.0001524871840354261, "loss": 2.1433, "step": 3274 }, { "epoch": 0.3445554971067859, "grad_norm": 1.7179830074310303, "learning_rate": 0.00015245817497019524, "loss": 2.0006, "step": 3275 }, { "epoch": 0.344660704892162, "grad_norm": 1.3353736400604248, "learning_rate": 0.0001524291598131693, "loss": 2.0854, "step": 3276 }, { "epoch": 0.34476591267753814, "grad_norm": 1.2626408338546753, "learning_rate": 0.00015240013856771768, "loss": 1.9261, "step": 3277 }, { "epoch": 0.34487112046291424, "grad_norm": 1.2768040895462036, "learning_rate": 0.00015237111123721052, "loss": 2.1543, "step": 3278 }, { "epoch": 0.3449763282482904, "grad_norm": 1.1452258825302124, "learning_rate": 0.00015234207782501865, "loss": 1.9403, "step": 3279 }, { "epoch": 0.3450815360336665, "grad_norm": 1.7478617429733276, "learning_rate": 0.0001523130383345136, "loss": 1.8556, "step": 3280 }, { "epoch": 0.34518674381904263, "grad_norm": 1.261497974395752, "learning_rate": 0.00015228399276906774, "loss": 2.3515, "step": 3281 }, { "epoch": 0.3452919516044187, "grad_norm": 2.1745786666870117, "learning_rate": 0.00015225494113205393, "loss": 1.8923, "step": 3282 }, { "epoch": 0.34539715938979487, "grad_norm": 1.2414960861206055, "learning_rate": 0.0001522258834268459, "loss": 1.8377, "step": 3283 }, { "epoch": 0.34550236717517097, "grad_norm": 1.666604995727539, "learning_rate": 0.00015219681965681798, "loss": 2.1966, "step": 3284 }, { "epoch": 0.34560757496054706, "grad_norm": 2.886913776397705, "learning_rate": 0.0001521677498253453, "loss": 1.9478, "step": 3285 }, { "epoch": 0.3457127827459232, "grad_norm": 1.258230447769165, "learning_rate": 0.00015213867393580358, "loss": 2.0871, "step": 3286 }, { "epoch": 0.3458179905312993, "grad_norm": 1.2245984077453613, "learning_rate": 0.0001521095919915694, "loss": 1.9927, "step": 3287 }, { "epoch": 0.34592319831667545, "grad_norm": 1.6182457208633423, "learning_rate": 0.00015208050399601985, "loss": 1.6089, "step": 3288 }, { "epoch": 0.34602840610205154, "grad_norm": 1.1847516298294067, "learning_rate": 0.00015205140995253283, "loss": 2.0237, "step": 3289 }, { "epoch": 0.3461336138874277, "grad_norm": 1.0616167783737183, "learning_rate": 0.00015202230986448704, "loss": 1.7928, "step": 3290 }, { "epoch": 0.3462388216728038, "grad_norm": 1.7937065362930298, "learning_rate": 0.0001519932037352617, "loss": 2.0342, "step": 3291 }, { "epoch": 0.3463440294581799, "grad_norm": 1.3131473064422607, "learning_rate": 0.0001519640915682368, "loss": 1.8552, "step": 3292 }, { "epoch": 0.34644923724355603, "grad_norm": 1.2970378398895264, "learning_rate": 0.0001519349733667931, "loss": 1.7052, "step": 3293 }, { "epoch": 0.3465544450289321, "grad_norm": 1.1303088665008545, "learning_rate": 0.00015190584913431194, "loss": 1.8543, "step": 3294 }, { "epoch": 0.3466596528143083, "grad_norm": 1.530392050743103, "learning_rate": 0.00015187671887417542, "loss": 2.0367, "step": 3295 }, { "epoch": 0.34676486059968437, "grad_norm": 1.7276886701583862, "learning_rate": 0.00015184758258976637, "loss": 2.6418, "step": 3296 }, { "epoch": 0.3468700683850605, "grad_norm": 1.323357343673706, "learning_rate": 0.0001518184402844683, "loss": 2.1965, "step": 3297 }, { "epoch": 0.3469752761704366, "grad_norm": 4.387045383453369, "learning_rate": 0.00015178929196166537, "loss": 1.9068, "step": 3298 }, { "epoch": 0.34708048395581276, "grad_norm": 1.7642402648925781, "learning_rate": 0.00015176013762474252, "loss": 1.7479, "step": 3299 }, { "epoch": 0.34718569174118885, "grad_norm": 1.1075654029846191, "learning_rate": 0.00015173097727708533, "loss": 1.7473, "step": 3300 }, { "epoch": 0.34729089952656494, "grad_norm": 1.3891255855560303, "learning_rate": 0.0001517018109220801, "loss": 1.863, "step": 3301 }, { "epoch": 0.3473961073119411, "grad_norm": 0.9241618514060974, "learning_rate": 0.0001516726385631138, "loss": 2.2975, "step": 3302 }, { "epoch": 0.3475013150973172, "grad_norm": 2.6608288288116455, "learning_rate": 0.00015164346020357417, "loss": 2.5467, "step": 3303 }, { "epoch": 0.34760652288269334, "grad_norm": 1.2339940071105957, "learning_rate": 0.00015161427584684954, "loss": 1.4252, "step": 3304 }, { "epoch": 0.34771173066806943, "grad_norm": 1.0386180877685547, "learning_rate": 0.00015158508549632902, "loss": 1.5439, "step": 3305 }, { "epoch": 0.3478169384534456, "grad_norm": 1.0006986856460571, "learning_rate": 0.0001515558891554024, "loss": 2.3416, "step": 3306 }, { "epoch": 0.3479221462388217, "grad_norm": 1.0212254524230957, "learning_rate": 0.0001515266868274601, "loss": 1.6541, "step": 3307 }, { "epoch": 0.34802735402419777, "grad_norm": 1.2508071660995483, "learning_rate": 0.0001514974785158934, "loss": 1.5524, "step": 3308 }, { "epoch": 0.3481325618095739, "grad_norm": 1.5572774410247803, "learning_rate": 0.00015146826422409405, "loss": 2.1225, "step": 3309 }, { "epoch": 0.34823776959495, "grad_norm": 1.8511630296707153, "learning_rate": 0.00015143904395545466, "loss": 1.6325, "step": 3310 }, { "epoch": 0.34834297738032616, "grad_norm": 1.0825985670089722, "learning_rate": 0.00015140981771336848, "loss": 1.9844, "step": 3311 }, { "epoch": 0.34844818516570225, "grad_norm": 1.2613874673843384, "learning_rate": 0.00015138058550122945, "loss": 1.5684, "step": 3312 }, { "epoch": 0.3485533929510784, "grad_norm": 1.5934251546859741, "learning_rate": 0.00015135134732243227, "loss": 2.6311, "step": 3313 }, { "epoch": 0.3486586007364545, "grad_norm": 1.325727939605713, "learning_rate": 0.00015132210318037214, "loss": 1.2294, "step": 3314 }, { "epoch": 0.34876380852183064, "grad_norm": 1.7962899208068848, "learning_rate": 0.00015129285307844523, "loss": 2.3393, "step": 3315 }, { "epoch": 0.34886901630720674, "grad_norm": 1.5050058364868164, "learning_rate": 0.00015126359702004818, "loss": 1.8036, "step": 3316 }, { "epoch": 0.34897422409258283, "grad_norm": 1.342183232307434, "learning_rate": 0.0001512343350085784, "loss": 1.9512, "step": 3317 }, { "epoch": 0.349079431877959, "grad_norm": 1.6918140649795532, "learning_rate": 0.00015120506704743402, "loss": 2.1275, "step": 3318 }, { "epoch": 0.3491846396633351, "grad_norm": 1.7969458103179932, "learning_rate": 0.00015117579314001382, "loss": 1.9085, "step": 3319 }, { "epoch": 0.3492898474487112, "grad_norm": 1.1267280578613281, "learning_rate": 0.00015114651328971727, "loss": 1.8183, "step": 3320 }, { "epoch": 0.3493950552340873, "grad_norm": 1.4564090967178345, "learning_rate": 0.00015111722749994457, "loss": 1.8921, "step": 3321 }, { "epoch": 0.34950026301946346, "grad_norm": 1.4575985670089722, "learning_rate": 0.00015108793577409656, "loss": 1.7664, "step": 3322 }, { "epoch": 0.34960547080483956, "grad_norm": 1.3683162927627563, "learning_rate": 0.0001510586381155748, "loss": 1.585, "step": 3323 }, { "epoch": 0.34971067859021565, "grad_norm": 2.316188097000122, "learning_rate": 0.0001510293345277815, "loss": 1.8608, "step": 3324 }, { "epoch": 0.3498158863755918, "grad_norm": 0.8995792269706726, "learning_rate": 0.0001510000250141196, "loss": 1.475, "step": 3325 }, { "epoch": 0.3499210941609679, "grad_norm": 1.4614365100860596, "learning_rate": 0.0001509707095779928, "loss": 2.1418, "step": 3326 }, { "epoch": 0.35002630194634404, "grad_norm": 2.0038840770721436, "learning_rate": 0.00015094138822280533, "loss": 2.1766, "step": 3327 }, { "epoch": 0.35013150973172014, "grad_norm": 2.1047608852386475, "learning_rate": 0.00015091206095196215, "loss": 1.7852, "step": 3328 }, { "epoch": 0.3502367175170963, "grad_norm": 1.098832607269287, "learning_rate": 0.000150882727768869, "loss": 1.973, "step": 3329 }, { "epoch": 0.3503419253024724, "grad_norm": 1.2280389070510864, "learning_rate": 0.00015085338867693225, "loss": 1.7759, "step": 3330 }, { "epoch": 0.35044713308784853, "grad_norm": 1.6479296684265137, "learning_rate": 0.0001508240436795589, "loss": 1.9789, "step": 3331 }, { "epoch": 0.3505523408732246, "grad_norm": 1.37543785572052, "learning_rate": 0.00015079469278015672, "loss": 2.2909, "step": 3332 }, { "epoch": 0.3506575486586007, "grad_norm": 0.9722198247909546, "learning_rate": 0.00015076533598213415, "loss": 1.6292, "step": 3333 }, { "epoch": 0.35076275644397686, "grad_norm": 1.5459274053573608, "learning_rate": 0.00015073597328890025, "loss": 1.8489, "step": 3334 }, { "epoch": 0.35086796422935296, "grad_norm": 1.3140374422073364, "learning_rate": 0.00015070660470386485, "loss": 2.0101, "step": 3335 }, { "epoch": 0.3509731720147291, "grad_norm": 1.515724778175354, "learning_rate": 0.00015067723023043844, "loss": 1.9427, "step": 3336 }, { "epoch": 0.3510783798001052, "grad_norm": 1.626247763633728, "learning_rate": 0.00015064784987203216, "loss": 1.574, "step": 3337 }, { "epoch": 0.35118358758548135, "grad_norm": 1.1140034198760986, "learning_rate": 0.00015061846363205784, "loss": 2.173, "step": 3338 }, { "epoch": 0.35128879537085744, "grad_norm": 1.1657525300979614, "learning_rate": 0.000150589071513928, "loss": 2.1967, "step": 3339 }, { "epoch": 0.35139400315623354, "grad_norm": 0.87020343542099, "learning_rate": 0.00015055967352105588, "loss": 1.7997, "step": 3340 }, { "epoch": 0.3514992109416097, "grad_norm": 1.1044055223464966, "learning_rate": 0.0001505302696568554, "loss": 2.051, "step": 3341 }, { "epoch": 0.3516044187269858, "grad_norm": 2.5715765953063965, "learning_rate": 0.00015050085992474106, "loss": 2.1982, "step": 3342 }, { "epoch": 0.35170962651236193, "grad_norm": 1.7368378639221191, "learning_rate": 0.00015047144432812814, "loss": 1.2449, "step": 3343 }, { "epoch": 0.351814834297738, "grad_norm": 1.0947346687316895, "learning_rate": 0.00015044202287043263, "loss": 1.4047, "step": 3344 }, { "epoch": 0.35192004208311417, "grad_norm": 1.4075251817703247, "learning_rate": 0.00015041259555507108, "loss": 1.8252, "step": 3345 }, { "epoch": 0.35202524986849026, "grad_norm": 1.3330639600753784, "learning_rate": 0.00015038316238546082, "loss": 1.9027, "step": 3346 }, { "epoch": 0.3521304576538664, "grad_norm": 1.0962144136428833, "learning_rate": 0.00015035372336501984, "loss": 1.3782, "step": 3347 }, { "epoch": 0.3522356654392425, "grad_norm": 1.612261414527893, "learning_rate": 0.00015032427849716675, "loss": 2.1975, "step": 3348 }, { "epoch": 0.3523408732246186, "grad_norm": 1.682795763015747, "learning_rate": 0.0001502948277853209, "loss": 1.8072, "step": 3349 }, { "epoch": 0.35244608100999475, "grad_norm": 1.4576448202133179, "learning_rate": 0.00015026537123290234, "loss": 1.7883, "step": 3350 }, { "epoch": 0.35255128879537084, "grad_norm": 1.385787844657898, "learning_rate": 0.00015023590884333173, "loss": 2.0371, "step": 3351 }, { "epoch": 0.352656496580747, "grad_norm": 1.0585753917694092, "learning_rate": 0.00015020644062003046, "loss": 1.912, "step": 3352 }, { "epoch": 0.3527617043661231, "grad_norm": 1.1872546672821045, "learning_rate": 0.00015017696656642056, "loss": 2.1347, "step": 3353 }, { "epoch": 0.35286691215149923, "grad_norm": 1.5553888082504272, "learning_rate": 0.00015014748668592477, "loss": 1.8306, "step": 3354 }, { "epoch": 0.35297211993687533, "grad_norm": 1.696855902671814, "learning_rate": 0.00015011800098196646, "loss": 1.5191, "step": 3355 }, { "epoch": 0.3530773277222514, "grad_norm": 1.31337571144104, "learning_rate": 0.00015008850945796975, "loss": 2.1424, "step": 3356 }, { "epoch": 0.35318253550762757, "grad_norm": 1.327221393585205, "learning_rate": 0.00015005901211735938, "loss": 1.8816, "step": 3357 }, { "epoch": 0.35328774329300366, "grad_norm": 1.1838815212249756, "learning_rate": 0.0001500295089635608, "loss": 2.271, "step": 3358 }, { "epoch": 0.3533929510783798, "grad_norm": 1.5247220993041992, "learning_rate": 0.00015000000000000001, "loss": 1.7908, "step": 3359 }, { "epoch": 0.3534981588637559, "grad_norm": 1.6162418127059937, "learning_rate": 0.0001499704852301039, "loss": 2.1344, "step": 3360 }, { "epoch": 0.35360336664913206, "grad_norm": 1.533591628074646, "learning_rate": 0.0001499409646572999, "loss": 2.2785, "step": 3361 }, { "epoch": 0.35370857443450815, "grad_norm": 1.3864167928695679, "learning_rate": 0.00014991143828501613, "loss": 2.2172, "step": 3362 }, { "epoch": 0.3538137822198843, "grad_norm": 1.580679178237915, "learning_rate": 0.00014988190611668135, "loss": 1.6399, "step": 3363 }, { "epoch": 0.3539189900052604, "grad_norm": 1.053759217262268, "learning_rate": 0.00014985236815572513, "loss": 1.3566, "step": 3364 }, { "epoch": 0.3540241977906365, "grad_norm": 1.2287757396697998, "learning_rate": 0.0001498228244055775, "loss": 2.0034, "step": 3365 }, { "epoch": 0.35412940557601263, "grad_norm": 1.3084381818771362, "learning_rate": 0.00014979327486966938, "loss": 1.9535, "step": 3366 }, { "epoch": 0.35423461336138873, "grad_norm": 1.1982232332229614, "learning_rate": 0.0001497637195514322, "loss": 2.0076, "step": 3367 }, { "epoch": 0.3543398211467649, "grad_norm": 1.4315484762191772, "learning_rate": 0.00014973415845429813, "loss": 1.9953, "step": 3368 }, { "epoch": 0.35444502893214097, "grad_norm": 1.0541669130325317, "learning_rate": 0.0001497045915817, "loss": 1.6665, "step": 3369 }, { "epoch": 0.3545502367175171, "grad_norm": 1.2444473505020142, "learning_rate": 0.00014967501893707133, "loss": 2.4482, "step": 3370 }, { "epoch": 0.3546554445028932, "grad_norm": 1.091001033782959, "learning_rate": 0.00014964544052384628, "loss": 2.1292, "step": 3371 }, { "epoch": 0.3547606522882693, "grad_norm": 1.2684603929519653, "learning_rate": 0.0001496158563454597, "loss": 1.6962, "step": 3372 }, { "epoch": 0.35486586007364546, "grad_norm": 1.5170433521270752, "learning_rate": 0.0001495862664053471, "loss": 1.6657, "step": 3373 }, { "epoch": 0.35497106785902155, "grad_norm": 1.2288258075714111, "learning_rate": 0.0001495566707069447, "loss": 1.9427, "step": 3374 }, { "epoch": 0.3550762756443977, "grad_norm": 1.205127239227295, "learning_rate": 0.0001495270692536893, "loss": 1.958, "step": 3375 }, { "epoch": 0.3551814834297738, "grad_norm": 1.3871060609817505, "learning_rate": 0.0001494974620490184, "loss": 1.9658, "step": 3376 }, { "epoch": 0.35528669121514994, "grad_norm": 1.6164618730545044, "learning_rate": 0.00014946784909637028, "loss": 1.354, "step": 3377 }, { "epoch": 0.35539189900052603, "grad_norm": 1.082649827003479, "learning_rate": 0.00014943823039918373, "loss": 1.5321, "step": 3378 }, { "epoch": 0.3554971067859022, "grad_norm": 1.9146784543991089, "learning_rate": 0.00014940860596089828, "loss": 1.5985, "step": 3379 }, { "epoch": 0.3556023145712783, "grad_norm": 1.5512475967407227, "learning_rate": 0.0001493789757849541, "loss": 1.7136, "step": 3380 }, { "epoch": 0.35570752235665437, "grad_norm": 1.2535208463668823, "learning_rate": 0.00014934933987479206, "loss": 1.5282, "step": 3381 }, { "epoch": 0.3558127301420305, "grad_norm": 1.6112395524978638, "learning_rate": 0.0001493196982338537, "loss": 2.1735, "step": 3382 }, { "epoch": 0.3559179379274066, "grad_norm": 1.6096539497375488, "learning_rate": 0.00014929005086558117, "loss": 2.2356, "step": 3383 }, { "epoch": 0.35602314571278276, "grad_norm": 1.7732410430908203, "learning_rate": 0.00014926039777341733, "loss": 1.8119, "step": 3384 }, { "epoch": 0.35612835349815886, "grad_norm": 2.6006152629852295, "learning_rate": 0.00014923073896080575, "loss": 1.6363, "step": 3385 }, { "epoch": 0.356233561283535, "grad_norm": 1.6109579801559448, "learning_rate": 0.00014920107443119052, "loss": 1.8525, "step": 3386 }, { "epoch": 0.3563387690689111, "grad_norm": 1.49036705493927, "learning_rate": 0.00014917140418801655, "loss": 1.6045, "step": 3387 }, { "epoch": 0.3564439768542872, "grad_norm": 1.5587002038955688, "learning_rate": 0.00014914172823472934, "loss": 2.4678, "step": 3388 }, { "epoch": 0.35654918463966334, "grad_norm": 1.2528430223464966, "learning_rate": 0.00014911204657477506, "loss": 2.4837, "step": 3389 }, { "epoch": 0.35665439242503943, "grad_norm": 1.163336157798767, "learning_rate": 0.00014908235921160055, "loss": 2.1102, "step": 3390 }, { "epoch": 0.3567596002104156, "grad_norm": 1.2272528409957886, "learning_rate": 0.00014905266614865324, "loss": 2.1122, "step": 3391 }, { "epoch": 0.3568648079957917, "grad_norm": 1.4555405378341675, "learning_rate": 0.00014902296738938134, "loss": 1.264, "step": 3392 }, { "epoch": 0.3569700157811678, "grad_norm": 2.172147274017334, "learning_rate": 0.00014899326293723371, "loss": 1.9308, "step": 3393 }, { "epoch": 0.3570752235665439, "grad_norm": 1.3015145063400269, "learning_rate": 0.00014896355279565976, "loss": 2.0421, "step": 3394 }, { "epoch": 0.35718043135192007, "grad_norm": 1.9815869331359863, "learning_rate": 0.00014893383696810964, "loss": 1.5845, "step": 3395 }, { "epoch": 0.35728563913729616, "grad_norm": 0.8126930594444275, "learning_rate": 0.0001489041154580342, "loss": 2.2649, "step": 3396 }, { "epoch": 0.35739084692267226, "grad_norm": 1.3474737405776978, "learning_rate": 0.00014887438826888483, "loss": 1.7113, "step": 3397 }, { "epoch": 0.3574960547080484, "grad_norm": 1.247473120689392, "learning_rate": 0.00014884465540411368, "loss": 2.136, "step": 3398 }, { "epoch": 0.3576012624934245, "grad_norm": 1.0129485130310059, "learning_rate": 0.00014881491686717362, "loss": 2.1247, "step": 3399 }, { "epoch": 0.35770647027880065, "grad_norm": 1.0661226511001587, "learning_rate": 0.00014878517266151794, "loss": 1.9584, "step": 3400 }, { "epoch": 0.35781167806417674, "grad_norm": 1.4464398622512817, "learning_rate": 0.00014875542279060085, "loss": 1.9324, "step": 3401 }, { "epoch": 0.3579168858495529, "grad_norm": 1.864518165588379, "learning_rate": 0.00014872566725787701, "loss": 1.1379, "step": 3402 }, { "epoch": 0.358022093634929, "grad_norm": 1.319411277770996, "learning_rate": 0.00014869590606680192, "loss": 1.796, "step": 3403 }, { "epoch": 0.3581273014203051, "grad_norm": 0.8547179698944092, "learning_rate": 0.0001486661392208316, "loss": 1.9455, "step": 3404 }, { "epoch": 0.3582325092056812, "grad_norm": 1.2236049175262451, "learning_rate": 0.00014863636672342277, "loss": 1.4572, "step": 3405 }, { "epoch": 0.3583377169910573, "grad_norm": 1.5455988645553589, "learning_rate": 0.00014860658857803285, "loss": 1.9523, "step": 3406 }, { "epoch": 0.35844292477643347, "grad_norm": 1.231605052947998, "learning_rate": 0.00014857680478811984, "loss": 2.3316, "step": 3407 }, { "epoch": 0.35854813256180956, "grad_norm": 1.5083273649215698, "learning_rate": 0.00014854701535714244, "loss": 2.0371, "step": 3408 }, { "epoch": 0.3586533403471857, "grad_norm": 1.7702112197875977, "learning_rate": 0.00014851722028856005, "loss": 2.159, "step": 3409 }, { "epoch": 0.3587585481325618, "grad_norm": 1.20704984664917, "learning_rate": 0.0001484874195858326, "loss": 1.1983, "step": 3410 }, { "epoch": 0.35886375591793795, "grad_norm": 1.5127192735671997, "learning_rate": 0.00014845761325242077, "loss": 1.638, "step": 3411 }, { "epoch": 0.35896896370331405, "grad_norm": 1.1735661029815674, "learning_rate": 0.0001484278012917859, "loss": 1.6514, "step": 3412 }, { "epoch": 0.35907417148869014, "grad_norm": 1.142444133758545, "learning_rate": 0.00014839798370738994, "loss": 1.6814, "step": 3413 }, { "epoch": 0.3591793792740663, "grad_norm": 2.166907787322998, "learning_rate": 0.00014836816050269548, "loss": 2.0189, "step": 3414 }, { "epoch": 0.3592845870594424, "grad_norm": 0.9603028893470764, "learning_rate": 0.00014833833168116582, "loss": 2.2928, "step": 3415 }, { "epoch": 0.35938979484481853, "grad_norm": 1.3047958612442017, "learning_rate": 0.00014830849724626488, "loss": 2.1418, "step": 3416 }, { "epoch": 0.3594950026301946, "grad_norm": 2.171341896057129, "learning_rate": 0.00014827865720145724, "loss": 1.9426, "step": 3417 }, { "epoch": 0.3596002104155708, "grad_norm": 1.029051661491394, "learning_rate": 0.0001482488115502081, "loss": 2.0982, "step": 3418 }, { "epoch": 0.35970541820094687, "grad_norm": 1.3975342512130737, "learning_rate": 0.00014821896029598337, "loss": 2.189, "step": 3419 }, { "epoch": 0.35981062598632296, "grad_norm": 1.4755433797836304, "learning_rate": 0.00014818910344224957, "loss": 1.6457, "step": 3420 }, { "epoch": 0.3599158337716991, "grad_norm": 1.5850578546524048, "learning_rate": 0.00014815924099247384, "loss": 2.2274, "step": 3421 }, { "epoch": 0.3600210415570752, "grad_norm": 1.3168679475784302, "learning_rate": 0.00014812937295012406, "loss": 2.4136, "step": 3422 }, { "epoch": 0.36012624934245135, "grad_norm": 1.5056153535842896, "learning_rate": 0.00014809949931866867, "loss": 1.8313, "step": 3423 }, { "epoch": 0.36023145712782745, "grad_norm": 3.0403895378112793, "learning_rate": 0.00014806962010157683, "loss": 2.1145, "step": 3424 }, { "epoch": 0.3603366649132036, "grad_norm": 0.8465742468833923, "learning_rate": 0.00014803973530231828, "loss": 2.0201, "step": 3425 }, { "epoch": 0.3604418726985797, "grad_norm": 1.618240237236023, "learning_rate": 0.00014800984492436346, "loss": 2.0447, "step": 3426 }, { "epoch": 0.36054708048395584, "grad_norm": 1.5372968912124634, "learning_rate": 0.00014797994897118347, "loss": 1.9388, "step": 3427 }, { "epoch": 0.36065228826933193, "grad_norm": 1.471268892288208, "learning_rate": 0.00014795004744625, "loss": 2.3546, "step": 3428 }, { "epoch": 0.360757496054708, "grad_norm": 1.4779199361801147, "learning_rate": 0.00014792014035303535, "loss": 1.9209, "step": 3429 }, { "epoch": 0.3608627038400842, "grad_norm": 0.9379653930664062, "learning_rate": 0.0001478902276950127, "loss": 1.705, "step": 3430 }, { "epoch": 0.36096791162546027, "grad_norm": 1.2077665328979492, "learning_rate": 0.00014786030947565554, "loss": 1.7868, "step": 3431 }, { "epoch": 0.3610731194108364, "grad_norm": 1.1456810235977173, "learning_rate": 0.00014783038569843822, "loss": 2.3369, "step": 3432 }, { "epoch": 0.3611783271962125, "grad_norm": 1.7450131177902222, "learning_rate": 0.00014780045636683578, "loss": 1.9921, "step": 3433 }, { "epoch": 0.36128353498158866, "grad_norm": 1.1912232637405396, "learning_rate": 0.00014777052148432372, "loss": 1.9747, "step": 3434 }, { "epoch": 0.36138874276696475, "grad_norm": 1.3234916925430298, "learning_rate": 0.00014774058105437827, "loss": 1.6403, "step": 3435 }, { "epoch": 0.36149395055234085, "grad_norm": 1.4043394327163696, "learning_rate": 0.00014771063508047636, "loss": 1.8832, "step": 3436 }, { "epoch": 0.361599158337717, "grad_norm": 1.1788146495819092, "learning_rate": 0.00014768068356609554, "loss": 1.997, "step": 3437 }, { "epoch": 0.3617043661230931, "grad_norm": 1.0332565307617188, "learning_rate": 0.00014765072651471393, "loss": 1.7426, "step": 3438 }, { "epoch": 0.36180957390846924, "grad_norm": 1.34288489818573, "learning_rate": 0.00014762076392981033, "loss": 2.0445, "step": 3439 }, { "epoch": 0.36191478169384533, "grad_norm": 1.6269721984863281, "learning_rate": 0.00014759079581486424, "loss": 1.7493, "step": 3440 }, { "epoch": 0.3620199894792215, "grad_norm": 1.204362392425537, "learning_rate": 0.00014756082217335577, "loss": 2.179, "step": 3441 }, { "epoch": 0.3621251972645976, "grad_norm": 1.8871209621429443, "learning_rate": 0.0001475308430087656, "loss": 1.5849, "step": 3442 }, { "epoch": 0.3622304050499737, "grad_norm": 1.0620596408843994, "learning_rate": 0.00014750085832457519, "loss": 2.2185, "step": 3443 }, { "epoch": 0.3623356128353498, "grad_norm": 1.24690842628479, "learning_rate": 0.00014747086812426648, "loss": 1.5221, "step": 3444 }, { "epoch": 0.3624408206207259, "grad_norm": 1.5834826231002808, "learning_rate": 0.0001474408724113222, "loss": 1.4759, "step": 3445 }, { "epoch": 0.36254602840610206, "grad_norm": 1.1942899227142334, "learning_rate": 0.0001474108711892256, "loss": 2.2882, "step": 3446 }, { "epoch": 0.36265123619147815, "grad_norm": 1.3137141466140747, "learning_rate": 0.00014738086446146065, "loss": 1.6804, "step": 3447 }, { "epoch": 0.3627564439768543, "grad_norm": 1.0239195823669434, "learning_rate": 0.00014735085223151198, "loss": 2.2197, "step": 3448 }, { "epoch": 0.3628616517622304, "grad_norm": 1.18763267993927, "learning_rate": 0.00014732083450286472, "loss": 1.7646, "step": 3449 }, { "epoch": 0.36296685954760655, "grad_norm": 1.4239435195922852, "learning_rate": 0.00014729081127900476, "loss": 1.8619, "step": 3450 }, { "epoch": 0.36307206733298264, "grad_norm": 1.079707384109497, "learning_rate": 0.00014726078256341863, "loss": 2.0, "step": 3451 }, { "epoch": 0.36317727511835873, "grad_norm": 1.7194312810897827, "learning_rate": 0.00014723074835959346, "loss": 1.6595, "step": 3452 }, { "epoch": 0.3632824829037349, "grad_norm": 1.498183250427246, "learning_rate": 0.000147200708671017, "loss": 1.6809, "step": 3453 }, { "epoch": 0.363387690689111, "grad_norm": 1.7695226669311523, "learning_rate": 0.00014717066350117768, "loss": 1.6466, "step": 3454 }, { "epoch": 0.3634928984744871, "grad_norm": 1.1211014986038208, "learning_rate": 0.00014714061285356453, "loss": 1.772, "step": 3455 }, { "epoch": 0.3635981062598632, "grad_norm": 1.286805510520935, "learning_rate": 0.00014711055673166724, "loss": 2.2176, "step": 3456 }, { "epoch": 0.36370331404523937, "grad_norm": 2.3629977703094482, "learning_rate": 0.0001470804951389761, "loss": 1.4311, "step": 3457 }, { "epoch": 0.36380852183061546, "grad_norm": 1.3376028537750244, "learning_rate": 0.00014705042807898214, "loss": 1.7872, "step": 3458 }, { "epoch": 0.3639137296159916, "grad_norm": 2.238438129425049, "learning_rate": 0.0001470203555551769, "loss": 1.8577, "step": 3459 }, { "epoch": 0.3640189374013677, "grad_norm": 1.5337350368499756, "learning_rate": 0.00014699027757105254, "loss": 1.7483, "step": 3460 }, { "epoch": 0.3641241451867438, "grad_norm": 2.3909807205200195, "learning_rate": 0.00014696019413010204, "loss": 2.0391, "step": 3461 }, { "epoch": 0.36422935297211995, "grad_norm": 1.3781287670135498, "learning_rate": 0.00014693010523581882, "loss": 1.8577, "step": 3462 }, { "epoch": 0.36433456075749604, "grad_norm": 1.3029046058654785, "learning_rate": 0.00014690001089169702, "loss": 1.7383, "step": 3463 }, { "epoch": 0.3644397685428722, "grad_norm": 1.002429485321045, "learning_rate": 0.00014686991110123135, "loss": 1.7997, "step": 3464 }, { "epoch": 0.3645449763282483, "grad_norm": 1.2660531997680664, "learning_rate": 0.0001468398058679173, "loss": 1.7173, "step": 3465 }, { "epoch": 0.36465018411362443, "grad_norm": 1.3863353729248047, "learning_rate": 0.0001468096951952508, "loss": 1.7758, "step": 3466 }, { "epoch": 0.3647553918990005, "grad_norm": 1.7203501462936401, "learning_rate": 0.00014677957908672856, "loss": 1.7279, "step": 3467 }, { "epoch": 0.3648605996843766, "grad_norm": 1.1552826166152954, "learning_rate": 0.0001467494575458478, "loss": 1.8039, "step": 3468 }, { "epoch": 0.36496580746975277, "grad_norm": 0.9511840343475342, "learning_rate": 0.00014671933057610654, "loss": 1.8241, "step": 3469 }, { "epoch": 0.36507101525512886, "grad_norm": 1.4740869998931885, "learning_rate": 0.00014668919818100322, "loss": 2.1223, "step": 3470 }, { "epoch": 0.365176223040505, "grad_norm": 2.0013620853424072, "learning_rate": 0.00014665906036403706, "loss": 2.3534, "step": 3471 }, { "epoch": 0.3652814308258811, "grad_norm": 1.2783856391906738, "learning_rate": 0.0001466289171287079, "loss": 1.7754, "step": 3472 }, { "epoch": 0.36538663861125725, "grad_norm": 1.5455641746520996, "learning_rate": 0.00014659876847851607, "loss": 2.1852, "step": 3473 }, { "epoch": 0.36549184639663335, "grad_norm": 1.3689314126968384, "learning_rate": 0.00014656861441696278, "loss": 1.7998, "step": 3474 }, { "epoch": 0.3655970541820095, "grad_norm": 1.5268868207931519, "learning_rate": 0.00014653845494754962, "loss": 1.4686, "step": 3475 }, { "epoch": 0.3657022619673856, "grad_norm": 1.6166832447052002, "learning_rate": 0.00014650829007377894, "loss": 1.9012, "step": 3476 }, { "epoch": 0.3658074697527617, "grad_norm": 1.4161622524261475, "learning_rate": 0.00014647811979915366, "loss": 2.1802, "step": 3477 }, { "epoch": 0.36591267753813783, "grad_norm": 1.2434589862823486, "learning_rate": 0.00014644794412717736, "loss": 1.9538, "step": 3478 }, { "epoch": 0.3660178853235139, "grad_norm": 1.0335787534713745, "learning_rate": 0.00014641776306135431, "loss": 1.8217, "step": 3479 }, { "epoch": 0.3661230931088901, "grad_norm": 1.653498649597168, "learning_rate": 0.00014638757660518923, "loss": 1.8545, "step": 3480 }, { "epoch": 0.36622830089426617, "grad_norm": 0.8305730223655701, "learning_rate": 0.00014635738476218767, "loss": 1.4702, "step": 3481 }, { "epoch": 0.3663335086796423, "grad_norm": 1.3280541896820068, "learning_rate": 0.00014632718753585566, "loss": 1.7956, "step": 3482 }, { "epoch": 0.3664387164650184, "grad_norm": 1.6454240083694458, "learning_rate": 0.0001462969849296999, "loss": 1.5739, "step": 3483 }, { "epoch": 0.3665439242503945, "grad_norm": 1.0778430700302124, "learning_rate": 0.00014626677694722773, "loss": 2.2551, "step": 3484 }, { "epoch": 0.36664913203577065, "grad_norm": 1.3582223653793335, "learning_rate": 0.00014623656359194712, "loss": 1.9954, "step": 3485 }, { "epoch": 0.36675433982114675, "grad_norm": 1.7454923391342163, "learning_rate": 0.00014620634486736667, "loss": 2.0445, "step": 3486 }, { "epoch": 0.3668595476065229, "grad_norm": 1.1220197677612305, "learning_rate": 0.00014617612077699548, "loss": 1.8598, "step": 3487 }, { "epoch": 0.366964755391899, "grad_norm": 1.2056738138198853, "learning_rate": 0.00014614589132434347, "loss": 2.0579, "step": 3488 }, { "epoch": 0.36706996317727514, "grad_norm": 1.1674832105636597, "learning_rate": 0.00014611565651292106, "loss": 1.7296, "step": 3489 }, { "epoch": 0.36717517096265123, "grad_norm": 1.2299489974975586, "learning_rate": 0.00014608541634623929, "loss": 1.6829, "step": 3490 }, { "epoch": 0.3672803787480274, "grad_norm": 1.580704689025879, "learning_rate": 0.00014605517082780988, "loss": 2.2437, "step": 3491 }, { "epoch": 0.3673855865334035, "grad_norm": 1.4246400594711304, "learning_rate": 0.00014602491996114516, "loss": 1.8395, "step": 3492 }, { "epoch": 0.36749079431877957, "grad_norm": 1.4935017824172974, "learning_rate": 0.00014599466374975802, "loss": 2.118, "step": 3493 }, { "epoch": 0.3675960021041557, "grad_norm": 1.4232902526855469, "learning_rate": 0.00014596440219716205, "loss": 1.8154, "step": 3494 }, { "epoch": 0.3677012098895318, "grad_norm": 1.707642912864685, "learning_rate": 0.00014593413530687138, "loss": 1.7055, "step": 3495 }, { "epoch": 0.36780641767490796, "grad_norm": 1.4270106554031372, "learning_rate": 0.0001459038630824009, "loss": 2.0895, "step": 3496 }, { "epoch": 0.36791162546028405, "grad_norm": 1.169714331626892, "learning_rate": 0.00014587358552726592, "loss": 1.8576, "step": 3497 }, { "epoch": 0.3680168332456602, "grad_norm": 1.1575000286102295, "learning_rate": 0.0001458433026449825, "loss": 1.8767, "step": 3498 }, { "epoch": 0.3681220410310363, "grad_norm": 1.5656603574752808, "learning_rate": 0.0001458130144390673, "loss": 1.8864, "step": 3499 }, { "epoch": 0.3682272488164124, "grad_norm": 0.9591088891029358, "learning_rate": 0.0001457827209130376, "loss": 1.8874, "step": 3500 }, { "epoch": 0.36833245660178854, "grad_norm": 1.8865973949432373, "learning_rate": 0.00014575242207041128, "loss": 1.846, "step": 3501 }, { "epoch": 0.36843766438716463, "grad_norm": 1.3061480522155762, "learning_rate": 0.00014572211791470685, "loss": 2.1671, "step": 3502 }, { "epoch": 0.3685428721725408, "grad_norm": 1.3023380041122437, "learning_rate": 0.00014569180844944344, "loss": 1.8949, "step": 3503 }, { "epoch": 0.3686480799579169, "grad_norm": 1.3070828914642334, "learning_rate": 0.00014566149367814074, "loss": 1.9491, "step": 3504 }, { "epoch": 0.368753287743293, "grad_norm": 1.1962894201278687, "learning_rate": 0.00014563117360431914, "loss": 1.5836, "step": 3505 }, { "epoch": 0.3688584955286691, "grad_norm": 1.190346121788025, "learning_rate": 0.00014560084823149965, "loss": 2.0028, "step": 3506 }, { "epoch": 0.36896370331404527, "grad_norm": 1.0240740776062012, "learning_rate": 0.00014557051756320378, "loss": 1.9108, "step": 3507 }, { "epoch": 0.36906891109942136, "grad_norm": 1.517669916152954, "learning_rate": 0.0001455401816029538, "loss": 1.7805, "step": 3508 }, { "epoch": 0.36917411888479745, "grad_norm": 1.1098333597183228, "learning_rate": 0.00014550984035427243, "loss": 1.9193, "step": 3509 }, { "epoch": 0.3692793266701736, "grad_norm": 1.9058518409729004, "learning_rate": 0.00014547949382068322, "loss": 2.471, "step": 3510 }, { "epoch": 0.3693845344555497, "grad_norm": 0.9477731585502625, "learning_rate": 0.0001454491420057101, "loss": 1.2372, "step": 3511 }, { "epoch": 0.36948974224092584, "grad_norm": 2.694037437438965, "learning_rate": 0.00014541878491287783, "loss": 2.3677, "step": 3512 }, { "epoch": 0.36959495002630194, "grad_norm": 0.9963229298591614, "learning_rate": 0.0001453884225457116, "loss": 1.8101, "step": 3513 }, { "epoch": 0.3697001578116781, "grad_norm": 1.3422759771347046, "learning_rate": 0.00014535805490773732, "loss": 1.9981, "step": 3514 }, { "epoch": 0.3698053655970542, "grad_norm": 0.9793868660926819, "learning_rate": 0.0001453276820024815, "loss": 2.0641, "step": 3515 }, { "epoch": 0.3699105733824303, "grad_norm": 1.3728710412979126, "learning_rate": 0.0001452973038334712, "loss": 2.2349, "step": 3516 }, { "epoch": 0.3700157811678064, "grad_norm": 1.3043874502182007, "learning_rate": 0.0001452669204042342, "loss": 2.091, "step": 3517 }, { "epoch": 0.3701209889531825, "grad_norm": 1.4674620628356934, "learning_rate": 0.0001452365317182988, "loss": 1.8651, "step": 3518 }, { "epoch": 0.37022619673855867, "grad_norm": 1.3150261640548706, "learning_rate": 0.00014520613777919392, "loss": 1.8597, "step": 3519 }, { "epoch": 0.37033140452393476, "grad_norm": 1.2791160345077515, "learning_rate": 0.00014517573859044907, "loss": 1.9082, "step": 3520 }, { "epoch": 0.3704366123093109, "grad_norm": 1.371948480606079, "learning_rate": 0.00014514533415559453, "loss": 1.6619, "step": 3521 }, { "epoch": 0.370541820094687, "grad_norm": 1.731662392616272, "learning_rate": 0.00014511492447816097, "loss": 2.1011, "step": 3522 }, { "epoch": 0.37064702788006315, "grad_norm": 1.2484428882598877, "learning_rate": 0.0001450845095616798, "loss": 1.4152, "step": 3523 }, { "epoch": 0.37075223566543924, "grad_norm": 1.5884274244308472, "learning_rate": 0.00014505408940968296, "loss": 2.0706, "step": 3524 }, { "epoch": 0.37085744345081534, "grad_norm": 1.000995397567749, "learning_rate": 0.00014502366402570309, "loss": 2.1573, "step": 3525 }, { "epoch": 0.3709626512361915, "grad_norm": 0.8821168541908264, "learning_rate": 0.00014499323341327338, "loss": 1.902, "step": 3526 }, { "epoch": 0.3710678590215676, "grad_norm": 1.4428273439407349, "learning_rate": 0.00014496279757592766, "loss": 2.1331, "step": 3527 }, { "epoch": 0.37117306680694373, "grad_norm": 1.5744878053665161, "learning_rate": 0.00014493235651720027, "loss": 1.7205, "step": 3528 }, { "epoch": 0.3712782745923198, "grad_norm": 1.5465056896209717, "learning_rate": 0.00014490191024062632, "loss": 1.5546, "step": 3529 }, { "epoch": 0.371383482377696, "grad_norm": 1.1197417974472046, "learning_rate": 0.00014487145874974135, "loss": 1.3374, "step": 3530 }, { "epoch": 0.37148869016307207, "grad_norm": 0.9378413558006287, "learning_rate": 0.00014484100204808167, "loss": 2.3889, "step": 3531 }, { "epoch": 0.37159389794844816, "grad_norm": 1.0312168598175049, "learning_rate": 0.00014481054013918408, "loss": 2.095, "step": 3532 }, { "epoch": 0.3716991057338243, "grad_norm": 1.6550025939941406, "learning_rate": 0.00014478007302658598, "loss": 1.6941, "step": 3533 }, { "epoch": 0.3718043135192004, "grad_norm": 1.5724120140075684, "learning_rate": 0.0001447496007138255, "loss": 2.3644, "step": 3534 }, { "epoch": 0.37190952130457655, "grad_norm": 1.3543592691421509, "learning_rate": 0.00014471912320444122, "loss": 2.1931, "step": 3535 }, { "epoch": 0.37201472908995264, "grad_norm": 1.7770755290985107, "learning_rate": 0.00014468864050197242, "loss": 1.5458, "step": 3536 }, { "epoch": 0.3721199368753288, "grad_norm": 1.3519712686538696, "learning_rate": 0.00014465815260995894, "loss": 2.119, "step": 3537 }, { "epoch": 0.3722251446607049, "grad_norm": 2.1993541717529297, "learning_rate": 0.0001446276595319413, "loss": 1.3163, "step": 3538 }, { "epoch": 0.37233035244608104, "grad_norm": 1.6572602987289429, "learning_rate": 0.00014459716127146049, "loss": 1.7625, "step": 3539 }, { "epoch": 0.37243556023145713, "grad_norm": 1.320532202720642, "learning_rate": 0.0001445666578320582, "loss": 1.377, "step": 3540 }, { "epoch": 0.3725407680168332, "grad_norm": 1.6189240217208862, "learning_rate": 0.00014453614921727668, "loss": 1.7391, "step": 3541 }, { "epoch": 0.3726459758022094, "grad_norm": 1.0008089542388916, "learning_rate": 0.00014450563543065881, "loss": 1.8331, "step": 3542 }, { "epoch": 0.37275118358758547, "grad_norm": 1.0552862882614136, "learning_rate": 0.00014447511647574805, "loss": 2.0291, "step": 3543 }, { "epoch": 0.3728563913729616, "grad_norm": 1.1746324300765991, "learning_rate": 0.00014444459235608847, "loss": 2.281, "step": 3544 }, { "epoch": 0.3729615991583377, "grad_norm": 1.2236127853393555, "learning_rate": 0.00014441406307522475, "loss": 1.8409, "step": 3545 }, { "epoch": 0.37306680694371386, "grad_norm": 1.948947548866272, "learning_rate": 0.0001443835286367021, "loss": 1.3529, "step": 3546 }, { "epoch": 0.37317201472908995, "grad_norm": 1.3793987035751343, "learning_rate": 0.00014435298904406642, "loss": 1.852, "step": 3547 }, { "epoch": 0.37327722251446604, "grad_norm": 0.9412884712219238, "learning_rate": 0.00014432244430086423, "loss": 2.2095, "step": 3548 }, { "epoch": 0.3733824302998422, "grad_norm": 1.2927563190460205, "learning_rate": 0.00014429189441064248, "loss": 1.9944, "step": 3549 }, { "epoch": 0.3734876380852183, "grad_norm": 1.234215497970581, "learning_rate": 0.00014426133937694887, "loss": 1.7411, "step": 3550 }, { "epoch": 0.37359284587059444, "grad_norm": 1.1995365619659424, "learning_rate": 0.00014423077920333173, "loss": 2.0875, "step": 3551 }, { "epoch": 0.37369805365597053, "grad_norm": 1.4042108058929443, "learning_rate": 0.00014420021389333982, "loss": 2.1245, "step": 3552 }, { "epoch": 0.3738032614413467, "grad_norm": 1.579077959060669, "learning_rate": 0.0001441696434505226, "loss": 1.2663, "step": 3553 }, { "epoch": 0.3739084692267228, "grad_norm": 1.5505826473236084, "learning_rate": 0.00014413906787843014, "loss": 1.8943, "step": 3554 }, { "epoch": 0.3740136770120989, "grad_norm": 1.1203827857971191, "learning_rate": 0.00014410848718061312, "loss": 1.9703, "step": 3555 }, { "epoch": 0.374118884797475, "grad_norm": 1.4529035091400146, "learning_rate": 0.00014407790136062267, "loss": 2.0986, "step": 3556 }, { "epoch": 0.3742240925828511, "grad_norm": 2.0236105918884277, "learning_rate": 0.0001440473104220107, "loss": 1.363, "step": 3557 }, { "epoch": 0.37432930036822726, "grad_norm": 1.0123807191848755, "learning_rate": 0.0001440167143683296, "loss": 1.7753, "step": 3558 }, { "epoch": 0.37443450815360335, "grad_norm": 0.9799262285232544, "learning_rate": 0.00014398611320313244, "loss": 1.4174, "step": 3559 }, { "epoch": 0.3745397159389795, "grad_norm": 1.738737940788269, "learning_rate": 0.00014395550692997277, "loss": 1.9, "step": 3560 }, { "epoch": 0.3746449237243556, "grad_norm": 1.3359789848327637, "learning_rate": 0.00014392489555240486, "loss": 1.7223, "step": 3561 }, { "epoch": 0.37475013150973174, "grad_norm": 2.408348798751831, "learning_rate": 0.00014389427907398342, "loss": 1.6607, "step": 3562 }, { "epoch": 0.37485533929510784, "grad_norm": 2.228571891784668, "learning_rate": 0.0001438636574982639, "loss": 1.2856, "step": 3563 }, { "epoch": 0.37496054708048393, "grad_norm": 1.1137131452560425, "learning_rate": 0.0001438330308288023, "loss": 2.1641, "step": 3564 }, { "epoch": 0.3750657548658601, "grad_norm": 1.3347491025924683, "learning_rate": 0.00014380239906915514, "loss": 2.0624, "step": 3565 }, { "epoch": 0.3751709626512362, "grad_norm": 1.4927902221679688, "learning_rate": 0.00014377176222287965, "loss": 1.6468, "step": 3566 }, { "epoch": 0.3752761704366123, "grad_norm": 1.1425906419754028, "learning_rate": 0.0001437411202935335, "loss": 2.0824, "step": 3567 }, { "epoch": 0.3753813782219884, "grad_norm": 1.4617263078689575, "learning_rate": 0.00014371047328467511, "loss": 2.1863, "step": 3568 }, { "epoch": 0.37548658600736456, "grad_norm": 1.414243459701538, "learning_rate": 0.00014367982119986342, "loss": 1.8408, "step": 3569 }, { "epoch": 0.37559179379274066, "grad_norm": 1.367104172706604, "learning_rate": 0.00014364916404265788, "loss": 1.6474, "step": 3570 }, { "epoch": 0.3756970015781168, "grad_norm": 2.073486089706421, "learning_rate": 0.0001436185018166187, "loss": 1.8869, "step": 3571 }, { "epoch": 0.3758022093634929, "grad_norm": 0.7273818850517273, "learning_rate": 0.0001435878345253065, "loss": 1.4154, "step": 3572 }, { "epoch": 0.375907417148869, "grad_norm": 1.1615116596221924, "learning_rate": 0.00014355716217228265, "loss": 2.0725, "step": 3573 }, { "epoch": 0.37601262493424514, "grad_norm": 1.9539501667022705, "learning_rate": 0.00014352648476110896, "loss": 1.8466, "step": 3574 }, { "epoch": 0.37611783271962124, "grad_norm": 1.2255717515945435, "learning_rate": 0.0001434958022953479, "loss": 2.1784, "step": 3575 }, { "epoch": 0.3762230405049974, "grad_norm": 1.502297282218933, "learning_rate": 0.00014346511477856259, "loss": 1.8617, "step": 3576 }, { "epoch": 0.3763282482903735, "grad_norm": 1.4873766899108887, "learning_rate": 0.0001434344222143166, "loss": 2.0098, "step": 3577 }, { "epoch": 0.37643345607574963, "grad_norm": 1.7163630723953247, "learning_rate": 0.0001434037246061742, "loss": 2.5738, "step": 3578 }, { "epoch": 0.3765386638611257, "grad_norm": 1.179367184638977, "learning_rate": 0.0001433730219577002, "loss": 1.5599, "step": 3579 }, { "epoch": 0.3766438716465018, "grad_norm": 1.8585902452468872, "learning_rate": 0.00014334231427245994, "loss": 1.9643, "step": 3580 }, { "epoch": 0.37674907943187796, "grad_norm": 1.168847918510437, "learning_rate": 0.00014331160155401948, "loss": 2.2399, "step": 3581 }, { "epoch": 0.37685428721725406, "grad_norm": 1.2563929557800293, "learning_rate": 0.00014328088380594534, "loss": 2.0344, "step": 3582 }, { "epoch": 0.3769594950026302, "grad_norm": 1.3818440437316895, "learning_rate": 0.0001432501610318047, "loss": 1.6415, "step": 3583 }, { "epoch": 0.3770647027880063, "grad_norm": 1.4234402179718018, "learning_rate": 0.00014321943323516526, "loss": 1.7941, "step": 3584 }, { "epoch": 0.37716991057338245, "grad_norm": 1.5692628622055054, "learning_rate": 0.00014318870041959538, "loss": 1.653, "step": 3585 }, { "epoch": 0.37727511835875854, "grad_norm": 1.3026686906814575, "learning_rate": 0.00014315796258866393, "loss": 2.1109, "step": 3586 }, { "epoch": 0.3773803261441347, "grad_norm": 1.237284541130066, "learning_rate": 0.00014312721974594038, "loss": 1.7561, "step": 3587 }, { "epoch": 0.3774855339295108, "grad_norm": 1.4320054054260254, "learning_rate": 0.00014309647189499481, "loss": 2.1673, "step": 3588 }, { "epoch": 0.3775907417148869, "grad_norm": 1.15114164352417, "learning_rate": 0.0001430657190393979, "loss": 1.7654, "step": 3589 }, { "epoch": 0.37769594950026303, "grad_norm": 1.3742176294326782, "learning_rate": 0.00014303496118272084, "loss": 1.8697, "step": 3590 }, { "epoch": 0.3778011572856391, "grad_norm": 1.233818531036377, "learning_rate": 0.00014300419832853544, "loss": 1.5884, "step": 3591 }, { "epoch": 0.37790636507101527, "grad_norm": 1.7550405263900757, "learning_rate": 0.0001429734304804141, "loss": 1.99, "step": 3592 }, { "epoch": 0.37801157285639136, "grad_norm": 1.5846307277679443, "learning_rate": 0.0001429426576419298, "loss": 1.879, "step": 3593 }, { "epoch": 0.3781167806417675, "grad_norm": 1.696427822113037, "learning_rate": 0.00014291187981665607, "loss": 1.921, "step": 3594 }, { "epoch": 0.3782219884271436, "grad_norm": 1.756914496421814, "learning_rate": 0.00014288109700816705, "loss": 1.7842, "step": 3595 }, { "epoch": 0.3783271962125197, "grad_norm": 1.9030567407608032, "learning_rate": 0.0001428503092200374, "loss": 1.8851, "step": 3596 }, { "epoch": 0.37843240399789585, "grad_norm": 0.8810452818870544, "learning_rate": 0.0001428195164558425, "loss": 1.7901, "step": 3597 }, { "epoch": 0.37853761178327194, "grad_norm": 1.4262439012527466, "learning_rate": 0.00014278871871915814, "loss": 1.9453, "step": 3598 }, { "epoch": 0.3786428195686481, "grad_norm": 1.6858327388763428, "learning_rate": 0.00014275791601356074, "loss": 1.8671, "step": 3599 }, { "epoch": 0.3787480273540242, "grad_norm": 0.9039170742034912, "learning_rate": 0.0001427271083426274, "loss": 2.199, "step": 3600 }, { "epoch": 0.37885323513940034, "grad_norm": 1.2409204244613647, "learning_rate": 0.00014269629570993564, "loss": 1.6864, "step": 3601 }, { "epoch": 0.37895844292477643, "grad_norm": 1.6588237285614014, "learning_rate": 0.00014266547811906364, "loss": 2.1573, "step": 3602 }, { "epoch": 0.3790636507101526, "grad_norm": 1.5845474004745483, "learning_rate": 0.00014263465557359017, "loss": 1.7634, "step": 3603 }, { "epoch": 0.37916885849552867, "grad_norm": 1.337897539138794, "learning_rate": 0.00014260382807709457, "loss": 2.0049, "step": 3604 }, { "epoch": 0.37927406628090476, "grad_norm": 1.3715331554412842, "learning_rate": 0.00014257299563315667, "loss": 1.8703, "step": 3605 }, { "epoch": 0.3793792740662809, "grad_norm": 0.8518646955490112, "learning_rate": 0.00014254215824535698, "loss": 2.0015, "step": 3606 }, { "epoch": 0.379484481851657, "grad_norm": 0.998729407787323, "learning_rate": 0.00014251131591727656, "loss": 1.7552, "step": 3607 }, { "epoch": 0.37958968963703316, "grad_norm": 1.5885865688323975, "learning_rate": 0.00014248046865249697, "loss": 1.7364, "step": 3608 }, { "epoch": 0.37969489742240925, "grad_norm": 1.1192876100540161, "learning_rate": 0.00014244961645460048, "loss": 1.9686, "step": 3609 }, { "epoch": 0.3798001052077854, "grad_norm": 1.3762092590332031, "learning_rate": 0.0001424187593271698, "loss": 1.668, "step": 3610 }, { "epoch": 0.3799053129931615, "grad_norm": 1.3695660829544067, "learning_rate": 0.0001423878972737883, "loss": 1.939, "step": 3611 }, { "epoch": 0.3800105207785376, "grad_norm": 1.5928248167037964, "learning_rate": 0.00014235703029803984, "loss": 1.6483, "step": 3612 }, { "epoch": 0.38011572856391374, "grad_norm": 1.027282953262329, "learning_rate": 0.00014232615840350894, "loss": 1.3274, "step": 3613 }, { "epoch": 0.38022093634928983, "grad_norm": 1.2726476192474365, "learning_rate": 0.00014229528159378065, "loss": 2.1561, "step": 3614 }, { "epoch": 0.380326144134666, "grad_norm": 1.3544089794158936, "learning_rate": 0.00014226439987244057, "loss": 1.5668, "step": 3615 }, { "epoch": 0.38043135192004207, "grad_norm": 1.9423713684082031, "learning_rate": 0.00014223351324307493, "loss": 1.7549, "step": 3616 }, { "epoch": 0.3805365597054182, "grad_norm": 0.9007306694984436, "learning_rate": 0.00014220262170927046, "loss": 2.1485, "step": 3617 }, { "epoch": 0.3806417674907943, "grad_norm": 1.2513463497161865, "learning_rate": 0.0001421717252746145, "loss": 1.9919, "step": 3618 }, { "epoch": 0.38074697527617046, "grad_norm": 1.4759798049926758, "learning_rate": 0.00014214082394269493, "loss": 2.2475, "step": 3619 }, { "epoch": 0.38085218306154656, "grad_norm": 1.4133992195129395, "learning_rate": 0.00014210991771710025, "loss": 2.229, "step": 3620 }, { "epoch": 0.38095739084692265, "grad_norm": 1.1135759353637695, "learning_rate": 0.0001420790066014195, "loss": 1.7897, "step": 3621 }, { "epoch": 0.3810625986322988, "grad_norm": 1.0141302347183228, "learning_rate": 0.00014204809059924228, "loss": 1.5876, "step": 3622 }, { "epoch": 0.3811678064176749, "grad_norm": 1.2272731065750122, "learning_rate": 0.00014201716971415875, "loss": 1.9002, "step": 3623 }, { "epoch": 0.38127301420305104, "grad_norm": 1.4292423725128174, "learning_rate": 0.00014198624394975968, "loss": 1.5974, "step": 3624 }, { "epoch": 0.38137822198842714, "grad_norm": 1.052093744277954, "learning_rate": 0.00014195531330963635, "loss": 1.9424, "step": 3625 }, { "epoch": 0.3814834297738033, "grad_norm": 1.377421498298645, "learning_rate": 0.00014192437779738062, "loss": 2.4788, "step": 3626 }, { "epoch": 0.3815886375591794, "grad_norm": 1.4875088930130005, "learning_rate": 0.00014189343741658497, "loss": 1.8751, "step": 3627 }, { "epoch": 0.38169384534455547, "grad_norm": 1.2840195894241333, "learning_rate": 0.0001418624921708424, "loss": 1.8626, "step": 3628 }, { "epoch": 0.3817990531299316, "grad_norm": 1.7244127988815308, "learning_rate": 0.00014183154206374643, "loss": 1.7683, "step": 3629 }, { "epoch": 0.3819042609153077, "grad_norm": 1.5196006298065186, "learning_rate": 0.0001418005870988912, "loss": 2.0749, "step": 3630 }, { "epoch": 0.38200946870068386, "grad_norm": 1.3021456003189087, "learning_rate": 0.0001417696272798715, "loss": 2.3025, "step": 3631 }, { "epoch": 0.38211467648605996, "grad_norm": 0.9717100262641907, "learning_rate": 0.0001417386626102825, "loss": 2.2728, "step": 3632 }, { "epoch": 0.3822198842714361, "grad_norm": 1.2531250715255737, "learning_rate": 0.00014170769309372006, "loss": 2.11, "step": 3633 }, { "epoch": 0.3823250920568122, "grad_norm": 3.0939295291900635, "learning_rate": 0.00014167671873378056, "loss": 2.119, "step": 3634 }, { "epoch": 0.38243029984218835, "grad_norm": 1.3304295539855957, "learning_rate": 0.00014164573953406095, "loss": 1.6821, "step": 3635 }, { "epoch": 0.38253550762756444, "grad_norm": 1.434133768081665, "learning_rate": 0.00014161475549815877, "loss": 2.2729, "step": 3636 }, { "epoch": 0.38264071541294054, "grad_norm": 1.4510753154754639, "learning_rate": 0.00014158376662967202, "loss": 2.2107, "step": 3637 }, { "epoch": 0.3827459231983167, "grad_norm": 1.247135877609253, "learning_rate": 0.0001415527729321994, "loss": 1.846, "step": 3638 }, { "epoch": 0.3828511309836928, "grad_norm": 0.9109544157981873, "learning_rate": 0.00014152177440934012, "loss": 1.9013, "step": 3639 }, { "epoch": 0.3829563387690689, "grad_norm": 0.9982622265815735, "learning_rate": 0.00014149077106469387, "loss": 2.0598, "step": 3640 }, { "epoch": 0.383061546554445, "grad_norm": 1.1758394241333008, "learning_rate": 0.00014145976290186102, "loss": 2.3168, "step": 3641 }, { "epoch": 0.38316675433982117, "grad_norm": 0.8653580546379089, "learning_rate": 0.00014142874992444243, "loss": 1.5041, "step": 3642 }, { "epoch": 0.38327196212519726, "grad_norm": 1.015748143196106, "learning_rate": 0.0001413977321360395, "loss": 1.9884, "step": 3643 }, { "epoch": 0.38337716991057336, "grad_norm": 1.737280249595642, "learning_rate": 0.00014136670954025427, "loss": 1.7972, "step": 3644 }, { "epoch": 0.3834823776959495, "grad_norm": 2.5558969974517822, "learning_rate": 0.0001413356821406893, "loss": 2.1549, "step": 3645 }, { "epoch": 0.3835875854813256, "grad_norm": 1.4405326843261719, "learning_rate": 0.0001413046499409477, "loss": 1.9342, "step": 3646 }, { "epoch": 0.38369279326670175, "grad_norm": 1.235929012298584, "learning_rate": 0.0001412736129446331, "loss": 2.0326, "step": 3647 }, { "epoch": 0.38379800105207784, "grad_norm": 1.8266171216964722, "learning_rate": 0.0001412425711553497, "loss": 1.9242, "step": 3648 }, { "epoch": 0.383903208837454, "grad_norm": 1.1843401193618774, "learning_rate": 0.00014121152457670234, "loss": 2.4682, "step": 3649 }, { "epoch": 0.3840084166228301, "grad_norm": 1.2029701471328735, "learning_rate": 0.00014118047321229633, "loss": 1.5255, "step": 3650 }, { "epoch": 0.38411362440820623, "grad_norm": 1.264588713645935, "learning_rate": 0.00014114941706573758, "loss": 1.5459, "step": 3651 }, { "epoch": 0.3842188321935823, "grad_norm": 1.7081525325775146, "learning_rate": 0.00014111835614063253, "loss": 2.1296, "step": 3652 }, { "epoch": 0.3843240399789584, "grad_norm": 0.9113522171974182, "learning_rate": 0.0001410872904405882, "loss": 1.5649, "step": 3653 }, { "epoch": 0.38442924776433457, "grad_norm": 1.2940986156463623, "learning_rate": 0.0001410562199692121, "loss": 1.6233, "step": 3654 }, { "epoch": 0.38453445554971066, "grad_norm": 1.201011300086975, "learning_rate": 0.00014102514473011233, "loss": 1.9928, "step": 3655 }, { "epoch": 0.3846396633350868, "grad_norm": 1.310477614402771, "learning_rate": 0.0001409940647268977, "loss": 1.7539, "step": 3656 }, { "epoch": 0.3847448711204629, "grad_norm": 0.9373623132705688, "learning_rate": 0.00014096297996317724, "loss": 1.9289, "step": 3657 }, { "epoch": 0.38485007890583905, "grad_norm": 1.0480597019195557, "learning_rate": 0.00014093189044256084, "loss": 2.0449, "step": 3658 }, { "epoch": 0.38495528669121515, "grad_norm": 1.2501201629638672, "learning_rate": 0.00014090079616865882, "loss": 1.7853, "step": 3659 }, { "epoch": 0.38506049447659124, "grad_norm": 1.2125840187072754, "learning_rate": 0.00014086969714508196, "loss": 1.3741, "step": 3660 }, { "epoch": 0.3851657022619674, "grad_norm": 0.8791213631629944, "learning_rate": 0.00014083859337544175, "loss": 1.7084, "step": 3661 }, { "epoch": 0.3852709100473435, "grad_norm": 1.8772777318954468, "learning_rate": 0.00014080748486335022, "loss": 1.6383, "step": 3662 }, { "epoch": 0.38537611783271963, "grad_norm": 1.3970454931259155, "learning_rate": 0.0001407763716124198, "loss": 1.5493, "step": 3663 }, { "epoch": 0.3854813256180957, "grad_norm": 1.5599162578582764, "learning_rate": 0.00014074525362626366, "loss": 2.0028, "step": 3664 }, { "epoch": 0.3855865334034719, "grad_norm": 1.0183049440383911, "learning_rate": 0.00014071413090849534, "loss": 2.151, "step": 3665 }, { "epoch": 0.38569174118884797, "grad_norm": 1.4794666767120361, "learning_rate": 0.0001406830034627291, "loss": 1.8065, "step": 3666 }, { "epoch": 0.3857969489742241, "grad_norm": 1.77559232711792, "learning_rate": 0.00014065187129257964, "loss": 2.3033, "step": 3667 }, { "epoch": 0.3859021567596002, "grad_norm": 2.2555582523345947, "learning_rate": 0.00014062073440166222, "loss": 1.48, "step": 3668 }, { "epoch": 0.3860073645449763, "grad_norm": 0.9742986559867859, "learning_rate": 0.00014058959279359266, "loss": 1.6655, "step": 3669 }, { "epoch": 0.38611257233035245, "grad_norm": 1.42844557762146, "learning_rate": 0.00014055844647198738, "loss": 2.1812, "step": 3670 }, { "epoch": 0.38621778011572855, "grad_norm": 1.0865979194641113, "learning_rate": 0.00014052729544046326, "loss": 1.8439, "step": 3671 }, { "epoch": 0.3863229879011047, "grad_norm": 1.070984125137329, "learning_rate": 0.00014049613970263774, "loss": 1.7071, "step": 3672 }, { "epoch": 0.3864281956864808, "grad_norm": 1.4193851947784424, "learning_rate": 0.0001404649792621289, "loss": 2.1123, "step": 3673 }, { "epoch": 0.38653340347185694, "grad_norm": 1.3338897228240967, "learning_rate": 0.00014043381412255526, "loss": 2.0661, "step": 3674 }, { "epoch": 0.38663861125723303, "grad_norm": 1.063714861869812, "learning_rate": 0.00014040264428753592, "loss": 1.5893, "step": 3675 }, { "epoch": 0.3867438190426091, "grad_norm": 1.06549870967865, "learning_rate": 0.00014037146976069055, "loss": 1.7869, "step": 3676 }, { "epoch": 0.3868490268279853, "grad_norm": 1.382040023803711, "learning_rate": 0.00014034029054563933, "loss": 1.5798, "step": 3677 }, { "epoch": 0.38695423461336137, "grad_norm": 1.4620635509490967, "learning_rate": 0.000140309106646003, "loss": 1.7959, "step": 3678 }, { "epoch": 0.3870594423987375, "grad_norm": 1.2155455350875854, "learning_rate": 0.0001402779180654029, "loss": 1.5766, "step": 3679 }, { "epoch": 0.3871646501841136, "grad_norm": 1.3501040935516357, "learning_rate": 0.00014024672480746078, "loss": 1.7963, "step": 3680 }, { "epoch": 0.38726985796948976, "grad_norm": 1.1991915702819824, "learning_rate": 0.00014021552687579902, "loss": 1.9882, "step": 3681 }, { "epoch": 0.38737506575486585, "grad_norm": 2.334041118621826, "learning_rate": 0.00014018432427404055, "loss": 1.3908, "step": 3682 }, { "epoch": 0.387480273540242, "grad_norm": 1.5623788833618164, "learning_rate": 0.0001401531170058088, "loss": 1.2739, "step": 3683 }, { "epoch": 0.3875854813256181, "grad_norm": 1.153784155845642, "learning_rate": 0.00014012190507472783, "loss": 1.6715, "step": 3684 }, { "epoch": 0.3876906891109942, "grad_norm": 0.9832100868225098, "learning_rate": 0.00014009068848442214, "loss": 2.1983, "step": 3685 }, { "epoch": 0.38779589689637034, "grad_norm": 1.071271538734436, "learning_rate": 0.0001400594672385168, "loss": 2.1699, "step": 3686 }, { "epoch": 0.38790110468174643, "grad_norm": 1.600494146347046, "learning_rate": 0.00014002824134063747, "loss": 1.9091, "step": 3687 }, { "epoch": 0.3880063124671226, "grad_norm": 1.5296038389205933, "learning_rate": 0.00013999701079441028, "loss": 2.0367, "step": 3688 }, { "epoch": 0.3881115202524987, "grad_norm": 1.072365403175354, "learning_rate": 0.0001399657756034619, "loss": 2.0738, "step": 3689 }, { "epoch": 0.3882167280378748, "grad_norm": 1.1414501667022705, "learning_rate": 0.00013993453577141964, "loss": 1.5334, "step": 3690 }, { "epoch": 0.3883219358232509, "grad_norm": 1.220701813697815, "learning_rate": 0.00013990329130191123, "loss": 1.9406, "step": 3691 }, { "epoch": 0.388427143608627, "grad_norm": 1.4676032066345215, "learning_rate": 0.000139872042198565, "loss": 1.6478, "step": 3692 }, { "epoch": 0.38853235139400316, "grad_norm": 2.2082393169403076, "learning_rate": 0.0001398407884650098, "loss": 2.5011, "step": 3693 }, { "epoch": 0.38863755917937925, "grad_norm": 1.4031996726989746, "learning_rate": 0.000139809530104875, "loss": 1.8676, "step": 3694 }, { "epoch": 0.3887427669647554, "grad_norm": 1.0090667009353638, "learning_rate": 0.00013977826712179058, "loss": 1.7266, "step": 3695 }, { "epoch": 0.3888479747501315, "grad_norm": 1.4006050825119019, "learning_rate": 0.000139746999519387, "loss": 1.8296, "step": 3696 }, { "epoch": 0.38895318253550765, "grad_norm": 1.0253705978393555, "learning_rate": 0.00013971572730129525, "loss": 1.7789, "step": 3697 }, { "epoch": 0.38905839032088374, "grad_norm": 1.1264349222183228, "learning_rate": 0.00013968445047114685, "loss": 2.0904, "step": 3698 }, { "epoch": 0.3891635981062599, "grad_norm": 1.5611605644226074, "learning_rate": 0.0001396531690325739, "loss": 2.101, "step": 3699 }, { "epoch": 0.389268805891636, "grad_norm": 1.7648766040802002, "learning_rate": 0.00013962188298920902, "loss": 2.1288, "step": 3700 }, { "epoch": 0.3893740136770121, "grad_norm": 2.0247011184692383, "learning_rate": 0.00013959059234468536, "loss": 1.7906, "step": 3701 }, { "epoch": 0.3894792214623882, "grad_norm": 1.0505448579788208, "learning_rate": 0.00013955929710263653, "loss": 1.995, "step": 3702 }, { "epoch": 0.3895844292477643, "grad_norm": 1.6498749256134033, "learning_rate": 0.00013952799726669682, "loss": 1.8126, "step": 3703 }, { "epoch": 0.38968963703314047, "grad_norm": 1.861400842666626, "learning_rate": 0.00013949669284050092, "loss": 2.3192, "step": 3704 }, { "epoch": 0.38979484481851656, "grad_norm": 1.1413242816925049, "learning_rate": 0.00013946538382768418, "loss": 1.4746, "step": 3705 }, { "epoch": 0.3899000526038927, "grad_norm": 1.281286597251892, "learning_rate": 0.00013943407023188234, "loss": 2.3798, "step": 3706 }, { "epoch": 0.3900052603892688, "grad_norm": 1.3880997896194458, "learning_rate": 0.00013940275205673178, "loss": 2.0053, "step": 3707 }, { "epoch": 0.3901104681746449, "grad_norm": 1.1959868669509888, "learning_rate": 0.0001393714293058694, "loss": 2.0179, "step": 3708 }, { "epoch": 0.39021567596002105, "grad_norm": 1.6288052797317505, "learning_rate": 0.00013934010198293257, "loss": 2.1802, "step": 3709 }, { "epoch": 0.39032088374539714, "grad_norm": 1.229329228401184, "learning_rate": 0.00013930877009155922, "loss": 1.8899, "step": 3710 }, { "epoch": 0.3904260915307733, "grad_norm": 1.2303109169006348, "learning_rate": 0.00013927743363538787, "loss": 1.6863, "step": 3711 }, { "epoch": 0.3905312993161494, "grad_norm": 1.653576374053955, "learning_rate": 0.0001392460926180575, "loss": 1.765, "step": 3712 }, { "epoch": 0.39063650710152553, "grad_norm": 1.6338762044906616, "learning_rate": 0.0001392147470432076, "loss": 2.0503, "step": 3713 }, { "epoch": 0.3907417148869016, "grad_norm": 1.4691399335861206, "learning_rate": 0.00013918339691447825, "loss": 2.1288, "step": 3714 }, { "epoch": 0.3908469226722778, "grad_norm": 1.0673540830612183, "learning_rate": 0.0001391520422355101, "loss": 1.8172, "step": 3715 }, { "epoch": 0.39095213045765387, "grad_norm": 0.9647770524024963, "learning_rate": 0.00013912068300994413, "loss": 1.7418, "step": 3716 }, { "epoch": 0.39105733824302996, "grad_norm": 1.7503970861434937, "learning_rate": 0.0001390893192414221, "loss": 1.78, "step": 3717 }, { "epoch": 0.3911625460284061, "grad_norm": 0.7777706980705261, "learning_rate": 0.00013905795093358615, "loss": 1.9819, "step": 3718 }, { "epoch": 0.3912677538137822, "grad_norm": 1.5159608125686646, "learning_rate": 0.00013902657809007897, "loss": 1.7175, "step": 3719 }, { "epoch": 0.39137296159915835, "grad_norm": 1.442254662513733, "learning_rate": 0.00013899520071454377, "loss": 2.1266, "step": 3720 }, { "epoch": 0.39147816938453445, "grad_norm": 1.2013967037200928, "learning_rate": 0.00013896381881062437, "loss": 1.9828, "step": 3721 }, { "epoch": 0.3915833771699106, "grad_norm": 2.616184949874878, "learning_rate": 0.00013893243238196495, "loss": 1.7142, "step": 3722 }, { "epoch": 0.3916885849552867, "grad_norm": 1.076119065284729, "learning_rate": 0.0001389010414322104, "loss": 2.2307, "step": 3723 }, { "epoch": 0.3917937927406628, "grad_norm": 1.1124643087387085, "learning_rate": 0.00013886964596500595, "loss": 1.7956, "step": 3724 }, { "epoch": 0.39189900052603893, "grad_norm": 1.8706750869750977, "learning_rate": 0.00013883824598399756, "loss": 2.3762, "step": 3725 }, { "epoch": 0.392004208311415, "grad_norm": 1.2666409015655518, "learning_rate": 0.00013880684149283152, "loss": 1.5874, "step": 3726 }, { "epoch": 0.3921094160967912, "grad_norm": 1.3697994947433472, "learning_rate": 0.00013877543249515476, "loss": 2.4001, "step": 3727 }, { "epoch": 0.39221462388216727, "grad_norm": 1.538824200630188, "learning_rate": 0.00013874401899461474, "loss": 1.8197, "step": 3728 }, { "epoch": 0.3923198316675434, "grad_norm": 1.4980809688568115, "learning_rate": 0.00013871260099485936, "loss": 1.434, "step": 3729 }, { "epoch": 0.3924250394529195, "grad_norm": 1.348250150680542, "learning_rate": 0.0001386811784995371, "loss": 1.6379, "step": 3730 }, { "epoch": 0.39253024723829566, "grad_norm": 2.0274887084960938, "learning_rate": 0.00013864975151229697, "loss": 1.7713, "step": 3731 }, { "epoch": 0.39263545502367175, "grad_norm": 1.4459730386734009, "learning_rate": 0.00013861832003678846, "loss": 1.68, "step": 3732 }, { "epoch": 0.39274066280904785, "grad_norm": 1.5965417623519897, "learning_rate": 0.00013858688407666163, "loss": 1.962, "step": 3733 }, { "epoch": 0.392845870594424, "grad_norm": 1.5508756637573242, "learning_rate": 0.00013855544363556698, "loss": 2.1783, "step": 3734 }, { "epoch": 0.3929510783798001, "grad_norm": 1.3883273601531982, "learning_rate": 0.00013852399871715562, "loss": 2.3433, "step": 3735 }, { "epoch": 0.39305628616517624, "grad_norm": 1.741829752922058, "learning_rate": 0.00013849254932507917, "loss": 2.3758, "step": 3736 }, { "epoch": 0.39316149395055233, "grad_norm": 1.35794198513031, "learning_rate": 0.00013846109546298971, "loss": 2.0261, "step": 3737 }, { "epoch": 0.3932667017359285, "grad_norm": 1.0668870210647583, "learning_rate": 0.00013842963713453987, "loss": 2.2452, "step": 3738 }, { "epoch": 0.3933719095213046, "grad_norm": 1.3212822675704956, "learning_rate": 0.00013839817434338286, "loss": 1.9696, "step": 3739 }, { "epoch": 0.39347711730668067, "grad_norm": 0.7865563035011292, "learning_rate": 0.00013836670709317225, "loss": 1.6841, "step": 3740 }, { "epoch": 0.3935823250920568, "grad_norm": 1.023016095161438, "learning_rate": 0.0001383352353875623, "loss": 2.1734, "step": 3741 }, { "epoch": 0.3936875328774329, "grad_norm": 0.966575562953949, "learning_rate": 0.00013830375923020772, "loss": 1.735, "step": 3742 }, { "epoch": 0.39379274066280906, "grad_norm": 1.255327582359314, "learning_rate": 0.00013827227862476372, "loss": 2.0901, "step": 3743 }, { "epoch": 0.39389794844818515, "grad_norm": 1.4241955280303955, "learning_rate": 0.00013824079357488598, "loss": 1.5569, "step": 3744 }, { "epoch": 0.3940031562335613, "grad_norm": 0.807941734790802, "learning_rate": 0.00013820930408423086, "loss": 1.9292, "step": 3745 }, { "epoch": 0.3941083640189374, "grad_norm": 1.034824013710022, "learning_rate": 0.00013817781015645507, "loss": 2.1646, "step": 3746 }, { "epoch": 0.39421357180431355, "grad_norm": 1.143803358078003, "learning_rate": 0.00013814631179521588, "loss": 2.2347, "step": 3747 }, { "epoch": 0.39431877958968964, "grad_norm": 1.7627426385879517, "learning_rate": 0.0001381148090041711, "loss": 1.8837, "step": 3748 }, { "epoch": 0.39442398737506573, "grad_norm": 1.153597354888916, "learning_rate": 0.0001380833017869791, "loss": 1.989, "step": 3749 }, { "epoch": 0.3945291951604419, "grad_norm": 1.3962596654891968, "learning_rate": 0.00013805179014729865, "loss": 2.4745, "step": 3750 }, { "epoch": 0.394634402945818, "grad_norm": 1.1619459390640259, "learning_rate": 0.0001380202740887891, "loss": 2.1719, "step": 3751 }, { "epoch": 0.3947396107311941, "grad_norm": 1.4817026853561401, "learning_rate": 0.00013798875361511033, "loss": 2.6195, "step": 3752 }, { "epoch": 0.3948448185165702, "grad_norm": 1.251517415046692, "learning_rate": 0.00013795722872992272, "loss": 1.9008, "step": 3753 }, { "epoch": 0.39495002630194637, "grad_norm": 1.6343916654586792, "learning_rate": 0.0001379256994368871, "loss": 1.8265, "step": 3754 }, { "epoch": 0.39505523408732246, "grad_norm": 1.4928406476974487, "learning_rate": 0.0001378941657396649, "loss": 2.0495, "step": 3755 }, { "epoch": 0.39516044187269855, "grad_norm": 0.9346429109573364, "learning_rate": 0.00013786262764191803, "loss": 2.0691, "step": 3756 }, { "epoch": 0.3952656496580747, "grad_norm": 1.5070823431015015, "learning_rate": 0.00013783108514730884, "loss": 1.6465, "step": 3757 }, { "epoch": 0.3953708574434508, "grad_norm": 1.225148320198059, "learning_rate": 0.00013779953825950034, "loss": 1.3778, "step": 3758 }, { "epoch": 0.39547606522882695, "grad_norm": 1.4228816032409668, "learning_rate": 0.00013776798698215593, "loss": 1.559, "step": 3759 }, { "epoch": 0.39558127301420304, "grad_norm": 1.1593899726867676, "learning_rate": 0.00013773643131893956, "loss": 2.1666, "step": 3760 }, { "epoch": 0.3956864807995792, "grad_norm": 1.2001755237579346, "learning_rate": 0.00013770487127351568, "loss": 1.5535, "step": 3761 }, { "epoch": 0.3957916885849553, "grad_norm": 1.1403510570526123, "learning_rate": 0.00013767330684954926, "loss": 1.765, "step": 3762 }, { "epoch": 0.39589689637033143, "grad_norm": 1.4081346988677979, "learning_rate": 0.00013764173805070576, "loss": 2.1388, "step": 3763 }, { "epoch": 0.3960021041557075, "grad_norm": 0.9673276543617249, "learning_rate": 0.00013761016488065118, "loss": 2.1821, "step": 3764 }, { "epoch": 0.3961073119410836, "grad_norm": 1.567577838897705, "learning_rate": 0.00013757858734305203, "loss": 2.0253, "step": 3765 }, { "epoch": 0.39621251972645977, "grad_norm": 1.2986990213394165, "learning_rate": 0.00013754700544157524, "loss": 2.1731, "step": 3766 }, { "epoch": 0.39631772751183586, "grad_norm": 1.9788020849227905, "learning_rate": 0.00013751541917988836, "loss": 1.8981, "step": 3767 }, { "epoch": 0.396422935297212, "grad_norm": 1.5377720594406128, "learning_rate": 0.0001374838285616594, "loss": 2.0393, "step": 3768 }, { "epoch": 0.3965281430825881, "grad_norm": 0.8659994006156921, "learning_rate": 0.00013745223359055682, "loss": 1.7419, "step": 3769 }, { "epoch": 0.39663335086796425, "grad_norm": 1.1457507610321045, "learning_rate": 0.0001374206342702497, "loss": 2.1031, "step": 3770 }, { "epoch": 0.39673855865334035, "grad_norm": 0.8939380645751953, "learning_rate": 0.00013738903060440757, "loss": 2.0608, "step": 3771 }, { "epoch": 0.39684376643871644, "grad_norm": 1.0141644477844238, "learning_rate": 0.0001373574225967004, "loss": 1.9432, "step": 3772 }, { "epoch": 0.3969489742240926, "grad_norm": 1.7639262676239014, "learning_rate": 0.0001373258102507988, "loss": 1.7746, "step": 3773 }, { "epoch": 0.3970541820094687, "grad_norm": 1.8275938034057617, "learning_rate": 0.00013729419357037372, "loss": 1.9925, "step": 3774 }, { "epoch": 0.39715938979484483, "grad_norm": 1.8225183486938477, "learning_rate": 0.00013726257255909676, "loss": 2.0511, "step": 3775 }, { "epoch": 0.3972645975802209, "grad_norm": 1.1750774383544922, "learning_rate": 0.00013723094722063996, "loss": 2.0788, "step": 3776 }, { "epoch": 0.3973698053655971, "grad_norm": 1.4258540868759155, "learning_rate": 0.00013719931755867587, "loss": 2.0758, "step": 3777 }, { "epoch": 0.39747501315097317, "grad_norm": 0.9708757400512695, "learning_rate": 0.0001371676835768775, "loss": 1.8594, "step": 3778 }, { "epoch": 0.3975802209363493, "grad_norm": 1.1266876459121704, "learning_rate": 0.00013713604527891844, "loss": 1.6579, "step": 3779 }, { "epoch": 0.3976854287217254, "grad_norm": 2.256415605545044, "learning_rate": 0.00013710440266847274, "loss": 2.2314, "step": 3780 }, { "epoch": 0.3977906365071015, "grad_norm": 1.4716403484344482, "learning_rate": 0.0001370727557492149, "loss": 1.7153, "step": 3781 }, { "epoch": 0.39789584429247765, "grad_norm": 1.2496949434280396, "learning_rate": 0.00013704110452482005, "loss": 2.0778, "step": 3782 }, { "epoch": 0.39800105207785375, "grad_norm": 0.9806689620018005, "learning_rate": 0.0001370094489989637, "loss": 2.1003, "step": 3783 }, { "epoch": 0.3981062598632299, "grad_norm": 1.0199190378189087, "learning_rate": 0.00013697778917532192, "loss": 2.0489, "step": 3784 }, { "epoch": 0.398211467648606, "grad_norm": 0.8735719919204712, "learning_rate": 0.00013694612505757122, "loss": 1.2784, "step": 3785 }, { "epoch": 0.39831667543398214, "grad_norm": 1.2320698499679565, "learning_rate": 0.00013691445664938866, "loss": 2.0641, "step": 3786 }, { "epoch": 0.39842188321935823, "grad_norm": 1.306445598602295, "learning_rate": 0.00013688278395445185, "loss": 1.8181, "step": 3787 }, { "epoch": 0.3985270910047343, "grad_norm": 1.5891810655593872, "learning_rate": 0.00013685110697643878, "loss": 1.7734, "step": 3788 }, { "epoch": 0.3986322987901105, "grad_norm": 1.2579609155654907, "learning_rate": 0.00013681942571902803, "loss": 1.9562, "step": 3789 }, { "epoch": 0.39873750657548657, "grad_norm": 1.3183737993240356, "learning_rate": 0.00013678774018589855, "loss": 1.824, "step": 3790 }, { "epoch": 0.3988427143608627, "grad_norm": 1.2794685363769531, "learning_rate": 0.00013675605038072997, "loss": 1.4231, "step": 3791 }, { "epoch": 0.3989479221462388, "grad_norm": 1.0361131429672241, "learning_rate": 0.00013672435630720232, "loss": 1.7537, "step": 3792 }, { "epoch": 0.39905312993161496, "grad_norm": 1.2648097276687622, "learning_rate": 0.00013669265796899607, "loss": 2.534, "step": 3793 }, { "epoch": 0.39915833771699105, "grad_norm": 1.1882283687591553, "learning_rate": 0.00013666095536979232, "loss": 1.7441, "step": 3794 }, { "epoch": 0.3992635455023672, "grad_norm": 1.5789291858673096, "learning_rate": 0.00013662924851327247, "loss": 1.7312, "step": 3795 }, { "epoch": 0.3993687532877433, "grad_norm": 1.9894529581069946, "learning_rate": 0.00013659753740311866, "loss": 1.8041, "step": 3796 }, { "epoch": 0.3994739610731194, "grad_norm": 1.1728602647781372, "learning_rate": 0.00013656582204301334, "loss": 2.1131, "step": 3797 }, { "epoch": 0.39957916885849554, "grad_norm": 2.1701881885528564, "learning_rate": 0.00013653410243663952, "loss": 2.1482, "step": 3798 }, { "epoch": 0.39968437664387163, "grad_norm": 1.0504519939422607, "learning_rate": 0.00013650237858768067, "loss": 2.0582, "step": 3799 }, { "epoch": 0.3997895844292478, "grad_norm": 2.3325071334838867, "learning_rate": 0.00013647065049982078, "loss": 1.565, "step": 3800 }, { "epoch": 0.3998947922146239, "grad_norm": 1.386509895324707, "learning_rate": 0.0001364389181767444, "loss": 1.8805, "step": 3801 }, { "epoch": 0.4, "grad_norm": 1.0210331678390503, "learning_rate": 0.0001364071816221364, "loss": 2.0216, "step": 3802 }, { "epoch": 0.4001052077853761, "grad_norm": 1.7572848796844482, "learning_rate": 0.00013637544083968227, "loss": 2.1344, "step": 3803 }, { "epoch": 0.4002104155707522, "grad_norm": 1.721763014793396, "learning_rate": 0.00013634369583306798, "loss": 1.8024, "step": 3804 }, { "epoch": 0.40031562335612836, "grad_norm": 1.5794200897216797, "learning_rate": 0.00013631194660598, "loss": 1.9257, "step": 3805 }, { "epoch": 0.40042083114150445, "grad_norm": 1.026024341583252, "learning_rate": 0.00013628019316210522, "loss": 2.4013, "step": 3806 }, { "epoch": 0.4005260389268806, "grad_norm": 4.058107852935791, "learning_rate": 0.0001362484355051311, "loss": 1.554, "step": 3807 }, { "epoch": 0.4006312467122567, "grad_norm": 1.9049569368362427, "learning_rate": 0.00013621667363874552, "loss": 2.2071, "step": 3808 }, { "epoch": 0.40073645449763284, "grad_norm": 1.2772477865219116, "learning_rate": 0.00013618490756663686, "loss": 1.6058, "step": 3809 }, { "epoch": 0.40084166228300894, "grad_norm": 1.0035552978515625, "learning_rate": 0.00013615313729249405, "loss": 1.8947, "step": 3810 }, { "epoch": 0.4009468700683851, "grad_norm": 0.8478926420211792, "learning_rate": 0.00013612136282000644, "loss": 1.8861, "step": 3811 }, { "epoch": 0.4010520778537612, "grad_norm": 1.2318350076675415, "learning_rate": 0.00013608958415286396, "loss": 1.8174, "step": 3812 }, { "epoch": 0.4011572856391373, "grad_norm": 1.1952483654022217, "learning_rate": 0.00013605780129475687, "loss": 1.8322, "step": 3813 }, { "epoch": 0.4012624934245134, "grad_norm": 1.1804511547088623, "learning_rate": 0.00013602601424937604, "loss": 1.7156, "step": 3814 }, { "epoch": 0.4013677012098895, "grad_norm": 1.3360835313796997, "learning_rate": 0.00013599422302041286, "loss": 1.7492, "step": 3815 }, { "epoch": 0.40147290899526566, "grad_norm": 1.1277227401733398, "learning_rate": 0.00013596242761155903, "loss": 1.4064, "step": 3816 }, { "epoch": 0.40157811678064176, "grad_norm": 1.0985256433486938, "learning_rate": 0.00013593062802650692, "loss": 1.8555, "step": 3817 }, { "epoch": 0.4016833245660179, "grad_norm": 1.0841742753982544, "learning_rate": 0.0001358988242689493, "loss": 1.4974, "step": 3818 }, { "epoch": 0.401788532351394, "grad_norm": 1.3948936462402344, "learning_rate": 0.0001358670163425795, "loss": 1.8494, "step": 3819 }, { "epoch": 0.4018937401367701, "grad_norm": 2.593919515609741, "learning_rate": 0.0001358352042510911, "loss": 1.9506, "step": 3820 }, { "epoch": 0.40199894792214624, "grad_norm": 1.0042921304702759, "learning_rate": 0.00013580338799817844, "loss": 2.05, "step": 3821 }, { "epoch": 0.40210415570752234, "grad_norm": 1.3841592073440552, "learning_rate": 0.00013577156758753627, "loss": 1.8187, "step": 3822 }, { "epoch": 0.4022093634928985, "grad_norm": 2.3145501613616943, "learning_rate": 0.00013573974302285972, "loss": 2.1793, "step": 3823 }, { "epoch": 0.4023145712782746, "grad_norm": 1.6691625118255615, "learning_rate": 0.00013570791430784452, "loss": 1.8489, "step": 3824 }, { "epoch": 0.40241977906365073, "grad_norm": 1.7601234912872314, "learning_rate": 0.0001356760814461868, "loss": 1.055, "step": 3825 }, { "epoch": 0.4025249868490268, "grad_norm": 1.5484458208084106, "learning_rate": 0.00013564424444158324, "loss": 1.8766, "step": 3826 }, { "epoch": 0.40263019463440297, "grad_norm": 1.6130404472351074, "learning_rate": 0.00013561240329773092, "loss": 1.8005, "step": 3827 }, { "epoch": 0.40273540241977906, "grad_norm": 1.7206354141235352, "learning_rate": 0.00013558055801832748, "loss": 1.5215, "step": 3828 }, { "epoch": 0.40284061020515516, "grad_norm": 1.2879303693771362, "learning_rate": 0.00013554870860707106, "loss": 2.0524, "step": 3829 }, { "epoch": 0.4029458179905313, "grad_norm": 0.978996217250824, "learning_rate": 0.0001355168550676601, "loss": 1.7266, "step": 3830 }, { "epoch": 0.4030510257759074, "grad_norm": 1.726261019706726, "learning_rate": 0.00013548499740379373, "loss": 2.3547, "step": 3831 }, { "epoch": 0.40315623356128355, "grad_norm": 1.3626611232757568, "learning_rate": 0.00013545313561917144, "loss": 1.5038, "step": 3832 }, { "epoch": 0.40326144134665964, "grad_norm": 1.0368597507476807, "learning_rate": 0.00013542126971749328, "loss": 1.8802, "step": 3833 }, { "epoch": 0.4033666491320358, "grad_norm": 1.394249439239502, "learning_rate": 0.00013538939970245972, "loss": 1.7687, "step": 3834 }, { "epoch": 0.4034718569174119, "grad_norm": 1.244553565979004, "learning_rate": 0.0001353575255777717, "loss": 1.9525, "step": 3835 }, { "epoch": 0.403577064702788, "grad_norm": 1.686198353767395, "learning_rate": 0.00013532564734713068, "loss": 1.6175, "step": 3836 }, { "epoch": 0.40368227248816413, "grad_norm": 2.307166814804077, "learning_rate": 0.00013529376501423852, "loss": 2.3844, "step": 3837 }, { "epoch": 0.4037874802735402, "grad_norm": 1.9087625741958618, "learning_rate": 0.00013526187858279765, "loss": 1.272, "step": 3838 }, { "epoch": 0.40389268805891637, "grad_norm": 1.4016311168670654, "learning_rate": 0.00013522998805651096, "loss": 1.9822, "step": 3839 }, { "epoch": 0.40399789584429247, "grad_norm": 1.2222285270690918, "learning_rate": 0.00013519809343908178, "loss": 2.1131, "step": 3840 }, { "epoch": 0.4041031036296686, "grad_norm": 1.7945427894592285, "learning_rate": 0.00013516619473421387, "loss": 1.8994, "step": 3841 }, { "epoch": 0.4042083114150447, "grad_norm": 1.665122628211975, "learning_rate": 0.0001351342919456116, "loss": 1.7643, "step": 3842 }, { "epoch": 0.40431351920042086, "grad_norm": 1.1571388244628906, "learning_rate": 0.00013510238507697967, "loss": 2.1814, "step": 3843 }, { "epoch": 0.40441872698579695, "grad_norm": 1.2247772216796875, "learning_rate": 0.00013507047413202335, "loss": 1.9968, "step": 3844 }, { "epoch": 0.40452393477117304, "grad_norm": 1.482480525970459, "learning_rate": 0.00013503855911444837, "loss": 1.9141, "step": 3845 }, { "epoch": 0.4046291425565492, "grad_norm": 1.5328335762023926, "learning_rate": 0.00013500664002796093, "loss": 1.8682, "step": 3846 }, { "epoch": 0.4047343503419253, "grad_norm": 1.9800300598144531, "learning_rate": 0.0001349747168762676, "loss": 1.7248, "step": 3847 }, { "epoch": 0.40483955812730144, "grad_norm": 1.3006877899169922, "learning_rate": 0.0001349427896630756, "loss": 1.5546, "step": 3848 }, { "epoch": 0.40494476591267753, "grad_norm": 1.683077096939087, "learning_rate": 0.0001349108583920925, "loss": 2.1227, "step": 3849 }, { "epoch": 0.4050499736980537, "grad_norm": 1.1028867959976196, "learning_rate": 0.00013487892306702638, "loss": 1.8122, "step": 3850 }, { "epoch": 0.40515518148342977, "grad_norm": 1.3588521480560303, "learning_rate": 0.00013484698369158578, "loss": 1.7322, "step": 3851 }, { "epoch": 0.40526038926880587, "grad_norm": 1.1769453287124634, "learning_rate": 0.0001348150402694797, "loss": 2.0872, "step": 3852 }, { "epoch": 0.405365597054182, "grad_norm": 1.4604272842407227, "learning_rate": 0.00013478309280441763, "loss": 2.044, "step": 3853 }, { "epoch": 0.4054708048395581, "grad_norm": 1.201572299003601, "learning_rate": 0.00013475114130010954, "loss": 2.048, "step": 3854 }, { "epoch": 0.40557601262493426, "grad_norm": 2.324699878692627, "learning_rate": 0.00013471918576026583, "loss": 2.0551, "step": 3855 }, { "epoch": 0.40568122041031035, "grad_norm": 1.0302995443344116, "learning_rate": 0.00013468722618859743, "loss": 1.7291, "step": 3856 }, { "epoch": 0.4057864281956865, "grad_norm": 1.6787930727005005, "learning_rate": 0.00013465526258881565, "loss": 2.4362, "step": 3857 }, { "epoch": 0.4058916359810626, "grad_norm": 1.2186408042907715, "learning_rate": 0.00013462329496463236, "loss": 1.8517, "step": 3858 }, { "epoch": 0.40599684376643874, "grad_norm": 0.8594940900802612, "learning_rate": 0.0001345913233197598, "loss": 1.8623, "step": 3859 }, { "epoch": 0.40610205155181484, "grad_norm": 1.7866066694259644, "learning_rate": 0.00013455934765791084, "loss": 1.7403, "step": 3860 }, { "epoch": 0.40620725933719093, "grad_norm": 1.9410916566848755, "learning_rate": 0.00013452736798279856, "loss": 1.7438, "step": 3861 }, { "epoch": 0.4063124671225671, "grad_norm": 0.9494317173957825, "learning_rate": 0.0001344953842981368, "loss": 1.8397, "step": 3862 }, { "epoch": 0.40641767490794317, "grad_norm": 1.590301752090454, "learning_rate": 0.0001344633966076396, "loss": 2.0589, "step": 3863 }, { "epoch": 0.4065228826933193, "grad_norm": 1.0571048259735107, "learning_rate": 0.0001344314049150217, "loss": 2.1632, "step": 3864 }, { "epoch": 0.4066280904786954, "grad_norm": 1.2266767024993896, "learning_rate": 0.00013439940922399806, "loss": 2.1405, "step": 3865 }, { "epoch": 0.40673329826407156, "grad_norm": 1.6102620363235474, "learning_rate": 0.00013436740953828432, "loss": 2.0555, "step": 3866 }, { "epoch": 0.40683850604944766, "grad_norm": 1.640577793121338, "learning_rate": 0.0001343354058615965, "loss": 1.3221, "step": 3867 }, { "epoch": 0.40694371383482375, "grad_norm": 1.5055091381072998, "learning_rate": 0.00013430339819765105, "loss": 1.9195, "step": 3868 }, { "epoch": 0.4070489216201999, "grad_norm": 1.0952521562576294, "learning_rate": 0.0001342713865501649, "loss": 1.8598, "step": 3869 }, { "epoch": 0.407154129405576, "grad_norm": 0.8317450881004333, "learning_rate": 0.00013423937092285555, "loss": 1.9036, "step": 3870 }, { "epoch": 0.40725933719095214, "grad_norm": 1.3598711490631104, "learning_rate": 0.00013420735131944073, "loss": 1.9391, "step": 3871 }, { "epoch": 0.40736454497632824, "grad_norm": 1.854884147644043, "learning_rate": 0.0001341753277436389, "loss": 1.5506, "step": 3872 }, { "epoch": 0.4074697527617044, "grad_norm": 1.2758795022964478, "learning_rate": 0.00013414330019916875, "loss": 1.6345, "step": 3873 }, { "epoch": 0.4075749605470805, "grad_norm": 1.4134248495101929, "learning_rate": 0.0001341112686897496, "loss": 1.3446, "step": 3874 }, { "epoch": 0.4076801683324566, "grad_norm": 1.3641563653945923, "learning_rate": 0.00013407923321910115, "loss": 2.0881, "step": 3875 }, { "epoch": 0.4077853761178327, "grad_norm": 1.287705659866333, "learning_rate": 0.00013404719379094354, "loss": 2.2842, "step": 3876 }, { "epoch": 0.4078905839032088, "grad_norm": 1.201897382736206, "learning_rate": 0.00013401515040899746, "loss": 1.7941, "step": 3877 }, { "epoch": 0.40799579168858496, "grad_norm": 1.092402696609497, "learning_rate": 0.00013398310307698397, "loss": 1.8657, "step": 3878 }, { "epoch": 0.40810099947396106, "grad_norm": 1.349609136581421, "learning_rate": 0.0001339510517986246, "loss": 1.6198, "step": 3879 }, { "epoch": 0.4082062072593372, "grad_norm": 1.9795316457748413, "learning_rate": 0.0001339189965776414, "loss": 1.7579, "step": 3880 }, { "epoch": 0.4083114150447133, "grad_norm": 1.3774399757385254, "learning_rate": 0.0001338869374177568, "loss": 1.903, "step": 3881 }, { "epoch": 0.40841662283008945, "grad_norm": 1.0936564207077026, "learning_rate": 0.00013385487432269376, "loss": 2.2156, "step": 3882 }, { "epoch": 0.40852183061546554, "grad_norm": 1.4462928771972656, "learning_rate": 0.00013382280729617568, "loss": 2.0977, "step": 3883 }, { "epoch": 0.40862703840084164, "grad_norm": 2.339925527572632, "learning_rate": 0.00013379073634192632, "loss": 1.6663, "step": 3884 }, { "epoch": 0.4087322461862178, "grad_norm": 1.1641333103179932, "learning_rate": 0.00013375866146367, "loss": 2.5582, "step": 3885 }, { "epoch": 0.4088374539715939, "grad_norm": 0.9723519086837769, "learning_rate": 0.00013372658266513153, "loss": 1.5445, "step": 3886 }, { "epoch": 0.40894266175697, "grad_norm": 0.9060115218162537, "learning_rate": 0.00013369449995003608, "loss": 2.0697, "step": 3887 }, { "epoch": 0.4090478695423461, "grad_norm": 1.7128041982650757, "learning_rate": 0.00013366241332210928, "loss": 1.4517, "step": 3888 }, { "epoch": 0.40915307732772227, "grad_norm": 2.7365641593933105, "learning_rate": 0.00013363032278507726, "loss": 1.2883, "step": 3889 }, { "epoch": 0.40925828511309836, "grad_norm": 1.3242186307907104, "learning_rate": 0.00013359822834266662, "loss": 2.3144, "step": 3890 }, { "epoch": 0.4093634928984745, "grad_norm": 1.483139991760254, "learning_rate": 0.00013356612999860436, "loss": 1.9398, "step": 3891 }, { "epoch": 0.4094687006838506, "grad_norm": 1.365196943283081, "learning_rate": 0.00013353402775661795, "loss": 1.5895, "step": 3892 }, { "epoch": 0.4095739084692267, "grad_norm": 1.608028769493103, "learning_rate": 0.0001335019216204353, "loss": 1.9566, "step": 3893 }, { "epoch": 0.40967911625460285, "grad_norm": 1.6372560262680054, "learning_rate": 0.00013346981159378485, "loss": 2.0881, "step": 3894 }, { "epoch": 0.40978432403997894, "grad_norm": 1.121978521347046, "learning_rate": 0.00013343769768039537, "loss": 1.9097, "step": 3895 }, { "epoch": 0.4098895318253551, "grad_norm": 1.4127432107925415, "learning_rate": 0.00013340557988399617, "loss": 2.1085, "step": 3896 }, { "epoch": 0.4099947396107312, "grad_norm": 1.146700143814087, "learning_rate": 0.00013337345820831696, "loss": 1.546, "step": 3897 }, { "epoch": 0.41009994739610733, "grad_norm": 1.1912237405776978, "learning_rate": 0.000133341332657088, "loss": 1.4891, "step": 3898 }, { "epoch": 0.4102051551814834, "grad_norm": 1.4994091987609863, "learning_rate": 0.0001333092032340398, "loss": 2.1801, "step": 3899 }, { "epoch": 0.4103103629668595, "grad_norm": 1.059524655342102, "learning_rate": 0.00013327706994290355, "loss": 1.9326, "step": 3900 }, { "epoch": 0.41041557075223567, "grad_norm": 1.365159034729004, "learning_rate": 0.00013324493278741073, "loss": 1.833, "step": 3901 }, { "epoch": 0.41052077853761176, "grad_norm": 2.979818344116211, "learning_rate": 0.00013321279177129337, "loss": 2.8629, "step": 3902 }, { "epoch": 0.4106259863229879, "grad_norm": 2.018228530883789, "learning_rate": 0.00013318064689828385, "loss": 1.9213, "step": 3903 }, { "epoch": 0.410731194108364, "grad_norm": 1.5214287042617798, "learning_rate": 0.00013314849817211508, "loss": 1.7545, "step": 3904 }, { "epoch": 0.41083640189374016, "grad_norm": 1.0850343704223633, "learning_rate": 0.00013311634559652036, "loss": 1.5041, "step": 3905 }, { "epoch": 0.41094160967911625, "grad_norm": 1.1685420274734497, "learning_rate": 0.00013308418917523348, "loss": 1.9563, "step": 3906 }, { "epoch": 0.4110468174644924, "grad_norm": 1.6198590993881226, "learning_rate": 0.00013305202891198862, "loss": 1.7929, "step": 3907 }, { "epoch": 0.4111520252498685, "grad_norm": 2.0444579124450684, "learning_rate": 0.0001330198648105205, "loss": 2.1797, "step": 3908 }, { "epoch": 0.4112572330352446, "grad_norm": 1.8956327438354492, "learning_rate": 0.00013298769687456426, "loss": 2.1496, "step": 3909 }, { "epoch": 0.41136244082062073, "grad_norm": 1.3414783477783203, "learning_rate": 0.00013295552510785534, "loss": 1.4914, "step": 3910 }, { "epoch": 0.4114676486059968, "grad_norm": 1.368857502937317, "learning_rate": 0.00013292334951412984, "loss": 1.7118, "step": 3911 }, { "epoch": 0.411572856391373, "grad_norm": 1.7612696886062622, "learning_rate": 0.00013289117009712418, "loss": 2.0468, "step": 3912 }, { "epoch": 0.41167806417674907, "grad_norm": 1.1317222118377686, "learning_rate": 0.00013285898686057524, "loss": 1.9907, "step": 3913 }, { "epoch": 0.4117832719621252, "grad_norm": 1.8835352659225464, "learning_rate": 0.00013282679980822034, "loss": 1.4493, "step": 3914 }, { "epoch": 0.4118884797475013, "grad_norm": 1.1284127235412598, "learning_rate": 0.00013279460894379729, "loss": 1.5342, "step": 3915 }, { "epoch": 0.4119936875328774, "grad_norm": 1.2283440828323364, "learning_rate": 0.0001327624142710443, "loss": 2.1995, "step": 3916 }, { "epoch": 0.41209889531825356, "grad_norm": 1.2161046266555786, "learning_rate": 0.00013273021579370003, "loss": 2.1285, "step": 3917 }, { "epoch": 0.41220410310362965, "grad_norm": 1.6340278387069702, "learning_rate": 0.00013269801351550354, "loss": 1.4938, "step": 3918 }, { "epoch": 0.4123093108890058, "grad_norm": 1.4137933254241943, "learning_rate": 0.00013266580744019445, "loss": 1.6995, "step": 3919 }, { "epoch": 0.4124145186743819, "grad_norm": 1.0340012311935425, "learning_rate": 0.0001326335975715127, "loss": 1.9991, "step": 3920 }, { "epoch": 0.41251972645975804, "grad_norm": 1.2276008129119873, "learning_rate": 0.00013260138391319872, "loss": 1.3765, "step": 3921 }, { "epoch": 0.41262493424513413, "grad_norm": 1.1833198070526123, "learning_rate": 0.00013256916646899337, "loss": 2.0268, "step": 3922 }, { "epoch": 0.4127301420305103, "grad_norm": 2.14572811126709, "learning_rate": 0.00013253694524263798, "loss": 2.0048, "step": 3923 }, { "epoch": 0.4128353498158864, "grad_norm": 1.3830515146255493, "learning_rate": 0.00013250472023787425, "loss": 2.113, "step": 3924 }, { "epoch": 0.41294055760126247, "grad_norm": 1.290831208229065, "learning_rate": 0.00013247249145844443, "loss": 1.6246, "step": 3925 }, { "epoch": 0.4130457653866386, "grad_norm": 1.4509975910186768, "learning_rate": 0.00013244025890809112, "loss": 1.8658, "step": 3926 }, { "epoch": 0.4131509731720147, "grad_norm": 1.2265394926071167, "learning_rate": 0.00013240802259055734, "loss": 1.763, "step": 3927 }, { "epoch": 0.41325618095739086, "grad_norm": 1.2114838361740112, "learning_rate": 0.0001323757825095866, "loss": 1.7796, "step": 3928 }, { "epoch": 0.41336138874276696, "grad_norm": 1.1204650402069092, "learning_rate": 0.00013234353866892285, "loss": 1.5069, "step": 3929 }, { "epoch": 0.4134665965281431, "grad_norm": 1.245553970336914, "learning_rate": 0.00013231129107231052, "loss": 1.9431, "step": 3930 }, { "epoch": 0.4135718043135192, "grad_norm": 1.2581690549850464, "learning_rate": 0.00013227903972349428, "loss": 1.7897, "step": 3931 }, { "epoch": 0.4136770120988953, "grad_norm": 1.647939920425415, "learning_rate": 0.00013224678462621947, "loss": 2.0146, "step": 3932 }, { "epoch": 0.41378221988427144, "grad_norm": 1.1378495693206787, "learning_rate": 0.00013221452578423176, "loss": 1.7066, "step": 3933 }, { "epoch": 0.41388742766964753, "grad_norm": 1.322014570236206, "learning_rate": 0.00013218226320127724, "loss": 1.4162, "step": 3934 }, { "epoch": 0.4139926354550237, "grad_norm": 0.965604841709137, "learning_rate": 0.00013214999688110249, "loss": 2.0709, "step": 3935 }, { "epoch": 0.4140978432403998, "grad_norm": 1.4124212265014648, "learning_rate": 0.00013211772682745446, "loss": 2.03, "step": 3936 }, { "epoch": 0.4142030510257759, "grad_norm": 1.284655213356018, "learning_rate": 0.00013208545304408057, "loss": 1.9079, "step": 3937 }, { "epoch": 0.414308258811152, "grad_norm": 1.4193320274353027, "learning_rate": 0.00013205317553472868, "loss": 1.6901, "step": 3938 }, { "epoch": 0.41441346659652817, "grad_norm": 1.1792535781860352, "learning_rate": 0.00013202089430314705, "loss": 1.5996, "step": 3939 }, { "epoch": 0.41451867438190426, "grad_norm": 1.242659568786621, "learning_rate": 0.00013198860935308444, "loss": 2.35, "step": 3940 }, { "epoch": 0.41462388216728036, "grad_norm": 1.4708364009857178, "learning_rate": 0.0001319563206882899, "loss": 1.83, "step": 3941 }, { "epoch": 0.4147290899526565, "grad_norm": 1.332457184791565, "learning_rate": 0.00013192402831251312, "loss": 2.0127, "step": 3942 }, { "epoch": 0.4148342977380326, "grad_norm": 1.8387818336486816, "learning_rate": 0.00013189173222950403, "loss": 2.4401, "step": 3943 }, { "epoch": 0.41493950552340875, "grad_norm": 1.1250078678131104, "learning_rate": 0.0001318594324430131, "loss": 2.0481, "step": 3944 }, { "epoch": 0.41504471330878484, "grad_norm": 1.0061681270599365, "learning_rate": 0.00013182712895679118, "loss": 1.6706, "step": 3945 }, { "epoch": 0.415149921094161, "grad_norm": 1.2926052808761597, "learning_rate": 0.0001317948217745896, "loss": 1.596, "step": 3946 }, { "epoch": 0.4152551288795371, "grad_norm": 1.2874207496643066, "learning_rate": 0.00013176251090016007, "loss": 1.7986, "step": 3947 }, { "epoch": 0.4153603366649132, "grad_norm": 1.1514983177185059, "learning_rate": 0.00013173019633725474, "loss": 2.1873, "step": 3948 }, { "epoch": 0.4154655444502893, "grad_norm": 1.636364459991455, "learning_rate": 0.00013169787808962617, "loss": 1.9381, "step": 3949 }, { "epoch": 0.4155707522356654, "grad_norm": 1.7293845415115356, "learning_rate": 0.00013166555616102744, "loss": 1.8849, "step": 3950 }, { "epoch": 0.41567596002104157, "grad_norm": 1.3633328676223755, "learning_rate": 0.0001316332305552119, "loss": 1.2241, "step": 3951 }, { "epoch": 0.41578116780641766, "grad_norm": 0.8640151023864746, "learning_rate": 0.00013160090127593344, "loss": 1.9409, "step": 3952 }, { "epoch": 0.4158863755917938, "grad_norm": 1.580431580543518, "learning_rate": 0.00013156856832694642, "loss": 1.7308, "step": 3953 }, { "epoch": 0.4159915833771699, "grad_norm": 0.9848331212997437, "learning_rate": 0.0001315362317120055, "loss": 1.8526, "step": 3954 }, { "epoch": 0.41609679116254605, "grad_norm": 1.7985378503799438, "learning_rate": 0.00013150389143486586, "loss": 1.7818, "step": 3955 }, { "epoch": 0.41620199894792215, "grad_norm": 1.3943655490875244, "learning_rate": 0.000131471547499283, "loss": 1.8059, "step": 3956 }, { "epoch": 0.41630720673329824, "grad_norm": 1.2380815744400024, "learning_rate": 0.00013143919990901302, "loss": 1.652, "step": 3957 }, { "epoch": 0.4164124145186744, "grad_norm": 1.415740966796875, "learning_rate": 0.00013140684866781225, "loss": 2.4166, "step": 3958 }, { "epoch": 0.4165176223040505, "grad_norm": 1.2207032442092896, "learning_rate": 0.00013137449377943755, "loss": 1.7993, "step": 3959 }, { "epoch": 0.41662283008942663, "grad_norm": 1.1489773988723755, "learning_rate": 0.00013134213524764623, "loss": 1.7678, "step": 3960 }, { "epoch": 0.4167280378748027, "grad_norm": 1.381227970123291, "learning_rate": 0.00013130977307619594, "loss": 2.0441, "step": 3961 }, { "epoch": 0.4168332456601789, "grad_norm": 1.6043063402175903, "learning_rate": 0.0001312774072688448, "loss": 1.9146, "step": 3962 }, { "epoch": 0.41693845344555497, "grad_norm": 1.3174347877502441, "learning_rate": 0.00013124503782935133, "loss": 1.7681, "step": 3963 }, { "epoch": 0.41704366123093106, "grad_norm": 1.854496955871582, "learning_rate": 0.00013121266476147454, "loss": 2.158, "step": 3964 }, { "epoch": 0.4171488690163072, "grad_norm": 1.0530141592025757, "learning_rate": 0.00013118028806897373, "loss": 1.8426, "step": 3965 }, { "epoch": 0.4172540768016833, "grad_norm": 1.5964198112487793, "learning_rate": 0.00013114790775560877, "loss": 1.3975, "step": 3966 }, { "epoch": 0.41735928458705945, "grad_norm": 1.1978175640106201, "learning_rate": 0.00013111552382513985, "loss": 2.0528, "step": 3967 }, { "epoch": 0.41746449237243555, "grad_norm": 1.6343151330947876, "learning_rate": 0.0001310831362813276, "loss": 1.9448, "step": 3968 }, { "epoch": 0.4175697001578117, "grad_norm": 1.598139762878418, "learning_rate": 0.0001310507451279331, "loss": 1.6996, "step": 3969 }, { "epoch": 0.4176749079431878, "grad_norm": 0.9592291712760925, "learning_rate": 0.00013101835036871781, "loss": 1.1519, "step": 3970 }, { "epoch": 0.41778011572856394, "grad_norm": 2.241469621658325, "learning_rate": 0.00013098595200744366, "loss": 1.9488, "step": 3971 }, { "epoch": 0.41788532351394003, "grad_norm": 2.4150736331939697, "learning_rate": 0.0001309535500478729, "loss": 2.2178, "step": 3972 }, { "epoch": 0.4179905312993161, "grad_norm": 1.3556127548217773, "learning_rate": 0.00013092114449376828, "loss": 1.5269, "step": 3973 }, { "epoch": 0.4180957390846923, "grad_norm": 1.4668810367584229, "learning_rate": 0.00013088873534889304, "loss": 2.0874, "step": 3974 }, { "epoch": 0.41820094687006837, "grad_norm": 1.5009702444076538, "learning_rate": 0.00013085632261701063, "loss": 1.7267, "step": 3975 }, { "epoch": 0.4183061546554445, "grad_norm": 1.6611928939819336, "learning_rate": 0.0001308239063018851, "loss": 1.5624, "step": 3976 }, { "epoch": 0.4184113624408206, "grad_norm": 1.1202343702316284, "learning_rate": 0.00013079148640728077, "loss": 1.78, "step": 3977 }, { "epoch": 0.41851657022619676, "grad_norm": 1.4417529106140137, "learning_rate": 0.0001307590629369626, "loss": 2.0658, "step": 3978 }, { "epoch": 0.41862177801157285, "grad_norm": 1.2309139966964722, "learning_rate": 0.0001307266358946957, "loss": 1.5864, "step": 3979 }, { "epoch": 0.41872698579694895, "grad_norm": 1.4191454648971558, "learning_rate": 0.00013069420528424579, "loss": 1.9842, "step": 3980 }, { "epoch": 0.4188321935823251, "grad_norm": 1.4356913566589355, "learning_rate": 0.00013066177110937884, "loss": 1.7109, "step": 3981 }, { "epoch": 0.4189374013677012, "grad_norm": 2.5019426345825195, "learning_rate": 0.00013062933337386142, "loss": 1.8636, "step": 3982 }, { "epoch": 0.41904260915307734, "grad_norm": 1.5507819652557373, "learning_rate": 0.00013059689208146035, "loss": 1.5129, "step": 3983 }, { "epoch": 0.41914781693845343, "grad_norm": 1.7946397066116333, "learning_rate": 0.00013056444723594297, "loss": 1.8957, "step": 3984 }, { "epoch": 0.4192530247238296, "grad_norm": 1.1665949821472168, "learning_rate": 0.000130531998841077, "loss": 1.995, "step": 3985 }, { "epoch": 0.4193582325092057, "grad_norm": 1.3788167238235474, "learning_rate": 0.00013049954690063048, "loss": 1.5433, "step": 3986 }, { "epoch": 0.4194634402945818, "grad_norm": 1.3582801818847656, "learning_rate": 0.00013046709141837205, "loss": 1.6558, "step": 3987 }, { "epoch": 0.4195686480799579, "grad_norm": 1.2572566270828247, "learning_rate": 0.00013043463239807064, "loss": 1.9563, "step": 3988 }, { "epoch": 0.419673855865334, "grad_norm": 1.366934061050415, "learning_rate": 0.00013040216984349555, "loss": 1.987, "step": 3989 }, { "epoch": 0.41977906365071016, "grad_norm": 1.2102018594741821, "learning_rate": 0.0001303697037584166, "loss": 2.09, "step": 3990 }, { "epoch": 0.41988427143608625, "grad_norm": 1.2199288606643677, "learning_rate": 0.000130337234146604, "loss": 2.0518, "step": 3991 }, { "epoch": 0.4199894792214624, "grad_norm": 1.3983739614486694, "learning_rate": 0.00013030476101182824, "loss": 1.9292, "step": 3992 }, { "epoch": 0.4200946870068385, "grad_norm": 1.5765870809555054, "learning_rate": 0.0001302722843578604, "loss": 1.7341, "step": 3993 }, { "epoch": 0.42019989479221465, "grad_norm": 1.6028943061828613, "learning_rate": 0.00013023980418847185, "loss": 2.1453, "step": 3994 }, { "epoch": 0.42030510257759074, "grad_norm": 1.2651803493499756, "learning_rate": 0.00013020732050743442, "loss": 1.869, "step": 3995 }, { "epoch": 0.42041031036296683, "grad_norm": 1.994928240776062, "learning_rate": 0.00013017483331852035, "loss": 2.1251, "step": 3996 }, { "epoch": 0.420515518148343, "grad_norm": 1.914730191230774, "learning_rate": 0.00013014234262550222, "loss": 1.464, "step": 3997 }, { "epoch": 0.4206207259337191, "grad_norm": 1.306941032409668, "learning_rate": 0.00013010984843215312, "loss": 1.7878, "step": 3998 }, { "epoch": 0.4207259337190952, "grad_norm": 2.770078659057617, "learning_rate": 0.00013007735074224645, "loss": 1.7626, "step": 3999 }, { "epoch": 0.4208311415044713, "grad_norm": 1.6361559629440308, "learning_rate": 0.0001300448495595561, "loss": 1.6572, "step": 4000 }, { "epoch": 0.42093634928984747, "grad_norm": 1.2924569845199585, "learning_rate": 0.0001300123448878563, "loss": 1.9668, "step": 4001 }, { "epoch": 0.42104155707522356, "grad_norm": 1.4006869792938232, "learning_rate": 0.00012997983673092173, "loss": 1.384, "step": 4002 }, { "epoch": 0.4211467648605997, "grad_norm": 1.6290597915649414, "learning_rate": 0.00012994732509252744, "loss": 2.2109, "step": 4003 }, { "epoch": 0.4212519726459758, "grad_norm": 1.2416553497314453, "learning_rate": 0.00012991480997644886, "loss": 2.3437, "step": 4004 }, { "epoch": 0.4213571804313519, "grad_norm": 1.4514647722244263, "learning_rate": 0.00012988229138646192, "loss": 1.9429, "step": 4005 }, { "epoch": 0.42146238821672805, "grad_norm": 1.3162492513656616, "learning_rate": 0.00012984976932634292, "loss": 1.8274, "step": 4006 }, { "epoch": 0.42156759600210414, "grad_norm": 1.3386199474334717, "learning_rate": 0.00012981724379986846, "loss": 1.7178, "step": 4007 }, { "epoch": 0.4216728037874803, "grad_norm": 1.1760342121124268, "learning_rate": 0.00012978471481081566, "loss": 1.4665, "step": 4008 }, { "epoch": 0.4217780115728564, "grad_norm": 1.1634572744369507, "learning_rate": 0.00012975218236296204, "loss": 1.805, "step": 4009 }, { "epoch": 0.42188321935823253, "grad_norm": 1.3325061798095703, "learning_rate": 0.00012971964646008542, "loss": 2.1894, "step": 4010 }, { "epoch": 0.4219884271436086, "grad_norm": 1.4473451375961304, "learning_rate": 0.00012968710710596417, "loss": 2.0688, "step": 4011 }, { "epoch": 0.4220936349289847, "grad_norm": 1.3676494359970093, "learning_rate": 0.0001296545643043769, "loss": 2.0859, "step": 4012 }, { "epoch": 0.42219884271436087, "grad_norm": 1.168908953666687, "learning_rate": 0.00012962201805910274, "loss": 1.6922, "step": 4013 }, { "epoch": 0.42230405049973696, "grad_norm": 1.1409196853637695, "learning_rate": 0.00012958946837392113, "loss": 1.9286, "step": 4014 }, { "epoch": 0.4224092582851131, "grad_norm": 1.3931876420974731, "learning_rate": 0.00012955691525261203, "loss": 1.5437, "step": 4015 }, { "epoch": 0.4225144660704892, "grad_norm": 1.6057072877883911, "learning_rate": 0.00012952435869895569, "loss": 2.0498, "step": 4016 }, { "epoch": 0.42261967385586535, "grad_norm": 2.05791974067688, "learning_rate": 0.00012949179871673278, "loss": 1.7363, "step": 4017 }, { "epoch": 0.42272488164124145, "grad_norm": 1.1081055402755737, "learning_rate": 0.00012945923530972438, "loss": 2.24, "step": 4018 }, { "epoch": 0.4228300894266176, "grad_norm": 1.165257215499878, "learning_rate": 0.00012942666848171202, "loss": 2.1699, "step": 4019 }, { "epoch": 0.4229352972119937, "grad_norm": 2.037813901901245, "learning_rate": 0.00012939409823647753, "loss": 1.3196, "step": 4020 }, { "epoch": 0.4230405049973698, "grad_norm": 1.1846753358840942, "learning_rate": 0.00012936152457780322, "loss": 1.6817, "step": 4021 }, { "epoch": 0.42314571278274593, "grad_norm": 0.9720628261566162, "learning_rate": 0.00012932894750947177, "loss": 1.6067, "step": 4022 }, { "epoch": 0.423250920568122, "grad_norm": 1.4518640041351318, "learning_rate": 0.00012929636703526618, "loss": 1.9264, "step": 4023 }, { "epoch": 0.4233561283534982, "grad_norm": 1.9571900367736816, "learning_rate": 0.00012926378315896998, "loss": 2.0874, "step": 4024 }, { "epoch": 0.42346133613887427, "grad_norm": 1.8690723180770874, "learning_rate": 0.00012923119588436702, "loss": 1.3981, "step": 4025 }, { "epoch": 0.4235665439242504, "grad_norm": 0.9987134337425232, "learning_rate": 0.0001291986052152415, "loss": 2.2083, "step": 4026 }, { "epoch": 0.4236717517096265, "grad_norm": 1.6420297622680664, "learning_rate": 0.0001291660111553781, "loss": 2.006, "step": 4027 }, { "epoch": 0.4237769594950026, "grad_norm": 1.2390166521072388, "learning_rate": 0.0001291334137085619, "loss": 1.4468, "step": 4028 }, { "epoch": 0.42388216728037875, "grad_norm": 1.2712844610214233, "learning_rate": 0.00012910081287857827, "loss": 2.3897, "step": 4029 }, { "epoch": 0.42398737506575485, "grad_norm": 1.1292376518249512, "learning_rate": 0.0001290682086692131, "loss": 2.0741, "step": 4030 }, { "epoch": 0.424092582851131, "grad_norm": 1.479219913482666, "learning_rate": 0.00012903560108425258, "loss": 1.9733, "step": 4031 }, { "epoch": 0.4241977906365071, "grad_norm": 0.8995711207389832, "learning_rate": 0.00012900299012748328, "loss": 1.9121, "step": 4032 }, { "epoch": 0.42430299842188324, "grad_norm": 1.6135600805282593, "learning_rate": 0.00012897037580269225, "loss": 1.7079, "step": 4033 }, { "epoch": 0.42440820620725933, "grad_norm": 1.3254718780517578, "learning_rate": 0.0001289377581136669, "loss": 1.5347, "step": 4034 }, { "epoch": 0.4245134139926355, "grad_norm": 1.225527286529541, "learning_rate": 0.00012890513706419497, "loss": 0.9884, "step": 4035 }, { "epoch": 0.4246186217780116, "grad_norm": 1.1481863260269165, "learning_rate": 0.00012887251265806466, "loss": 1.9885, "step": 4036 }, { "epoch": 0.42472382956338767, "grad_norm": 1.3110417127609253, "learning_rate": 0.00012883988489906454, "loss": 1.7185, "step": 4037 }, { "epoch": 0.4248290373487638, "grad_norm": 1.7494990825653076, "learning_rate": 0.00012880725379098352, "loss": 1.8137, "step": 4038 }, { "epoch": 0.4249342451341399, "grad_norm": 1.0107097625732422, "learning_rate": 0.00012877461933761102, "loss": 1.7371, "step": 4039 }, { "epoch": 0.42503945291951606, "grad_norm": 1.2290560007095337, "learning_rate": 0.00012874198154273672, "loss": 2.0912, "step": 4040 }, { "epoch": 0.42514466070489215, "grad_norm": 1.1773860454559326, "learning_rate": 0.00012870934041015071, "loss": 1.9689, "step": 4041 }, { "epoch": 0.4252498684902683, "grad_norm": 1.1857486963272095, "learning_rate": 0.00012867669594364357, "loss": 1.6265, "step": 4042 }, { "epoch": 0.4253550762756444, "grad_norm": 1.5043977499008179, "learning_rate": 0.00012864404814700618, "loss": 1.9106, "step": 4043 }, { "epoch": 0.4254602840610205, "grad_norm": 1.6733112335205078, "learning_rate": 0.00012861139702402977, "loss": 1.4501, "step": 4044 }, { "epoch": 0.42556549184639664, "grad_norm": 1.2191860675811768, "learning_rate": 0.00012857874257850605, "loss": 1.5926, "step": 4045 }, { "epoch": 0.42567069963177273, "grad_norm": 1.3937748670578003, "learning_rate": 0.00012854608481422707, "loss": 1.2054, "step": 4046 }, { "epoch": 0.4257759074171489, "grad_norm": 1.3559739589691162, "learning_rate": 0.00012851342373498525, "loss": 1.5141, "step": 4047 }, { "epoch": 0.425881115202525, "grad_norm": 1.3075133562088013, "learning_rate": 0.0001284807593445734, "loss": 1.3719, "step": 4048 }, { "epoch": 0.4259863229879011, "grad_norm": 1.1074446439743042, "learning_rate": 0.00012844809164678478, "loss": 1.8366, "step": 4049 }, { "epoch": 0.4260915307732772, "grad_norm": 2.5052926540374756, "learning_rate": 0.00012841542064541292, "loss": 1.7895, "step": 4050 }, { "epoch": 0.42619673855865337, "grad_norm": 1.4112364053726196, "learning_rate": 0.00012838274634425188, "loss": 1.6224, "step": 4051 }, { "epoch": 0.42630194634402946, "grad_norm": 1.2035661935806274, "learning_rate": 0.00012835006874709594, "loss": 1.7851, "step": 4052 }, { "epoch": 0.42640715412940555, "grad_norm": 1.3116462230682373, "learning_rate": 0.00012831738785773985, "loss": 1.5662, "step": 4053 }, { "epoch": 0.4265123619147817, "grad_norm": 1.2714296579360962, "learning_rate": 0.00012828470367997884, "loss": 1.3814, "step": 4054 }, { "epoch": 0.4266175697001578, "grad_norm": 2.2298810482025146, "learning_rate": 0.00012825201621760826, "loss": 1.8722, "step": 4055 }, { "epoch": 0.42672277748553394, "grad_norm": 1.7081495523452759, "learning_rate": 0.00012821932547442408, "loss": 2.1527, "step": 4056 }, { "epoch": 0.42682798527091004, "grad_norm": 1.4093936681747437, "learning_rate": 0.00012818663145422256, "loss": 2.1676, "step": 4057 }, { "epoch": 0.4269331930562862, "grad_norm": 1.707614541053772, "learning_rate": 0.00012815393416080035, "loss": 1.9155, "step": 4058 }, { "epoch": 0.4270384008416623, "grad_norm": 1.1860383749008179, "learning_rate": 0.00012812123359795446, "loss": 1.788, "step": 4059 }, { "epoch": 0.4271436086270384, "grad_norm": 1.6916757822036743, "learning_rate": 0.00012808852976948232, "loss": 1.3056, "step": 4060 }, { "epoch": 0.4272488164124145, "grad_norm": 1.7355988025665283, "learning_rate": 0.00012805582267918172, "loss": 2.3532, "step": 4061 }, { "epoch": 0.4273540241977906, "grad_norm": 0.883721113204956, "learning_rate": 0.00012802311233085082, "loss": 2.0607, "step": 4062 }, { "epoch": 0.42745923198316677, "grad_norm": 1.3145248889923096, "learning_rate": 0.00012799039872828812, "loss": 2.2087, "step": 4063 }, { "epoch": 0.42756443976854286, "grad_norm": 1.7425910234451294, "learning_rate": 0.00012795768187529263, "loss": 0.9703, "step": 4064 }, { "epoch": 0.427669647553919, "grad_norm": 0.8028357028961182, "learning_rate": 0.00012792496177566363, "loss": 1.613, "step": 4065 }, { "epoch": 0.4277748553392951, "grad_norm": 1.5005557537078857, "learning_rate": 0.00012789223843320073, "loss": 1.8242, "step": 4066 }, { "epoch": 0.42788006312467125, "grad_norm": 1.2664399147033691, "learning_rate": 0.00012785951185170403, "loss": 2.2433, "step": 4067 }, { "epoch": 0.42798527091004734, "grad_norm": 1.4799624681472778, "learning_rate": 0.000127826782034974, "loss": 2.0403, "step": 4068 }, { "epoch": 0.42809047869542344, "grad_norm": 1.3635642528533936, "learning_rate": 0.00012779404898681136, "loss": 2.2521, "step": 4069 }, { "epoch": 0.4281956864807996, "grad_norm": 1.3099942207336426, "learning_rate": 0.00012776131271101732, "loss": 2.0154, "step": 4070 }, { "epoch": 0.4283008942661757, "grad_norm": 1.4328498840332031, "learning_rate": 0.00012772857321139352, "loss": 1.9576, "step": 4071 }, { "epoch": 0.42840610205155183, "grad_norm": 1.504550576210022, "learning_rate": 0.00012769583049174177, "loss": 2.4232, "step": 4072 }, { "epoch": 0.4285113098369279, "grad_norm": 1.3973898887634277, "learning_rate": 0.0001276630845558644, "loss": 1.5293, "step": 4073 }, { "epoch": 0.42861651762230407, "grad_norm": 1.2882184982299805, "learning_rate": 0.00012763033540756416, "loss": 2.3094, "step": 4074 }, { "epoch": 0.42872172540768017, "grad_norm": 1.2002280950546265, "learning_rate": 0.00012759758305064405, "loss": 1.8278, "step": 4075 }, { "epoch": 0.42882693319305626, "grad_norm": 1.0488744974136353, "learning_rate": 0.0001275648274889075, "loss": 1.8504, "step": 4076 }, { "epoch": 0.4289321409784324, "grad_norm": 1.2440074682235718, "learning_rate": 0.00012753206872615825, "loss": 1.8653, "step": 4077 }, { "epoch": 0.4290373487638085, "grad_norm": 0.9582875967025757, "learning_rate": 0.00012749930676620057, "loss": 2.0311, "step": 4078 }, { "epoch": 0.42914255654918465, "grad_norm": 1.1008398532867432, "learning_rate": 0.00012746654161283896, "loss": 1.6135, "step": 4079 }, { "epoch": 0.42924776433456074, "grad_norm": 1.095392107963562, "learning_rate": 0.00012743377326987826, "loss": 1.7668, "step": 4080 }, { "epoch": 0.4293529721199369, "grad_norm": 1.0410737991333008, "learning_rate": 0.00012740100174112384, "loss": 1.607, "step": 4081 }, { "epoch": 0.429458179905313, "grad_norm": 0.979989767074585, "learning_rate": 0.00012736822703038133, "loss": 1.4283, "step": 4082 }, { "epoch": 0.42956338769068914, "grad_norm": 1.1817355155944824, "learning_rate": 0.00012733544914145673, "loss": 1.7454, "step": 4083 }, { "epoch": 0.42966859547606523, "grad_norm": 1.5766743421554565, "learning_rate": 0.00012730266807815642, "loss": 2.0112, "step": 4084 }, { "epoch": 0.4297738032614413, "grad_norm": 1.0003412961959839, "learning_rate": 0.0001272698838442872, "loss": 1.9937, "step": 4085 }, { "epoch": 0.42987901104681747, "grad_norm": 2.305222272872925, "learning_rate": 0.00012723709644365614, "loss": 2.0902, "step": 4086 }, { "epoch": 0.42998421883219357, "grad_norm": 1.1228468418121338, "learning_rate": 0.00012720430588007077, "loss": 2.1054, "step": 4087 }, { "epoch": 0.4300894266175697, "grad_norm": 1.6916412115097046, "learning_rate": 0.00012717151215733892, "loss": 1.3402, "step": 4088 }, { "epoch": 0.4301946344029458, "grad_norm": 1.376791000366211, "learning_rate": 0.0001271387152792689, "loss": 2.2196, "step": 4089 }, { "epoch": 0.43029984218832196, "grad_norm": 1.307098150253296, "learning_rate": 0.00012710591524966918, "loss": 1.9181, "step": 4090 }, { "epoch": 0.43040504997369805, "grad_norm": 0.9531205892562866, "learning_rate": 0.00012707311207234878, "loss": 1.4014, "step": 4091 }, { "epoch": 0.43051025775907414, "grad_norm": 1.9267292022705078, "learning_rate": 0.00012704030575111705, "loss": 1.4603, "step": 4092 }, { "epoch": 0.4306154655444503, "grad_norm": 1.3264367580413818, "learning_rate": 0.00012700749628978363, "loss": 2.0973, "step": 4093 }, { "epoch": 0.4307206733298264, "grad_norm": 1.430148720741272, "learning_rate": 0.00012697468369215863, "loss": 1.5874, "step": 4094 }, { "epoch": 0.43082588111520254, "grad_norm": 1.3423150777816772, "learning_rate": 0.00012694186796205243, "loss": 1.6053, "step": 4095 }, { "epoch": 0.43093108890057863, "grad_norm": 1.2820467948913574, "learning_rate": 0.00012690904910327578, "loss": 1.8371, "step": 4096 }, { "epoch": 0.4310362966859548, "grad_norm": 1.965772271156311, "learning_rate": 0.00012687622711963993, "loss": 1.9467, "step": 4097 }, { "epoch": 0.43114150447133087, "grad_norm": 1.4754441976547241, "learning_rate": 0.0001268434020149563, "loss": 1.8387, "step": 4098 }, { "epoch": 0.431246712256707, "grad_norm": 1.0966287851333618, "learning_rate": 0.00012681057379303678, "loss": 2.0815, "step": 4099 }, { "epoch": 0.4313519200420831, "grad_norm": 1.2483052015304565, "learning_rate": 0.00012677774245769362, "loss": 2.1525, "step": 4100 }, { "epoch": 0.4314571278274592, "grad_norm": 1.1769499778747559, "learning_rate": 0.00012674490801273938, "loss": 2.0793, "step": 4101 }, { "epoch": 0.43156233561283536, "grad_norm": 1.8558199405670166, "learning_rate": 0.00012671207046198706, "loss": 2.3923, "step": 4102 }, { "epoch": 0.43166754339821145, "grad_norm": 1.0593931674957275, "learning_rate": 0.00012667922980924998, "loss": 2.0428, "step": 4103 }, { "epoch": 0.4317727511835876, "grad_norm": 0.8765531182289124, "learning_rate": 0.00012664638605834177, "loss": 1.7572, "step": 4104 }, { "epoch": 0.4318779589689637, "grad_norm": 0.9581341743469238, "learning_rate": 0.00012661353921307648, "loss": 2.2846, "step": 4105 }, { "epoch": 0.43198316675433984, "grad_norm": 1.0098820924758911, "learning_rate": 0.00012658068927726853, "loss": 1.7325, "step": 4106 }, { "epoch": 0.43208837453971594, "grad_norm": 1.224402666091919, "learning_rate": 0.00012654783625473266, "loss": 1.6651, "step": 4107 }, { "epoch": 0.43219358232509203, "grad_norm": 0.9913700222969055, "learning_rate": 0.00012651498014928402, "loss": 2.4144, "step": 4108 }, { "epoch": 0.4322987901104682, "grad_norm": 1.0635404586791992, "learning_rate": 0.00012648212096473798, "loss": 1.4415, "step": 4109 }, { "epoch": 0.43240399789584427, "grad_norm": 0.8817548751831055, "learning_rate": 0.00012644925870491052, "loss": 1.8636, "step": 4110 }, { "epoch": 0.4325092056812204, "grad_norm": 1.284338116645813, "learning_rate": 0.0001264163933736177, "loss": 1.6044, "step": 4111 }, { "epoch": 0.4326144134665965, "grad_norm": 0.9684668779373169, "learning_rate": 0.00012638352497467608, "loss": 2.1041, "step": 4112 }, { "epoch": 0.43271962125197266, "grad_norm": 1.4305078983306885, "learning_rate": 0.00012635065351190261, "loss": 1.9746, "step": 4113 }, { "epoch": 0.43282482903734876, "grad_norm": 1.0623332262039185, "learning_rate": 0.0001263177789891145, "loss": 1.7623, "step": 4114 }, { "epoch": 0.4329300368227249, "grad_norm": 1.431246042251587, "learning_rate": 0.00012628490141012937, "loss": 2.0701, "step": 4115 }, { "epoch": 0.433035244608101, "grad_norm": 1.5637847185134888, "learning_rate": 0.00012625202077876525, "loss": 1.6522, "step": 4116 }, { "epoch": 0.4331404523934771, "grad_norm": 1.6422128677368164, "learning_rate": 0.00012621913709884037, "loss": 1.8042, "step": 4117 }, { "epoch": 0.43324566017885324, "grad_norm": 1.0828219652175903, "learning_rate": 0.0001261862503741734, "loss": 1.9911, "step": 4118 }, { "epoch": 0.43335086796422934, "grad_norm": 1.0087640285491943, "learning_rate": 0.00012615336060858344, "loss": 2.199, "step": 4119 }, { "epoch": 0.4334560757496055, "grad_norm": 1.5731931924819946, "learning_rate": 0.00012612046780588986, "loss": 1.9389, "step": 4120 }, { "epoch": 0.4335612835349816, "grad_norm": 1.271106481552124, "learning_rate": 0.00012608757196991234, "loss": 1.8302, "step": 4121 }, { "epoch": 0.4336664913203577, "grad_norm": 1.1410576105117798, "learning_rate": 0.000126054673104471, "loss": 1.6422, "step": 4122 }, { "epoch": 0.4337716991057338, "grad_norm": 1.52702796459198, "learning_rate": 0.00012602177121338626, "loss": 2.0576, "step": 4123 }, { "epoch": 0.4338769068911099, "grad_norm": 1.1503021717071533, "learning_rate": 0.0001259888663004789, "loss": 1.6653, "step": 4124 }, { "epoch": 0.43398211467648606, "grad_norm": 1.6308045387268066, "learning_rate": 0.00012595595836957006, "loss": 1.885, "step": 4125 }, { "epoch": 0.43408732246186216, "grad_norm": 1.5235882997512817, "learning_rate": 0.0001259230474244813, "loss": 1.9792, "step": 4126 }, { "epoch": 0.4341925302472383, "grad_norm": 1.6996004581451416, "learning_rate": 0.00012589013346903438, "loss": 1.9094, "step": 4127 }, { "epoch": 0.4342977380326144, "grad_norm": 1.2587865591049194, "learning_rate": 0.0001258572165070515, "loss": 1.8968, "step": 4128 }, { "epoch": 0.43440294581799055, "grad_norm": 1.0128796100616455, "learning_rate": 0.00012582429654235523, "loss": 1.1083, "step": 4129 }, { "epoch": 0.43450815360336664, "grad_norm": 1.4834532737731934, "learning_rate": 0.00012579137357876844, "loss": 2.2179, "step": 4130 }, { "epoch": 0.4346133613887428, "grad_norm": 1.3126288652420044, "learning_rate": 0.00012575844762011438, "loss": 2.4677, "step": 4131 }, { "epoch": 0.4347185691741189, "grad_norm": 0.8772363066673279, "learning_rate": 0.0001257255186702166, "loss": 1.4076, "step": 4132 }, { "epoch": 0.434823776959495, "grad_norm": 1.3988395929336548, "learning_rate": 0.00012569258673289903, "loss": 1.8661, "step": 4133 }, { "epoch": 0.4349289847448711, "grad_norm": 1.469456434249878, "learning_rate": 0.000125659651811986, "loss": 1.8328, "step": 4134 }, { "epoch": 0.4350341925302472, "grad_norm": 1.3229504823684692, "learning_rate": 0.00012562671391130208, "loss": 1.7977, "step": 4135 }, { "epoch": 0.43513940031562337, "grad_norm": 1.2461540699005127, "learning_rate": 0.00012559377303467226, "loss": 1.7205, "step": 4136 }, { "epoch": 0.43524460810099946, "grad_norm": 1.4295867681503296, "learning_rate": 0.00012556082918592187, "loss": 2.3218, "step": 4137 }, { "epoch": 0.4353498158863756, "grad_norm": 1.1670809984207153, "learning_rate": 0.00012552788236887654, "loss": 1.7761, "step": 4138 }, { "epoch": 0.4354550236717517, "grad_norm": 2.9805972576141357, "learning_rate": 0.0001254949325873623, "loss": 1.9536, "step": 4139 }, { "epoch": 0.4355602314571278, "grad_norm": 1.0447720289230347, "learning_rate": 0.0001254619798452055, "loss": 1.6888, "step": 4140 }, { "epoch": 0.43566543924250395, "grad_norm": 1.6240369081497192, "learning_rate": 0.00012542902414623282, "loss": 2.3843, "step": 4141 }, { "epoch": 0.43577064702788004, "grad_norm": 1.2419333457946777, "learning_rate": 0.0001253960654942713, "loss": 2.0027, "step": 4142 }, { "epoch": 0.4358758548132562, "grad_norm": 1.9167548418045044, "learning_rate": 0.00012536310389314832, "loss": 1.8499, "step": 4143 }, { "epoch": 0.4359810625986323, "grad_norm": 0.9558454155921936, "learning_rate": 0.0001253301393466916, "loss": 2.0347, "step": 4144 }, { "epoch": 0.43608627038400843, "grad_norm": 1.5771024227142334, "learning_rate": 0.0001252971718587292, "loss": 2.0212, "step": 4145 }, { "epoch": 0.43619147816938453, "grad_norm": 1.3093889951705933, "learning_rate": 0.00012526420143308954, "loss": 1.4546, "step": 4146 }, { "epoch": 0.4362966859547607, "grad_norm": 1.1525970697402954, "learning_rate": 0.00012523122807360138, "loss": 1.8394, "step": 4147 }, { "epoch": 0.43640189374013677, "grad_norm": 1.2308920621871948, "learning_rate": 0.00012519825178409377, "loss": 1.8985, "step": 4148 }, { "epoch": 0.43650710152551286, "grad_norm": 1.5874372720718384, "learning_rate": 0.00012516527256839616, "loss": 1.814, "step": 4149 }, { "epoch": 0.436612309310889, "grad_norm": 1.3634819984436035, "learning_rate": 0.0001251322904303383, "loss": 1.764, "step": 4150 }, { "epoch": 0.4367175170962651, "grad_norm": 1.272567868232727, "learning_rate": 0.00012509930537375036, "loss": 2.2667, "step": 4151 }, { "epoch": 0.43682272488164126, "grad_norm": 2.495673418045044, "learning_rate": 0.0001250663174024627, "loss": 1.88, "step": 4152 }, { "epoch": 0.43692793266701735, "grad_norm": 1.3136742115020752, "learning_rate": 0.00012503332652030613, "loss": 1.7169, "step": 4153 }, { "epoch": 0.4370331404523935, "grad_norm": 1.5158318281173706, "learning_rate": 0.0001250003327311118, "loss": 1.7009, "step": 4154 }, { "epoch": 0.4371383482377696, "grad_norm": 1.6757663488388062, "learning_rate": 0.00012496733603871115, "loss": 2.4002, "step": 4155 }, { "epoch": 0.4372435560231457, "grad_norm": 1.6522952318191528, "learning_rate": 0.000124934336446936, "loss": 1.8635, "step": 4156 }, { "epoch": 0.43734876380852183, "grad_norm": 1.3090764284133911, "learning_rate": 0.00012490133395961844, "loss": 1.7477, "step": 4157 }, { "epoch": 0.43745397159389793, "grad_norm": 1.1116405725479126, "learning_rate": 0.000124868328580591, "loss": 1.7589, "step": 4158 }, { "epoch": 0.4375591793792741, "grad_norm": 1.0621029138565063, "learning_rate": 0.0001248353203136864, "loss": 1.7158, "step": 4159 }, { "epoch": 0.43766438716465017, "grad_norm": 1.3473455905914307, "learning_rate": 0.00012480230916273784, "loss": 1.8552, "step": 4160 }, { "epoch": 0.4377695949500263, "grad_norm": 1.4094268083572388, "learning_rate": 0.00012476929513157881, "loss": 1.7809, "step": 4161 }, { "epoch": 0.4378748027354024, "grad_norm": 1.2711224555969238, "learning_rate": 0.00012473627822404314, "loss": 1.9564, "step": 4162 }, { "epoch": 0.43798001052077856, "grad_norm": 1.670932650566101, "learning_rate": 0.00012470325844396487, "loss": 1.7578, "step": 4163 }, { "epoch": 0.43808521830615466, "grad_norm": 1.6926169395446777, "learning_rate": 0.00012467023579517856, "loss": 1.9468, "step": 4164 }, { "epoch": 0.43819042609153075, "grad_norm": 1.197359323501587, "learning_rate": 0.000124637210281519, "loss": 1.4122, "step": 4165 }, { "epoch": 0.4382956338769069, "grad_norm": 1.7274643182754517, "learning_rate": 0.00012460418190682134, "loss": 2.1852, "step": 4166 }, { "epoch": 0.438400841662283, "grad_norm": 1.5755963325500488, "learning_rate": 0.00012457115067492108, "loss": 2.3088, "step": 4167 }, { "epoch": 0.43850604944765914, "grad_norm": 1.3150062561035156, "learning_rate": 0.000124538116589654, "loss": 1.9563, "step": 4168 }, { "epoch": 0.43861125723303523, "grad_norm": 1.4447141885757446, "learning_rate": 0.0001245050796548562, "loss": 1.6322, "step": 4169 }, { "epoch": 0.4387164650184114, "grad_norm": 1.5568017959594727, "learning_rate": 0.0001244720398743642, "loss": 1.272, "step": 4170 }, { "epoch": 0.4388216728037875, "grad_norm": 1.4406249523162842, "learning_rate": 0.00012443899725201482, "loss": 1.4789, "step": 4171 }, { "epoch": 0.43892688058916357, "grad_norm": 2.007683753967285, "learning_rate": 0.0001244059517916452, "loss": 2.0801, "step": 4172 }, { "epoch": 0.4390320883745397, "grad_norm": 2.023632287979126, "learning_rate": 0.00012437290349709271, "loss": 2.739, "step": 4173 }, { "epoch": 0.4391372961599158, "grad_norm": 1.7203633785247803, "learning_rate": 0.0001243398523721952, "loss": 2.2986, "step": 4174 }, { "epoch": 0.43924250394529196, "grad_norm": 1.1265063285827637, "learning_rate": 0.0001243067984207908, "loss": 1.9861, "step": 4175 }, { "epoch": 0.43934771173066806, "grad_norm": 1.3561371564865112, "learning_rate": 0.00012427374164671794, "loss": 2.1845, "step": 4176 }, { "epoch": 0.4394529195160442, "grad_norm": 1.0997103452682495, "learning_rate": 0.00012424068205381538, "loss": 2.1557, "step": 4177 }, { "epoch": 0.4395581273014203, "grad_norm": 1.7254860401153564, "learning_rate": 0.00012420761964592223, "loss": 1.6301, "step": 4178 }, { "epoch": 0.43966333508679645, "grad_norm": 1.4888262748718262, "learning_rate": 0.00012417455442687795, "loss": 1.9758, "step": 4179 }, { "epoch": 0.43976854287217254, "grad_norm": 1.2371491193771362, "learning_rate": 0.00012414148640052227, "loss": 2.0836, "step": 4180 }, { "epoch": 0.43987375065754863, "grad_norm": 1.2289551496505737, "learning_rate": 0.00012410841557069523, "loss": 2.0425, "step": 4181 }, { "epoch": 0.4399789584429248, "grad_norm": 1.2869768142700195, "learning_rate": 0.0001240753419412373, "loss": 1.9797, "step": 4182 }, { "epoch": 0.4400841662283009, "grad_norm": 1.227531909942627, "learning_rate": 0.00012404226551598923, "loss": 2.0121, "step": 4183 }, { "epoch": 0.440189374013677, "grad_norm": 1.2489982843399048, "learning_rate": 0.000124009186298792, "loss": 1.5442, "step": 4184 }, { "epoch": 0.4402945817990531, "grad_norm": 1.3487237691879272, "learning_rate": 0.000123976104293487, "loss": 1.7614, "step": 4185 }, { "epoch": 0.44039978958442927, "grad_norm": 1.567232370376587, "learning_rate": 0.000123943019503916, "loss": 1.7655, "step": 4186 }, { "epoch": 0.44050499736980536, "grad_norm": 3.1679434776306152, "learning_rate": 0.00012390993193392097, "loss": 1.0603, "step": 4187 }, { "epoch": 0.44061020515518146, "grad_norm": 1.1254740953445435, "learning_rate": 0.00012387684158734425, "loss": 1.781, "step": 4188 }, { "epoch": 0.4407154129405576, "grad_norm": 1.0286729335784912, "learning_rate": 0.0001238437484680286, "loss": 1.839, "step": 4189 }, { "epoch": 0.4408206207259337, "grad_norm": 1.7165943384170532, "learning_rate": 0.0001238106525798169, "loss": 1.6264, "step": 4190 }, { "epoch": 0.44092582851130985, "grad_norm": 1.339667797088623, "learning_rate": 0.00012377755392655254, "loss": 1.8619, "step": 4191 }, { "epoch": 0.44103103629668594, "grad_norm": 1.1098575592041016, "learning_rate": 0.00012374445251207914, "loss": 1.8949, "step": 4192 }, { "epoch": 0.4411362440820621, "grad_norm": 1.4389253854751587, "learning_rate": 0.00012371134834024067, "loss": 1.6791, "step": 4193 }, { "epoch": 0.4412414518674382, "grad_norm": 1.6168919801712036, "learning_rate": 0.00012367824141488142, "loss": 1.251, "step": 4194 }, { "epoch": 0.44134665965281433, "grad_norm": 1.0156736373901367, "learning_rate": 0.00012364513173984592, "loss": 2.281, "step": 4195 }, { "epoch": 0.4414518674381904, "grad_norm": 1.645992636680603, "learning_rate": 0.00012361201931897916, "loss": 1.8607, "step": 4196 }, { "epoch": 0.4415570752235665, "grad_norm": 1.0895994901657104, "learning_rate": 0.00012357890415612635, "loss": 2.0752, "step": 4197 }, { "epoch": 0.44166228300894267, "grad_norm": 1.4120984077453613, "learning_rate": 0.00012354578625513302, "loss": 1.6247, "step": 4198 }, { "epoch": 0.44176749079431876, "grad_norm": 1.073986291885376, "learning_rate": 0.00012351266561984507, "loss": 1.4361, "step": 4199 }, { "epoch": 0.4418726985796949, "grad_norm": 1.2587485313415527, "learning_rate": 0.0001234795422541087, "loss": 2.4348, "step": 4200 }, { "epoch": 0.441977906365071, "grad_norm": 1.7424877882003784, "learning_rate": 0.00012344641616177042, "loss": 1.1056, "step": 4201 }, { "epoch": 0.44208311415044715, "grad_norm": 1.4434423446655273, "learning_rate": 0.00012341328734667698, "loss": 2.2263, "step": 4202 }, { "epoch": 0.44218832193582325, "grad_norm": 1.3455250263214111, "learning_rate": 0.00012338015581267567, "loss": 1.872, "step": 4203 }, { "epoch": 0.44229352972119934, "grad_norm": 1.5791774988174438, "learning_rate": 0.00012334702156361377, "loss": 2.61, "step": 4204 }, { "epoch": 0.4423987375065755, "grad_norm": 1.0933024883270264, "learning_rate": 0.0001233138846033392, "loss": 2.0891, "step": 4205 }, { "epoch": 0.4425039452919516, "grad_norm": 1.2376352548599243, "learning_rate": 0.00012328074493569993, "loss": 1.4209, "step": 4206 }, { "epoch": 0.44260915307732773, "grad_norm": 1.549952507019043, "learning_rate": 0.00012324760256454445, "loss": 1.6895, "step": 4207 }, { "epoch": 0.4427143608627038, "grad_norm": 1.5059970617294312, "learning_rate": 0.0001232144574937214, "loss": 1.8844, "step": 4208 }, { "epoch": 0.44281956864808, "grad_norm": 1.6260056495666504, "learning_rate": 0.00012318130972707985, "loss": 2.5282, "step": 4209 }, { "epoch": 0.44292477643345607, "grad_norm": 1.3028151988983154, "learning_rate": 0.0001231481592684692, "loss": 2.0735, "step": 4210 }, { "epoch": 0.4430299842188322, "grad_norm": 0.9203398823738098, "learning_rate": 0.00012311500612173897, "loss": 2.0104, "step": 4211 }, { "epoch": 0.4431351920042083, "grad_norm": 1.2784510850906372, "learning_rate": 0.0001230818502907392, "loss": 1.7065, "step": 4212 }, { "epoch": 0.4432403997895844, "grad_norm": 1.1374523639678955, "learning_rate": 0.0001230486917793202, "loss": 1.569, "step": 4213 }, { "epoch": 0.44334560757496055, "grad_norm": 1.1995996236801147, "learning_rate": 0.00012301553059133248, "loss": 1.6635, "step": 4214 }, { "epoch": 0.44345081536033665, "grad_norm": 1.210629940032959, "learning_rate": 0.000122982366730627, "loss": 1.8382, "step": 4215 }, { "epoch": 0.4435560231457128, "grad_norm": 1.6628928184509277, "learning_rate": 0.00012294920020105497, "loss": 1.4067, "step": 4216 }, { "epoch": 0.4436612309310889, "grad_norm": 1.001809000968933, "learning_rate": 0.00012291603100646786, "loss": 1.9476, "step": 4217 }, { "epoch": 0.44376643871646504, "grad_norm": 2.0265283584594727, "learning_rate": 0.00012288285915071752, "loss": 1.9624, "step": 4218 }, { "epoch": 0.44387164650184113, "grad_norm": 1.6050817966461182, "learning_rate": 0.00012284968463765613, "loss": 1.7624, "step": 4219 }, { "epoch": 0.4439768542872172, "grad_norm": 1.2535775899887085, "learning_rate": 0.00012281650747113612, "loss": 2.0082, "step": 4220 }, { "epoch": 0.4440820620725934, "grad_norm": 1.6964945793151855, "learning_rate": 0.00012278332765501017, "loss": 2.1988, "step": 4221 }, { "epoch": 0.44418726985796947, "grad_norm": 1.662829041481018, "learning_rate": 0.00012275014519313145, "loss": 2.4153, "step": 4222 }, { "epoch": 0.4442924776433456, "grad_norm": 1.6172958612442017, "learning_rate": 0.00012271696008935324, "loss": 1.6942, "step": 4223 }, { "epoch": 0.4443976854287217, "grad_norm": 1.380727767944336, "learning_rate": 0.0001226837723475293, "loss": 1.9723, "step": 4224 }, { "epoch": 0.44450289321409786, "grad_norm": 1.6496597528457642, "learning_rate": 0.00012265058197151357, "loss": 1.5928, "step": 4225 }, { "epoch": 0.44460810099947395, "grad_norm": 1.51160728931427, "learning_rate": 0.00012261738896516034, "loss": 2.1494, "step": 4226 }, { "epoch": 0.4447133087848501, "grad_norm": 1.5989267826080322, "learning_rate": 0.0001225841933323242, "loss": 1.6661, "step": 4227 }, { "epoch": 0.4448185165702262, "grad_norm": 1.2797974348068237, "learning_rate": 0.00012255099507686007, "loss": 2.1762, "step": 4228 }, { "epoch": 0.4449237243556023, "grad_norm": 0.9348226189613342, "learning_rate": 0.00012251779420262312, "loss": 1.6779, "step": 4229 }, { "epoch": 0.44502893214097844, "grad_norm": 1.528698205947876, "learning_rate": 0.0001224845907134689, "loss": 1.4624, "step": 4230 }, { "epoch": 0.44513413992635453, "grad_norm": 2.389446973800659, "learning_rate": 0.00012245138461325318, "loss": 1.4935, "step": 4231 }, { "epoch": 0.4452393477117307, "grad_norm": 1.2253481149673462, "learning_rate": 0.0001224181759058321, "loss": 2.4449, "step": 4232 }, { "epoch": 0.4453445554971068, "grad_norm": 1.3102329969406128, "learning_rate": 0.00012238496459506207, "loss": 1.9277, "step": 4233 }, { "epoch": 0.4454497632824829, "grad_norm": 1.1610690355300903, "learning_rate": 0.00012235175068479984, "loss": 2.1319, "step": 4234 }, { "epoch": 0.445554971067859, "grad_norm": 2.348677635192871, "learning_rate": 0.00012231853417890237, "loss": 2.2125, "step": 4235 }, { "epoch": 0.44566017885323517, "grad_norm": 1.5758837461471558, "learning_rate": 0.00012228531508122703, "loss": 1.6423, "step": 4236 }, { "epoch": 0.44576538663861126, "grad_norm": 1.0791726112365723, "learning_rate": 0.00012225209339563145, "loss": 1.7604, "step": 4237 }, { "epoch": 0.44587059442398735, "grad_norm": 1.3658674955368042, "learning_rate": 0.00012221886912597353, "loss": 1.8358, "step": 4238 }, { "epoch": 0.4459758022093635, "grad_norm": 2.2098424434661865, "learning_rate": 0.00012218564227611152, "loss": 2.1306, "step": 4239 }, { "epoch": 0.4460810099947396, "grad_norm": 1.6169191598892212, "learning_rate": 0.0001221524128499039, "loss": 2.0847, "step": 4240 }, { "epoch": 0.44618621778011575, "grad_norm": 1.3867640495300293, "learning_rate": 0.00012211918085120954, "loss": 1.8926, "step": 4241 }, { "epoch": 0.44629142556549184, "grad_norm": 1.7240827083587646, "learning_rate": 0.00012208594628388753, "loss": 2.1408, "step": 4242 }, { "epoch": 0.446396633350868, "grad_norm": 1.132155179977417, "learning_rate": 0.00012205270915179729, "loss": 2.1424, "step": 4243 }, { "epoch": 0.4465018411362441, "grad_norm": 1.2491706609725952, "learning_rate": 0.00012201946945879856, "loss": 2.039, "step": 4244 }, { "epoch": 0.4466070489216202, "grad_norm": 1.3799735307693481, "learning_rate": 0.00012198622720875139, "loss": 1.7383, "step": 4245 }, { "epoch": 0.4467122567069963, "grad_norm": 1.3152798414230347, "learning_rate": 0.000121952982405516, "loss": 2.1134, "step": 4246 }, { "epoch": 0.4468174644923724, "grad_norm": 1.6262292861938477, "learning_rate": 0.00012191973505295311, "loss": 1.8854, "step": 4247 }, { "epoch": 0.44692267227774857, "grad_norm": 1.3993144035339355, "learning_rate": 0.00012188648515492355, "loss": 1.8349, "step": 4248 }, { "epoch": 0.44702788006312466, "grad_norm": 1.2754366397857666, "learning_rate": 0.00012185323271528853, "loss": 1.2813, "step": 4249 }, { "epoch": 0.4471330878485008, "grad_norm": 1.437742829322815, "learning_rate": 0.00012181997773790954, "loss": 1.79, "step": 4250 }, { "epoch": 0.4472382956338769, "grad_norm": 1.1852113008499146, "learning_rate": 0.00012178672022664838, "loss": 2.2712, "step": 4251 }, { "epoch": 0.44734350341925305, "grad_norm": 1.1398671865463257, "learning_rate": 0.00012175346018536717, "loss": 2.3377, "step": 4252 }, { "epoch": 0.44744871120462915, "grad_norm": 0.8755077123641968, "learning_rate": 0.00012172019761792825, "loss": 2.2223, "step": 4253 }, { "epoch": 0.44755391899000524, "grad_norm": 0.9048967957496643, "learning_rate": 0.00012168693252819433, "loss": 1.8152, "step": 4254 }, { "epoch": 0.4476591267753814, "grad_norm": 1.133655071258545, "learning_rate": 0.00012165366492002832, "loss": 1.8637, "step": 4255 }, { "epoch": 0.4477643345607575, "grad_norm": 2.401837110519409, "learning_rate": 0.00012162039479729351, "loss": 2.07, "step": 4256 }, { "epoch": 0.44786954234613363, "grad_norm": 1.3808231353759766, "learning_rate": 0.00012158712216385344, "loss": 1.9546, "step": 4257 }, { "epoch": 0.4479747501315097, "grad_norm": 1.5541157722473145, "learning_rate": 0.00012155384702357198, "loss": 1.6161, "step": 4258 }, { "epoch": 0.4480799579168859, "grad_norm": 1.5308620929718018, "learning_rate": 0.00012152056938031324, "loss": 2.0226, "step": 4259 }, { "epoch": 0.44818516570226197, "grad_norm": 1.0472149848937988, "learning_rate": 0.00012148728923794162, "loss": 2.0012, "step": 4260 }, { "epoch": 0.44829037348763806, "grad_norm": 1.0326931476593018, "learning_rate": 0.00012145400660032187, "loss": 1.6771, "step": 4261 }, { "epoch": 0.4483955812730142, "grad_norm": 1.4375073909759521, "learning_rate": 0.00012142072147131898, "loss": 2.0649, "step": 4262 }, { "epoch": 0.4485007890583903, "grad_norm": 0.880775511264801, "learning_rate": 0.00012138743385479823, "loss": 1.7757, "step": 4263 }, { "epoch": 0.44860599684376645, "grad_norm": 1.205504298210144, "learning_rate": 0.00012135414375462522, "loss": 1.7152, "step": 4264 }, { "epoch": 0.44871120462914255, "grad_norm": 0.8612405061721802, "learning_rate": 0.00012132085117466582, "loss": 1.7546, "step": 4265 }, { "epoch": 0.4488164124145187, "grad_norm": 1.592641830444336, "learning_rate": 0.00012128755611878617, "loss": 1.8245, "step": 4266 }, { "epoch": 0.4489216201998948, "grad_norm": 1.2067207098007202, "learning_rate": 0.00012125425859085273, "loss": 1.6449, "step": 4267 }, { "epoch": 0.44902682798527094, "grad_norm": 1.0257954597473145, "learning_rate": 0.00012122095859473223, "loss": 1.13, "step": 4268 }, { "epoch": 0.44913203577064703, "grad_norm": 2.4131839275360107, "learning_rate": 0.00012118765613429173, "loss": 1.3638, "step": 4269 }, { "epoch": 0.4492372435560231, "grad_norm": 1.0637006759643555, "learning_rate": 0.00012115435121339844, "loss": 1.5358, "step": 4270 }, { "epoch": 0.4493424513413993, "grad_norm": 1.3625959157943726, "learning_rate": 0.00012112104383592, "loss": 1.4193, "step": 4271 }, { "epoch": 0.44944765912677537, "grad_norm": 1.6006038188934326, "learning_rate": 0.00012108773400572431, "loss": 1.8992, "step": 4272 }, { "epoch": 0.4495528669121515, "grad_norm": 1.9256750345230103, "learning_rate": 0.00012105442172667951, "loss": 2.2142, "step": 4273 }, { "epoch": 0.4496580746975276, "grad_norm": 1.241640567779541, "learning_rate": 0.00012102110700265403, "loss": 1.4468, "step": 4274 }, { "epoch": 0.44976328248290376, "grad_norm": 1.4494163990020752, "learning_rate": 0.00012098778983751662, "loss": 1.8657, "step": 4275 }, { "epoch": 0.44986849026827985, "grad_norm": 0.9596696496009827, "learning_rate": 0.0001209544702351363, "loss": 1.9698, "step": 4276 }, { "epoch": 0.44997369805365595, "grad_norm": 0.9092001914978027, "learning_rate": 0.00012092114819938233, "loss": 1.3448, "step": 4277 }, { "epoch": 0.4500789058390321, "grad_norm": 1.8261799812316895, "learning_rate": 0.00012088782373412432, "loss": 2.2352, "step": 4278 }, { "epoch": 0.4501841136244082, "grad_norm": 1.3973294496536255, "learning_rate": 0.00012085449684323216, "loss": 1.4725, "step": 4279 }, { "epoch": 0.45028932140978434, "grad_norm": 1.5682103633880615, "learning_rate": 0.00012082116753057593, "loss": 1.8422, "step": 4280 }, { "epoch": 0.45039452919516043, "grad_norm": 1.5455161333084106, "learning_rate": 0.00012078783580002607, "loss": 1.604, "step": 4281 }, { "epoch": 0.4504997369805366, "grad_norm": 1.6469101905822754, "learning_rate": 0.00012075450165545328, "loss": 2.1398, "step": 4282 }, { "epoch": 0.4506049447659127, "grad_norm": 1.5728659629821777, "learning_rate": 0.00012072116510072858, "loss": 1.9734, "step": 4283 }, { "epoch": 0.4507101525512888, "grad_norm": 1.4864883422851562, "learning_rate": 0.00012068782613972318, "loss": 1.7709, "step": 4284 }, { "epoch": 0.4508153603366649, "grad_norm": 0.9983920454978943, "learning_rate": 0.00012065448477630867, "loss": 1.5441, "step": 4285 }, { "epoch": 0.450920568122041, "grad_norm": 1.9383924007415771, "learning_rate": 0.00012062114101435686, "loss": 1.9526, "step": 4286 }, { "epoch": 0.45102577590741716, "grad_norm": 1.959188461303711, "learning_rate": 0.00012058779485773985, "loss": 1.9704, "step": 4287 }, { "epoch": 0.45113098369279325, "grad_norm": 1.8748353719711304, "learning_rate": 0.00012055444631033, "loss": 1.5454, "step": 4288 }, { "epoch": 0.4512361914781694, "grad_norm": 1.4259321689605713, "learning_rate": 0.000120521095376, "loss": 1.6818, "step": 4289 }, { "epoch": 0.4513413992635455, "grad_norm": 1.2353706359863281, "learning_rate": 0.00012048774205862279, "loss": 2.2079, "step": 4290 }, { "epoch": 0.45144660704892164, "grad_norm": 1.5690447092056274, "learning_rate": 0.00012045438636207151, "loss": 1.7757, "step": 4291 }, { "epoch": 0.45155181483429774, "grad_norm": 1.6075197458267212, "learning_rate": 0.00012042102829021973, "loss": 2.0568, "step": 4292 }, { "epoch": 0.45165702261967383, "grad_norm": 1.3543256521224976, "learning_rate": 0.00012038766784694117, "loss": 1.6417, "step": 4293 }, { "epoch": 0.45176223040505, "grad_norm": 1.3820946216583252, "learning_rate": 0.00012035430503610988, "loss": 2.0945, "step": 4294 }, { "epoch": 0.4518674381904261, "grad_norm": 1.327938437461853, "learning_rate": 0.00012032093986160015, "loss": 1.7868, "step": 4295 }, { "epoch": 0.4519726459758022, "grad_norm": 1.0907479524612427, "learning_rate": 0.0001202875723272866, "loss": 1.7495, "step": 4296 }, { "epoch": 0.4520778537611783, "grad_norm": 1.2229701280593872, "learning_rate": 0.0001202542024370441, "loss": 2.1307, "step": 4297 }, { "epoch": 0.45218306154655447, "grad_norm": 0.9999223947525024, "learning_rate": 0.00012022083019474774, "loss": 2.0101, "step": 4298 }, { "epoch": 0.45228826933193056, "grad_norm": 1.5254671573638916, "learning_rate": 0.00012018745560427298, "loss": 1.6449, "step": 4299 }, { "epoch": 0.4523934771173067, "grad_norm": 1.047942042350769, "learning_rate": 0.00012015407866949548, "loss": 1.5853, "step": 4300 }, { "epoch": 0.4524986849026828, "grad_norm": 1.6619633436203003, "learning_rate": 0.0001201206993942912, "loss": 2.1214, "step": 4301 }, { "epoch": 0.4526038926880589, "grad_norm": 1.228927731513977, "learning_rate": 0.00012008731778253632, "loss": 1.7196, "step": 4302 }, { "epoch": 0.45270910047343504, "grad_norm": 1.4575189352035522, "learning_rate": 0.00012005393383810737, "loss": 2.0006, "step": 4303 }, { "epoch": 0.45281430825881114, "grad_norm": 1.3251969814300537, "learning_rate": 0.00012002054756488115, "loss": 1.7704, "step": 4304 }, { "epoch": 0.4529195160441873, "grad_norm": 1.6667139530181885, "learning_rate": 0.00011998715896673465, "loss": 1.2439, "step": 4305 }, { "epoch": 0.4530247238295634, "grad_norm": 1.0954786539077759, "learning_rate": 0.0001199537680475452, "loss": 1.827, "step": 4306 }, { "epoch": 0.45312993161493953, "grad_norm": 1.4709742069244385, "learning_rate": 0.00011992037481119036, "loss": 2.3623, "step": 4307 }, { "epoch": 0.4532351394003156, "grad_norm": 1.467758059501648, "learning_rate": 0.00011988697926154799, "loss": 1.6894, "step": 4308 }, { "epoch": 0.4533403471856917, "grad_norm": 1.011841058731079, "learning_rate": 0.00011985358140249621, "loss": 1.4649, "step": 4309 }, { "epoch": 0.45344555497106787, "grad_norm": 1.7795685529708862, "learning_rate": 0.0001198201812379134, "loss": 1.4143, "step": 4310 }, { "epoch": 0.45355076275644396, "grad_norm": 1.545654058456421, "learning_rate": 0.00011978677877167822, "loss": 2.1281, "step": 4311 }, { "epoch": 0.4536559705418201, "grad_norm": 1.2907682657241821, "learning_rate": 0.00011975337400766958, "loss": 1.4049, "step": 4312 }, { "epoch": 0.4537611783271962, "grad_norm": 1.0812395811080933, "learning_rate": 0.00011971996694976663, "loss": 2.2, "step": 4313 }, { "epoch": 0.45386638611257235, "grad_norm": 1.4511226415634155, "learning_rate": 0.00011968655760184891, "loss": 2.0119, "step": 4314 }, { "epoch": 0.45397159389794844, "grad_norm": 1.6760962009429932, "learning_rate": 0.00011965314596779604, "loss": 1.6423, "step": 4315 }, { "epoch": 0.4540768016833246, "grad_norm": 1.0780059099197388, "learning_rate": 0.00011961973205148804, "loss": 1.5716, "step": 4316 }, { "epoch": 0.4541820094687007, "grad_norm": 1.6850388050079346, "learning_rate": 0.00011958631585680518, "loss": 1.7895, "step": 4317 }, { "epoch": 0.4542872172540768, "grad_norm": 1.061856746673584, "learning_rate": 0.00011955289738762796, "loss": 1.8926, "step": 4318 }, { "epoch": 0.45439242503945293, "grad_norm": 1.7766098976135254, "learning_rate": 0.00011951947664783713, "loss": 1.3322, "step": 4319 }, { "epoch": 0.454497632824829, "grad_norm": 1.695619821548462, "learning_rate": 0.00011948605364131375, "loss": 1.6927, "step": 4320 }, { "epoch": 0.45460284061020517, "grad_norm": 1.510094404220581, "learning_rate": 0.00011945262837193915, "loss": 1.9738, "step": 4321 }, { "epoch": 0.45470804839558127, "grad_norm": 1.4329527616500854, "learning_rate": 0.0001194192008435949, "loss": 1.2781, "step": 4322 }, { "epoch": 0.4548132561809574, "grad_norm": 1.3772903680801392, "learning_rate": 0.00011938577106016275, "loss": 2.2099, "step": 4323 }, { "epoch": 0.4549184639663335, "grad_norm": 1.3040528297424316, "learning_rate": 0.00011935233902552485, "loss": 1.9477, "step": 4324 }, { "epoch": 0.4550236717517096, "grad_norm": 1.0545731782913208, "learning_rate": 0.00011931890474356358, "loss": 1.987, "step": 4325 }, { "epoch": 0.45512887953708575, "grad_norm": 1.2383321523666382, "learning_rate": 0.00011928546821816149, "loss": 1.0446, "step": 4326 }, { "epoch": 0.45523408732246184, "grad_norm": 0.9607561230659485, "learning_rate": 0.00011925202945320146, "loss": 1.446, "step": 4327 }, { "epoch": 0.455339295107838, "grad_norm": 0.9800947308540344, "learning_rate": 0.00011921858845256669, "loss": 1.6101, "step": 4328 }, { "epoch": 0.4554445028932141, "grad_norm": 1.430310845375061, "learning_rate": 0.00011918514522014051, "loss": 1.9691, "step": 4329 }, { "epoch": 0.45554971067859024, "grad_norm": 1.495063304901123, "learning_rate": 0.00011915169975980658, "loss": 1.7227, "step": 4330 }, { "epoch": 0.45565491846396633, "grad_norm": 1.0809447765350342, "learning_rate": 0.00011911825207544885, "loss": 2.2602, "step": 4331 }, { "epoch": 0.4557601262493425, "grad_norm": 1.0745564699172974, "learning_rate": 0.00011908480217095141, "loss": 1.8813, "step": 4332 }, { "epoch": 0.4558653340347186, "grad_norm": 1.3847942352294922, "learning_rate": 0.00011905135005019881, "loss": 2.1904, "step": 4333 }, { "epoch": 0.45597054182009467, "grad_norm": 1.2953932285308838, "learning_rate": 0.0001190178957170756, "loss": 1.7487, "step": 4334 }, { "epoch": 0.4560757496054708, "grad_norm": 1.1227591037750244, "learning_rate": 0.00011898443917546682, "loss": 1.7218, "step": 4335 }, { "epoch": 0.4561809573908469, "grad_norm": 2.098883867263794, "learning_rate": 0.00011895098042925763, "loss": 2.1559, "step": 4336 }, { "epoch": 0.45628616517622306, "grad_norm": 1.4620628356933594, "learning_rate": 0.00011891751948233348, "loss": 1.692, "step": 4337 }, { "epoch": 0.45639137296159915, "grad_norm": 2.7015371322631836, "learning_rate": 0.00011888405633858009, "loss": 2.0226, "step": 4338 }, { "epoch": 0.4564965807469753, "grad_norm": 1.333720326423645, "learning_rate": 0.00011885059100188341, "loss": 2.1821, "step": 4339 }, { "epoch": 0.4566017885323514, "grad_norm": 1.3061823844909668, "learning_rate": 0.00011881712347612965, "loss": 1.7872, "step": 4340 }, { "epoch": 0.4567069963177275, "grad_norm": 2.075654983520508, "learning_rate": 0.00011878365376520535, "loss": 1.7089, "step": 4341 }, { "epoch": 0.45681220410310364, "grad_norm": 1.908001184463501, "learning_rate": 0.00011875018187299719, "loss": 1.7904, "step": 4342 }, { "epoch": 0.45691741188847973, "grad_norm": 1.1886532306671143, "learning_rate": 0.0001187167078033921, "loss": 1.8649, "step": 4343 }, { "epoch": 0.4570226196738559, "grad_norm": 1.1382215023040771, "learning_rate": 0.00011868323156027742, "loss": 1.6879, "step": 4344 }, { "epoch": 0.457127827459232, "grad_norm": 1.0881881713867188, "learning_rate": 0.00011864975314754058, "loss": 1.816, "step": 4345 }, { "epoch": 0.4572330352446081, "grad_norm": 1.6298319101333618, "learning_rate": 0.00011861627256906929, "loss": 1.1575, "step": 4346 }, { "epoch": 0.4573382430299842, "grad_norm": 1.241731882095337, "learning_rate": 0.00011858278982875157, "loss": 1.7019, "step": 4347 }, { "epoch": 0.45744345081536036, "grad_norm": 0.9705410599708557, "learning_rate": 0.00011854930493047566, "loss": 1.9562, "step": 4348 }, { "epoch": 0.45754865860073646, "grad_norm": 1.3326573371887207, "learning_rate": 0.00011851581787813006, "loss": 2.3098, "step": 4349 }, { "epoch": 0.45765386638611255, "grad_norm": 1.1833465099334717, "learning_rate": 0.00011848232867560352, "loss": 2.0325, "step": 4350 }, { "epoch": 0.4577590741714887, "grad_norm": 1.501226544380188, "learning_rate": 0.00011844883732678495, "loss": 1.9941, "step": 4351 }, { "epoch": 0.4578642819568648, "grad_norm": 1.8644059896469116, "learning_rate": 0.00011841534383556372, "loss": 1.6826, "step": 4352 }, { "epoch": 0.45796948974224094, "grad_norm": 1.2248753309249878, "learning_rate": 0.00011838184820582923, "loss": 1.382, "step": 4353 }, { "epoch": 0.45807469752761704, "grad_norm": 2.008018970489502, "learning_rate": 0.00011834835044147121, "loss": 1.5946, "step": 4354 }, { "epoch": 0.4581799053129932, "grad_norm": 1.5521501302719116, "learning_rate": 0.00011831485054637973, "loss": 1.5836, "step": 4355 }, { "epoch": 0.4582851130983693, "grad_norm": 1.707040548324585, "learning_rate": 0.00011828134852444493, "loss": 1.7551, "step": 4356 }, { "epoch": 0.4583903208837454, "grad_norm": 1.7729437351226807, "learning_rate": 0.00011824784437955732, "loss": 2.3166, "step": 4357 }, { "epoch": 0.4584955286691215, "grad_norm": 1.4290671348571777, "learning_rate": 0.0001182143381156076, "loss": 2.0124, "step": 4358 }, { "epoch": 0.4586007364544976, "grad_norm": 2.462273359298706, "learning_rate": 0.00011818082973648683, "loss": 2.1217, "step": 4359 }, { "epoch": 0.45870594423987376, "grad_norm": 1.7780591249465942, "learning_rate": 0.00011814731924608616, "loss": 1.4172, "step": 4360 }, { "epoch": 0.45881115202524986, "grad_norm": 1.9604672193527222, "learning_rate": 0.00011811380664829703, "loss": 1.3744, "step": 4361 }, { "epoch": 0.458916359810626, "grad_norm": 1.0941351652145386, "learning_rate": 0.00011808029194701122, "loss": 1.9376, "step": 4362 }, { "epoch": 0.4590215675960021, "grad_norm": 1.587456226348877, "learning_rate": 0.00011804677514612062, "loss": 1.9933, "step": 4363 }, { "epoch": 0.45912677538137825, "grad_norm": 1.38690984249115, "learning_rate": 0.00011801325624951745, "loss": 2.1653, "step": 4364 }, { "epoch": 0.45923198316675434, "grad_norm": 1.6729885339736938, "learning_rate": 0.00011797973526109416, "loss": 1.7463, "step": 4365 }, { "epoch": 0.45933719095213044, "grad_norm": 1.2510261535644531, "learning_rate": 0.00011794621218474345, "loss": 1.6988, "step": 4366 }, { "epoch": 0.4594423987375066, "grad_norm": 1.0695215463638306, "learning_rate": 0.00011791268702435816, "loss": 2.1912, "step": 4367 }, { "epoch": 0.4595476065228827, "grad_norm": 1.849915862083435, "learning_rate": 0.00011787915978383151, "loss": 1.7784, "step": 4368 }, { "epoch": 0.45965281430825883, "grad_norm": 1.144481897354126, "learning_rate": 0.00011784563046705695, "loss": 1.928, "step": 4369 }, { "epoch": 0.4597580220936349, "grad_norm": 1.9032231569290161, "learning_rate": 0.00011781209907792805, "loss": 1.3965, "step": 4370 }, { "epoch": 0.45986322987901107, "grad_norm": 1.352990746498108, "learning_rate": 0.00011777856562033876, "loss": 1.987, "step": 4371 }, { "epoch": 0.45996843766438716, "grad_norm": 1.394627571105957, "learning_rate": 0.00011774503009818316, "loss": 2.3461, "step": 4372 }, { "epoch": 0.46007364544976326, "grad_norm": 0.8705309629440308, "learning_rate": 0.00011771149251535569, "loss": 2.7158, "step": 4373 }, { "epoch": 0.4601788532351394, "grad_norm": 1.0310938358306885, "learning_rate": 0.0001176779528757509, "loss": 1.9135, "step": 4374 }, { "epoch": 0.4602840610205155, "grad_norm": 1.4547808170318604, "learning_rate": 0.00011764441118326364, "loss": 1.8487, "step": 4375 }, { "epoch": 0.46038926880589165, "grad_norm": 2.0553081035614014, "learning_rate": 0.00011761086744178902, "loss": 1.698, "step": 4376 }, { "epoch": 0.46049447659126774, "grad_norm": 1.7115883827209473, "learning_rate": 0.00011757732165522237, "loss": 2.4097, "step": 4377 }, { "epoch": 0.4605996843766439, "grad_norm": 1.6055662631988525, "learning_rate": 0.00011754377382745922, "loss": 1.6749, "step": 4378 }, { "epoch": 0.46070489216202, "grad_norm": 1.6005985736846924, "learning_rate": 0.00011751022396239539, "loss": 2.0828, "step": 4379 }, { "epoch": 0.46081009994739613, "grad_norm": 1.4807254076004028, "learning_rate": 0.00011747667206392691, "loss": 1.7367, "step": 4380 }, { "epoch": 0.46091530773277223, "grad_norm": 1.8019040822982788, "learning_rate": 0.00011744311813595006, "loss": 2.1629, "step": 4381 }, { "epoch": 0.4610205155181483, "grad_norm": 1.1676995754241943, "learning_rate": 0.00011740956218236132, "loss": 1.6825, "step": 4382 }, { "epoch": 0.46112572330352447, "grad_norm": 1.9756308794021606, "learning_rate": 0.00011737600420705748, "loss": 2.0505, "step": 4383 }, { "epoch": 0.46123093108890056, "grad_norm": 1.3423806428909302, "learning_rate": 0.00011734244421393548, "loss": 2.0603, "step": 4384 }, { "epoch": 0.4613361388742767, "grad_norm": 1.3737515211105347, "learning_rate": 0.00011730888220689251, "loss": 1.5806, "step": 4385 }, { "epoch": 0.4614413466596528, "grad_norm": 1.299750804901123, "learning_rate": 0.0001172753181898261, "loss": 1.4868, "step": 4386 }, { "epoch": 0.46154655444502896, "grad_norm": 1.3489303588867188, "learning_rate": 0.00011724175216663384, "loss": 1.6957, "step": 4387 }, { "epoch": 0.46165176223040505, "grad_norm": 1.3841158151626587, "learning_rate": 0.00011720818414121368, "loss": 1.5918, "step": 4388 }, { "epoch": 0.46175697001578114, "grad_norm": 1.4252556562423706, "learning_rate": 0.00011717461411746378, "loss": 1.7677, "step": 4389 }, { "epoch": 0.4618621778011573, "grad_norm": 1.7873703241348267, "learning_rate": 0.0001171410420992825, "loss": 1.6604, "step": 4390 }, { "epoch": 0.4619673855865334, "grad_norm": 1.825914978981018, "learning_rate": 0.00011710746809056841, "loss": 1.7022, "step": 4391 }, { "epoch": 0.46207259337190953, "grad_norm": 1.4188059568405151, "learning_rate": 0.00011707389209522039, "loss": 2.1184, "step": 4392 }, { "epoch": 0.46217780115728563, "grad_norm": 1.4496338367462158, "learning_rate": 0.0001170403141171375, "loss": 2.0205, "step": 4393 }, { "epoch": 0.4622830089426618, "grad_norm": 2.31512713432312, "learning_rate": 0.00011700673416021908, "loss": 1.822, "step": 4394 }, { "epoch": 0.46238821672803787, "grad_norm": 1.1979174613952637, "learning_rate": 0.00011697315222836458, "loss": 2.3128, "step": 4395 }, { "epoch": 0.462493424513414, "grad_norm": 1.3453646898269653, "learning_rate": 0.00011693956832547384, "loss": 2.0374, "step": 4396 }, { "epoch": 0.4625986322987901, "grad_norm": 1.1278789043426514, "learning_rate": 0.0001169059824554468, "loss": 1.5538, "step": 4397 }, { "epoch": 0.4627038400841662, "grad_norm": 1.6684142351150513, "learning_rate": 0.00011687239462218369, "loss": 2.0152, "step": 4398 }, { "epoch": 0.46280904786954236, "grad_norm": 1.2076631784439087, "learning_rate": 0.00011683880482958493, "loss": 1.9577, "step": 4399 }, { "epoch": 0.46291425565491845, "grad_norm": 1.3028810024261475, "learning_rate": 0.00011680521308155124, "loss": 1.413, "step": 4400 }, { "epoch": 0.4630194634402946, "grad_norm": 1.2423094511032104, "learning_rate": 0.00011677161938198348, "loss": 1.7865, "step": 4401 }, { "epoch": 0.4631246712256707, "grad_norm": 1.2606873512268066, "learning_rate": 0.0001167380237347828, "loss": 1.9457, "step": 4402 }, { "epoch": 0.46322987901104684, "grad_norm": 1.7899268865585327, "learning_rate": 0.00011670442614385053, "loss": 2.2358, "step": 4403 }, { "epoch": 0.46333508679642293, "grad_norm": 1.2076514959335327, "learning_rate": 0.00011667082661308826, "loss": 1.7803, "step": 4404 }, { "epoch": 0.46344029458179903, "grad_norm": 1.6006786823272705, "learning_rate": 0.00011663722514639778, "loss": 1.696, "step": 4405 }, { "epoch": 0.4635455023671752, "grad_norm": 1.0841476917266846, "learning_rate": 0.00011660362174768114, "loss": 1.8837, "step": 4406 }, { "epoch": 0.46365071015255127, "grad_norm": 0.9759506583213806, "learning_rate": 0.0001165700164208406, "loss": 1.4346, "step": 4407 }, { "epoch": 0.4637559179379274, "grad_norm": 1.5411173105239868, "learning_rate": 0.00011653640916977861, "loss": 1.925, "step": 4408 }, { "epoch": 0.4638611257233035, "grad_norm": 1.1252624988555908, "learning_rate": 0.00011650279999839787, "loss": 1.8466, "step": 4409 }, { "epoch": 0.46396633350867966, "grad_norm": 1.3989907503128052, "learning_rate": 0.00011646918891060127, "loss": 1.8891, "step": 4410 }, { "epoch": 0.46407154129405576, "grad_norm": 1.7185852527618408, "learning_rate": 0.00011643557591029206, "loss": 1.7989, "step": 4411 }, { "epoch": 0.4641767490794319, "grad_norm": 1.037192702293396, "learning_rate": 0.0001164019610013735, "loss": 1.3982, "step": 4412 }, { "epoch": 0.464281956864808, "grad_norm": 1.2058227062225342, "learning_rate": 0.00011636834418774922, "loss": 1.605, "step": 4413 }, { "epoch": 0.4643871646501841, "grad_norm": 1.8014076948165894, "learning_rate": 0.00011633472547332305, "loss": 1.3505, "step": 4414 }, { "epoch": 0.46449237243556024, "grad_norm": 1.513741135597229, "learning_rate": 0.00011630110486199899, "loss": 1.6722, "step": 4415 }, { "epoch": 0.46459758022093633, "grad_norm": 1.3467214107513428, "learning_rate": 0.00011626748235768128, "loss": 1.9053, "step": 4416 }, { "epoch": 0.4647027880063125, "grad_norm": 1.5441854000091553, "learning_rate": 0.00011623385796427442, "loss": 1.7715, "step": 4417 }, { "epoch": 0.4648079957916886, "grad_norm": 1.012117624282837, "learning_rate": 0.00011620023168568311, "loss": 1.9828, "step": 4418 }, { "epoch": 0.4649132035770647, "grad_norm": 1.2613452672958374, "learning_rate": 0.00011616660352581225, "loss": 1.996, "step": 4419 }, { "epoch": 0.4650184113624408, "grad_norm": 2.198415756225586, "learning_rate": 0.00011613297348856693, "loss": 2.229, "step": 4420 }, { "epoch": 0.4651236191478169, "grad_norm": 1.369099736213684, "learning_rate": 0.00011609934157785251, "loss": 1.7323, "step": 4421 }, { "epoch": 0.46522882693319306, "grad_norm": 1.4710428714752197, "learning_rate": 0.00011606570779757461, "loss": 1.7945, "step": 4422 }, { "epoch": 0.46533403471856916, "grad_norm": 2.027179479598999, "learning_rate": 0.00011603207215163894, "loss": 1.0333, "step": 4423 }, { "epoch": 0.4654392425039453, "grad_norm": 1.1320077180862427, "learning_rate": 0.00011599843464395151, "loss": 1.8657, "step": 4424 }, { "epoch": 0.4655444502893214, "grad_norm": 1.3532216548919678, "learning_rate": 0.00011596479527841859, "loss": 2.1035, "step": 4425 }, { "epoch": 0.46564965807469755, "grad_norm": 1.3773186206817627, "learning_rate": 0.00011593115405894652, "loss": 1.9023, "step": 4426 }, { "epoch": 0.46575486586007364, "grad_norm": 1.1166491508483887, "learning_rate": 0.00011589751098944202, "loss": 1.5557, "step": 4427 }, { "epoch": 0.4658600736454498, "grad_norm": 1.6609245538711548, "learning_rate": 0.0001158638660738119, "loss": 2.003, "step": 4428 }, { "epoch": 0.4659652814308259, "grad_norm": 1.4807888269424438, "learning_rate": 0.00011583021931596325, "loss": 2.0677, "step": 4429 }, { "epoch": 0.466070489216202, "grad_norm": 1.3660595417022705, "learning_rate": 0.0001157965707198034, "loss": 1.838, "step": 4430 }, { "epoch": 0.4661756970015781, "grad_norm": 1.6003875732421875, "learning_rate": 0.00011576292028923976, "loss": 1.789, "step": 4431 }, { "epoch": 0.4662809047869542, "grad_norm": 1.518566370010376, "learning_rate": 0.00011572926802818011, "loss": 2.4484, "step": 4432 }, { "epoch": 0.46638611257233037, "grad_norm": 2.0211031436920166, "learning_rate": 0.00011569561394053236, "loss": 1.7331, "step": 4433 }, { "epoch": 0.46649132035770646, "grad_norm": 1.0467982292175293, "learning_rate": 0.00011566195803020464, "loss": 1.5526, "step": 4434 }, { "epoch": 0.4665965281430826, "grad_norm": 1.9077860116958618, "learning_rate": 0.00011562830030110532, "loss": 2.1115, "step": 4435 }, { "epoch": 0.4667017359284587, "grad_norm": 1.1763733625411987, "learning_rate": 0.00011559464075714292, "loss": 1.5898, "step": 4436 }, { "epoch": 0.4668069437138348, "grad_norm": 1.084130048751831, "learning_rate": 0.00011556097940222628, "loss": 2.0153, "step": 4437 }, { "epoch": 0.46691215149921095, "grad_norm": 0.8493435978889465, "learning_rate": 0.00011552731624026432, "loss": 1.9469, "step": 4438 }, { "epoch": 0.46701735928458704, "grad_norm": 2.182161331176758, "learning_rate": 0.00011549365127516627, "loss": 2.1621, "step": 4439 }, { "epoch": 0.4671225670699632, "grad_norm": 1.1532615423202515, "learning_rate": 0.0001154599845108415, "loss": 2.1096, "step": 4440 }, { "epoch": 0.4672277748553393, "grad_norm": 0.9221954345703125, "learning_rate": 0.00011542631595119965, "loss": 1.8867, "step": 4441 }, { "epoch": 0.46733298264071543, "grad_norm": 1.4042004346847534, "learning_rate": 0.00011539264560015052, "loss": 1.7898, "step": 4442 }, { "epoch": 0.4674381904260915, "grad_norm": 1.2298986911773682, "learning_rate": 0.00011535897346160416, "loss": 1.8017, "step": 4443 }, { "epoch": 0.4675433982114677, "grad_norm": 0.9590116739273071, "learning_rate": 0.00011532529953947075, "loss": 1.823, "step": 4444 }, { "epoch": 0.46764860599684377, "grad_norm": 1.2512177228927612, "learning_rate": 0.00011529162383766079, "loss": 1.6104, "step": 4445 }, { "epoch": 0.46775381378221986, "grad_norm": 1.2496157884597778, "learning_rate": 0.00011525794636008491, "loss": 1.4588, "step": 4446 }, { "epoch": 0.467859021567596, "grad_norm": 1.6144243478775024, "learning_rate": 0.00011522426711065397, "loss": 1.9968, "step": 4447 }, { "epoch": 0.4679642293529721, "grad_norm": 1.1202630996704102, "learning_rate": 0.000115190586093279, "loss": 2.2607, "step": 4448 }, { "epoch": 0.46806943713834825, "grad_norm": 1.414974570274353, "learning_rate": 0.00011515690331187133, "loss": 1.7647, "step": 4449 }, { "epoch": 0.46817464492372435, "grad_norm": 1.3937184810638428, "learning_rate": 0.00011512321877034234, "loss": 1.5259, "step": 4450 }, { "epoch": 0.4682798527091005, "grad_norm": 1.7348077297210693, "learning_rate": 0.00011508953247260379, "loss": 1.8668, "step": 4451 }, { "epoch": 0.4683850604944766, "grad_norm": 1.4032397270202637, "learning_rate": 0.00011505584442256752, "loss": 1.1793, "step": 4452 }, { "epoch": 0.4684902682798527, "grad_norm": 1.652096152305603, "learning_rate": 0.00011502215462414561, "loss": 1.5654, "step": 4453 }, { "epoch": 0.46859547606522883, "grad_norm": 1.547971487045288, "learning_rate": 0.00011498846308125033, "loss": 1.9391, "step": 4454 }, { "epoch": 0.4687006838506049, "grad_norm": 1.1444284915924072, "learning_rate": 0.00011495476979779418, "loss": 1.7784, "step": 4455 }, { "epoch": 0.4688058916359811, "grad_norm": 1.4032714366912842, "learning_rate": 0.00011492107477768992, "loss": 1.7778, "step": 4456 }, { "epoch": 0.46891109942135717, "grad_norm": 1.3546231985092163, "learning_rate": 0.00011488737802485033, "loss": 1.4267, "step": 4457 }, { "epoch": 0.4690163072067333, "grad_norm": 1.4766918420791626, "learning_rate": 0.00011485367954318856, "loss": 1.6944, "step": 4458 }, { "epoch": 0.4691215149921094, "grad_norm": 1.9072582721710205, "learning_rate": 0.0001148199793366179, "loss": 2.0898, "step": 4459 }, { "epoch": 0.46922672277748556, "grad_norm": 1.6005427837371826, "learning_rate": 0.00011478627740905183, "loss": 1.5538, "step": 4460 }, { "epoch": 0.46933193056286165, "grad_norm": 1.1679853200912476, "learning_rate": 0.00011475257376440405, "loss": 1.6107, "step": 4461 }, { "epoch": 0.46943713834823775, "grad_norm": 1.3431743383407593, "learning_rate": 0.0001147188684065885, "loss": 1.99, "step": 4462 }, { "epoch": 0.4695423461336139, "grad_norm": 1.37478768825531, "learning_rate": 0.00011468516133951921, "loss": 1.9783, "step": 4463 }, { "epoch": 0.46964755391899, "grad_norm": 1.1095014810562134, "learning_rate": 0.00011465145256711048, "loss": 1.9507, "step": 4464 }, { "epoch": 0.46975276170436614, "grad_norm": 1.8217538595199585, "learning_rate": 0.0001146177420932768, "loss": 2.4746, "step": 4465 }, { "epoch": 0.46985796948974223, "grad_norm": 1.3353768587112427, "learning_rate": 0.00011458402992193289, "loss": 1.7805, "step": 4466 }, { "epoch": 0.4699631772751184, "grad_norm": 1.624212384223938, "learning_rate": 0.0001145503160569936, "loss": 1.928, "step": 4467 }, { "epoch": 0.4700683850604945, "grad_norm": 1.4001796245574951, "learning_rate": 0.00011451660050237401, "loss": 1.7054, "step": 4468 }, { "epoch": 0.47017359284587057, "grad_norm": 1.3089070320129395, "learning_rate": 0.00011448288326198939, "loss": 2.2208, "step": 4469 }, { "epoch": 0.4702788006312467, "grad_norm": 1.7245391607284546, "learning_rate": 0.00011444916433975528, "loss": 1.6884, "step": 4470 }, { "epoch": 0.4703840084166228, "grad_norm": 1.450230360031128, "learning_rate": 0.00011441544373958725, "loss": 1.5062, "step": 4471 }, { "epoch": 0.47048921620199896, "grad_norm": 1.8866242170333862, "learning_rate": 0.00011438172146540123, "loss": 1.6767, "step": 4472 }, { "epoch": 0.47059442398737505, "grad_norm": 1.5962532758712769, "learning_rate": 0.00011434799752111324, "loss": 1.8297, "step": 4473 }, { "epoch": 0.4706996317727512, "grad_norm": 1.6113159656524658, "learning_rate": 0.00011431427191063957, "loss": 1.84, "step": 4474 }, { "epoch": 0.4708048395581273, "grad_norm": 1.374487042427063, "learning_rate": 0.00011428054463789661, "loss": 1.865, "step": 4475 }, { "epoch": 0.47091004734350345, "grad_norm": 2.844097137451172, "learning_rate": 0.00011424681570680105, "loss": 1.3199, "step": 4476 }, { "epoch": 0.47101525512887954, "grad_norm": 1.1103769540786743, "learning_rate": 0.00011421308512126969, "loss": 2.3692, "step": 4477 }, { "epoch": 0.47112046291425563, "grad_norm": 1.4509694576263428, "learning_rate": 0.00011417935288521955, "loss": 2.0409, "step": 4478 }, { "epoch": 0.4712256706996318, "grad_norm": 1.3323218822479248, "learning_rate": 0.00011414561900256784, "loss": 2.2169, "step": 4479 }, { "epoch": 0.4713308784850079, "grad_norm": 1.0157517194747925, "learning_rate": 0.00011411188347723198, "loss": 2.1209, "step": 4480 }, { "epoch": 0.471436086270384, "grad_norm": 1.2026599645614624, "learning_rate": 0.00011407814631312957, "loss": 1.934, "step": 4481 }, { "epoch": 0.4715412940557601, "grad_norm": 1.1034396886825562, "learning_rate": 0.00011404440751417838, "loss": 1.7275, "step": 4482 }, { "epoch": 0.47164650184113627, "grad_norm": 0.9263768196105957, "learning_rate": 0.00011401066708429641, "loss": 2.0278, "step": 4483 }, { "epoch": 0.47175170962651236, "grad_norm": 1.4000307321548462, "learning_rate": 0.0001139769250274018, "loss": 2.0002, "step": 4484 }, { "epoch": 0.47185691741188845, "grad_norm": 1.6272485256195068, "learning_rate": 0.0001139431813474129, "loss": 1.8365, "step": 4485 }, { "epoch": 0.4719621251972646, "grad_norm": 1.3752802610397339, "learning_rate": 0.00011390943604824826, "loss": 2.1863, "step": 4486 }, { "epoch": 0.4720673329826407, "grad_norm": 1.1118308305740356, "learning_rate": 0.00011387568913382664, "loss": 2.1644, "step": 4487 }, { "epoch": 0.47217254076801685, "grad_norm": 1.0211619138717651, "learning_rate": 0.00011384194060806692, "loss": 1.9979, "step": 4488 }, { "epoch": 0.47227774855339294, "grad_norm": 1.477390170097351, "learning_rate": 0.0001138081904748882, "loss": 1.9993, "step": 4489 }, { "epoch": 0.4723829563387691, "grad_norm": 0.9481652975082397, "learning_rate": 0.00011377443873820981, "loss": 1.886, "step": 4490 }, { "epoch": 0.4724881641241452, "grad_norm": 1.2719368934631348, "learning_rate": 0.00011374068540195122, "loss": 1.6373, "step": 4491 }, { "epoch": 0.47259337190952133, "grad_norm": 1.15910005569458, "learning_rate": 0.00011370693047003205, "loss": 2.0742, "step": 4492 }, { "epoch": 0.4726985796948974, "grad_norm": 1.1939606666564941, "learning_rate": 0.00011367317394637218, "loss": 1.4249, "step": 4493 }, { "epoch": 0.4728037874802735, "grad_norm": 1.500286340713501, "learning_rate": 0.00011363941583489171, "loss": 2.1489, "step": 4494 }, { "epoch": 0.47290899526564967, "grad_norm": 1.2122026681900024, "learning_rate": 0.00011360565613951073, "loss": 2.0753, "step": 4495 }, { "epoch": 0.47301420305102576, "grad_norm": 1.3212809562683105, "learning_rate": 0.0001135718948641497, "loss": 1.4172, "step": 4496 }, { "epoch": 0.4731194108364019, "grad_norm": 1.0483636856079102, "learning_rate": 0.00011353813201272921, "loss": 1.5167, "step": 4497 }, { "epoch": 0.473224618621778, "grad_norm": 0.8720149993896484, "learning_rate": 0.00011350436758917007, "loss": 1.6892, "step": 4498 }, { "epoch": 0.47332982640715415, "grad_norm": 1.857450008392334, "learning_rate": 0.00011347060159739315, "loss": 2.0008, "step": 4499 }, { "epoch": 0.47343503419253025, "grad_norm": 1.0890053510665894, "learning_rate": 0.00011343683404131964, "loss": 1.5298, "step": 4500 }, { "epoch": 0.47354024197790634, "grad_norm": 1.181207299232483, "learning_rate": 0.00011340306492487084, "loss": 1.6269, "step": 4501 }, { "epoch": 0.4736454497632825, "grad_norm": 1.423409104347229, "learning_rate": 0.00011336929425196826, "loss": 2.2304, "step": 4502 }, { "epoch": 0.4737506575486586, "grad_norm": 1.1393976211547852, "learning_rate": 0.00011333552202653353, "loss": 1.6851, "step": 4503 }, { "epoch": 0.47385586533403473, "grad_norm": 1.761922001838684, "learning_rate": 0.00011330174825248857, "loss": 1.5886, "step": 4504 }, { "epoch": 0.4739610731194108, "grad_norm": 2.4872066974639893, "learning_rate": 0.0001132679729337554, "loss": 1.8363, "step": 4505 }, { "epoch": 0.474066280904787, "grad_norm": 2.0420877933502197, "learning_rate": 0.00011323419607425618, "loss": 1.7054, "step": 4506 }, { "epoch": 0.47417148869016307, "grad_norm": 1.310332179069519, "learning_rate": 0.00011320041767791336, "loss": 2.296, "step": 4507 }, { "epoch": 0.4742766964755392, "grad_norm": 1.570314884185791, "learning_rate": 0.00011316663774864951, "loss": 1.3577, "step": 4508 }, { "epoch": 0.4743819042609153, "grad_norm": 1.4115073680877686, "learning_rate": 0.00011313285629038737, "loss": 1.744, "step": 4509 }, { "epoch": 0.4744871120462914, "grad_norm": 1.366669774055481, "learning_rate": 0.00011309907330704988, "loss": 1.6847, "step": 4510 }, { "epoch": 0.47459231983166755, "grad_norm": 1.2731913328170776, "learning_rate": 0.00011306528880256016, "loss": 2.1791, "step": 4511 }, { "epoch": 0.47469752761704365, "grad_norm": 0.9359650015830994, "learning_rate": 0.00011303150278084145, "loss": 1.6904, "step": 4512 }, { "epoch": 0.4748027354024198, "grad_norm": 1.9053103923797607, "learning_rate": 0.00011299771524581722, "loss": 1.1146, "step": 4513 }, { "epoch": 0.4749079431877959, "grad_norm": 1.60822331905365, "learning_rate": 0.00011296392620141114, "loss": 1.47, "step": 4514 }, { "epoch": 0.47501315097317204, "grad_norm": 1.3728163242340088, "learning_rate": 0.00011293013565154702, "loss": 2.0799, "step": 4515 }, { "epoch": 0.47511835875854813, "grad_norm": 1.5891927480697632, "learning_rate": 0.0001128963436001488, "loss": 2.0376, "step": 4516 }, { "epoch": 0.4752235665439242, "grad_norm": 1.4155621528625488, "learning_rate": 0.00011286255005114065, "loss": 1.6403, "step": 4517 }, { "epoch": 0.4753287743293004, "grad_norm": 1.1898889541625977, "learning_rate": 0.00011282875500844694, "loss": 2.0779, "step": 4518 }, { "epoch": 0.47543398211467647, "grad_norm": 1.205228328704834, "learning_rate": 0.00011279495847599216, "loss": 2.0405, "step": 4519 }, { "epoch": 0.4755391899000526, "grad_norm": 1.6402561664581299, "learning_rate": 0.00011276116045770096, "loss": 1.8382, "step": 4520 }, { "epoch": 0.4756443976854287, "grad_norm": 1.9118852615356445, "learning_rate": 0.00011272736095749823, "loss": 2.0503, "step": 4521 }, { "epoch": 0.47574960547080486, "grad_norm": 1.6586589813232422, "learning_rate": 0.00011269355997930899, "loss": 1.7323, "step": 4522 }, { "epoch": 0.47585481325618095, "grad_norm": 1.5802953243255615, "learning_rate": 0.00011265975752705842, "loss": 2.3248, "step": 4523 }, { "epoch": 0.4759600210415571, "grad_norm": 1.256369709968567, "learning_rate": 0.0001126259536046719, "loss": 1.5487, "step": 4524 }, { "epoch": 0.4760652288269332, "grad_norm": 1.2171026468276978, "learning_rate": 0.00011259214821607496, "loss": 1.3623, "step": 4525 }, { "epoch": 0.4761704366123093, "grad_norm": 1.427194595336914, "learning_rate": 0.00011255834136519334, "loss": 1.1506, "step": 4526 }, { "epoch": 0.47627564439768544, "grad_norm": 1.2046290636062622, "learning_rate": 0.00011252453305595285, "loss": 1.3518, "step": 4527 }, { "epoch": 0.47638085218306153, "grad_norm": 1.498401403427124, "learning_rate": 0.00011249072329227959, "loss": 2.0285, "step": 4528 }, { "epoch": 0.4764860599684377, "grad_norm": 1.4994467496871948, "learning_rate": 0.00011245691207809978, "loss": 1.9723, "step": 4529 }, { "epoch": 0.4765912677538138, "grad_norm": 0.8767755627632141, "learning_rate": 0.00011242309941733978, "loss": 1.7982, "step": 4530 }, { "epoch": 0.4766964755391899, "grad_norm": 1.7396360635757446, "learning_rate": 0.00011238928531392614, "loss": 2.3044, "step": 4531 }, { "epoch": 0.476801683324566, "grad_norm": 1.3658801317214966, "learning_rate": 0.00011235546977178562, "loss": 1.4995, "step": 4532 }, { "epoch": 0.4769068911099421, "grad_norm": 1.1424037218093872, "learning_rate": 0.00011232165279484506, "loss": 1.6863, "step": 4533 }, { "epoch": 0.47701209889531826, "grad_norm": 1.8249849081039429, "learning_rate": 0.00011228783438703154, "loss": 1.6042, "step": 4534 }, { "epoch": 0.47711730668069435, "grad_norm": 1.236474871635437, "learning_rate": 0.0001122540145522723, "loss": 2.4466, "step": 4535 }, { "epoch": 0.4772225144660705, "grad_norm": 1.7508653402328491, "learning_rate": 0.00011222019329449467, "loss": 1.9367, "step": 4536 }, { "epoch": 0.4773277222514466, "grad_norm": 1.29315185546875, "learning_rate": 0.00011218637061762624, "loss": 1.9788, "step": 4537 }, { "epoch": 0.47743293003682274, "grad_norm": 1.5131714344024658, "learning_rate": 0.00011215254652559472, "loss": 2.1485, "step": 4538 }, { "epoch": 0.47753813782219884, "grad_norm": 1.8211942911148071, "learning_rate": 0.00011211872102232801, "loss": 1.3127, "step": 4539 }, { "epoch": 0.477643345607575, "grad_norm": 1.5046416521072388, "learning_rate": 0.0001120848941117541, "loss": 1.6588, "step": 4540 }, { "epoch": 0.4777485533929511, "grad_norm": 2.2012786865234375, "learning_rate": 0.00011205106579780125, "loss": 2.1727, "step": 4541 }, { "epoch": 0.4778537611783272, "grad_norm": 2.173950433731079, "learning_rate": 0.00011201723608439778, "loss": 2.1504, "step": 4542 }, { "epoch": 0.4779589689637033, "grad_norm": 1.459008812904358, "learning_rate": 0.00011198340497547231, "loss": 2.1339, "step": 4543 }, { "epoch": 0.4780641767490794, "grad_norm": 1.1652424335479736, "learning_rate": 0.00011194957247495344, "loss": 1.6663, "step": 4544 }, { "epoch": 0.47816938453445557, "grad_norm": 1.2521672248840332, "learning_rate": 0.00011191573858677007, "loss": 2.2594, "step": 4545 }, { "epoch": 0.47827459231983166, "grad_norm": 1.4206370115280151, "learning_rate": 0.00011188190331485125, "loss": 2.0058, "step": 4546 }, { "epoch": 0.4783798001052078, "grad_norm": 1.4350491762161255, "learning_rate": 0.00011184806666312609, "loss": 1.8621, "step": 4547 }, { "epoch": 0.4784850078905839, "grad_norm": 0.9594536423683167, "learning_rate": 0.00011181422863552398, "loss": 2.0049, "step": 4548 }, { "epoch": 0.47859021567596, "grad_norm": 1.3745486736297607, "learning_rate": 0.0001117803892359744, "loss": 2.1858, "step": 4549 }, { "epoch": 0.47869542346133614, "grad_norm": 1.600081443786621, "learning_rate": 0.00011174654846840701, "loss": 1.9469, "step": 4550 }, { "epoch": 0.47880063124671224, "grad_norm": 0.9910126328468323, "learning_rate": 0.00011171270633675161, "loss": 2.1031, "step": 4551 }, { "epoch": 0.4789058390320884, "grad_norm": 1.027575135231018, "learning_rate": 0.00011167886284493821, "loss": 1.5686, "step": 4552 }, { "epoch": 0.4790110468174645, "grad_norm": 1.3736275434494019, "learning_rate": 0.00011164501799689693, "loss": 1.8597, "step": 4553 }, { "epoch": 0.47911625460284063, "grad_norm": 1.4835340976715088, "learning_rate": 0.00011161117179655804, "loss": 2.0892, "step": 4554 }, { "epoch": 0.4792214623882167, "grad_norm": 1.1146955490112305, "learning_rate": 0.00011157732424785202, "loss": 1.7941, "step": 4555 }, { "epoch": 0.4793266701735929, "grad_norm": 1.7215523719787598, "learning_rate": 0.00011154347535470947, "loss": 1.6763, "step": 4556 }, { "epoch": 0.47943187795896897, "grad_norm": 1.5364283323287964, "learning_rate": 0.0001115096251210611, "loss": 2.0692, "step": 4557 }, { "epoch": 0.47953708574434506, "grad_norm": 1.7501965761184692, "learning_rate": 0.00011147577355083789, "loss": 2.2162, "step": 4558 }, { "epoch": 0.4796422935297212, "grad_norm": 1.8486822843551636, "learning_rate": 0.00011144192064797088, "loss": 1.9551, "step": 4559 }, { "epoch": 0.4797475013150973, "grad_norm": 2.2193543910980225, "learning_rate": 0.0001114080664163913, "loss": 1.7281, "step": 4560 }, { "epoch": 0.47985270910047345, "grad_norm": 1.7399812936782837, "learning_rate": 0.00011137421086003052, "loss": 1.8969, "step": 4561 }, { "epoch": 0.47995791688584954, "grad_norm": 1.6608930826187134, "learning_rate": 0.0001113403539828201, "loss": 1.8734, "step": 4562 }, { "epoch": 0.4800631246712257, "grad_norm": 1.4710301160812378, "learning_rate": 0.00011130649578869173, "loss": 1.8415, "step": 4563 }, { "epoch": 0.4801683324566018, "grad_norm": 1.3207552433013916, "learning_rate": 0.00011127263628157722, "loss": 1.8769, "step": 4564 }, { "epoch": 0.4802735402419779, "grad_norm": 1.6613874435424805, "learning_rate": 0.00011123877546540857, "loss": 1.778, "step": 4565 }, { "epoch": 0.48037874802735403, "grad_norm": 1.1604993343353271, "learning_rate": 0.00011120491334411793, "loss": 1.5905, "step": 4566 }, { "epoch": 0.4804839558127301, "grad_norm": 1.4732471704483032, "learning_rate": 0.00011117104992163762, "loss": 1.515, "step": 4567 }, { "epoch": 0.4805891635981063, "grad_norm": 1.1420209407806396, "learning_rate": 0.00011113718520190006, "loss": 1.6139, "step": 4568 }, { "epoch": 0.48069437138348237, "grad_norm": 1.2326006889343262, "learning_rate": 0.00011110331918883787, "loss": 1.5793, "step": 4569 }, { "epoch": 0.4807995791688585, "grad_norm": 1.0035511255264282, "learning_rate": 0.00011106945188638378, "loss": 2.121, "step": 4570 }, { "epoch": 0.4809047869542346, "grad_norm": 0.9064120054244995, "learning_rate": 0.0001110355832984707, "loss": 1.6258, "step": 4571 }, { "epoch": 0.48100999473961076, "grad_norm": 0.8680402040481567, "learning_rate": 0.00011100171342903165, "loss": 2.326, "step": 4572 }, { "epoch": 0.48111520252498685, "grad_norm": 1.4351173639297485, "learning_rate": 0.00011096784228199985, "loss": 1.8333, "step": 4573 }, { "epoch": 0.48122041031036294, "grad_norm": 1.4196946620941162, "learning_rate": 0.00011093396986130866, "loss": 1.5755, "step": 4574 }, { "epoch": 0.4813256180957391, "grad_norm": 1.547824501991272, "learning_rate": 0.00011090009617089155, "loss": 2.2177, "step": 4575 }, { "epoch": 0.4814308258811152, "grad_norm": 1.1497628688812256, "learning_rate": 0.00011086622121468213, "loss": 1.8107, "step": 4576 }, { "epoch": 0.48153603366649134, "grad_norm": 0.9802242517471313, "learning_rate": 0.00011083234499661426, "loss": 1.9389, "step": 4577 }, { "epoch": 0.48164124145186743, "grad_norm": 1.283870816230774, "learning_rate": 0.00011079846752062182, "loss": 2.4584, "step": 4578 }, { "epoch": 0.4817464492372436, "grad_norm": 1.0405322313308716, "learning_rate": 0.00011076458879063891, "loss": 1.7302, "step": 4579 }, { "epoch": 0.4818516570226197, "grad_norm": 1.3652944564819336, "learning_rate": 0.00011073070881059977, "loss": 1.8197, "step": 4580 }, { "epoch": 0.48195686480799577, "grad_norm": 0.9112363457679749, "learning_rate": 0.00011069682758443873, "loss": 2.3815, "step": 4581 }, { "epoch": 0.4820620725933719, "grad_norm": 2.2471213340759277, "learning_rate": 0.00011066294511609032, "loss": 1.918, "step": 4582 }, { "epoch": 0.482167280378748, "grad_norm": 1.5038715600967407, "learning_rate": 0.00011062906140948922, "loss": 1.571, "step": 4583 }, { "epoch": 0.48227248816412416, "grad_norm": 1.7240707874298096, "learning_rate": 0.00011059517646857023, "loss": 1.7805, "step": 4584 }, { "epoch": 0.48237769594950025, "grad_norm": 1.71571946144104, "learning_rate": 0.00011056129029726825, "loss": 2.1035, "step": 4585 }, { "epoch": 0.4824829037348764, "grad_norm": 1.455073356628418, "learning_rate": 0.00011052740289951842, "loss": 1.5254, "step": 4586 }, { "epoch": 0.4825881115202525, "grad_norm": 1.1514016389846802, "learning_rate": 0.00011049351427925598, "loss": 1.5474, "step": 4587 }, { "epoch": 0.48269331930562864, "grad_norm": 1.1613560914993286, "learning_rate": 0.00011045962444041624, "loss": 2.2161, "step": 4588 }, { "epoch": 0.48279852709100474, "grad_norm": 1.7991211414337158, "learning_rate": 0.00011042573338693479, "loss": 1.9893, "step": 4589 }, { "epoch": 0.48290373487638083, "grad_norm": 1.4048147201538086, "learning_rate": 0.00011039184112274725, "loss": 1.771, "step": 4590 }, { "epoch": 0.483008942661757, "grad_norm": 1.2304316759109497, "learning_rate": 0.00011035794765178941, "loss": 2.0752, "step": 4591 }, { "epoch": 0.4831141504471331, "grad_norm": 1.0293245315551758, "learning_rate": 0.00011032405297799722, "loss": 2.0021, "step": 4592 }, { "epoch": 0.4832193582325092, "grad_norm": 1.4348002672195435, "learning_rate": 0.00011029015710530674, "loss": 1.5027, "step": 4593 }, { "epoch": 0.4833245660178853, "grad_norm": 1.2140568494796753, "learning_rate": 0.0001102562600376542, "loss": 1.864, "step": 4594 }, { "epoch": 0.48342977380326146, "grad_norm": 1.5038398504257202, "learning_rate": 0.000110222361778976, "loss": 1.567, "step": 4595 }, { "epoch": 0.48353498158863756, "grad_norm": 2.2957375049591064, "learning_rate": 0.00011018846233320854, "loss": 1.1497, "step": 4596 }, { "epoch": 0.48364018937401365, "grad_norm": 1.7357734441757202, "learning_rate": 0.0001101545617042885, "loss": 2.1494, "step": 4597 }, { "epoch": 0.4837453971593898, "grad_norm": 1.3374382257461548, "learning_rate": 0.0001101206598961527, "loss": 2.0164, "step": 4598 }, { "epoch": 0.4838506049447659, "grad_norm": 1.0593690872192383, "learning_rate": 0.00011008675691273793, "loss": 1.7716, "step": 4599 }, { "epoch": 0.48395581273014204, "grad_norm": 1.28423273563385, "learning_rate": 0.00011005285275798132, "loss": 1.9312, "step": 4600 }, { "epoch": 0.48406102051551814, "grad_norm": 1.176078200340271, "learning_rate": 0.00011001894743582004, "loss": 2.0926, "step": 4601 }, { "epoch": 0.4841662283008943, "grad_norm": 1.3551251888275146, "learning_rate": 0.00010998504095019137, "loss": 1.9196, "step": 4602 }, { "epoch": 0.4842714360862704, "grad_norm": 1.554956316947937, "learning_rate": 0.00010995113330503278, "loss": 1.5871, "step": 4603 }, { "epoch": 0.48437664387164653, "grad_norm": 0.9341940879821777, "learning_rate": 0.00010991722450428184, "loss": 1.5597, "step": 4604 }, { "epoch": 0.4844818516570226, "grad_norm": 1.026902437210083, "learning_rate": 0.00010988331455187628, "loss": 2.1465, "step": 4605 }, { "epoch": 0.4845870594423987, "grad_norm": 1.683455228805542, "learning_rate": 0.00010984940345175392, "loss": 1.9747, "step": 4606 }, { "epoch": 0.48469226722777486, "grad_norm": 1.6722251176834106, "learning_rate": 0.0001098154912078528, "loss": 1.8076, "step": 4607 }, { "epoch": 0.48479747501315096, "grad_norm": 1.0394576787948608, "learning_rate": 0.000109781577824111, "loss": 1.9039, "step": 4608 }, { "epoch": 0.4849026827985271, "grad_norm": 1.5216197967529297, "learning_rate": 0.00010974766330446678, "loss": 2.1068, "step": 4609 }, { "epoch": 0.4850078905839032, "grad_norm": 2.0184504985809326, "learning_rate": 0.00010971374765285851, "loss": 1.9356, "step": 4610 }, { "epoch": 0.48511309836927935, "grad_norm": 2.1446685791015625, "learning_rate": 0.0001096798308732247, "loss": 1.0265, "step": 4611 }, { "epoch": 0.48521830615465544, "grad_norm": 1.6989480257034302, "learning_rate": 0.00010964591296950406, "loss": 1.8608, "step": 4612 }, { "epoch": 0.48532351394003154, "grad_norm": 1.0963361263275146, "learning_rate": 0.00010961199394563526, "loss": 1.5642, "step": 4613 }, { "epoch": 0.4854287217254077, "grad_norm": 1.4704118967056274, "learning_rate": 0.00010957807380555727, "loss": 1.5999, "step": 4614 }, { "epoch": 0.4855339295107838, "grad_norm": 1.1268386840820312, "learning_rate": 0.00010954415255320909, "loss": 1.4149, "step": 4615 }, { "epoch": 0.48563913729615993, "grad_norm": 1.2833198308944702, "learning_rate": 0.00010951023019252993, "loss": 1.3482, "step": 4616 }, { "epoch": 0.485744345081536, "grad_norm": 1.0503816604614258, "learning_rate": 0.00010947630672745906, "loss": 1.5722, "step": 4617 }, { "epoch": 0.48584955286691217, "grad_norm": 2.151472330093384, "learning_rate": 0.00010944238216193586, "loss": 2.0229, "step": 4618 }, { "epoch": 0.48595476065228826, "grad_norm": 1.2306005954742432, "learning_rate": 0.00010940845649989994, "loss": 1.4716, "step": 4619 }, { "epoch": 0.4860599684376644, "grad_norm": 1.4052913188934326, "learning_rate": 0.00010937452974529093, "loss": 1.6263, "step": 4620 }, { "epoch": 0.4861651762230405, "grad_norm": 1.0920840501785278, "learning_rate": 0.00010934060190204865, "loss": 2.1744, "step": 4621 }, { "epoch": 0.4862703840084166, "grad_norm": 2.252732276916504, "learning_rate": 0.00010930667297411305, "loss": 1.9662, "step": 4622 }, { "epoch": 0.48637559179379275, "grad_norm": 1.5878801345825195, "learning_rate": 0.00010927274296542416, "loss": 1.7275, "step": 4623 }, { "epoch": 0.48648079957916884, "grad_norm": 0.946306586265564, "learning_rate": 0.00010923881187992215, "loss": 1.5401, "step": 4624 }, { "epoch": 0.486586007364545, "grad_norm": 1.2825038433074951, "learning_rate": 0.00010920487972154734, "loss": 2.1301, "step": 4625 }, { "epoch": 0.4866912151499211, "grad_norm": 2.0224785804748535, "learning_rate": 0.00010917094649424018, "loss": 1.6339, "step": 4626 }, { "epoch": 0.48679642293529723, "grad_norm": 1.4419629573822021, "learning_rate": 0.00010913701220194117, "loss": 2.2219, "step": 4627 }, { "epoch": 0.48690163072067333, "grad_norm": 1.4137790203094482, "learning_rate": 0.00010910307684859102, "loss": 2.2587, "step": 4628 }, { "epoch": 0.4870068385060494, "grad_norm": 1.2425460815429688, "learning_rate": 0.00010906914043813056, "loss": 1.6606, "step": 4629 }, { "epoch": 0.48711204629142557, "grad_norm": 1.4972299337387085, "learning_rate": 0.00010903520297450067, "loss": 1.3636, "step": 4630 }, { "epoch": 0.48721725407680166, "grad_norm": 1.6092376708984375, "learning_rate": 0.0001090012644616424, "loss": 2.0573, "step": 4631 }, { "epoch": 0.4873224618621778, "grad_norm": 1.6330995559692383, "learning_rate": 0.00010896732490349697, "loss": 1.7161, "step": 4632 }, { "epoch": 0.4874276696475539, "grad_norm": 2.3415868282318115, "learning_rate": 0.00010893338430400562, "loss": 1.524, "step": 4633 }, { "epoch": 0.48753287743293006, "grad_norm": 1.2462005615234375, "learning_rate": 0.00010889944266710972, "loss": 1.5716, "step": 4634 }, { "epoch": 0.48763808521830615, "grad_norm": 1.564387321472168, "learning_rate": 0.00010886549999675088, "loss": 1.6856, "step": 4635 }, { "epoch": 0.4877432930036823, "grad_norm": 0.9912921786308289, "learning_rate": 0.00010883155629687071, "loss": 1.878, "step": 4636 }, { "epoch": 0.4878485007890584, "grad_norm": 1.348905086517334, "learning_rate": 0.000108797611571411, "loss": 2.4173, "step": 4637 }, { "epoch": 0.4879537085744345, "grad_norm": 1.316444754600525, "learning_rate": 0.00010876366582431361, "loss": 2.1035, "step": 4638 }, { "epoch": 0.48805891635981064, "grad_norm": 1.0654300451278687, "learning_rate": 0.00010872971905952057, "loss": 2.1269, "step": 4639 }, { "epoch": 0.48816412414518673, "grad_norm": 1.8568501472473145, "learning_rate": 0.00010869577128097404, "loss": 1.2797, "step": 4640 }, { "epoch": 0.4882693319305629, "grad_norm": 1.3833286762237549, "learning_rate": 0.00010866182249261617, "loss": 1.9619, "step": 4641 }, { "epoch": 0.48837453971593897, "grad_norm": 0.8749449253082275, "learning_rate": 0.00010862787269838939, "loss": 1.6768, "step": 4642 }, { "epoch": 0.4884797475013151, "grad_norm": 1.499631643295288, "learning_rate": 0.00010859392190223619, "loss": 1.9632, "step": 4643 }, { "epoch": 0.4885849552866912, "grad_norm": 1.0441616773605347, "learning_rate": 0.00010855997010809915, "loss": 2.0146, "step": 4644 }, { "epoch": 0.4886901630720673, "grad_norm": 1.8089901208877563, "learning_rate": 0.00010852601731992094, "loss": 1.8159, "step": 4645 }, { "epoch": 0.48879537085744346, "grad_norm": 1.6164319515228271, "learning_rate": 0.00010849206354164439, "loss": 1.3445, "step": 4646 }, { "epoch": 0.48890057864281955, "grad_norm": 1.5210686922073364, "learning_rate": 0.00010845810877721252, "loss": 1.6565, "step": 4647 }, { "epoch": 0.4890057864281957, "grad_norm": 1.2863743305206299, "learning_rate": 0.00010842415303056827, "loss": 1.5714, "step": 4648 }, { "epoch": 0.4891109942135718, "grad_norm": 1.657315731048584, "learning_rate": 0.0001083901963056549, "loss": 1.7163, "step": 4649 }, { "epoch": 0.48921620199894794, "grad_norm": 1.0181341171264648, "learning_rate": 0.00010835623860641569, "loss": 1.8581, "step": 4650 }, { "epoch": 0.48932140978432404, "grad_norm": 0.8646203279495239, "learning_rate": 0.00010832227993679396, "loss": 1.8136, "step": 4651 }, { "epoch": 0.4894266175697002, "grad_norm": 1.2895039319992065, "learning_rate": 0.00010828832030073329, "loss": 1.9504, "step": 4652 }, { "epoch": 0.4895318253550763, "grad_norm": 1.2667862176895142, "learning_rate": 0.00010825435970217728, "loss": 1.9774, "step": 4653 }, { "epoch": 0.48963703314045237, "grad_norm": 1.3460382223129272, "learning_rate": 0.00010822039814506964, "loss": 2.0969, "step": 4654 }, { "epoch": 0.4897422409258285, "grad_norm": 1.2763968706130981, "learning_rate": 0.00010818643563335424, "loss": 2.2074, "step": 4655 }, { "epoch": 0.4898474487112046, "grad_norm": 0.9694689512252808, "learning_rate": 0.00010815247217097504, "loss": 1.7114, "step": 4656 }, { "epoch": 0.48995265649658076, "grad_norm": 2.234217882156372, "learning_rate": 0.00010811850776187608, "loss": 2.196, "step": 4657 }, { "epoch": 0.49005786428195686, "grad_norm": 1.10568106174469, "learning_rate": 0.00010808454241000155, "loss": 1.66, "step": 4658 }, { "epoch": 0.490163072067333, "grad_norm": 1.4655194282531738, "learning_rate": 0.00010805057611929573, "loss": 1.9986, "step": 4659 }, { "epoch": 0.4902682798527091, "grad_norm": 1.9132064580917358, "learning_rate": 0.00010801660889370301, "loss": 1.7011, "step": 4660 }, { "epoch": 0.4903734876380852, "grad_norm": 1.7220205068588257, "learning_rate": 0.00010798264073716791, "loss": 1.6763, "step": 4661 }, { "epoch": 0.49047869542346134, "grad_norm": 0.8977935910224915, "learning_rate": 0.000107948671653635, "loss": 1.8618, "step": 4662 }, { "epoch": 0.49058390320883744, "grad_norm": 1.6346113681793213, "learning_rate": 0.00010791470164704904, "loss": 1.6551, "step": 4663 }, { "epoch": 0.4906891109942136, "grad_norm": 1.7571468353271484, "learning_rate": 0.00010788073072135485, "loss": 1.6048, "step": 4664 }, { "epoch": 0.4907943187795897, "grad_norm": 1.8179447650909424, "learning_rate": 0.00010784675888049735, "loss": 2.1345, "step": 4665 }, { "epoch": 0.4908995265649658, "grad_norm": 1.7753139734268188, "learning_rate": 0.00010781278612842159, "loss": 1.8888, "step": 4666 }, { "epoch": 0.4910047343503419, "grad_norm": 1.0005837678909302, "learning_rate": 0.00010777881246907269, "loss": 1.645, "step": 4667 }, { "epoch": 0.49110994213571807, "grad_norm": 1.0366028547286987, "learning_rate": 0.00010774483790639591, "loss": 1.5939, "step": 4668 }, { "epoch": 0.49121514992109416, "grad_norm": 0.8974822163581848, "learning_rate": 0.00010771086244433662, "loss": 1.9674, "step": 4669 }, { "epoch": 0.49132035770647026, "grad_norm": 1.6691381931304932, "learning_rate": 0.00010767688608684023, "loss": 1.32, "step": 4670 }, { "epoch": 0.4914255654918464, "grad_norm": 1.127601981163025, "learning_rate": 0.00010764290883785237, "loss": 1.8129, "step": 4671 }, { "epoch": 0.4915307732772225, "grad_norm": 1.6219996213912964, "learning_rate": 0.00010760893070131868, "loss": 1.8319, "step": 4672 }, { "epoch": 0.49163598106259865, "grad_norm": 1.6142001152038574, "learning_rate": 0.0001075749516811849, "loss": 1.3877, "step": 4673 }, { "epoch": 0.49174118884797474, "grad_norm": 1.2514292001724243, "learning_rate": 0.00010754097178139695, "loss": 1.4618, "step": 4674 }, { "epoch": 0.4918463966333509, "grad_norm": 2.3644227981567383, "learning_rate": 0.00010750699100590076, "loss": 1.4959, "step": 4675 }, { "epoch": 0.491951604418727, "grad_norm": 0.9489824175834656, "learning_rate": 0.00010747300935864243, "loss": 1.775, "step": 4676 }, { "epoch": 0.4920568122041031, "grad_norm": 1.1649144887924194, "learning_rate": 0.00010743902684356815, "loss": 1.8669, "step": 4677 }, { "epoch": 0.4921620199894792, "grad_norm": 1.477669596672058, "learning_rate": 0.00010740504346462417, "loss": 1.9316, "step": 4678 }, { "epoch": 0.4922672277748553, "grad_norm": 1.8862009048461914, "learning_rate": 0.00010737105922575685, "loss": 2.0482, "step": 4679 }, { "epoch": 0.49237243556023147, "grad_norm": 1.249657154083252, "learning_rate": 0.00010733707413091269, "loss": 1.5986, "step": 4680 }, { "epoch": 0.49247764334560756, "grad_norm": 1.274321436882019, "learning_rate": 0.00010730308818403832, "loss": 2.0053, "step": 4681 }, { "epoch": 0.4925828511309837, "grad_norm": 1.5534741878509521, "learning_rate": 0.00010726910138908032, "loss": 1.7589, "step": 4682 }, { "epoch": 0.4926880589163598, "grad_norm": 1.196625828742981, "learning_rate": 0.00010723511374998554, "loss": 1.6722, "step": 4683 }, { "epoch": 0.49279326670173595, "grad_norm": 1.5351232290267944, "learning_rate": 0.00010720112527070083, "loss": 1.7944, "step": 4684 }, { "epoch": 0.49289847448711205, "grad_norm": 1.5849000215530396, "learning_rate": 0.00010716713595517313, "loss": 1.7102, "step": 4685 }, { "epoch": 0.49300368227248814, "grad_norm": 1.8203864097595215, "learning_rate": 0.00010713314580734954, "loss": 2.2117, "step": 4686 }, { "epoch": 0.4931088900578643, "grad_norm": 1.308393955230713, "learning_rate": 0.00010709915483117723, "loss": 1.8919, "step": 4687 }, { "epoch": 0.4932140978432404, "grad_norm": 1.2589519023895264, "learning_rate": 0.00010706516303060345, "loss": 1.3221, "step": 4688 }, { "epoch": 0.49331930562861653, "grad_norm": 1.2415590286254883, "learning_rate": 0.00010703117040957553, "loss": 1.8339, "step": 4689 }, { "epoch": 0.4934245134139926, "grad_norm": 1.551783800125122, "learning_rate": 0.00010699717697204095, "loss": 1.8219, "step": 4690 }, { "epoch": 0.4935297211993688, "grad_norm": 1.280253291130066, "learning_rate": 0.00010696318272194726, "loss": 1.7465, "step": 4691 }, { "epoch": 0.49363492898474487, "grad_norm": 1.4209909439086914, "learning_rate": 0.00010692918766324209, "loss": 1.7862, "step": 4692 }, { "epoch": 0.49374013677012096, "grad_norm": 1.7612025737762451, "learning_rate": 0.00010689519179987316, "loss": 1.8797, "step": 4693 }, { "epoch": 0.4938453445554971, "grad_norm": 1.5453522205352783, "learning_rate": 0.00010686119513578831, "loss": 2.0076, "step": 4694 }, { "epoch": 0.4939505523408732, "grad_norm": 1.7672468423843384, "learning_rate": 0.00010682719767493547, "loss": 1.8416, "step": 4695 }, { "epoch": 0.49405576012624935, "grad_norm": 1.4348251819610596, "learning_rate": 0.00010679319942126264, "loss": 1.872, "step": 4696 }, { "epoch": 0.49416096791162545, "grad_norm": 1.9852848052978516, "learning_rate": 0.00010675920037871794, "loss": 1.921, "step": 4697 }, { "epoch": 0.4942661756970016, "grad_norm": 1.4216840267181396, "learning_rate": 0.00010672520055124958, "loss": 1.5686, "step": 4698 }, { "epoch": 0.4943713834823777, "grad_norm": 1.5400251150131226, "learning_rate": 0.00010669119994280581, "loss": 2.021, "step": 4699 }, { "epoch": 0.49447659126775384, "grad_norm": 1.3077402114868164, "learning_rate": 0.00010665719855733501, "loss": 2.1264, "step": 4700 }, { "epoch": 0.49458179905312993, "grad_norm": 1.2699223756790161, "learning_rate": 0.00010662319639878565, "loss": 1.7802, "step": 4701 }, { "epoch": 0.494687006838506, "grad_norm": 1.2943834066390991, "learning_rate": 0.00010658919347110634, "loss": 1.8573, "step": 4702 }, { "epoch": 0.4947922146238822, "grad_norm": 1.5511493682861328, "learning_rate": 0.00010655518977824566, "loss": 2.2023, "step": 4703 }, { "epoch": 0.49489742240925827, "grad_norm": 1.4400460720062256, "learning_rate": 0.00010652118532415236, "loss": 1.9261, "step": 4704 }, { "epoch": 0.4950026301946344, "grad_norm": 1.2507339715957642, "learning_rate": 0.00010648718011277535, "loss": 1.6863, "step": 4705 }, { "epoch": 0.4951078379800105, "grad_norm": 1.3244736194610596, "learning_rate": 0.00010645317414806342, "loss": 1.9085, "step": 4706 }, { "epoch": 0.49521304576538666, "grad_norm": 1.1200934648513794, "learning_rate": 0.00010641916743396563, "loss": 1.5047, "step": 4707 }, { "epoch": 0.49531825355076275, "grad_norm": 1.6609597206115723, "learning_rate": 0.00010638515997443109, "loss": 1.1343, "step": 4708 }, { "epoch": 0.49542346133613885, "grad_norm": 1.0850027799606323, "learning_rate": 0.00010635115177340893, "loss": 1.6411, "step": 4709 }, { "epoch": 0.495528669121515, "grad_norm": 1.537176489830017, "learning_rate": 0.00010631714283484842, "loss": 1.524, "step": 4710 }, { "epoch": 0.4956338769068911, "grad_norm": 2.0602495670318604, "learning_rate": 0.00010628313316269891, "loss": 1.9078, "step": 4711 }, { "epoch": 0.49573908469226724, "grad_norm": 1.759886622428894, "learning_rate": 0.00010624912276090988, "loss": 1.9751, "step": 4712 }, { "epoch": 0.49584429247764333, "grad_norm": 1.4797873497009277, "learning_rate": 0.00010621511163343077, "loss": 2.0188, "step": 4713 }, { "epoch": 0.4959495002630195, "grad_norm": 1.672033429145813, "learning_rate": 0.00010618109978421119, "loss": 1.5586, "step": 4714 }, { "epoch": 0.4960547080483956, "grad_norm": 1.9004497528076172, "learning_rate": 0.00010614708721720085, "loss": 1.4778, "step": 4715 }, { "epoch": 0.4961599158337717, "grad_norm": 1.600061058998108, "learning_rate": 0.00010611307393634955, "loss": 1.4879, "step": 4716 }, { "epoch": 0.4962651236191478, "grad_norm": 1.7574427127838135, "learning_rate": 0.0001060790599456071, "loss": 1.8051, "step": 4717 }, { "epoch": 0.4963703314045239, "grad_norm": 1.1964126825332642, "learning_rate": 0.0001060450452489234, "loss": 1.7907, "step": 4718 }, { "epoch": 0.49647553918990006, "grad_norm": 1.7190804481506348, "learning_rate": 0.00010601102985024853, "loss": 2.033, "step": 4719 }, { "epoch": 0.49658074697527615, "grad_norm": 1.3082791566848755, "learning_rate": 0.00010597701375353257, "loss": 1.7543, "step": 4720 }, { "epoch": 0.4966859547606523, "grad_norm": 1.5606935024261475, "learning_rate": 0.00010594299696272565, "loss": 1.6072, "step": 4721 }, { "epoch": 0.4967911625460284, "grad_norm": 1.4001544713974, "learning_rate": 0.00010590897948177806, "loss": 2.1012, "step": 4722 }, { "epoch": 0.49689637033140455, "grad_norm": 1.4540914297103882, "learning_rate": 0.00010587496131464019, "loss": 1.3696, "step": 4723 }, { "epoch": 0.49700157811678064, "grad_norm": 1.2410134077072144, "learning_rate": 0.00010584094246526237, "loss": 1.4659, "step": 4724 }, { "epoch": 0.49710678590215673, "grad_norm": 1.1496258974075317, "learning_rate": 0.00010580692293759513, "loss": 1.7615, "step": 4725 }, { "epoch": 0.4972119936875329, "grad_norm": 1.969294548034668, "learning_rate": 0.00010577290273558908, "loss": 1.6735, "step": 4726 }, { "epoch": 0.497317201472909, "grad_norm": 1.9359304904937744, "learning_rate": 0.00010573888186319482, "loss": 1.7351, "step": 4727 }, { "epoch": 0.4974224092582851, "grad_norm": 1.6216297149658203, "learning_rate": 0.0001057048603243631, "loss": 1.7732, "step": 4728 }, { "epoch": 0.4975276170436612, "grad_norm": 1.4571208953857422, "learning_rate": 0.00010567083812304477, "loss": 1.5308, "step": 4729 }, { "epoch": 0.49763282482903737, "grad_norm": 1.5610977411270142, "learning_rate": 0.00010563681526319069, "loss": 1.8377, "step": 4730 }, { "epoch": 0.49773803261441346, "grad_norm": 1.9585388898849487, "learning_rate": 0.00010560279174875179, "loss": 2.1389, "step": 4731 }, { "epoch": 0.4978432403997896, "grad_norm": 1.929347038269043, "learning_rate": 0.0001055687675836791, "loss": 0.9178, "step": 4732 }, { "epoch": 0.4979484481851657, "grad_norm": 1.9738050699234009, "learning_rate": 0.00010553474277192381, "loss": 1.9461, "step": 4733 }, { "epoch": 0.4980536559705418, "grad_norm": 1.227402925491333, "learning_rate": 0.00010550071731743707, "loss": 2.15, "step": 4734 }, { "epoch": 0.49815886375591795, "grad_norm": 1.7934569120407104, "learning_rate": 0.00010546669122417013, "loss": 1.7138, "step": 4735 }, { "epoch": 0.49826407154129404, "grad_norm": 1.155175805091858, "learning_rate": 0.00010543266449607432, "loss": 1.708, "step": 4736 }, { "epoch": 0.4983692793266702, "grad_norm": 1.2957451343536377, "learning_rate": 0.0001053986371371011, "loss": 1.8071, "step": 4737 }, { "epoch": 0.4984744871120463, "grad_norm": 1.2430212497711182, "learning_rate": 0.0001053646091512019, "loss": 1.568, "step": 4738 }, { "epoch": 0.49857969489742243, "grad_norm": 0.9808127880096436, "learning_rate": 0.00010533058054232832, "loss": 1.6683, "step": 4739 }, { "epoch": 0.4986849026827985, "grad_norm": 1.0529319047927856, "learning_rate": 0.00010529655131443199, "loss": 1.5132, "step": 4740 }, { "epoch": 0.4987901104681746, "grad_norm": 1.2187601327896118, "learning_rate": 0.0001052625214714646, "loss": 1.7863, "step": 4741 }, { "epoch": 0.49889531825355077, "grad_norm": 1.5013796091079712, "learning_rate": 0.00010522849101737788, "loss": 1.8348, "step": 4742 }, { "epoch": 0.49900052603892686, "grad_norm": 1.5242013931274414, "learning_rate": 0.00010519445995612374, "loss": 2.0386, "step": 4743 }, { "epoch": 0.499105733824303, "grad_norm": 1.6240057945251465, "learning_rate": 0.00010516042829165408, "loss": 1.6951, "step": 4744 }, { "epoch": 0.4992109416096791, "grad_norm": 1.4725539684295654, "learning_rate": 0.00010512639602792088, "loss": 1.9254, "step": 4745 }, { "epoch": 0.49931614939505525, "grad_norm": 1.0966986417770386, "learning_rate": 0.00010509236316887615, "loss": 1.7749, "step": 4746 }, { "epoch": 0.49942135718043135, "grad_norm": 1.0299161672592163, "learning_rate": 0.0001050583297184721, "loss": 1.4928, "step": 4747 }, { "epoch": 0.4995265649658075, "grad_norm": 1.2240405082702637, "learning_rate": 0.00010502429568066084, "loss": 1.4958, "step": 4748 }, { "epoch": 0.4996317727511836, "grad_norm": 1.3169293403625488, "learning_rate": 0.00010499026105939467, "loss": 1.3972, "step": 4749 }, { "epoch": 0.4997369805365597, "grad_norm": 1.836531400680542, "learning_rate": 0.00010495622585862594, "loss": 2.0759, "step": 4750 }, { "epoch": 0.49984218832193583, "grad_norm": 1.1490098237991333, "learning_rate": 0.00010492219008230704, "loss": 1.9707, "step": 4751 }, { "epoch": 0.4999473961073119, "grad_norm": 1.809786319732666, "learning_rate": 0.00010488815373439036, "loss": 2.212, "step": 4752 }, { "epoch": 0.500052603892688, "grad_norm": 1.3090404272079468, "learning_rate": 0.0001048541168188285, "loss": 1.7952, "step": 4753 }, { "epoch": 0.5001578116780642, "grad_norm": 1.8462897539138794, "learning_rate": 0.00010482007933957407, "loss": 1.837, "step": 4754 }, { "epoch": 0.5002630194634403, "grad_norm": 1.5334430932998657, "learning_rate": 0.00010478604130057965, "loss": 1.7316, "step": 4755 }, { "epoch": 0.5003682272488165, "grad_norm": 1.2265576124191284, "learning_rate": 0.00010475200270579803, "loss": 1.6373, "step": 4756 }, { "epoch": 0.5004734350341925, "grad_norm": 1.6965841054916382, "learning_rate": 0.00010471796355918202, "loss": 2.1889, "step": 4757 }, { "epoch": 0.5005786428195687, "grad_norm": 1.6729693412780762, "learning_rate": 0.0001046839238646844, "loss": 1.4878, "step": 4758 }, { "epoch": 0.5006838506049448, "grad_norm": 1.4285995960235596, "learning_rate": 0.00010464988362625812, "loss": 2.3533, "step": 4759 }, { "epoch": 0.5007890583903208, "grad_norm": 0.9867104291915894, "learning_rate": 0.00010461584284785617, "loss": 2.2433, "step": 4760 }, { "epoch": 0.500894266175697, "grad_norm": 1.198486566543579, "learning_rate": 0.00010458180153343162, "loss": 1.491, "step": 4761 }, { "epoch": 0.5009994739610731, "grad_norm": 2.1748099327087402, "learning_rate": 0.00010454775968693753, "loss": 1.4489, "step": 4762 }, { "epoch": 0.5011046817464493, "grad_norm": 1.7211601734161377, "learning_rate": 0.00010451371731232708, "loss": 1.4753, "step": 4763 }, { "epoch": 0.5012098895318253, "grad_norm": 1.0159982442855835, "learning_rate": 0.00010447967441355349, "loss": 1.9713, "step": 4764 }, { "epoch": 0.5013150973172015, "grad_norm": 1.20917546749115, "learning_rate": 0.00010444563099457008, "loss": 1.5139, "step": 4765 }, { "epoch": 0.5014203051025776, "grad_norm": 1.7025082111358643, "learning_rate": 0.00010441158705933016, "loss": 1.5271, "step": 4766 }, { "epoch": 0.5015255128879537, "grad_norm": 1.3307257890701294, "learning_rate": 0.00010437754261178719, "loss": 1.9943, "step": 4767 }, { "epoch": 0.5016307206733298, "grad_norm": 2.893484592437744, "learning_rate": 0.00010434349765589459, "loss": 1.4271, "step": 4768 }, { "epoch": 0.501735928458706, "grad_norm": 1.664506196975708, "learning_rate": 0.0001043094521956059, "loss": 1.5161, "step": 4769 }, { "epoch": 0.5018411362440821, "grad_norm": 1.219966173171997, "learning_rate": 0.00010427540623487475, "loss": 1.3841, "step": 4770 }, { "epoch": 0.5019463440294581, "grad_norm": 1.3663967847824097, "learning_rate": 0.00010424135977765475, "loss": 2.0504, "step": 4771 }, { "epoch": 0.5020515518148343, "grad_norm": 1.2188875675201416, "learning_rate": 0.00010420731282789957, "loss": 1.7724, "step": 4772 }, { "epoch": 0.5021567596002104, "grad_norm": 1.4278043508529663, "learning_rate": 0.00010417326538956305, "loss": 1.5938, "step": 4773 }, { "epoch": 0.5022619673855865, "grad_norm": 1.2825640439987183, "learning_rate": 0.00010413921746659894, "loss": 2.0405, "step": 4774 }, { "epoch": 0.5023671751709626, "grad_norm": 1.455748200416565, "learning_rate": 0.00010410516906296115, "loss": 1.3787, "step": 4775 }, { "epoch": 0.5024723829563388, "grad_norm": 0.9623004198074341, "learning_rate": 0.00010407112018260356, "loss": 1.4602, "step": 4776 }, { "epoch": 0.5025775907417149, "grad_norm": 1.078614592552185, "learning_rate": 0.0001040370708294802, "loss": 1.9882, "step": 4777 }, { "epoch": 0.502682798527091, "grad_norm": 1.2258964776992798, "learning_rate": 0.00010400302100754514, "loss": 1.5276, "step": 4778 }, { "epoch": 0.5027880063124671, "grad_norm": 1.2255494594573975, "learning_rate": 0.00010396897072075237, "loss": 1.2363, "step": 4779 }, { "epoch": 0.5028932140978433, "grad_norm": 1.2698360681533813, "learning_rate": 0.00010393491997305613, "loss": 1.7825, "step": 4780 }, { "epoch": 0.5029984218832193, "grad_norm": 1.997664451599121, "learning_rate": 0.00010390086876841061, "loss": 1.9237, "step": 4781 }, { "epoch": 0.5031036296685955, "grad_norm": 1.0741022825241089, "learning_rate": 0.00010386681711077002, "loss": 1.8118, "step": 4782 }, { "epoch": 0.5032088374539716, "grad_norm": 1.1076297760009766, "learning_rate": 0.0001038327650040887, "loss": 1.4896, "step": 4783 }, { "epoch": 0.5033140452393478, "grad_norm": 2.157230854034424, "learning_rate": 0.000103798712452321, "loss": 1.6452, "step": 4784 }, { "epoch": 0.5034192530247238, "grad_norm": 1.4255646467208862, "learning_rate": 0.00010376465945942133, "loss": 2.2098, "step": 4785 }, { "epoch": 0.5035244608100999, "grad_norm": 1.7662725448608398, "learning_rate": 0.00010373060602934415, "loss": 2.1151, "step": 4786 }, { "epoch": 0.5036296685954761, "grad_norm": 1.1463786363601685, "learning_rate": 0.00010369655216604397, "loss": 1.8305, "step": 4787 }, { "epoch": 0.5037348763808522, "grad_norm": 1.4723010063171387, "learning_rate": 0.00010366249787347537, "loss": 2.3292, "step": 4788 }, { "epoch": 0.5038400841662283, "grad_norm": 0.9463380575180054, "learning_rate": 0.00010362844315559297, "loss": 1.6957, "step": 4789 }, { "epoch": 0.5039452919516044, "grad_norm": 1.907961368560791, "learning_rate": 0.0001035943880163514, "loss": 2.1228, "step": 4790 }, { "epoch": 0.5040504997369806, "grad_norm": 1.6900379657745361, "learning_rate": 0.00010356033245970536, "loss": 2.1128, "step": 4791 }, { "epoch": 0.5041557075223566, "grad_norm": 1.4977108240127563, "learning_rate": 0.00010352627648960966, "loss": 1.584, "step": 4792 }, { "epoch": 0.5042609153077328, "grad_norm": 1.1327449083328247, "learning_rate": 0.00010349222011001908, "loss": 1.4022, "step": 4793 }, { "epoch": 0.5043661230931089, "grad_norm": 1.6916382312774658, "learning_rate": 0.0001034581633248885, "loss": 1.8529, "step": 4794 }, { "epoch": 0.5044713308784851, "grad_norm": 1.5584489107131958, "learning_rate": 0.00010342410613817277, "loss": 1.1992, "step": 4795 }, { "epoch": 0.5045765386638611, "grad_norm": 1.77381432056427, "learning_rate": 0.0001033900485538269, "loss": 2.2351, "step": 4796 }, { "epoch": 0.5046817464492372, "grad_norm": 1.6010788679122925, "learning_rate": 0.00010335599057580583, "loss": 1.8224, "step": 4797 }, { "epoch": 0.5047869542346134, "grad_norm": 1.5694942474365234, "learning_rate": 0.0001033219322080646, "loss": 2.1278, "step": 4798 }, { "epoch": 0.5048921620199894, "grad_norm": 1.6243647336959839, "learning_rate": 0.00010328787345455837, "loss": 1.913, "step": 4799 }, { "epoch": 0.5049973698053656, "grad_norm": 2.164302110671997, "learning_rate": 0.00010325381431924221, "loss": 1.8029, "step": 4800 }, { "epoch": 0.5051025775907417, "grad_norm": 0.8400641083717346, "learning_rate": 0.00010321975480607129, "loss": 1.3965, "step": 4801 }, { "epoch": 0.5052077853761179, "grad_norm": 1.4535051584243774, "learning_rate": 0.00010318569491900088, "loss": 2.4847, "step": 4802 }, { "epoch": 0.5053129931614939, "grad_norm": 1.4781876802444458, "learning_rate": 0.00010315163466198616, "loss": 1.9232, "step": 4803 }, { "epoch": 0.5054182009468701, "grad_norm": 1.4223634004592896, "learning_rate": 0.00010311757403898252, "loss": 1.3876, "step": 4804 }, { "epoch": 0.5055234087322462, "grad_norm": 2.039214849472046, "learning_rate": 0.00010308351305394528, "loss": 2.0856, "step": 4805 }, { "epoch": 0.5056286165176223, "grad_norm": 1.5365289449691772, "learning_rate": 0.0001030494517108298, "loss": 1.9099, "step": 4806 }, { "epoch": 0.5057338243029984, "grad_norm": 1.1576287746429443, "learning_rate": 0.00010301539001359155, "loss": 1.7402, "step": 4807 }, { "epoch": 0.5058390320883746, "grad_norm": 1.7206387519836426, "learning_rate": 0.00010298132796618596, "loss": 1.9806, "step": 4808 }, { "epoch": 0.5059442398737507, "grad_norm": 1.5616505146026611, "learning_rate": 0.00010294726557256862, "loss": 1.7111, "step": 4809 }, { "epoch": 0.5060494476591267, "grad_norm": 1.6971865892410278, "learning_rate": 0.00010291320283669499, "loss": 2.0612, "step": 4810 }, { "epoch": 0.5061546554445029, "grad_norm": 0.908016562461853, "learning_rate": 0.0001028791397625207, "loss": 2.0077, "step": 4811 }, { "epoch": 0.506259863229879, "grad_norm": 0.8206803798675537, "learning_rate": 0.00010284507635400142, "loss": 1.7445, "step": 4812 }, { "epoch": 0.5063650710152551, "grad_norm": 1.4862329959869385, "learning_rate": 0.00010281101261509278, "loss": 1.562, "step": 4813 }, { "epoch": 0.5064702788006312, "grad_norm": 1.458016276359558, "learning_rate": 0.00010277694854975051, "loss": 1.0668, "step": 4814 }, { "epoch": 0.5065754865860074, "grad_norm": 1.4686590433120728, "learning_rate": 0.00010274288416193034, "loss": 1.8496, "step": 4815 }, { "epoch": 0.5066806943713835, "grad_norm": 1.2068148851394653, "learning_rate": 0.00010270881945558808, "loss": 2.0571, "step": 4816 }, { "epoch": 0.5067859021567596, "grad_norm": 2.0828161239624023, "learning_rate": 0.00010267475443467954, "loss": 1.4989, "step": 4817 }, { "epoch": 0.5068911099421357, "grad_norm": 1.2611733675003052, "learning_rate": 0.00010264068910316055, "loss": 1.7526, "step": 4818 }, { "epoch": 0.5069963177275119, "grad_norm": 1.14738929271698, "learning_rate": 0.00010260662346498703, "loss": 2.0188, "step": 4819 }, { "epoch": 0.507101525512888, "grad_norm": 1.0972294807434082, "learning_rate": 0.00010257255752411495, "loss": 1.8969, "step": 4820 }, { "epoch": 0.507206733298264, "grad_norm": 1.5468823909759521, "learning_rate": 0.0001025384912845002, "loss": 1.5493, "step": 4821 }, { "epoch": 0.5073119410836402, "grad_norm": 1.338287353515625, "learning_rate": 0.0001025044247500988, "loss": 1.9397, "step": 4822 }, { "epoch": 0.5074171488690163, "grad_norm": 1.2431987524032593, "learning_rate": 0.00010247035792486683, "loss": 1.6526, "step": 4823 }, { "epoch": 0.5075223566543924, "grad_norm": 3.7089953422546387, "learning_rate": 0.00010243629081276031, "loss": 2.1808, "step": 4824 }, { "epoch": 0.5076275644397685, "grad_norm": 1.4418970346450806, "learning_rate": 0.00010240222341773538, "loss": 1.5014, "step": 4825 }, { "epoch": 0.5077327722251447, "grad_norm": 1.5097178220748901, "learning_rate": 0.00010236815574374816, "loss": 1.9034, "step": 4826 }, { "epoch": 0.5078379800105208, "grad_norm": 1.105833888053894, "learning_rate": 0.00010233408779475482, "loss": 1.6967, "step": 4827 }, { "epoch": 0.5079431877958969, "grad_norm": 1.0300766229629517, "learning_rate": 0.00010230001957471151, "loss": 1.9713, "step": 4828 }, { "epoch": 0.508048395581273, "grad_norm": 1.495307445526123, "learning_rate": 0.00010226595108757451, "loss": 1.7107, "step": 4829 }, { "epoch": 0.5081536033666492, "grad_norm": 1.6375658512115479, "learning_rate": 0.0001022318823373001, "loss": 1.7321, "step": 4830 }, { "epoch": 0.5082588111520252, "grad_norm": 1.7210208177566528, "learning_rate": 0.00010219781332784451, "loss": 1.7835, "step": 4831 }, { "epoch": 0.5083640189374014, "grad_norm": 1.3353241682052612, "learning_rate": 0.00010216374406316411, "loss": 1.8193, "step": 4832 }, { "epoch": 0.5084692267227775, "grad_norm": 1.2672992944717407, "learning_rate": 0.00010212967454721523, "loss": 2.0596, "step": 4833 }, { "epoch": 0.5085744345081536, "grad_norm": 1.5790518522262573, "learning_rate": 0.00010209560478395428, "loss": 1.3901, "step": 4834 }, { "epoch": 0.5086796422935297, "grad_norm": 1.039208173751831, "learning_rate": 0.00010206153477733762, "loss": 2.0043, "step": 4835 }, { "epoch": 0.5087848500789058, "grad_norm": 1.4386025667190552, "learning_rate": 0.00010202746453132172, "loss": 1.807, "step": 4836 }, { "epoch": 0.508890057864282, "grad_norm": 1.9724053144454956, "learning_rate": 0.00010199339404986308, "loss": 1.9082, "step": 4837 }, { "epoch": 0.508995265649658, "grad_norm": 1.3470215797424316, "learning_rate": 0.00010195932333691812, "loss": 2.0544, "step": 4838 }, { "epoch": 0.5091004734350342, "grad_norm": 2.0379178524017334, "learning_rate": 0.0001019252523964434, "loss": 1.5562, "step": 4839 }, { "epoch": 0.5092056812204103, "grad_norm": 1.6759377717971802, "learning_rate": 0.00010189118123239543, "loss": 2.1569, "step": 4840 }, { "epoch": 0.5093108890057865, "grad_norm": 1.4470930099487305, "learning_rate": 0.00010185710984873084, "loss": 2.3249, "step": 4841 }, { "epoch": 0.5094160967911625, "grad_norm": 1.781419038772583, "learning_rate": 0.0001018230382494062, "loss": 2.0459, "step": 4842 }, { "epoch": 0.5095213045765387, "grad_norm": 1.2743864059448242, "learning_rate": 0.00010178896643837809, "loss": 1.7856, "step": 4843 }, { "epoch": 0.5096265123619148, "grad_norm": 1.4296742677688599, "learning_rate": 0.00010175489441960327, "loss": 1.943, "step": 4844 }, { "epoch": 0.5097317201472908, "grad_norm": 2.150285005569458, "learning_rate": 0.00010172082219703829, "loss": 1.6565, "step": 4845 }, { "epoch": 0.509836927932667, "grad_norm": 1.225191593170166, "learning_rate": 0.0001016867497746399, "loss": 1.8036, "step": 4846 }, { "epoch": 0.5099421357180431, "grad_norm": 1.5737195014953613, "learning_rate": 0.00010165267715636482, "loss": 1.6792, "step": 4847 }, { "epoch": 0.5100473435034193, "grad_norm": 1.7750451564788818, "learning_rate": 0.00010161860434616982, "loss": 1.3811, "step": 4848 }, { "epoch": 0.5101525512887953, "grad_norm": 1.4361519813537598, "learning_rate": 0.00010158453134801155, "loss": 1.4103, "step": 4849 }, { "epoch": 0.5102577590741715, "grad_norm": 1.3288068771362305, "learning_rate": 0.00010155045816584691, "loss": 2.1853, "step": 4850 }, { "epoch": 0.5103629668595476, "grad_norm": 1.5870311260223389, "learning_rate": 0.0001015163848036327, "loss": 1.683, "step": 4851 }, { "epoch": 0.5104681746449238, "grad_norm": 1.2659305334091187, "learning_rate": 0.00010148231126532568, "loss": 1.7193, "step": 4852 }, { "epoch": 0.5105733824302998, "grad_norm": 1.333278775215149, "learning_rate": 0.00010144823755488273, "loss": 1.5757, "step": 4853 }, { "epoch": 0.510678590215676, "grad_norm": 0.9039688110351562, "learning_rate": 0.00010141416367626075, "loss": 1.8356, "step": 4854 }, { "epoch": 0.5107837980010521, "grad_norm": 0.9868873357772827, "learning_rate": 0.00010138008963341657, "loss": 1.6002, "step": 4855 }, { "epoch": 0.5108890057864282, "grad_norm": 1.448551893234253, "learning_rate": 0.00010134601543030713, "loss": 2.0055, "step": 4856 }, { "epoch": 0.5109942135718043, "grad_norm": 0.9456648230552673, "learning_rate": 0.00010131194107088935, "loss": 1.847, "step": 4857 }, { "epoch": 0.5110994213571805, "grad_norm": 1.293251633644104, "learning_rate": 0.00010127786655912021, "loss": 1.235, "step": 4858 }, { "epoch": 0.5112046291425566, "grad_norm": 1.0912578105926514, "learning_rate": 0.00010124379189895661, "loss": 2.0099, "step": 4859 }, { "epoch": 0.5113098369279326, "grad_norm": 1.4057592153549194, "learning_rate": 0.00010120971709435553, "loss": 2.2696, "step": 4860 }, { "epoch": 0.5114150447133088, "grad_norm": 0.9786197543144226, "learning_rate": 0.000101175642149274, "loss": 1.313, "step": 4861 }, { "epoch": 0.5115202524986849, "grad_norm": 1.2093125581741333, "learning_rate": 0.00010114156706766904, "loss": 1.5894, "step": 4862 }, { "epoch": 0.511625460284061, "grad_norm": 1.1187880039215088, "learning_rate": 0.00010110749185349763, "loss": 1.8596, "step": 4863 }, { "epoch": 0.5117306680694371, "grad_norm": 1.3426660299301147, "learning_rate": 0.00010107341651071684, "loss": 1.6574, "step": 4864 }, { "epoch": 0.5118358758548133, "grad_norm": 1.5116214752197266, "learning_rate": 0.00010103934104328375, "loss": 1.551, "step": 4865 }, { "epoch": 0.5119410836401894, "grad_norm": 1.235568642616272, "learning_rate": 0.00010100526545515539, "loss": 1.8606, "step": 4866 }, { "epoch": 0.5120462914255655, "grad_norm": 1.1877378225326538, "learning_rate": 0.00010097118975028885, "loss": 1.8721, "step": 4867 }, { "epoch": 0.5121514992109416, "grad_norm": 1.012769103050232, "learning_rate": 0.00010093711393264127, "loss": 1.5946, "step": 4868 }, { "epoch": 0.5122567069963178, "grad_norm": 1.3246477842330933, "learning_rate": 0.00010090303800616974, "loss": 1.3306, "step": 4869 }, { "epoch": 0.5123619147816938, "grad_norm": 1.2476806640625, "learning_rate": 0.00010086896197483136, "loss": 1.8805, "step": 4870 }, { "epoch": 0.5124671225670699, "grad_norm": 1.3036119937896729, "learning_rate": 0.00010083488584258326, "loss": 1.6369, "step": 4871 }, { "epoch": 0.5125723303524461, "grad_norm": 1.2664161920547485, "learning_rate": 0.00010080080961338265, "loss": 1.5136, "step": 4872 }, { "epoch": 0.5126775381378222, "grad_norm": 1.7038179636001587, "learning_rate": 0.00010076673329118665, "loss": 2.3886, "step": 4873 }, { "epoch": 0.5127827459231983, "grad_norm": 1.4440287351608276, "learning_rate": 0.00010073265687995243, "loss": 1.9345, "step": 4874 }, { "epoch": 0.5128879537085744, "grad_norm": 2.526855707168579, "learning_rate": 0.0001006985803836372, "loss": 1.3719, "step": 4875 }, { "epoch": 0.5129931614939506, "grad_norm": 1.2801002264022827, "learning_rate": 0.00010066450380619812, "loss": 2.1389, "step": 4876 }, { "epoch": 0.5130983692793266, "grad_norm": 1.681620478630066, "learning_rate": 0.0001006304271515924, "loss": 1.7829, "step": 4877 }, { "epoch": 0.5132035770647028, "grad_norm": 1.5121160745620728, "learning_rate": 0.00010059635042377725, "loss": 2.1326, "step": 4878 }, { "epoch": 0.5133087848500789, "grad_norm": 1.5406997203826904, "learning_rate": 0.00010056227362670989, "loss": 2.1427, "step": 4879 }, { "epoch": 0.5134139926354551, "grad_norm": 1.1765246391296387, "learning_rate": 0.00010052819676434754, "loss": 1.8298, "step": 4880 }, { "epoch": 0.5135192004208311, "grad_norm": 1.1417006254196167, "learning_rate": 0.00010049411984064745, "loss": 1.2332, "step": 4881 }, { "epoch": 0.5136244082062073, "grad_norm": 0.9688271880149841, "learning_rate": 0.00010046004285956684, "loss": 1.8887, "step": 4882 }, { "epoch": 0.5137296159915834, "grad_norm": 1.804365634918213, "learning_rate": 0.00010042596582506298, "loss": 1.6797, "step": 4883 }, { "epoch": 0.5138348237769595, "grad_norm": 1.8476359844207764, "learning_rate": 0.00010039188874109308, "loss": 1.5778, "step": 4884 }, { "epoch": 0.5139400315623356, "grad_norm": 1.6497973203659058, "learning_rate": 0.00010035781161161446, "loss": 1.8296, "step": 4885 }, { "epoch": 0.5140452393477117, "grad_norm": 1.3590152263641357, "learning_rate": 0.00010032373444058437, "loss": 2.2775, "step": 4886 }, { "epoch": 0.5141504471330879, "grad_norm": 1.5917162895202637, "learning_rate": 0.00010028965723196002, "loss": 2.0477, "step": 4887 }, { "epoch": 0.5142556549184639, "grad_norm": 2.074033737182617, "learning_rate": 0.00010025557998969875, "loss": 1.708, "step": 4888 }, { "epoch": 0.5143608627038401, "grad_norm": 1.4734264612197876, "learning_rate": 0.00010022150271775783, "loss": 1.8895, "step": 4889 }, { "epoch": 0.5144660704892162, "grad_norm": 0.9418952465057373, "learning_rate": 0.00010018742542009452, "loss": 1.4237, "step": 4890 }, { "epoch": 0.5145712782745924, "grad_norm": 1.5608463287353516, "learning_rate": 0.00010015334810066612, "loss": 1.7448, "step": 4891 }, { "epoch": 0.5146764860599684, "grad_norm": 1.5609275102615356, "learning_rate": 0.0001001192707634299, "loss": 1.8851, "step": 4892 }, { "epoch": 0.5147816938453446, "grad_norm": 1.515660047531128, "learning_rate": 0.00010008519341234318, "loss": 2.3635, "step": 4893 }, { "epoch": 0.5148869016307207, "grad_norm": 1.6261802911758423, "learning_rate": 0.00010005111605136319, "loss": 1.8002, "step": 4894 }, { "epoch": 0.5149921094160967, "grad_norm": 1.6840312480926514, "learning_rate": 0.00010001703868444728, "loss": 1.8234, "step": 4895 }, { "epoch": 0.5150973172014729, "grad_norm": 1.0442866086959839, "learning_rate": 9.998296131555273e-05, "loss": 1.7835, "step": 4896 }, { "epoch": 0.515202524986849, "grad_norm": 1.02167546749115, "learning_rate": 9.994888394863683e-05, "loss": 1.5165, "step": 4897 }, { "epoch": 0.5153077327722252, "grad_norm": 1.9780964851379395, "learning_rate": 9.991480658765685e-05, "loss": 1.7533, "step": 4898 }, { "epoch": 0.5154129405576012, "grad_norm": 2.685133695602417, "learning_rate": 9.988072923657012e-05, "loss": 2.181, "step": 4899 }, { "epoch": 0.5155181483429774, "grad_norm": 1.0838717222213745, "learning_rate": 9.98466518993339e-05, "loss": 1.7872, "step": 4900 }, { "epoch": 0.5156233561283535, "grad_norm": 2.0336694717407227, "learning_rate": 9.981257457990548e-05, "loss": 1.6419, "step": 4901 }, { "epoch": 0.5157285639137296, "grad_norm": 1.6287788152694702, "learning_rate": 9.977849728224219e-05, "loss": 2.189, "step": 4902 }, { "epoch": 0.5158337716991057, "grad_norm": 1.2882606983184814, "learning_rate": 9.974442001030125e-05, "loss": 1.7009, "step": 4903 }, { "epoch": 0.5159389794844819, "grad_norm": 1.8696643114089966, "learning_rate": 9.971034276803998e-05, "loss": 2.0965, "step": 4904 }, { "epoch": 0.516044187269858, "grad_norm": 1.5908252000808716, "learning_rate": 9.967626555941564e-05, "loss": 1.727, "step": 4905 }, { "epoch": 0.516149395055234, "grad_norm": 1.1613487005233765, "learning_rate": 9.964218838838554e-05, "loss": 2.1126, "step": 4906 }, { "epoch": 0.5162546028406102, "grad_norm": 1.0229045152664185, "learning_rate": 9.960811125890695e-05, "loss": 2.3455, "step": 4907 }, { "epoch": 0.5163598106259863, "grad_norm": 1.273767113685608, "learning_rate": 9.957403417493707e-05, "loss": 1.6472, "step": 4908 }, { "epoch": 0.5164650184113624, "grad_norm": 1.6049546003341675, "learning_rate": 9.953995714043319e-05, "loss": 1.6844, "step": 4909 }, { "epoch": 0.5165702261967385, "grad_norm": 1.1211811304092407, "learning_rate": 9.95058801593526e-05, "loss": 2.0708, "step": 4910 }, { "epoch": 0.5166754339821147, "grad_norm": 1.2839888334274292, "learning_rate": 9.94718032356525e-05, "loss": 1.8021, "step": 4911 }, { "epoch": 0.5167806417674908, "grad_norm": 1.3963242769241333, "learning_rate": 9.943772637329015e-05, "loss": 2.2216, "step": 4912 }, { "epoch": 0.5168858495528669, "grad_norm": 1.2890313863754272, "learning_rate": 9.940364957622276e-05, "loss": 1.7626, "step": 4913 }, { "epoch": 0.516991057338243, "grad_norm": 1.017500400543213, "learning_rate": 9.936957284840763e-05, "loss": 1.8122, "step": 4914 }, { "epoch": 0.5170962651236192, "grad_norm": 1.0217024087905884, "learning_rate": 9.93354961938019e-05, "loss": 2.2297, "step": 4915 }, { "epoch": 0.5172014729089953, "grad_norm": 1.5468822717666626, "learning_rate": 9.93014196163628e-05, "loss": 2.0883, "step": 4916 }, { "epoch": 0.5173066806943714, "grad_norm": 1.8743644952774048, "learning_rate": 9.926734312004759e-05, "loss": 1.6563, "step": 4917 }, { "epoch": 0.5174118884797475, "grad_norm": 1.372307538986206, "learning_rate": 9.923326670881336e-05, "loss": 1.7315, "step": 4918 }, { "epoch": 0.5175170962651237, "grad_norm": 1.082401156425476, "learning_rate": 9.919919038661736e-05, "loss": 1.8861, "step": 4919 }, { "epoch": 0.5176223040504997, "grad_norm": 1.2021406888961792, "learning_rate": 9.916511415741676e-05, "loss": 2.2311, "step": 4920 }, { "epoch": 0.5177275118358758, "grad_norm": 1.4951965808868408, "learning_rate": 9.913103802516868e-05, "loss": 2.1998, "step": 4921 }, { "epoch": 0.517832719621252, "grad_norm": 1.098451852798462, "learning_rate": 9.90969619938303e-05, "loss": 2.01, "step": 4922 }, { "epoch": 0.5179379274066281, "grad_norm": 1.2300734519958496, "learning_rate": 9.906288606735875e-05, "loss": 1.8119, "step": 4923 }, { "epoch": 0.5180431351920042, "grad_norm": 1.2791273593902588, "learning_rate": 9.902881024971116e-05, "loss": 1.8411, "step": 4924 }, { "epoch": 0.5181483429773803, "grad_norm": 1.747589111328125, "learning_rate": 9.899473454484461e-05, "loss": 1.8983, "step": 4925 }, { "epoch": 0.5182535507627565, "grad_norm": 1.192456841468811, "learning_rate": 9.896065895671625e-05, "loss": 1.4597, "step": 4926 }, { "epoch": 0.5183587585481325, "grad_norm": 1.400451898574829, "learning_rate": 9.892658348928316e-05, "loss": 2.15, "step": 4927 }, { "epoch": 0.5184639663335087, "grad_norm": 1.4197369813919067, "learning_rate": 9.88925081465024e-05, "loss": 1.6032, "step": 4928 }, { "epoch": 0.5185691741188848, "grad_norm": 1.509582757949829, "learning_rate": 9.8858432932331e-05, "loss": 1.9569, "step": 4929 }, { "epoch": 0.518674381904261, "grad_norm": 0.9736523628234863, "learning_rate": 9.882435785072601e-05, "loss": 1.6721, "step": 4930 }, { "epoch": 0.518779589689637, "grad_norm": 1.5283832550048828, "learning_rate": 9.87902829056445e-05, "loss": 2.0695, "step": 4931 }, { "epoch": 0.5188847974750131, "grad_norm": 1.4057093858718872, "learning_rate": 9.875620810104344e-05, "loss": 1.4839, "step": 4932 }, { "epoch": 0.5189900052603893, "grad_norm": 1.614888310432434, "learning_rate": 9.872213344087983e-05, "loss": 1.7947, "step": 4933 }, { "epoch": 0.5190952130457653, "grad_norm": 1.5216361284255981, "learning_rate": 9.868805892911067e-05, "loss": 1.3341, "step": 4934 }, { "epoch": 0.5192004208311415, "grad_norm": 1.41942298412323, "learning_rate": 9.86539845696929e-05, "loss": 1.8061, "step": 4935 }, { "epoch": 0.5193056286165176, "grad_norm": 1.3126589059829712, "learning_rate": 9.861991036658345e-05, "loss": 2.092, "step": 4936 }, { "epoch": 0.5194108364018938, "grad_norm": 2.1999170780181885, "learning_rate": 9.858583632373927e-05, "loss": 1.9388, "step": 4937 }, { "epoch": 0.5195160441872698, "grad_norm": 1.6807349920272827, "learning_rate": 9.85517624451173e-05, "loss": 1.3968, "step": 4938 }, { "epoch": 0.519621251972646, "grad_norm": 1.121988296508789, "learning_rate": 9.851768873467435e-05, "loss": 1.8882, "step": 4939 }, { "epoch": 0.5197264597580221, "grad_norm": 1.7207212448120117, "learning_rate": 9.848361519636733e-05, "loss": 1.5875, "step": 4940 }, { "epoch": 0.5198316675433982, "grad_norm": 1.8505074977874756, "learning_rate": 9.84495418341531e-05, "loss": 2.0285, "step": 4941 }, { "epoch": 0.5199368753287743, "grad_norm": 1.3527370691299438, "learning_rate": 9.841546865198846e-05, "loss": 1.8778, "step": 4942 }, { "epoch": 0.5200420831141505, "grad_norm": 1.1984349489212036, "learning_rate": 9.838139565383022e-05, "loss": 1.9808, "step": 4943 }, { "epoch": 0.5201472908995266, "grad_norm": 1.9260704517364502, "learning_rate": 9.834732284363519e-05, "loss": 1.8141, "step": 4944 }, { "epoch": 0.5202524986849026, "grad_norm": 1.8520371913909912, "learning_rate": 9.83132502253601e-05, "loss": 2.1056, "step": 4945 }, { "epoch": 0.5203577064702788, "grad_norm": 1.291551113128662, "learning_rate": 9.827917780296172e-05, "loss": 1.7853, "step": 4946 }, { "epoch": 0.5204629142556549, "grad_norm": 1.5085182189941406, "learning_rate": 9.824510558039675e-05, "loss": 1.8278, "step": 4947 }, { "epoch": 0.5205681220410311, "grad_norm": 1.3979451656341553, "learning_rate": 9.821103356162189e-05, "loss": 1.2452, "step": 4948 }, { "epoch": 0.5206733298264071, "grad_norm": 1.249040961265564, "learning_rate": 9.817696175059381e-05, "loss": 1.2894, "step": 4949 }, { "epoch": 0.5207785376117833, "grad_norm": 1.443249225616455, "learning_rate": 9.814289015126919e-05, "loss": 1.8148, "step": 4950 }, { "epoch": 0.5208837453971594, "grad_norm": 1.2137818336486816, "learning_rate": 9.81088187676046e-05, "loss": 2.0707, "step": 4951 }, { "epoch": 0.5209889531825355, "grad_norm": 1.0801608562469482, "learning_rate": 9.807474760355665e-05, "loss": 1.5743, "step": 4952 }, { "epoch": 0.5210941609679116, "grad_norm": 1.206287145614624, "learning_rate": 9.804067666308192e-05, "loss": 2.013, "step": 4953 }, { "epoch": 0.5211993687532878, "grad_norm": 1.502138376235962, "learning_rate": 9.800660595013696e-05, "loss": 1.8673, "step": 4954 }, { "epoch": 0.5213045765386639, "grad_norm": 1.6293638944625854, "learning_rate": 9.797253546867831e-05, "loss": 2.0488, "step": 4955 }, { "epoch": 0.52140978432404, "grad_norm": 1.6563540697097778, "learning_rate": 9.79384652226624e-05, "loss": 1.6133, "step": 4956 }, { "epoch": 0.5215149921094161, "grad_norm": 1.4221042394638062, "learning_rate": 9.790439521604574e-05, "loss": 1.7739, "step": 4957 }, { "epoch": 0.5216201998947922, "grad_norm": 1.3993518352508545, "learning_rate": 9.78703254527848e-05, "loss": 1.2957, "step": 4958 }, { "epoch": 0.5217254076801683, "grad_norm": 1.2816994190216064, "learning_rate": 9.783625593683592e-05, "loss": 1.9906, "step": 4959 }, { "epoch": 0.5218306154655444, "grad_norm": 1.2781740427017212, "learning_rate": 9.78021866721555e-05, "loss": 1.5746, "step": 4960 }, { "epoch": 0.5219358232509206, "grad_norm": 1.2438249588012695, "learning_rate": 9.776811766269993e-05, "loss": 1.978, "step": 4961 }, { "epoch": 0.5220410310362967, "grad_norm": 2.31378436088562, "learning_rate": 9.773404891242551e-05, "loss": 1.8975, "step": 4962 }, { "epoch": 0.5221462388216728, "grad_norm": 1.96122407913208, "learning_rate": 9.769998042528852e-05, "loss": 2.2517, "step": 4963 }, { "epoch": 0.5222514466070489, "grad_norm": 1.7919749021530151, "learning_rate": 9.766591220524521e-05, "loss": 1.7129, "step": 4964 }, { "epoch": 0.5223566543924251, "grad_norm": 1.509630799293518, "learning_rate": 9.763184425625186e-05, "loss": 1.589, "step": 4965 }, { "epoch": 0.5224618621778011, "grad_norm": 1.3050771951675415, "learning_rate": 9.759777658226462e-05, "loss": 1.8745, "step": 4966 }, { "epoch": 0.5225670699631773, "grad_norm": 2.0204238891601562, "learning_rate": 9.756370918723968e-05, "loss": 2.0122, "step": 4967 }, { "epoch": 0.5226722777485534, "grad_norm": 1.1685175895690918, "learning_rate": 9.752964207513318e-05, "loss": 1.5934, "step": 4968 }, { "epoch": 0.5227774855339296, "grad_norm": 2.0005977153778076, "learning_rate": 9.749557524990121e-05, "loss": 1.8383, "step": 4969 }, { "epoch": 0.5228826933193056, "grad_norm": 1.3754639625549316, "learning_rate": 9.746150871549981e-05, "loss": 2.2025, "step": 4970 }, { "epoch": 0.5229879011046817, "grad_norm": 1.4466034173965454, "learning_rate": 9.742744247588512e-05, "loss": 1.8922, "step": 4971 }, { "epoch": 0.5230931088900579, "grad_norm": 1.596903920173645, "learning_rate": 9.739337653501299e-05, "loss": 1.8912, "step": 4972 }, { "epoch": 0.5231983166754339, "grad_norm": 1.1891504526138306, "learning_rate": 9.73593108968395e-05, "loss": 1.5827, "step": 4973 }, { "epoch": 0.5233035244608101, "grad_norm": 1.6135132312774658, "learning_rate": 9.732524556532051e-05, "loss": 2.2273, "step": 4974 }, { "epoch": 0.5234087322461862, "grad_norm": 2.420681953430176, "learning_rate": 9.729118054441194e-05, "loss": 1.9405, "step": 4975 }, { "epoch": 0.5235139400315624, "grad_norm": 1.0372477769851685, "learning_rate": 9.72571158380697e-05, "loss": 2.0549, "step": 4976 }, { "epoch": 0.5236191478169384, "grad_norm": 1.2813150882720947, "learning_rate": 9.722305145024951e-05, "loss": 1.3903, "step": 4977 }, { "epoch": 0.5237243556023146, "grad_norm": 1.1346882581710815, "learning_rate": 9.718898738490723e-05, "loss": 2.2729, "step": 4978 }, { "epoch": 0.5238295633876907, "grad_norm": 1.6237995624542236, "learning_rate": 9.71549236459986e-05, "loss": 1.7977, "step": 4979 }, { "epoch": 0.5239347711730669, "grad_norm": 1.280985713005066, "learning_rate": 9.71208602374793e-05, "loss": 1.543, "step": 4980 }, { "epoch": 0.5240399789584429, "grad_norm": 1.655838131904602, "learning_rate": 9.708679716330504e-05, "loss": 2.5013, "step": 4981 }, { "epoch": 0.524145186743819, "grad_norm": 1.0946263074874878, "learning_rate": 9.705273442743142e-05, "loss": 1.923, "step": 4982 }, { "epoch": 0.5242503945291952, "grad_norm": 1.1894264221191406, "learning_rate": 9.701867203381405e-05, "loss": 1.7739, "step": 4983 }, { "epoch": 0.5243556023145712, "grad_norm": 1.1565569639205933, "learning_rate": 9.698460998640848e-05, "loss": 1.5141, "step": 4984 }, { "epoch": 0.5244608100999474, "grad_norm": 1.2816983461380005, "learning_rate": 9.695054828917021e-05, "loss": 1.6513, "step": 4985 }, { "epoch": 0.5245660178853235, "grad_norm": 1.1288788318634033, "learning_rate": 9.691648694605475e-05, "loss": 1.9599, "step": 4986 }, { "epoch": 0.5246712256706997, "grad_norm": 2.1675658226013184, "learning_rate": 9.688242596101749e-05, "loss": 1.8789, "step": 4987 }, { "epoch": 0.5247764334560757, "grad_norm": 1.5547009706497192, "learning_rate": 9.684836533801383e-05, "loss": 1.6302, "step": 4988 }, { "epoch": 0.5248816412414519, "grad_norm": 1.4101799726486206, "learning_rate": 9.681430508099916e-05, "loss": 1.6675, "step": 4989 }, { "epoch": 0.524986849026828, "grad_norm": 1.8517366647720337, "learning_rate": 9.678024519392871e-05, "loss": 1.555, "step": 4990 }, { "epoch": 0.525092056812204, "grad_norm": 1.7405954599380493, "learning_rate": 9.67461856807578e-05, "loss": 2.156, "step": 4991 }, { "epoch": 0.5251972645975802, "grad_norm": 1.4130207300186157, "learning_rate": 9.671212654544167e-05, "loss": 1.2812, "step": 4992 }, { "epoch": 0.5253024723829564, "grad_norm": 0.9680867791175842, "learning_rate": 9.667806779193541e-05, "loss": 1.6768, "step": 4993 }, { "epoch": 0.5254076801683325, "grad_norm": 1.184627890586853, "learning_rate": 9.664400942419423e-05, "loss": 1.4777, "step": 4994 }, { "epoch": 0.5255128879537085, "grad_norm": 1.430696964263916, "learning_rate": 9.660995144617316e-05, "loss": 1.8958, "step": 4995 }, { "epoch": 0.5256180957390847, "grad_norm": 1.450882911682129, "learning_rate": 9.657589386182725e-05, "loss": 2.0249, "step": 4996 }, { "epoch": 0.5257233035244608, "grad_norm": 1.3977291584014893, "learning_rate": 9.654183667511154e-05, "loss": 1.873, "step": 4997 }, { "epoch": 0.5258285113098369, "grad_norm": 1.272571325302124, "learning_rate": 9.650777988998093e-05, "loss": 2.1763, "step": 4998 }, { "epoch": 0.525933719095213, "grad_norm": 1.565454125404358, "learning_rate": 9.647372351039035e-05, "loss": 2.0786, "step": 4999 }, { "epoch": 0.5260389268805892, "grad_norm": 1.4186714887619019, "learning_rate": 9.643966754029466e-05, "loss": 1.9929, "step": 5000 }, { "epoch": 0.5261441346659653, "grad_norm": 1.9095489978790283, "learning_rate": 9.640561198364864e-05, "loss": 1.466, "step": 5001 }, { "epoch": 0.5262493424513414, "grad_norm": 1.268289566040039, "learning_rate": 9.637155684440705e-05, "loss": 1.3982, "step": 5002 }, { "epoch": 0.5263545502367175, "grad_norm": 1.090356469154358, "learning_rate": 9.633750212652465e-05, "loss": 2.1568, "step": 5003 }, { "epoch": 0.5264597580220937, "grad_norm": 2.0192060470581055, "learning_rate": 9.630344783395604e-05, "loss": 1.8322, "step": 5004 }, { "epoch": 0.5265649658074697, "grad_norm": 0.9348209500312805, "learning_rate": 9.626939397065586e-05, "loss": 1.8744, "step": 5005 }, { "epoch": 0.5266701735928458, "grad_norm": 1.5496996641159058, "learning_rate": 9.623534054057868e-05, "loss": 1.703, "step": 5006 }, { "epoch": 0.526775381378222, "grad_norm": 1.0677669048309326, "learning_rate": 9.620128754767904e-05, "loss": 1.8331, "step": 5007 }, { "epoch": 0.5268805891635981, "grad_norm": 1.6514785289764404, "learning_rate": 9.616723499591131e-05, "loss": 1.8713, "step": 5008 }, { "epoch": 0.5269857969489742, "grad_norm": 1.1393558979034424, "learning_rate": 9.613318288922999e-05, "loss": 1.6698, "step": 5009 }, { "epoch": 0.5270910047343503, "grad_norm": 1.6217390298843384, "learning_rate": 9.609913123158941e-05, "loss": 1.7692, "step": 5010 }, { "epoch": 0.5271962125197265, "grad_norm": 1.725000023841858, "learning_rate": 9.606508002694386e-05, "loss": 1.7788, "step": 5011 }, { "epoch": 0.5273014203051026, "grad_norm": 1.360186219215393, "learning_rate": 9.603102927924762e-05, "loss": 1.9891, "step": 5012 }, { "epoch": 0.5274066280904787, "grad_norm": 1.6628353595733643, "learning_rate": 9.59969789924549e-05, "loss": 1.7478, "step": 5013 }, { "epoch": 0.5275118358758548, "grad_norm": 1.2454009056091309, "learning_rate": 9.596292917051985e-05, "loss": 1.744, "step": 5014 }, { "epoch": 0.527617043661231, "grad_norm": 1.4857524633407593, "learning_rate": 9.592887981739648e-05, "loss": 1.5022, "step": 5015 }, { "epoch": 0.527722251446607, "grad_norm": 1.3809852600097656, "learning_rate": 9.58948309370389e-05, "loss": 1.8917, "step": 5016 }, { "epoch": 0.5278274592319832, "grad_norm": 0.873028039932251, "learning_rate": 9.58607825334011e-05, "loss": 1.7126, "step": 5017 }, { "epoch": 0.5279326670173593, "grad_norm": 1.1375359296798706, "learning_rate": 9.5826734610437e-05, "loss": 2.3039, "step": 5018 }, { "epoch": 0.5280378748027355, "grad_norm": 1.4480020999908447, "learning_rate": 9.579268717210045e-05, "loss": 1.6948, "step": 5019 }, { "epoch": 0.5281430825881115, "grad_norm": 1.870962381362915, "learning_rate": 9.575864022234527e-05, "loss": 1.8897, "step": 5020 }, { "epoch": 0.5282482903734876, "grad_norm": 1.3602070808410645, "learning_rate": 9.572459376512528e-05, "loss": 2.2099, "step": 5021 }, { "epoch": 0.5283534981588638, "grad_norm": 1.2275303602218628, "learning_rate": 9.56905478043941e-05, "loss": 1.9035, "step": 5022 }, { "epoch": 0.5284587059442398, "grad_norm": 1.2830156087875366, "learning_rate": 9.565650234410542e-05, "loss": 1.6825, "step": 5023 }, { "epoch": 0.528563913729616, "grad_norm": 1.4303338527679443, "learning_rate": 9.562245738821285e-05, "loss": 2.0779, "step": 5024 }, { "epoch": 0.5286691215149921, "grad_norm": 1.9997981786727905, "learning_rate": 9.558841294066985e-05, "loss": 1.9707, "step": 5025 }, { "epoch": 0.5287743293003683, "grad_norm": 1.8480234146118164, "learning_rate": 9.555436900542993e-05, "loss": 1.656, "step": 5026 }, { "epoch": 0.5288795370857443, "grad_norm": 1.244927167892456, "learning_rate": 9.552032558644654e-05, "loss": 2.1726, "step": 5027 }, { "epoch": 0.5289847448711205, "grad_norm": 1.005563497543335, "learning_rate": 9.548628268767294e-05, "loss": 2.0138, "step": 5028 }, { "epoch": 0.5290899526564966, "grad_norm": 1.155867576599121, "learning_rate": 9.545224031306249e-05, "loss": 1.6304, "step": 5029 }, { "epoch": 0.5291951604418726, "grad_norm": 1.5326203107833862, "learning_rate": 9.541819846656839e-05, "loss": 1.7073, "step": 5030 }, { "epoch": 0.5293003682272488, "grad_norm": 1.6995837688446045, "learning_rate": 9.538415715214383e-05, "loss": 1.8962, "step": 5031 }, { "epoch": 0.529405576012625, "grad_norm": 1.8225377798080444, "learning_rate": 9.535011637374189e-05, "loss": 2.0861, "step": 5032 }, { "epoch": 0.5295107837980011, "grad_norm": 1.634724736213684, "learning_rate": 9.53160761353156e-05, "loss": 1.4762, "step": 5033 }, { "epoch": 0.5296159915833771, "grad_norm": 0.9659777879714966, "learning_rate": 9.528203644081801e-05, "loss": 1.0181, "step": 5034 }, { "epoch": 0.5297211993687533, "grad_norm": 1.0323597192764282, "learning_rate": 9.5247997294202e-05, "loss": 1.7988, "step": 5035 }, { "epoch": 0.5298264071541294, "grad_norm": 1.1700800657272339, "learning_rate": 9.521395869942039e-05, "loss": 1.8157, "step": 5036 }, { "epoch": 0.5299316149395055, "grad_norm": 2.2417099475860596, "learning_rate": 9.517992066042598e-05, "loss": 2.1183, "step": 5037 }, { "epoch": 0.5300368227248816, "grad_norm": 1.6578564643859863, "learning_rate": 9.514588318117152e-05, "loss": 1.5281, "step": 5038 }, { "epoch": 0.5301420305102578, "grad_norm": 1.3541558980941772, "learning_rate": 9.511184626560968e-05, "loss": 2.1024, "step": 5039 }, { "epoch": 0.5302472382956339, "grad_norm": 1.1925020217895508, "learning_rate": 9.507780991769302e-05, "loss": 1.6266, "step": 5040 }, { "epoch": 0.53035244608101, "grad_norm": 2.338503360748291, "learning_rate": 9.504377414137407e-05, "loss": 2.1032, "step": 5041 }, { "epoch": 0.5304576538663861, "grad_norm": 1.6318100690841675, "learning_rate": 9.500973894060534e-05, "loss": 1.9588, "step": 5042 }, { "epoch": 0.5305628616517623, "grad_norm": 1.629852533340454, "learning_rate": 9.497570431933917e-05, "loss": 1.8528, "step": 5043 }, { "epoch": 0.5306680694371384, "grad_norm": 1.3625423908233643, "learning_rate": 9.494167028152792e-05, "loss": 2.5084, "step": 5044 }, { "epoch": 0.5307732772225144, "grad_norm": 1.6486890316009521, "learning_rate": 9.490763683112386e-05, "loss": 1.3986, "step": 5045 }, { "epoch": 0.5308784850078906, "grad_norm": 1.887699007987976, "learning_rate": 9.487360397207916e-05, "loss": 1.9863, "step": 5046 }, { "epoch": 0.5309836927932667, "grad_norm": 1.5402462482452393, "learning_rate": 9.483957170834593e-05, "loss": 2.2311, "step": 5047 }, { "epoch": 0.5310889005786428, "grad_norm": 1.120913028717041, "learning_rate": 9.480554004387627e-05, "loss": 1.6302, "step": 5048 }, { "epoch": 0.5311941083640189, "grad_norm": 1.8108731508255005, "learning_rate": 9.477150898262213e-05, "loss": 1.8564, "step": 5049 }, { "epoch": 0.5312993161493951, "grad_norm": 0.9820263385772705, "learning_rate": 9.473747852853543e-05, "loss": 2.0829, "step": 5050 }, { "epoch": 0.5314045239347712, "grad_norm": 2.3030552864074707, "learning_rate": 9.4703448685568e-05, "loss": 1.5117, "step": 5051 }, { "epoch": 0.5315097317201473, "grad_norm": 1.2382352352142334, "learning_rate": 9.466941945767168e-05, "loss": 1.4252, "step": 5052 }, { "epoch": 0.5316149395055234, "grad_norm": 1.1163996458053589, "learning_rate": 9.463539084879809e-05, "loss": 1.8226, "step": 5053 }, { "epoch": 0.5317201472908996, "grad_norm": 1.8982148170471191, "learning_rate": 9.46013628628989e-05, "loss": 1.6211, "step": 5054 }, { "epoch": 0.5318253550762756, "grad_norm": 1.5627444982528687, "learning_rate": 9.456733550392568e-05, "loss": 1.846, "step": 5055 }, { "epoch": 0.5319305628616517, "grad_norm": 3.1989519596099854, "learning_rate": 9.453330877582988e-05, "loss": 1.5141, "step": 5056 }, { "epoch": 0.5320357706470279, "grad_norm": 1.2844103574752808, "learning_rate": 9.449928268256299e-05, "loss": 1.4024, "step": 5057 }, { "epoch": 0.532140978432404, "grad_norm": 2.4570162296295166, "learning_rate": 9.446525722807623e-05, "loss": 1.7243, "step": 5058 }, { "epoch": 0.5322461862177801, "grad_norm": 1.4320532083511353, "learning_rate": 9.443123241632093e-05, "loss": 1.5919, "step": 5059 }, { "epoch": 0.5323513940031562, "grad_norm": 1.297221302986145, "learning_rate": 9.439720825124827e-05, "loss": 1.6516, "step": 5060 }, { "epoch": 0.5324566017885324, "grad_norm": 1.2610849142074585, "learning_rate": 9.436318473680936e-05, "loss": 1.7887, "step": 5061 }, { "epoch": 0.5325618095739084, "grad_norm": 1.0165259838104248, "learning_rate": 9.432916187695525e-05, "loss": 1.8535, "step": 5062 }, { "epoch": 0.5326670173592846, "grad_norm": 1.0816254615783691, "learning_rate": 9.42951396756369e-05, "loss": 1.93, "step": 5063 }, { "epoch": 0.5327722251446607, "grad_norm": 1.453770637512207, "learning_rate": 9.42611181368052e-05, "loss": 1.601, "step": 5064 }, { "epoch": 0.5328774329300369, "grad_norm": 1.5351864099502563, "learning_rate": 9.422709726441094e-05, "loss": 2.3019, "step": 5065 }, { "epoch": 0.5329826407154129, "grad_norm": 1.867706537246704, "learning_rate": 9.419307706240489e-05, "loss": 2.1457, "step": 5066 }, { "epoch": 0.533087848500789, "grad_norm": 2.1947357654571533, "learning_rate": 9.415905753473765e-05, "loss": 1.4276, "step": 5067 }, { "epoch": 0.5331930562861652, "grad_norm": 1.1833242177963257, "learning_rate": 9.412503868535983e-05, "loss": 1.9144, "step": 5068 }, { "epoch": 0.5332982640715412, "grad_norm": 1.284825325012207, "learning_rate": 9.409102051822195e-05, "loss": 2.1141, "step": 5069 }, { "epoch": 0.5334034718569174, "grad_norm": 1.165414571762085, "learning_rate": 9.405700303727435e-05, "loss": 1.8646, "step": 5070 }, { "epoch": 0.5335086796422935, "grad_norm": 1.4519579410552979, "learning_rate": 9.402298624646744e-05, "loss": 2.0232, "step": 5071 }, { "epoch": 0.5336138874276697, "grad_norm": 1.0977115631103516, "learning_rate": 9.398897014975149e-05, "loss": 1.4565, "step": 5072 }, { "epoch": 0.5337190952130457, "grad_norm": 1.6109386682510376, "learning_rate": 9.39549547510766e-05, "loss": 2.0598, "step": 5073 }, { "epoch": 0.5338243029984219, "grad_norm": 1.9979568719863892, "learning_rate": 9.392094005439291e-05, "loss": 2.0561, "step": 5074 }, { "epoch": 0.533929510783798, "grad_norm": 1.9197112321853638, "learning_rate": 9.388692606365043e-05, "loss": 1.81, "step": 5075 }, { "epoch": 0.5340347185691742, "grad_norm": 1.5256714820861816, "learning_rate": 9.385291278279914e-05, "loss": 1.4039, "step": 5076 }, { "epoch": 0.5341399263545502, "grad_norm": 1.7796401977539062, "learning_rate": 9.381890021578881e-05, "loss": 2.1833, "step": 5077 }, { "epoch": 0.5342451341399264, "grad_norm": 1.145056962966919, "learning_rate": 9.37848883665693e-05, "loss": 1.7855, "step": 5078 }, { "epoch": 0.5343503419253025, "grad_norm": 2.008789300918579, "learning_rate": 9.375087723909017e-05, "loss": 1.6486, "step": 5079 }, { "epoch": 0.5344555497106785, "grad_norm": 1.488021731376648, "learning_rate": 9.371686683730113e-05, "loss": 1.9833, "step": 5080 }, { "epoch": 0.5345607574960547, "grad_norm": 1.6001816987991333, "learning_rate": 9.368285716515162e-05, "loss": 1.4451, "step": 5081 }, { "epoch": 0.5346659652814308, "grad_norm": 1.6554683446884155, "learning_rate": 9.36488482265911e-05, "loss": 2.2062, "step": 5082 }, { "epoch": 0.534771173066807, "grad_norm": 1.8193860054016113, "learning_rate": 9.361484002556898e-05, "loss": 1.4615, "step": 5083 }, { "epoch": 0.534876380852183, "grad_norm": 1.3823652267456055, "learning_rate": 9.35808325660344e-05, "loss": 1.9892, "step": 5084 }, { "epoch": 0.5349815886375592, "grad_norm": 1.5087515115737915, "learning_rate": 9.354682585193662e-05, "loss": 1.9344, "step": 5085 }, { "epoch": 0.5350867964229353, "grad_norm": 1.761411190032959, "learning_rate": 9.351281988722469e-05, "loss": 1.7704, "step": 5086 }, { "epoch": 0.5351920042083114, "grad_norm": 1.332608938217163, "learning_rate": 9.347881467584764e-05, "loss": 1.6883, "step": 5087 }, { "epoch": 0.5352972119936875, "grad_norm": 1.6421012878417969, "learning_rate": 9.344481022175436e-05, "loss": 1.832, "step": 5088 }, { "epoch": 0.5354024197790637, "grad_norm": 1.594934344291687, "learning_rate": 9.34108065288937e-05, "loss": 1.6753, "step": 5089 }, { "epoch": 0.5355076275644398, "grad_norm": 1.367395043373108, "learning_rate": 9.337680360121436e-05, "loss": 1.1994, "step": 5090 }, { "epoch": 0.5356128353498159, "grad_norm": 1.6289167404174805, "learning_rate": 9.334280144266501e-05, "loss": 1.8281, "step": 5091 }, { "epoch": 0.535718043135192, "grad_norm": 2.1067452430725098, "learning_rate": 9.330880005719422e-05, "loss": 1.4705, "step": 5092 }, { "epoch": 0.5358232509205682, "grad_norm": 1.5518765449523926, "learning_rate": 9.327479944875045e-05, "loss": 2.2224, "step": 5093 }, { "epoch": 0.5359284587059442, "grad_norm": 1.0815091133117676, "learning_rate": 9.324079962128207e-05, "loss": 1.8141, "step": 5094 }, { "epoch": 0.5360336664913203, "grad_norm": 1.461266279220581, "learning_rate": 9.320680057873735e-05, "loss": 2.0772, "step": 5095 }, { "epoch": 0.5361388742766965, "grad_norm": 1.2924128770828247, "learning_rate": 9.317280232506454e-05, "loss": 1.8573, "step": 5096 }, { "epoch": 0.5362440820620726, "grad_norm": 1.1167140007019043, "learning_rate": 9.31388048642117e-05, "loss": 2.0632, "step": 5097 }, { "epoch": 0.5363492898474487, "grad_norm": 1.1252926588058472, "learning_rate": 9.310480820012684e-05, "loss": 2.7284, "step": 5098 }, { "epoch": 0.5364544976328248, "grad_norm": 1.0790119171142578, "learning_rate": 9.307081233675791e-05, "loss": 1.9891, "step": 5099 }, { "epoch": 0.536559705418201, "grad_norm": 1.325523018836975, "learning_rate": 9.303681727805276e-05, "loss": 2.269, "step": 5100 }, { "epoch": 0.536664913203577, "grad_norm": 1.8128808736801147, "learning_rate": 9.300282302795909e-05, "loss": 1.2854, "step": 5101 }, { "epoch": 0.5367701209889532, "grad_norm": 2.0237619876861572, "learning_rate": 9.29688295904245e-05, "loss": 1.7467, "step": 5102 }, { "epoch": 0.5368753287743293, "grad_norm": 1.2668648958206177, "learning_rate": 9.293483696939658e-05, "loss": 1.72, "step": 5103 }, { "epoch": 0.5369805365597055, "grad_norm": 1.171208143234253, "learning_rate": 9.290084516882281e-05, "loss": 1.9828, "step": 5104 }, { "epoch": 0.5370857443450815, "grad_norm": 1.4353476762771606, "learning_rate": 9.286685419265048e-05, "loss": 1.9993, "step": 5105 }, { "epoch": 0.5371909521304576, "grad_norm": 1.5309464931488037, "learning_rate": 9.283286404482688e-05, "loss": 2.002, "step": 5106 }, { "epoch": 0.5372961599158338, "grad_norm": 1.001417636871338, "learning_rate": 9.27988747292992e-05, "loss": 2.2841, "step": 5107 }, { "epoch": 0.5374013677012099, "grad_norm": 1.408814549446106, "learning_rate": 9.276488625001448e-05, "loss": 1.5659, "step": 5108 }, { "epoch": 0.537506575486586, "grad_norm": 1.94085693359375, "learning_rate": 9.273089861091969e-05, "loss": 1.3672, "step": 5109 }, { "epoch": 0.5376117832719621, "grad_norm": 1.7884807586669922, "learning_rate": 9.269691181596169e-05, "loss": 1.5813, "step": 5110 }, { "epoch": 0.5377169910573383, "grad_norm": 1.3529471158981323, "learning_rate": 9.266292586908732e-05, "loss": 2.0667, "step": 5111 }, { "epoch": 0.5378221988427143, "grad_norm": 0.8547163009643555, "learning_rate": 9.262894077424317e-05, "loss": 1.8774, "step": 5112 }, { "epoch": 0.5379274066280905, "grad_norm": 1.3882497549057007, "learning_rate": 9.259495653537586e-05, "loss": 1.9437, "step": 5113 }, { "epoch": 0.5380326144134666, "grad_norm": 1.9616116285324097, "learning_rate": 9.256097315643188e-05, "loss": 1.5366, "step": 5114 }, { "epoch": 0.5381378221988428, "grad_norm": 0.89047771692276, "learning_rate": 9.252699064135758e-05, "loss": 2.124, "step": 5115 }, { "epoch": 0.5382430299842188, "grad_norm": 1.3593010902404785, "learning_rate": 9.249300899409924e-05, "loss": 1.9311, "step": 5116 }, { "epoch": 0.538348237769595, "grad_norm": 1.1994686126708984, "learning_rate": 9.245902821860308e-05, "loss": 1.8233, "step": 5117 }, { "epoch": 0.5384534455549711, "grad_norm": 1.4323614835739136, "learning_rate": 9.24250483188151e-05, "loss": 1.5137, "step": 5118 }, { "epoch": 0.5385586533403471, "grad_norm": 1.0706384181976318, "learning_rate": 9.239106929868133e-05, "loss": 1.4401, "step": 5119 }, { "epoch": 0.5386638611257233, "grad_norm": 1.6682357788085938, "learning_rate": 9.235709116214764e-05, "loss": 1.8276, "step": 5120 }, { "epoch": 0.5387690689110994, "grad_norm": 1.3352079391479492, "learning_rate": 9.232311391315979e-05, "loss": 2.0025, "step": 5121 }, { "epoch": 0.5388742766964756, "grad_norm": 1.127171516418457, "learning_rate": 9.228913755566344e-05, "loss": 1.959, "step": 5122 }, { "epoch": 0.5389794844818516, "grad_norm": 0.859086275100708, "learning_rate": 9.225516209360413e-05, "loss": 1.5351, "step": 5123 }, { "epoch": 0.5390846922672278, "grad_norm": 1.3062644004821777, "learning_rate": 9.222118753092735e-05, "loss": 1.8726, "step": 5124 }, { "epoch": 0.5391899000526039, "grad_norm": 1.8840057849884033, "learning_rate": 9.218721387157846e-05, "loss": 1.8806, "step": 5125 }, { "epoch": 0.53929510783798, "grad_norm": 1.518869400024414, "learning_rate": 9.215324111950267e-05, "loss": 1.5398, "step": 5126 }, { "epoch": 0.5394003156233561, "grad_norm": 1.1188104152679443, "learning_rate": 9.211926927864518e-05, "loss": 1.5614, "step": 5127 }, { "epoch": 0.5395055234087323, "grad_norm": 1.0320559740066528, "learning_rate": 9.208529835295098e-05, "loss": 1.6083, "step": 5128 }, { "epoch": 0.5396107311941084, "grad_norm": 2.2655322551727295, "learning_rate": 9.205132834636502e-05, "loss": 1.5238, "step": 5129 }, { "epoch": 0.5397159389794844, "grad_norm": 1.1405690908432007, "learning_rate": 9.201735926283213e-05, "loss": 2.1654, "step": 5130 }, { "epoch": 0.5398211467648606, "grad_norm": 1.9452557563781738, "learning_rate": 9.198339110629701e-05, "loss": 1.5046, "step": 5131 }, { "epoch": 0.5399263545502367, "grad_norm": 1.15932035446167, "learning_rate": 9.194942388070431e-05, "loss": 1.6429, "step": 5132 }, { "epoch": 0.5400315623356128, "grad_norm": 1.0874872207641602, "learning_rate": 9.191545758999848e-05, "loss": 1.3977, "step": 5133 }, { "epoch": 0.5401367701209889, "grad_norm": 1.428829550743103, "learning_rate": 9.188149223812393e-05, "loss": 1.8418, "step": 5134 }, { "epoch": 0.5402419779063651, "grad_norm": 1.535487174987793, "learning_rate": 9.1847527829025e-05, "loss": 1.6133, "step": 5135 }, { "epoch": 0.5403471856917412, "grad_norm": 1.295906662940979, "learning_rate": 9.181356436664578e-05, "loss": 1.9259, "step": 5136 }, { "epoch": 0.5404523934771173, "grad_norm": 0.8215249180793762, "learning_rate": 9.177960185493036e-05, "loss": 1.8883, "step": 5137 }, { "epoch": 0.5405576012624934, "grad_norm": 1.4424512386322021, "learning_rate": 9.174564029782275e-05, "loss": 1.9409, "step": 5138 }, { "epoch": 0.5406628090478696, "grad_norm": 1.4258966445922852, "learning_rate": 9.171167969926672e-05, "loss": 1.5655, "step": 5139 }, { "epoch": 0.5407680168332457, "grad_norm": 1.681448221206665, "learning_rate": 9.167772006320604e-05, "loss": 1.6488, "step": 5140 }, { "epoch": 0.5408732246186218, "grad_norm": 1.6626986265182495, "learning_rate": 9.164376139358433e-05, "loss": 1.7347, "step": 5141 }, { "epoch": 0.5409784324039979, "grad_norm": 1.1256407499313354, "learning_rate": 9.16098036943451e-05, "loss": 2.2423, "step": 5142 }, { "epoch": 0.541083640189374, "grad_norm": 2.0072197914123535, "learning_rate": 9.157584696943175e-05, "loss": 1.2106, "step": 5143 }, { "epoch": 0.5411888479747501, "grad_norm": 1.7380682229995728, "learning_rate": 9.154189122278754e-05, "loss": 1.436, "step": 5144 }, { "epoch": 0.5412940557601262, "grad_norm": 1.4622247219085693, "learning_rate": 9.150793645835562e-05, "loss": 1.0282, "step": 5145 }, { "epoch": 0.5413992635455024, "grad_norm": 1.5467033386230469, "learning_rate": 9.147398268007912e-05, "loss": 1.4557, "step": 5146 }, { "epoch": 0.5415044713308785, "grad_norm": 1.8523279428482056, "learning_rate": 9.14400298919009e-05, "loss": 1.7551, "step": 5147 }, { "epoch": 0.5416096791162546, "grad_norm": 2.7987358570098877, "learning_rate": 9.140607809776382e-05, "loss": 1.4688, "step": 5148 }, { "epoch": 0.5417148869016307, "grad_norm": 1.0487982034683228, "learning_rate": 9.137212730161062e-05, "loss": 2.1586, "step": 5149 }, { "epoch": 0.5418200946870069, "grad_norm": 1.263087272644043, "learning_rate": 9.133817750738384e-05, "loss": 1.4165, "step": 5150 }, { "epoch": 0.5419253024723829, "grad_norm": 1.5688707828521729, "learning_rate": 9.1304228719026e-05, "loss": 1.8595, "step": 5151 }, { "epoch": 0.5420305102577591, "grad_norm": 1.5955549478530884, "learning_rate": 9.127028094047944e-05, "loss": 1.722, "step": 5152 }, { "epoch": 0.5421357180431352, "grad_norm": 1.5457823276519775, "learning_rate": 9.123633417568641e-05, "loss": 1.8036, "step": 5153 }, { "epoch": 0.5422409258285114, "grad_norm": 2.240226984024048, "learning_rate": 9.120238842858903e-05, "loss": 1.8054, "step": 5154 }, { "epoch": 0.5423461336138874, "grad_norm": 1.7893513441085815, "learning_rate": 9.11684437031293e-05, "loss": 1.8167, "step": 5155 }, { "epoch": 0.5424513413992635, "grad_norm": 1.6553772687911987, "learning_rate": 9.113450000324914e-05, "loss": 1.9113, "step": 5156 }, { "epoch": 0.5425565491846397, "grad_norm": 1.1440198421478271, "learning_rate": 9.110055733289029e-05, "loss": 1.7322, "step": 5157 }, { "epoch": 0.5426617569700157, "grad_norm": 1.0650477409362793, "learning_rate": 9.106661569599442e-05, "loss": 1.9446, "step": 5158 }, { "epoch": 0.5427669647553919, "grad_norm": 1.3577461242675781, "learning_rate": 9.103267509650305e-05, "loss": 2.0171, "step": 5159 }, { "epoch": 0.542872172540768, "grad_norm": 1.0990657806396484, "learning_rate": 9.099873553835758e-05, "loss": 1.5223, "step": 5160 }, { "epoch": 0.5429773803261442, "grad_norm": 1.6959264278411865, "learning_rate": 9.096479702549933e-05, "loss": 2.3626, "step": 5161 }, { "epoch": 0.5430825881115202, "grad_norm": 1.6704100370407104, "learning_rate": 9.093085956186945e-05, "loss": 2.2676, "step": 5162 }, { "epoch": 0.5431877958968964, "grad_norm": 2.997234582901001, "learning_rate": 9.089692315140896e-05, "loss": 1.617, "step": 5163 }, { "epoch": 0.5432930036822725, "grad_norm": 1.61349356174469, "learning_rate": 9.086298779805887e-05, "loss": 1.7279, "step": 5164 }, { "epoch": 0.5433982114676486, "grad_norm": 1.6692132949829102, "learning_rate": 9.082905350575986e-05, "loss": 1.9036, "step": 5165 }, { "epoch": 0.5435034192530247, "grad_norm": 1.7874563932418823, "learning_rate": 9.079512027845268e-05, "loss": 2.4346, "step": 5166 }, { "epoch": 0.5436086270384008, "grad_norm": 1.3825801610946655, "learning_rate": 9.076118812007789e-05, "loss": 1.3331, "step": 5167 }, { "epoch": 0.543713834823777, "grad_norm": 1.6000703573226929, "learning_rate": 9.072725703457587e-05, "loss": 1.7898, "step": 5168 }, { "epoch": 0.543819042609153, "grad_norm": 1.600172996520996, "learning_rate": 9.069332702588698e-05, "loss": 1.4787, "step": 5169 }, { "epoch": 0.5439242503945292, "grad_norm": 1.7228039503097534, "learning_rate": 9.065939809795137e-05, "loss": 2.2927, "step": 5170 }, { "epoch": 0.5440294581799053, "grad_norm": 1.3640235662460327, "learning_rate": 9.062547025470908e-05, "loss": 1.8261, "step": 5171 }, { "epoch": 0.5441346659652815, "grad_norm": 1.3940327167510986, "learning_rate": 9.059154350010008e-05, "loss": 1.997, "step": 5172 }, { "epoch": 0.5442398737506575, "grad_norm": 1.7132809162139893, "learning_rate": 9.055761783806416e-05, "loss": 2.0858, "step": 5173 }, { "epoch": 0.5443450815360337, "grad_norm": 1.438615322113037, "learning_rate": 9.052369327254098e-05, "loss": 1.7468, "step": 5174 }, { "epoch": 0.5444502893214098, "grad_norm": 1.4938368797302246, "learning_rate": 9.048976980747008e-05, "loss": 1.7274, "step": 5175 }, { "epoch": 0.5445554971067859, "grad_norm": 1.2716962099075317, "learning_rate": 9.045584744679092e-05, "loss": 1.5497, "step": 5176 }, { "epoch": 0.544660704892162, "grad_norm": 1.7363189458847046, "learning_rate": 9.042192619444275e-05, "loss": 1.6291, "step": 5177 }, { "epoch": 0.5447659126775382, "grad_norm": 1.5554457902908325, "learning_rate": 9.038800605436475e-05, "loss": 1.8769, "step": 5178 }, { "epoch": 0.5448711204629143, "grad_norm": 1.222988247871399, "learning_rate": 9.035408703049596e-05, "loss": 1.9806, "step": 5179 }, { "epoch": 0.5449763282482903, "grad_norm": 1.5716575384140015, "learning_rate": 9.03201691267753e-05, "loss": 2.0254, "step": 5180 }, { "epoch": 0.5450815360336665, "grad_norm": 1.1745754480361938, "learning_rate": 9.02862523471415e-05, "loss": 2.1139, "step": 5181 }, { "epoch": 0.5451867438190426, "grad_norm": 0.8382359147071838, "learning_rate": 9.025233669553322e-05, "loss": 1.5266, "step": 5182 }, { "epoch": 0.5452919516044187, "grad_norm": 1.7974534034729004, "learning_rate": 9.021842217588901e-05, "loss": 1.7337, "step": 5183 }, { "epoch": 0.5453971593897948, "grad_norm": 1.5042651891708374, "learning_rate": 9.018450879214721e-05, "loss": 2.0298, "step": 5184 }, { "epoch": 0.545502367175171, "grad_norm": 1.2993311882019043, "learning_rate": 9.015059654824611e-05, "loss": 1.4338, "step": 5185 }, { "epoch": 0.5456075749605471, "grad_norm": 1.5049631595611572, "learning_rate": 9.011668544812377e-05, "loss": 1.6596, "step": 5186 }, { "epoch": 0.5457127827459232, "grad_norm": 1.4581938982009888, "learning_rate": 9.00827754957182e-05, "loss": 1.8009, "step": 5187 }, { "epoch": 0.5458179905312993, "grad_norm": 1.5323725938796997, "learning_rate": 9.004886669496728e-05, "loss": 1.6577, "step": 5188 }, { "epoch": 0.5459231983166755, "grad_norm": 1.0720787048339844, "learning_rate": 9.001495904980867e-05, "loss": 1.7775, "step": 5189 }, { "epoch": 0.5460284061020515, "grad_norm": 1.4327290058135986, "learning_rate": 8.998105256418e-05, "loss": 1.7773, "step": 5190 }, { "epoch": 0.5461336138874276, "grad_norm": 1.6901955604553223, "learning_rate": 8.99471472420187e-05, "loss": 1.8843, "step": 5191 }, { "epoch": 0.5462388216728038, "grad_norm": 2.1214852333068848, "learning_rate": 8.991324308726209e-05, "loss": 2.1451, "step": 5192 }, { "epoch": 0.54634402945818, "grad_norm": 1.875383734703064, "learning_rate": 8.987934010384733e-05, "loss": 1.6287, "step": 5193 }, { "epoch": 0.546449237243556, "grad_norm": 1.438472867012024, "learning_rate": 8.984543829571151e-05, "loss": 1.7141, "step": 5194 }, { "epoch": 0.5465544450289321, "grad_norm": 1.697603464126587, "learning_rate": 8.981153766679149e-05, "loss": 1.9385, "step": 5195 }, { "epoch": 0.5466596528143083, "grad_norm": 1.5435723066329956, "learning_rate": 8.977763822102404e-05, "loss": 1.7889, "step": 5196 }, { "epoch": 0.5467648605996843, "grad_norm": 1.3071941137313843, "learning_rate": 8.97437399623458e-05, "loss": 1.8384, "step": 5197 }, { "epoch": 0.5468700683850605, "grad_norm": 1.1430269479751587, "learning_rate": 8.970984289469327e-05, "loss": 1.4976, "step": 5198 }, { "epoch": 0.5469752761704366, "grad_norm": 2.691263198852539, "learning_rate": 8.96759470220028e-05, "loss": 1.39, "step": 5199 }, { "epoch": 0.5470804839558128, "grad_norm": 1.5296295881271362, "learning_rate": 8.96420523482106e-05, "loss": 1.8981, "step": 5200 }, { "epoch": 0.5471856917411888, "grad_norm": 2.23455810546875, "learning_rate": 8.960815887725278e-05, "loss": 1.573, "step": 5201 }, { "epoch": 0.547290899526565, "grad_norm": 2.9234347343444824, "learning_rate": 8.957426661306522e-05, "loss": 1.4707, "step": 5202 }, { "epoch": 0.5473961073119411, "grad_norm": 1.3259090185165405, "learning_rate": 8.954037555958376e-05, "loss": 1.2053, "step": 5203 }, { "epoch": 0.5475013150973173, "grad_norm": 1.4656827449798584, "learning_rate": 8.950648572074405e-05, "loss": 1.3973, "step": 5204 }, { "epoch": 0.5476065228826933, "grad_norm": 1.52262282371521, "learning_rate": 8.947259710048158e-05, "loss": 2.1761, "step": 5205 }, { "epoch": 0.5477117306680694, "grad_norm": 1.1430667638778687, "learning_rate": 8.943870970273174e-05, "loss": 1.4505, "step": 5206 }, { "epoch": 0.5478169384534456, "grad_norm": 1.6592637300491333, "learning_rate": 8.940482353142983e-05, "loss": 1.9043, "step": 5207 }, { "epoch": 0.5479221462388216, "grad_norm": 1.875982642173767, "learning_rate": 8.937093859051083e-05, "loss": 1.9452, "step": 5208 }, { "epoch": 0.5480273540241978, "grad_norm": 1.242350697517395, "learning_rate": 8.933705488390972e-05, "loss": 1.7839, "step": 5209 }, { "epoch": 0.5481325618095739, "grad_norm": 1.8692058324813843, "learning_rate": 8.930317241556132e-05, "loss": 1.6776, "step": 5210 }, { "epoch": 0.5482377695949501, "grad_norm": 1.910408854484558, "learning_rate": 8.926929118940026e-05, "loss": 1.588, "step": 5211 }, { "epoch": 0.5483429773803261, "grad_norm": 1.608375906944275, "learning_rate": 8.923541120936111e-05, "loss": 1.525, "step": 5212 }, { "epoch": 0.5484481851657023, "grad_norm": 1.786906123161316, "learning_rate": 8.92015324793782e-05, "loss": 1.5707, "step": 5213 }, { "epoch": 0.5485533929510784, "grad_norm": 1.4942151308059692, "learning_rate": 8.916765500338575e-05, "loss": 2.0173, "step": 5214 }, { "epoch": 0.5486586007364544, "grad_norm": 1.761673092842102, "learning_rate": 8.913377878531789e-05, "loss": 1.9173, "step": 5215 }, { "epoch": 0.5487638085218306, "grad_norm": 1.1698923110961914, "learning_rate": 8.909990382910849e-05, "loss": 1.9928, "step": 5216 }, { "epoch": 0.5488690163072067, "grad_norm": 1.3080923557281494, "learning_rate": 8.906603013869136e-05, "loss": 2.0938, "step": 5217 }, { "epoch": 0.5489742240925829, "grad_norm": 1.443345546722412, "learning_rate": 8.903215771800017e-05, "loss": 2.204, "step": 5218 }, { "epoch": 0.5490794318779589, "grad_norm": 1.6220980882644653, "learning_rate": 8.899828657096838e-05, "loss": 1.4677, "step": 5219 }, { "epoch": 0.5491846396633351, "grad_norm": 2.169625759124756, "learning_rate": 8.896441670152932e-05, "loss": 1.6339, "step": 5220 }, { "epoch": 0.5492898474487112, "grad_norm": 0.9750475287437439, "learning_rate": 8.893054811361624e-05, "loss": 1.76, "step": 5221 }, { "epoch": 0.5493950552340873, "grad_norm": 1.8625184297561646, "learning_rate": 8.889668081116214e-05, "loss": 1.6533, "step": 5222 }, { "epoch": 0.5495002630194634, "grad_norm": 1.3964723348617554, "learning_rate": 8.886281479809993e-05, "loss": 2.0727, "step": 5223 }, { "epoch": 0.5496054708048396, "grad_norm": 1.3396962881088257, "learning_rate": 8.882895007836236e-05, "loss": 1.5633, "step": 5224 }, { "epoch": 0.5497106785902157, "grad_norm": 1.373002290725708, "learning_rate": 8.879508665588206e-05, "loss": 1.6469, "step": 5225 }, { "epoch": 0.5498158863755918, "grad_norm": 0.9774206876754761, "learning_rate": 8.876122453459143e-05, "loss": 1.7567, "step": 5226 }, { "epoch": 0.5499210941609679, "grad_norm": 1.5650739669799805, "learning_rate": 8.872736371842279e-05, "loss": 1.8958, "step": 5227 }, { "epoch": 0.550026301946344, "grad_norm": 1.5855236053466797, "learning_rate": 8.869350421130831e-05, "loss": 2.1006, "step": 5228 }, { "epoch": 0.5501315097317201, "grad_norm": 1.8107531070709229, "learning_rate": 8.865964601717994e-05, "loss": 1.9863, "step": 5229 }, { "epoch": 0.5502367175170962, "grad_norm": 1.2894846200942993, "learning_rate": 8.862578913996952e-05, "loss": 1.749, "step": 5230 }, { "epoch": 0.5503419253024724, "grad_norm": 1.1580836772918701, "learning_rate": 8.859193358360874e-05, "loss": 1.3304, "step": 5231 }, { "epoch": 0.5504471330878485, "grad_norm": 1.6148262023925781, "learning_rate": 8.855807935202915e-05, "loss": 1.5428, "step": 5232 }, { "epoch": 0.5505523408732246, "grad_norm": 1.7478631734848022, "learning_rate": 8.852422644916216e-05, "loss": 1.6134, "step": 5233 }, { "epoch": 0.5506575486586007, "grad_norm": 1.809343934059143, "learning_rate": 8.849037487893893e-05, "loss": 2.3462, "step": 5234 }, { "epoch": 0.5507627564439769, "grad_norm": 1.5731574296951294, "learning_rate": 8.845652464529057e-05, "loss": 1.7138, "step": 5235 }, { "epoch": 0.550867964229353, "grad_norm": 1.3270047903060913, "learning_rate": 8.842267575214802e-05, "loss": 2.0084, "step": 5236 }, { "epoch": 0.5509731720147291, "grad_norm": 1.761651873588562, "learning_rate": 8.838882820344198e-05, "loss": 1.4156, "step": 5237 }, { "epoch": 0.5510783798001052, "grad_norm": 1.6372829675674438, "learning_rate": 8.835498200310309e-05, "loss": 2.1276, "step": 5238 }, { "epoch": 0.5511835875854814, "grad_norm": 1.144127368927002, "learning_rate": 8.832113715506181e-05, "loss": 1.574, "step": 5239 }, { "epoch": 0.5512887953708574, "grad_norm": 1.8449068069458008, "learning_rate": 8.82872936632484e-05, "loss": 1.67, "step": 5240 }, { "epoch": 0.5513940031562335, "grad_norm": 1.1163779497146606, "learning_rate": 8.825345153159301e-05, "loss": 1.6902, "step": 5241 }, { "epoch": 0.5514992109416097, "grad_norm": 1.2700437307357788, "learning_rate": 8.821961076402563e-05, "loss": 2.3529, "step": 5242 }, { "epoch": 0.5516044187269858, "grad_norm": 1.2813799381256104, "learning_rate": 8.818577136447603e-05, "loss": 1.6812, "step": 5243 }, { "epoch": 0.5517096265123619, "grad_norm": 1.321010947227478, "learning_rate": 8.815193333687391e-05, "loss": 1.7545, "step": 5244 }, { "epoch": 0.551814834297738, "grad_norm": 1.3672521114349365, "learning_rate": 8.811809668514878e-05, "loss": 1.5022, "step": 5245 }, { "epoch": 0.5519200420831142, "grad_norm": 1.245273470878601, "learning_rate": 8.808426141322994e-05, "loss": 1.6858, "step": 5246 }, { "epoch": 0.5520252498684902, "grad_norm": 2.0327329635620117, "learning_rate": 8.805042752504656e-05, "loss": 1.8182, "step": 5247 }, { "epoch": 0.5521304576538664, "grad_norm": 1.9399306774139404, "learning_rate": 8.801659502452769e-05, "loss": 1.8135, "step": 5248 }, { "epoch": 0.5522356654392425, "grad_norm": 1.4941593408584595, "learning_rate": 8.79827639156022e-05, "loss": 2.0688, "step": 5249 }, { "epoch": 0.5523408732246187, "grad_norm": 1.5331664085388184, "learning_rate": 8.794893420219881e-05, "loss": 2.1115, "step": 5250 }, { "epoch": 0.5524460810099947, "grad_norm": 2.205839157104492, "learning_rate": 8.791510588824594e-05, "loss": 1.3747, "step": 5251 }, { "epoch": 0.5525512887953709, "grad_norm": 1.1780728101730347, "learning_rate": 8.788127897767204e-05, "loss": 1.7812, "step": 5252 }, { "epoch": 0.552656496580747, "grad_norm": 1.1612539291381836, "learning_rate": 8.784745347440533e-05, "loss": 2.0561, "step": 5253 }, { "epoch": 0.552761704366123, "grad_norm": 1.275850534439087, "learning_rate": 8.78136293823738e-05, "loss": 1.6603, "step": 5254 }, { "epoch": 0.5528669121514992, "grad_norm": 1.7113317251205444, "learning_rate": 8.777980670550536e-05, "loss": 1.4027, "step": 5255 }, { "epoch": 0.5529721199368753, "grad_norm": 1.0461790561676025, "learning_rate": 8.774598544772774e-05, "loss": 2.2595, "step": 5256 }, { "epoch": 0.5530773277222515, "grad_norm": 1.1092756986618042, "learning_rate": 8.771216561296849e-05, "loss": 1.6448, "step": 5257 }, { "epoch": 0.5531825355076275, "grad_norm": 1.1810952425003052, "learning_rate": 8.767834720515496e-05, "loss": 1.8668, "step": 5258 }, { "epoch": 0.5532877432930037, "grad_norm": 1.1021080017089844, "learning_rate": 8.76445302282144e-05, "loss": 2.0023, "step": 5259 }, { "epoch": 0.5533929510783798, "grad_norm": 1.4798521995544434, "learning_rate": 8.761071468607388e-05, "loss": 1.8638, "step": 5260 }, { "epoch": 0.5534981588637559, "grad_norm": 1.3366169929504395, "learning_rate": 8.757690058266025e-05, "loss": 1.458, "step": 5261 }, { "epoch": 0.553603366649132, "grad_norm": 1.5138647556304932, "learning_rate": 8.754308792190024e-05, "loss": 1.6667, "step": 5262 }, { "epoch": 0.5537085744345082, "grad_norm": 1.3789781332015991, "learning_rate": 8.750927670772044e-05, "loss": 2.1057, "step": 5263 }, { "epoch": 0.5538137822198843, "grad_norm": 1.4035643339157104, "learning_rate": 8.747546694404717e-05, "loss": 1.6416, "step": 5264 }, { "epoch": 0.5539189900052603, "grad_norm": 1.30898916721344, "learning_rate": 8.744165863480669e-05, "loss": 1.9435, "step": 5265 }, { "epoch": 0.5540241977906365, "grad_norm": 1.4918098449707031, "learning_rate": 8.740785178392505e-05, "loss": 1.9183, "step": 5266 }, { "epoch": 0.5541294055760126, "grad_norm": 1.285561203956604, "learning_rate": 8.737404639532811e-05, "loss": 1.82, "step": 5267 }, { "epoch": 0.5542346133613888, "grad_norm": 1.183773159980774, "learning_rate": 8.734024247294157e-05, "loss": 1.9191, "step": 5268 }, { "epoch": 0.5543398211467648, "grad_norm": 1.6294718980789185, "learning_rate": 8.7306440020691e-05, "loss": 1.9698, "step": 5269 }, { "epoch": 0.554445028932141, "grad_norm": 1.5693155527114868, "learning_rate": 8.727263904250178e-05, "loss": 1.8887, "step": 5270 }, { "epoch": 0.5545502367175171, "grad_norm": 1.6532758474349976, "learning_rate": 8.723883954229908e-05, "loss": 1.8751, "step": 5271 }, { "epoch": 0.5546554445028932, "grad_norm": 1.02011239528656, "learning_rate": 8.72050415240079e-05, "loss": 2.3712, "step": 5272 }, { "epoch": 0.5547606522882693, "grad_norm": 1.6323237419128418, "learning_rate": 8.71712449915531e-05, "loss": 1.7668, "step": 5273 }, { "epoch": 0.5548658600736455, "grad_norm": 1.436021327972412, "learning_rate": 8.713744994885938e-05, "loss": 1.8613, "step": 5274 }, { "epoch": 0.5549710678590216, "grad_norm": 1.5757768154144287, "learning_rate": 8.710365639985126e-05, "loss": 2.0038, "step": 5275 }, { "epoch": 0.5550762756443977, "grad_norm": 2.4132494926452637, "learning_rate": 8.706986434845302e-05, "loss": 1.5683, "step": 5276 }, { "epoch": 0.5551814834297738, "grad_norm": 1.5619103908538818, "learning_rate": 8.703607379858889e-05, "loss": 1.2968, "step": 5277 }, { "epoch": 0.55528669121515, "grad_norm": 1.2575358152389526, "learning_rate": 8.70022847541828e-05, "loss": 1.5361, "step": 5278 }, { "epoch": 0.555391899000526, "grad_norm": 0.924577534198761, "learning_rate": 8.696849721915859e-05, "loss": 1.6288, "step": 5279 }, { "epoch": 0.5554971067859021, "grad_norm": 1.1372270584106445, "learning_rate": 8.693471119743987e-05, "loss": 2.1464, "step": 5280 }, { "epoch": 0.5556023145712783, "grad_norm": 1.3003607988357544, "learning_rate": 8.690092669295014e-05, "loss": 2.0537, "step": 5281 }, { "epoch": 0.5557075223566544, "grad_norm": 1.147006869316101, "learning_rate": 8.686714370961264e-05, "loss": 2.0348, "step": 5282 }, { "epoch": 0.5558127301420305, "grad_norm": 1.6022502183914185, "learning_rate": 8.68333622513505e-05, "loss": 1.6282, "step": 5283 }, { "epoch": 0.5559179379274066, "grad_norm": 1.5122016668319702, "learning_rate": 8.679958232208668e-05, "loss": 1.2036, "step": 5284 }, { "epoch": 0.5560231457127828, "grad_norm": 1.583200216293335, "learning_rate": 8.676580392574385e-05, "loss": 1.5517, "step": 5285 }, { "epoch": 0.5561283534981588, "grad_norm": 1.0503642559051514, "learning_rate": 8.673202706624464e-05, "loss": 1.9161, "step": 5286 }, { "epoch": 0.556233561283535, "grad_norm": 1.8765935897827148, "learning_rate": 8.669825174751144e-05, "loss": 2.1381, "step": 5287 }, { "epoch": 0.5563387690689111, "grad_norm": 0.8225523233413696, "learning_rate": 8.666447797346648e-05, "loss": 1.781, "step": 5288 }, { "epoch": 0.5564439768542873, "grad_norm": 1.6632425785064697, "learning_rate": 8.663070574803175e-05, "loss": 1.636, "step": 5289 }, { "epoch": 0.5565491846396633, "grad_norm": 1.561035394668579, "learning_rate": 8.659693507512917e-05, "loss": 1.7061, "step": 5290 }, { "epoch": 0.5566543924250394, "grad_norm": 1.1021442413330078, "learning_rate": 8.656316595868037e-05, "loss": 1.7368, "step": 5291 }, { "epoch": 0.5567596002104156, "grad_norm": 1.2675588130950928, "learning_rate": 8.652939840260686e-05, "loss": 1.6662, "step": 5292 }, { "epoch": 0.5568648079957917, "grad_norm": 1.4895219802856445, "learning_rate": 8.649563241082998e-05, "loss": 1.3631, "step": 5293 }, { "epoch": 0.5569700157811678, "grad_norm": 1.1941972970962524, "learning_rate": 8.64618679872708e-05, "loss": 1.2894, "step": 5294 }, { "epoch": 0.5570752235665439, "grad_norm": 1.6961616277694702, "learning_rate": 8.642810513585035e-05, "loss": 2.1377, "step": 5295 }, { "epoch": 0.5571804313519201, "grad_norm": 1.7371389865875244, "learning_rate": 8.639434386048932e-05, "loss": 2.2303, "step": 5296 }, { "epoch": 0.5572856391372961, "grad_norm": 1.3116867542266846, "learning_rate": 8.636058416510836e-05, "loss": 1.7173, "step": 5297 }, { "epoch": 0.5573908469226723, "grad_norm": 1.2672263383865356, "learning_rate": 8.632682605362784e-05, "loss": 1.8597, "step": 5298 }, { "epoch": 0.5574960547080484, "grad_norm": 1.0156499147415161, "learning_rate": 8.629306952996797e-05, "loss": 2.0679, "step": 5299 }, { "epoch": 0.5576012624934246, "grad_norm": 1.6344144344329834, "learning_rate": 8.625931459804881e-05, "loss": 1.8525, "step": 5300 }, { "epoch": 0.5577064702788006, "grad_norm": 1.6262001991271973, "learning_rate": 8.622556126179023e-05, "loss": 1.9396, "step": 5301 }, { "epoch": 0.5578116780641768, "grad_norm": 1.4433337450027466, "learning_rate": 8.619180952511181e-05, "loss": 2.3541, "step": 5302 }, { "epoch": 0.5579168858495529, "grad_norm": 1.4532362222671509, "learning_rate": 8.61580593919331e-05, "loss": 1.6864, "step": 5303 }, { "epoch": 0.5580220936349289, "grad_norm": 1.5361088514328003, "learning_rate": 8.612431086617337e-05, "loss": 1.7575, "step": 5304 }, { "epoch": 0.5581273014203051, "grad_norm": 1.3419499397277832, "learning_rate": 8.609056395175175e-05, "loss": 1.6097, "step": 5305 }, { "epoch": 0.5582325092056812, "grad_norm": 1.3803333044052124, "learning_rate": 8.605681865258712e-05, "loss": 1.5305, "step": 5306 }, { "epoch": 0.5583377169910574, "grad_norm": 1.5263311862945557, "learning_rate": 8.602307497259821e-05, "loss": 1.6276, "step": 5307 }, { "epoch": 0.5584429247764334, "grad_norm": 1.4065600633621216, "learning_rate": 8.598933291570361e-05, "loss": 1.5332, "step": 5308 }, { "epoch": 0.5585481325618096, "grad_norm": 1.0995397567749023, "learning_rate": 8.595559248582161e-05, "loss": 2.0225, "step": 5309 }, { "epoch": 0.5586533403471857, "grad_norm": 1.0431246757507324, "learning_rate": 8.592185368687043e-05, "loss": 1.662, "step": 5310 }, { "epoch": 0.5587585481325618, "grad_norm": 1.5991910696029663, "learning_rate": 8.588811652276803e-05, "loss": 1.2915, "step": 5311 }, { "epoch": 0.5588637559179379, "grad_norm": 1.2698297500610352, "learning_rate": 8.585438099743217e-05, "loss": 2.1424, "step": 5312 }, { "epoch": 0.5589689637033141, "grad_norm": 1.9178346395492554, "learning_rate": 8.582064711478046e-05, "loss": 1.7453, "step": 5313 }, { "epoch": 0.5590741714886902, "grad_norm": 1.5896426439285278, "learning_rate": 8.578691487873036e-05, "loss": 1.0764, "step": 5314 }, { "epoch": 0.5591793792740662, "grad_norm": 1.0997956991195679, "learning_rate": 8.575318429319899e-05, "loss": 1.588, "step": 5315 }, { "epoch": 0.5592845870594424, "grad_norm": 1.4343537092208862, "learning_rate": 8.571945536210342e-05, "loss": 1.578, "step": 5316 }, { "epoch": 0.5593897948448185, "grad_norm": 1.9470630884170532, "learning_rate": 8.568572808936047e-05, "loss": 1.905, "step": 5317 }, { "epoch": 0.5594950026301946, "grad_norm": 1.999913215637207, "learning_rate": 8.565200247888678e-05, "loss": 2.0373, "step": 5318 }, { "epoch": 0.5596002104155707, "grad_norm": 1.2631433010101318, "learning_rate": 8.56182785345988e-05, "loss": 2.4557, "step": 5319 }, { "epoch": 0.5597054182009469, "grad_norm": 0.9738912582397461, "learning_rate": 8.558455626041277e-05, "loss": 1.4726, "step": 5320 }, { "epoch": 0.559810625986323, "grad_norm": 1.5372016429901123, "learning_rate": 8.555083566024474e-05, "loss": 2.1864, "step": 5321 }, { "epoch": 0.5599158337716991, "grad_norm": 1.7038294076919556, "learning_rate": 8.551711673801062e-05, "loss": 1.8453, "step": 5322 }, { "epoch": 0.5600210415570752, "grad_norm": 1.2219566106796265, "learning_rate": 8.548339949762601e-05, "loss": 1.3113, "step": 5323 }, { "epoch": 0.5601262493424514, "grad_norm": 2.9071662425994873, "learning_rate": 8.544968394300642e-05, "loss": 1.8082, "step": 5324 }, { "epoch": 0.5602314571278275, "grad_norm": 2.0915303230285645, "learning_rate": 8.541597007806712e-05, "loss": 1.5558, "step": 5325 }, { "epoch": 0.5603366649132036, "grad_norm": 1.6373728513717651, "learning_rate": 8.538225790672322e-05, "loss": 1.5953, "step": 5326 }, { "epoch": 0.5604418726985797, "grad_norm": 1.2306228876113892, "learning_rate": 8.534854743288954e-05, "loss": 1.5297, "step": 5327 }, { "epoch": 0.5605470804839559, "grad_norm": 1.990144968032837, "learning_rate": 8.531483866048081e-05, "loss": 1.6063, "step": 5328 }, { "epoch": 0.5606522882693319, "grad_norm": 1.5562758445739746, "learning_rate": 8.528113159341153e-05, "loss": 1.6421, "step": 5329 }, { "epoch": 0.560757496054708, "grad_norm": 1.1921128034591675, "learning_rate": 8.524742623559594e-05, "loss": 1.4861, "step": 5330 }, { "epoch": 0.5608627038400842, "grad_norm": 1.379715085029602, "learning_rate": 8.521372259094818e-05, "loss": 1.4598, "step": 5331 }, { "epoch": 0.5609679116254603, "grad_norm": 2.179091453552246, "learning_rate": 8.518002066338212e-05, "loss": 1.9094, "step": 5332 }, { "epoch": 0.5610731194108364, "grad_norm": 1.716064214706421, "learning_rate": 8.514632045681145e-05, "loss": 1.6363, "step": 5333 }, { "epoch": 0.5611783271962125, "grad_norm": 1.1381585597991943, "learning_rate": 8.511262197514968e-05, "loss": 1.5435, "step": 5334 }, { "epoch": 0.5612835349815887, "grad_norm": 2.24033522605896, "learning_rate": 8.507892522231012e-05, "loss": 1.8934, "step": 5335 }, { "epoch": 0.5613887427669647, "grad_norm": 1.3724555969238281, "learning_rate": 8.504523020220583e-05, "loss": 2.2629, "step": 5336 }, { "epoch": 0.5614939505523409, "grad_norm": 1.862369418144226, "learning_rate": 8.501153691874971e-05, "loss": 2.0515, "step": 5337 }, { "epoch": 0.561599158337717, "grad_norm": 2.476644992828369, "learning_rate": 8.497784537585444e-05, "loss": 1.9697, "step": 5338 }, { "epoch": 0.5617043661230932, "grad_norm": 1.3707612752914429, "learning_rate": 8.494415557743252e-05, "loss": 2.1986, "step": 5339 }, { "epoch": 0.5618095739084692, "grad_norm": 2.085458993911743, "learning_rate": 8.491046752739624e-05, "loss": 1.9783, "step": 5340 }, { "epoch": 0.5619147816938453, "grad_norm": 1.2586299180984497, "learning_rate": 8.487678122965767e-05, "loss": 2.0315, "step": 5341 }, { "epoch": 0.5620199894792215, "grad_norm": 1.0489764213562012, "learning_rate": 8.48430966881287e-05, "loss": 1.7353, "step": 5342 }, { "epoch": 0.5621251972645975, "grad_norm": 1.1171035766601562, "learning_rate": 8.480941390672101e-05, "loss": 1.8519, "step": 5343 }, { "epoch": 0.5622304050499737, "grad_norm": 1.3247672319412231, "learning_rate": 8.477573288934605e-05, "loss": 1.7206, "step": 5344 }, { "epoch": 0.5623356128353498, "grad_norm": 2.1845767498016357, "learning_rate": 8.47420536399151e-05, "loss": 2.2207, "step": 5345 }, { "epoch": 0.562440820620726, "grad_norm": 1.5519604682922363, "learning_rate": 8.470837616233924e-05, "loss": 1.7987, "step": 5346 }, { "epoch": 0.562546028406102, "grad_norm": 1.6114767789840698, "learning_rate": 8.467470046052927e-05, "loss": 1.608, "step": 5347 }, { "epoch": 0.5626512361914782, "grad_norm": 1.703809142112732, "learning_rate": 8.464102653839588e-05, "loss": 1.2808, "step": 5348 }, { "epoch": 0.5627564439768543, "grad_norm": 1.431298851966858, "learning_rate": 8.460735439984949e-05, "loss": 2.1355, "step": 5349 }, { "epoch": 0.5628616517622304, "grad_norm": 1.7516570091247559, "learning_rate": 8.457368404880037e-05, "loss": 2.1989, "step": 5350 }, { "epoch": 0.5629668595476065, "grad_norm": 1.4657411575317383, "learning_rate": 8.454001548915851e-05, "loss": 1.5914, "step": 5351 }, { "epoch": 0.5630720673329827, "grad_norm": 1.8436520099639893, "learning_rate": 8.450634872483374e-05, "loss": 2.0691, "step": 5352 }, { "epoch": 0.5631772751183588, "grad_norm": 1.4249285459518433, "learning_rate": 8.44726837597357e-05, "loss": 1.4602, "step": 5353 }, { "epoch": 0.5632824829037348, "grad_norm": 1.0019652843475342, "learning_rate": 8.443902059777373e-05, "loss": 2.0361, "step": 5354 }, { "epoch": 0.563387690689111, "grad_norm": 2.051858901977539, "learning_rate": 8.440535924285706e-05, "loss": 1.5908, "step": 5355 }, { "epoch": 0.5634928984744871, "grad_norm": 1.3849743604660034, "learning_rate": 8.43716996988947e-05, "loss": 1.8964, "step": 5356 }, { "epoch": 0.5635981062598633, "grad_norm": 1.2312546968460083, "learning_rate": 8.433804196979541e-05, "loss": 1.8502, "step": 5357 }, { "epoch": 0.5637033140452393, "grad_norm": 1.2379924058914185, "learning_rate": 8.430438605946769e-05, "loss": 2.0632, "step": 5358 }, { "epoch": 0.5638085218306155, "grad_norm": 0.9644449353218079, "learning_rate": 8.427073197181993e-05, "loss": 1.6485, "step": 5359 }, { "epoch": 0.5639137296159916, "grad_norm": 1.0192735195159912, "learning_rate": 8.423707971076026e-05, "loss": 1.4893, "step": 5360 }, { "epoch": 0.5640189374013677, "grad_norm": 1.3611501455307007, "learning_rate": 8.420342928019666e-05, "loss": 1.7624, "step": 5361 }, { "epoch": 0.5641241451867438, "grad_norm": 1.648772120475769, "learning_rate": 8.416978068403676e-05, "loss": 1.7394, "step": 5362 }, { "epoch": 0.56422935297212, "grad_norm": 1.2542641162872314, "learning_rate": 8.413613392618811e-05, "loss": 2.0594, "step": 5363 }, { "epoch": 0.5643345607574961, "grad_norm": 1.6775392293930054, "learning_rate": 8.410248901055801e-05, "loss": 2.0371, "step": 5364 }, { "epoch": 0.5644397685428721, "grad_norm": 1.1697038412094116, "learning_rate": 8.40688459410535e-05, "loss": 1.8887, "step": 5365 }, { "epoch": 0.5645449763282483, "grad_norm": 1.5925973653793335, "learning_rate": 8.403520472158143e-05, "loss": 1.8341, "step": 5366 }, { "epoch": 0.5646501841136244, "grad_norm": 1.3041836023330688, "learning_rate": 8.40015653560485e-05, "loss": 1.5598, "step": 5367 }, { "epoch": 0.5647553918990005, "grad_norm": 1.0811697244644165, "learning_rate": 8.396792784836108e-05, "loss": 1.3874, "step": 5368 }, { "epoch": 0.5648605996843766, "grad_norm": 1.3267872333526611, "learning_rate": 8.393429220242541e-05, "loss": 2.1926, "step": 5369 }, { "epoch": 0.5649658074697528, "grad_norm": 1.133404016494751, "learning_rate": 8.39006584221475e-05, "loss": 1.7818, "step": 5370 }, { "epoch": 0.5650710152551289, "grad_norm": 2.1951334476470947, "learning_rate": 8.38670265114331e-05, "loss": 2.0216, "step": 5371 }, { "epoch": 0.565176223040505, "grad_norm": 1.3310232162475586, "learning_rate": 8.383339647418777e-05, "loss": 1.5981, "step": 5372 }, { "epoch": 0.5652814308258811, "grad_norm": 0.9504427313804626, "learning_rate": 8.379976831431689e-05, "loss": 1.7203, "step": 5373 }, { "epoch": 0.5653866386112573, "grad_norm": 1.273130178451538, "learning_rate": 8.376614203572559e-05, "loss": 1.5829, "step": 5374 }, { "epoch": 0.5654918463966333, "grad_norm": 1.1506158113479614, "learning_rate": 8.373251764231872e-05, "loss": 1.3818, "step": 5375 }, { "epoch": 0.5655970541820095, "grad_norm": 1.0576092004776, "learning_rate": 8.369889513800102e-05, "loss": 1.9378, "step": 5376 }, { "epoch": 0.5657022619673856, "grad_norm": 1.4205738306045532, "learning_rate": 8.366527452667698e-05, "loss": 1.9037, "step": 5377 }, { "epoch": 0.5658074697527617, "grad_norm": 1.1215096712112427, "learning_rate": 8.363165581225083e-05, "loss": 1.6023, "step": 5378 }, { "epoch": 0.5659126775381378, "grad_norm": 1.6068058013916016, "learning_rate": 8.359803899862655e-05, "loss": 2.4349, "step": 5379 }, { "epoch": 0.5660178853235139, "grad_norm": 1.7272415161132812, "learning_rate": 8.356442408970799e-05, "loss": 1.3773, "step": 5380 }, { "epoch": 0.5661230931088901, "grad_norm": 1.9546716213226318, "learning_rate": 8.353081108939874e-05, "loss": 1.6571, "step": 5381 }, { "epoch": 0.5662283008942661, "grad_norm": 1.531908392906189, "learning_rate": 8.349720000160218e-05, "loss": 1.9016, "step": 5382 }, { "epoch": 0.5663335086796423, "grad_norm": 1.5750274658203125, "learning_rate": 8.346359083022143e-05, "loss": 2.044, "step": 5383 }, { "epoch": 0.5664387164650184, "grad_norm": 1.3168236017227173, "learning_rate": 8.342998357915942e-05, "loss": 1.6516, "step": 5384 }, { "epoch": 0.5665439242503946, "grad_norm": 1.2597466707229614, "learning_rate": 8.339637825231887e-05, "loss": 1.4705, "step": 5385 }, { "epoch": 0.5666491320357706, "grad_norm": 1.7140848636627197, "learning_rate": 8.336277485360223e-05, "loss": 1.9122, "step": 5386 }, { "epoch": 0.5667543398211468, "grad_norm": 1.403868556022644, "learning_rate": 8.332917338691175e-05, "loss": 1.7833, "step": 5387 }, { "epoch": 0.5668595476065229, "grad_norm": 1.871900200843811, "learning_rate": 8.32955738561495e-05, "loss": 1.7798, "step": 5388 }, { "epoch": 0.5669647553918991, "grad_norm": 1.4764777421951294, "learning_rate": 8.326197626521723e-05, "loss": 1.9589, "step": 5389 }, { "epoch": 0.5670699631772751, "grad_norm": 1.7492798566818237, "learning_rate": 8.322838061801653e-05, "loss": 2.1874, "step": 5390 }, { "epoch": 0.5671751709626512, "grad_norm": 1.9016979932785034, "learning_rate": 8.319478691844878e-05, "loss": 1.4618, "step": 5391 }, { "epoch": 0.5672803787480274, "grad_norm": 1.8420443534851074, "learning_rate": 8.316119517041508e-05, "loss": 1.305, "step": 5392 }, { "epoch": 0.5673855865334034, "grad_norm": 1.3935943841934204, "learning_rate": 8.312760537781632e-05, "loss": 1.9563, "step": 5393 }, { "epoch": 0.5674907943187796, "grad_norm": 1.9522449970245361, "learning_rate": 8.30940175445532e-05, "loss": 1.5606, "step": 5394 }, { "epoch": 0.5675960021041557, "grad_norm": 1.159475326538086, "learning_rate": 8.306043167452617e-05, "loss": 1.5782, "step": 5395 }, { "epoch": 0.5677012098895319, "grad_norm": 1.9530138969421387, "learning_rate": 8.30268477716354e-05, "loss": 1.8486, "step": 5396 }, { "epoch": 0.5678064176749079, "grad_norm": 1.253212571144104, "learning_rate": 8.299326583978092e-05, "loss": 1.7982, "step": 5397 }, { "epoch": 0.5679116254602841, "grad_norm": 1.5111079216003418, "learning_rate": 8.29596858828625e-05, "loss": 1.5008, "step": 5398 }, { "epoch": 0.5680168332456602, "grad_norm": 1.4527966976165771, "learning_rate": 8.292610790477962e-05, "loss": 2.2226, "step": 5399 }, { "epoch": 0.5681220410310363, "grad_norm": 1.5884594917297363, "learning_rate": 8.289253190943164e-05, "loss": 1.9317, "step": 5400 }, { "epoch": 0.5682272488164124, "grad_norm": 1.4073702096939087, "learning_rate": 8.285895790071757e-05, "loss": 1.4504, "step": 5401 }, { "epoch": 0.5683324566017885, "grad_norm": 1.079156756401062, "learning_rate": 8.282538588253627e-05, "loss": 1.2555, "step": 5402 }, { "epoch": 0.5684376643871647, "grad_norm": 1.4192665815353394, "learning_rate": 8.279181585878635e-05, "loss": 1.947, "step": 5403 }, { "epoch": 0.5685428721725407, "grad_norm": 1.4388915300369263, "learning_rate": 8.275824783336618e-05, "loss": 1.5275, "step": 5404 }, { "epoch": 0.5686480799579169, "grad_norm": 0.9725925326347351, "learning_rate": 8.272468181017391e-05, "loss": 1.8777, "step": 5405 }, { "epoch": 0.568753287743293, "grad_norm": 2.05253005027771, "learning_rate": 8.26911177931075e-05, "loss": 1.1397, "step": 5406 }, { "epoch": 0.5688584955286691, "grad_norm": 1.8297544717788696, "learning_rate": 8.265755578606456e-05, "loss": 1.6646, "step": 5407 }, { "epoch": 0.5689637033140452, "grad_norm": 1.0543756484985352, "learning_rate": 8.262399579294253e-05, "loss": 2.0218, "step": 5408 }, { "epoch": 0.5690689110994214, "grad_norm": 1.6449626684188843, "learning_rate": 8.259043781763869e-05, "loss": 1.4181, "step": 5409 }, { "epoch": 0.5691741188847975, "grad_norm": 1.7541320323944092, "learning_rate": 8.255688186404996e-05, "loss": 1.8855, "step": 5410 }, { "epoch": 0.5692793266701736, "grad_norm": 2.0391685962677, "learning_rate": 8.25233279360731e-05, "loss": 1.7674, "step": 5411 }, { "epoch": 0.5693845344555497, "grad_norm": 1.9726836681365967, "learning_rate": 8.248977603760464e-05, "loss": 1.7674, "step": 5412 }, { "epoch": 0.5694897422409259, "grad_norm": 1.3250395059585571, "learning_rate": 8.245622617254079e-05, "loss": 1.7336, "step": 5413 }, { "epoch": 0.5695949500263019, "grad_norm": 1.848076581954956, "learning_rate": 8.242267834477764e-05, "loss": 1.9363, "step": 5414 }, { "epoch": 0.569700157811678, "grad_norm": 1.8783268928527832, "learning_rate": 8.238913255821099e-05, "loss": 1.3876, "step": 5415 }, { "epoch": 0.5698053655970542, "grad_norm": 1.4165843725204468, "learning_rate": 8.235558881673637e-05, "loss": 2.0615, "step": 5416 }, { "epoch": 0.5699105733824303, "grad_norm": 1.4009727239608765, "learning_rate": 8.232204712424911e-05, "loss": 1.9336, "step": 5417 }, { "epoch": 0.5700157811678064, "grad_norm": 1.3084132671356201, "learning_rate": 8.228850748464431e-05, "loss": 1.7258, "step": 5418 }, { "epoch": 0.5701209889531825, "grad_norm": 1.5547186136245728, "learning_rate": 8.225496990181684e-05, "loss": 1.4471, "step": 5419 }, { "epoch": 0.5702261967385587, "grad_norm": 1.5895652770996094, "learning_rate": 8.222143437966124e-05, "loss": 1.815, "step": 5420 }, { "epoch": 0.5703314045239348, "grad_norm": 1.7180938720703125, "learning_rate": 8.218790092207199e-05, "loss": 1.4393, "step": 5421 }, { "epoch": 0.5704366123093109, "grad_norm": 1.1043965816497803, "learning_rate": 8.21543695329431e-05, "loss": 1.5099, "step": 5422 }, { "epoch": 0.570541820094687, "grad_norm": 1.113439917564392, "learning_rate": 8.212084021616852e-05, "loss": 1.9908, "step": 5423 }, { "epoch": 0.5706470278800632, "grad_norm": 1.640676736831665, "learning_rate": 8.208731297564189e-05, "loss": 1.6397, "step": 5424 }, { "epoch": 0.5707522356654392, "grad_norm": 1.0427812337875366, "learning_rate": 8.205378781525662e-05, "loss": 1.6538, "step": 5425 }, { "epoch": 0.5708574434508153, "grad_norm": 1.614121437072754, "learning_rate": 8.202026473890588e-05, "loss": 1.6651, "step": 5426 }, { "epoch": 0.5709626512361915, "grad_norm": 1.6778579950332642, "learning_rate": 8.198674375048257e-05, "loss": 1.6189, "step": 5427 }, { "epoch": 0.5710678590215676, "grad_norm": 1.0318078994750977, "learning_rate": 8.195322485387939e-05, "loss": 1.7629, "step": 5428 }, { "epoch": 0.5711730668069437, "grad_norm": 1.1207058429718018, "learning_rate": 8.191970805298881e-05, "loss": 1.6471, "step": 5429 }, { "epoch": 0.5712782745923198, "grad_norm": 1.0314679145812988, "learning_rate": 8.188619335170298e-05, "loss": 1.5387, "step": 5430 }, { "epoch": 0.571383482377696, "grad_norm": 1.1926995515823364, "learning_rate": 8.185268075391388e-05, "loss": 1.5988, "step": 5431 }, { "epoch": 0.571488690163072, "grad_norm": 1.2411372661590576, "learning_rate": 8.181917026351318e-05, "loss": 1.2326, "step": 5432 }, { "epoch": 0.5715938979484482, "grad_norm": 1.1884357929229736, "learning_rate": 8.17856618843924e-05, "loss": 1.7397, "step": 5433 }, { "epoch": 0.5716991057338243, "grad_norm": 1.4349546432495117, "learning_rate": 8.175215562044272e-05, "loss": 1.6069, "step": 5434 }, { "epoch": 0.5718043135192005, "grad_norm": 2.4111011028289795, "learning_rate": 8.17186514755551e-05, "loss": 1.669, "step": 5435 }, { "epoch": 0.5719095213045765, "grad_norm": 1.5937304496765137, "learning_rate": 8.168514945362031e-05, "loss": 1.7242, "step": 5436 }, { "epoch": 0.5720147290899527, "grad_norm": 1.468326210975647, "learning_rate": 8.165164955852879e-05, "loss": 1.5671, "step": 5437 }, { "epoch": 0.5721199368753288, "grad_norm": 1.6039079427719116, "learning_rate": 8.161815179417078e-05, "loss": 1.2596, "step": 5438 }, { "epoch": 0.5722251446607048, "grad_norm": 1.7768357992172241, "learning_rate": 8.15846561644363e-05, "loss": 2.1289, "step": 5439 }, { "epoch": 0.572330352446081, "grad_norm": 1.709168553352356, "learning_rate": 8.155116267321503e-05, "loss": 1.5969, "step": 5440 }, { "epoch": 0.5724355602314571, "grad_norm": 2.2013728618621826, "learning_rate": 8.151767132439649e-05, "loss": 2.0476, "step": 5441 }, { "epoch": 0.5725407680168333, "grad_norm": 1.7853853702545166, "learning_rate": 8.148418212186992e-05, "loss": 1.9721, "step": 5442 }, { "epoch": 0.5726459758022093, "grad_norm": 1.9058358669281006, "learning_rate": 8.145069506952436e-05, "loss": 2.2435, "step": 5443 }, { "epoch": 0.5727511835875855, "grad_norm": 2.1775949001312256, "learning_rate": 8.141721017124847e-05, "loss": 1.8347, "step": 5444 }, { "epoch": 0.5728563913729616, "grad_norm": 1.0717917680740356, "learning_rate": 8.138372743093076e-05, "loss": 1.486, "step": 5445 }, { "epoch": 0.5729615991583377, "grad_norm": 1.1905382871627808, "learning_rate": 8.135024685245947e-05, "loss": 1.5313, "step": 5446 }, { "epoch": 0.5730668069437138, "grad_norm": 1.8441627025604248, "learning_rate": 8.131676843972263e-05, "loss": 1.5609, "step": 5447 }, { "epoch": 0.57317201472909, "grad_norm": 1.70012366771698, "learning_rate": 8.128329219660791e-05, "loss": 1.4996, "step": 5448 }, { "epoch": 0.5732772225144661, "grad_norm": 2.055849313735962, "learning_rate": 8.124981812700285e-05, "loss": 1.6694, "step": 5449 }, { "epoch": 0.5733824302998421, "grad_norm": 1.5213593244552612, "learning_rate": 8.121634623479466e-05, "loss": 1.7859, "step": 5450 }, { "epoch": 0.5734876380852183, "grad_norm": 1.240997314453125, "learning_rate": 8.118287652387035e-05, "loss": 1.9417, "step": 5451 }, { "epoch": 0.5735928458705944, "grad_norm": 1.187052607536316, "learning_rate": 8.114940899811662e-05, "loss": 1.7307, "step": 5452 }, { "epoch": 0.5736980536559706, "grad_norm": 2.3511910438537598, "learning_rate": 8.111594366141993e-05, "loss": 2.0432, "step": 5453 }, { "epoch": 0.5738032614413466, "grad_norm": 1.187315583229065, "learning_rate": 8.108248051766656e-05, "loss": 1.6997, "step": 5454 }, { "epoch": 0.5739084692267228, "grad_norm": 1.4285547733306885, "learning_rate": 8.10490195707424e-05, "loss": 2.0346, "step": 5455 }, { "epoch": 0.5740136770120989, "grad_norm": 1.6566582918167114, "learning_rate": 8.101556082453319e-05, "loss": 2.0706, "step": 5456 }, { "epoch": 0.574118884797475, "grad_norm": 1.2293633222579956, "learning_rate": 8.098210428292441e-05, "loss": 2.2512, "step": 5457 }, { "epoch": 0.5742240925828511, "grad_norm": 1.1286085844039917, "learning_rate": 8.094864994980123e-05, "loss": 1.6408, "step": 5458 }, { "epoch": 0.5743293003682273, "grad_norm": 1.2557892799377441, "learning_rate": 8.091519782904857e-05, "loss": 1.9494, "step": 5459 }, { "epoch": 0.5744345081536034, "grad_norm": 1.3432778120040894, "learning_rate": 8.088174792455119e-05, "loss": 2.1635, "step": 5460 }, { "epoch": 0.5745397159389795, "grad_norm": 2.0232627391815186, "learning_rate": 8.084830024019343e-05, "loss": 1.565, "step": 5461 }, { "epoch": 0.5746449237243556, "grad_norm": 1.3621368408203125, "learning_rate": 8.08148547798595e-05, "loss": 1.755, "step": 5462 }, { "epoch": 0.5747501315097318, "grad_norm": 1.1986950635910034, "learning_rate": 8.078141154743332e-05, "loss": 1.8158, "step": 5463 }, { "epoch": 0.5748553392951078, "grad_norm": 1.457479476928711, "learning_rate": 8.074797054679855e-05, "loss": 1.6491, "step": 5464 }, { "epoch": 0.5749605470804839, "grad_norm": 1.3137367963790894, "learning_rate": 8.071453178183856e-05, "loss": 1.7712, "step": 5465 }, { "epoch": 0.5750657548658601, "grad_norm": 1.0634950399398804, "learning_rate": 8.068109525643647e-05, "loss": 1.9505, "step": 5466 }, { "epoch": 0.5751709626512362, "grad_norm": 0.9608885645866394, "learning_rate": 8.064766097447516e-05, "loss": 1.57, "step": 5467 }, { "epoch": 0.5752761704366123, "grad_norm": 1.5873912572860718, "learning_rate": 8.061422893983729e-05, "loss": 1.6664, "step": 5468 }, { "epoch": 0.5753813782219884, "grad_norm": 1.4330425262451172, "learning_rate": 8.058079915640515e-05, "loss": 1.8861, "step": 5469 }, { "epoch": 0.5754865860073646, "grad_norm": 1.3749375343322754, "learning_rate": 8.054737162806086e-05, "loss": 1.9692, "step": 5470 }, { "epoch": 0.5755917937927406, "grad_norm": 1.9150141477584839, "learning_rate": 8.051394635868626e-05, "loss": 1.6297, "step": 5471 }, { "epoch": 0.5756970015781168, "grad_norm": 1.4533182382583618, "learning_rate": 8.048052335216289e-05, "loss": 1.2963, "step": 5472 }, { "epoch": 0.5758022093634929, "grad_norm": 2.1088831424713135, "learning_rate": 8.044710261237207e-05, "loss": 1.4709, "step": 5473 }, { "epoch": 0.5759074171488691, "grad_norm": 1.277236819267273, "learning_rate": 8.041368414319483e-05, "loss": 1.8557, "step": 5474 }, { "epoch": 0.5760126249342451, "grad_norm": 1.2016223669052124, "learning_rate": 8.038026794851198e-05, "loss": 1.6482, "step": 5475 }, { "epoch": 0.5761178327196212, "grad_norm": 1.476108431816101, "learning_rate": 8.034685403220398e-05, "loss": 1.7097, "step": 5476 }, { "epoch": 0.5762230405049974, "grad_norm": 2.2213351726531982, "learning_rate": 8.031344239815111e-05, "loss": 1.9822, "step": 5477 }, { "epoch": 0.5763282482903734, "grad_norm": 1.5243347883224487, "learning_rate": 8.028003305023338e-05, "loss": 2.1552, "step": 5478 }, { "epoch": 0.5764334560757496, "grad_norm": 2.157576322555542, "learning_rate": 8.024662599233043e-05, "loss": 2.0237, "step": 5479 }, { "epoch": 0.5765386638611257, "grad_norm": 1.6475622653961182, "learning_rate": 8.021322122832178e-05, "loss": 2.1017, "step": 5480 }, { "epoch": 0.5766438716465019, "grad_norm": 1.4928076267242432, "learning_rate": 8.01798187620866e-05, "loss": 1.6096, "step": 5481 }, { "epoch": 0.5767490794318779, "grad_norm": 1.6426701545715332, "learning_rate": 8.014641859750379e-05, "loss": 1.8119, "step": 5482 }, { "epoch": 0.5768542872172541, "grad_norm": 1.8036248683929443, "learning_rate": 8.011302073845201e-05, "loss": 1.9572, "step": 5483 }, { "epoch": 0.5769594950026302, "grad_norm": 1.9108166694641113, "learning_rate": 8.007962518880966e-05, "loss": 2.202, "step": 5484 }, { "epoch": 0.5770647027880064, "grad_norm": 1.3631569147109985, "learning_rate": 8.004623195245481e-05, "loss": 1.9852, "step": 5485 }, { "epoch": 0.5771699105733824, "grad_norm": 1.4796253442764282, "learning_rate": 8.001284103326539e-05, "loss": 1.9356, "step": 5486 }, { "epoch": 0.5772751183587586, "grad_norm": 1.3997398614883423, "learning_rate": 7.99794524351189e-05, "loss": 1.9229, "step": 5487 }, { "epoch": 0.5773803261441347, "grad_norm": 0.9555890560150146, "learning_rate": 7.994606616189264e-05, "loss": 1.6695, "step": 5488 }, { "epoch": 0.5774855339295107, "grad_norm": 3.1957201957702637, "learning_rate": 7.991268221746373e-05, "loss": 2.0273, "step": 5489 }, { "epoch": 0.5775907417148869, "grad_norm": 1.504439115524292, "learning_rate": 7.987930060570885e-05, "loss": 2.1869, "step": 5490 }, { "epoch": 0.577695949500263, "grad_norm": 1.781453251838684, "learning_rate": 7.984592133050454e-05, "loss": 1.8198, "step": 5491 }, { "epoch": 0.5778011572856392, "grad_norm": 1.4022020101547241, "learning_rate": 7.981254439572704e-05, "loss": 1.6377, "step": 5492 }, { "epoch": 0.5779063650710152, "grad_norm": 2.5264246463775635, "learning_rate": 7.977916980525227e-05, "loss": 1.8286, "step": 5493 }, { "epoch": 0.5780115728563914, "grad_norm": 1.4550338983535767, "learning_rate": 7.974579756295591e-05, "loss": 1.9897, "step": 5494 }, { "epoch": 0.5781167806417675, "grad_norm": 1.6223548650741577, "learning_rate": 7.97124276727134e-05, "loss": 1.7706, "step": 5495 }, { "epoch": 0.5782219884271436, "grad_norm": 1.2290290594100952, "learning_rate": 7.967906013839987e-05, "loss": 1.8209, "step": 5496 }, { "epoch": 0.5783271962125197, "grad_norm": 1.5976051092147827, "learning_rate": 7.964569496389013e-05, "loss": 2.1965, "step": 5497 }, { "epoch": 0.5784324039978959, "grad_norm": 1.4536248445510864, "learning_rate": 7.961233215305884e-05, "loss": 1.8925, "step": 5498 }, { "epoch": 0.578537611783272, "grad_norm": 0.8368136286735535, "learning_rate": 7.957897170978031e-05, "loss": 1.5314, "step": 5499 }, { "epoch": 0.578642819568648, "grad_norm": 1.7957181930541992, "learning_rate": 7.95456136379285e-05, "loss": 2.1585, "step": 5500 }, { "epoch": 0.5787480273540242, "grad_norm": 1.1656877994537354, "learning_rate": 7.951225794137724e-05, "loss": 1.6489, "step": 5501 }, { "epoch": 0.5788532351394003, "grad_norm": 1.2200168371200562, "learning_rate": 7.947890462400002e-05, "loss": 1.6195, "step": 5502 }, { "epoch": 0.5789584429247764, "grad_norm": 1.2942944765090942, "learning_rate": 7.944555368967001e-05, "loss": 1.8944, "step": 5503 }, { "epoch": 0.5790636507101525, "grad_norm": 1.5925309658050537, "learning_rate": 7.941220514226016e-05, "loss": 1.5092, "step": 5504 }, { "epoch": 0.5791688584955287, "grad_norm": 1.2316579818725586, "learning_rate": 7.937885898564315e-05, "loss": 1.7135, "step": 5505 }, { "epoch": 0.5792740662809048, "grad_norm": 1.9800273180007935, "learning_rate": 7.934551522369134e-05, "loss": 1.6139, "step": 5506 }, { "epoch": 0.5793792740662809, "grad_norm": 1.2461237907409668, "learning_rate": 7.931217386027686e-05, "loss": 1.6989, "step": 5507 }, { "epoch": 0.579484481851657, "grad_norm": 1.102742075920105, "learning_rate": 7.927883489927147e-05, "loss": 1.9475, "step": 5508 }, { "epoch": 0.5795896896370332, "grad_norm": 1.5722696781158447, "learning_rate": 7.924549834454674e-05, "loss": 1.6444, "step": 5509 }, { "epoch": 0.5796948974224092, "grad_norm": 1.331183671951294, "learning_rate": 7.921216419997398e-05, "loss": 1.3104, "step": 5510 }, { "epoch": 0.5798001052077854, "grad_norm": 1.825430989265442, "learning_rate": 7.917883246942412e-05, "loss": 2.1806, "step": 5511 }, { "epoch": 0.5799053129931615, "grad_norm": 1.2514722347259521, "learning_rate": 7.914550315676787e-05, "loss": 1.7759, "step": 5512 }, { "epoch": 0.5800105207785377, "grad_norm": 1.1857120990753174, "learning_rate": 7.91121762658757e-05, "loss": 1.59, "step": 5513 }, { "epoch": 0.5801157285639137, "grad_norm": 1.2514647245407104, "learning_rate": 7.907885180061767e-05, "loss": 2.1173, "step": 5514 }, { "epoch": 0.5802209363492898, "grad_norm": 2.3011891841888428, "learning_rate": 7.904552976486372e-05, "loss": 1.8243, "step": 5515 }, { "epoch": 0.580326144134666, "grad_norm": 1.686474084854126, "learning_rate": 7.90122101624834e-05, "loss": 1.6543, "step": 5516 }, { "epoch": 0.5804313519200421, "grad_norm": 1.496381163597107, "learning_rate": 7.897889299734599e-05, "loss": 1.6579, "step": 5517 }, { "epoch": 0.5805365597054182, "grad_norm": 2.0382790565490723, "learning_rate": 7.894557827332052e-05, "loss": 1.0179, "step": 5518 }, { "epoch": 0.5806417674907943, "grad_norm": 1.186845064163208, "learning_rate": 7.891226599427572e-05, "loss": 1.7989, "step": 5519 }, { "epoch": 0.5807469752761705, "grad_norm": 1.6111037731170654, "learning_rate": 7.887895616408001e-05, "loss": 1.6084, "step": 5520 }, { "epoch": 0.5808521830615465, "grad_norm": 1.4457639455795288, "learning_rate": 7.884564878660159e-05, "loss": 1.784, "step": 5521 }, { "epoch": 0.5809573908469227, "grad_norm": 1.4394114017486572, "learning_rate": 7.88123438657083e-05, "loss": 1.555, "step": 5522 }, { "epoch": 0.5810625986322988, "grad_norm": 2.1034014225006104, "learning_rate": 7.877904140526778e-05, "loss": 1.7858, "step": 5523 }, { "epoch": 0.581167806417675, "grad_norm": 1.5533802509307861, "learning_rate": 7.874574140914727e-05, "loss": 1.9843, "step": 5524 }, { "epoch": 0.581273014203051, "grad_norm": 1.3675698041915894, "learning_rate": 7.871244388121381e-05, "loss": 2.3865, "step": 5525 }, { "epoch": 0.5813782219884271, "grad_norm": 1.3307299613952637, "learning_rate": 7.867914882533419e-05, "loss": 1.7025, "step": 5526 }, { "epoch": 0.5814834297738033, "grad_norm": 1.7351833581924438, "learning_rate": 7.864585624537478e-05, "loss": 1.679, "step": 5527 }, { "epoch": 0.5815886375591793, "grad_norm": 1.8544718027114868, "learning_rate": 7.86125661452018e-05, "loss": 2.0512, "step": 5528 }, { "epoch": 0.5816938453445555, "grad_norm": 1.8268216848373413, "learning_rate": 7.857927852868107e-05, "loss": 1.7498, "step": 5529 }, { "epoch": 0.5817990531299316, "grad_norm": 1.4137629270553589, "learning_rate": 7.854599339967817e-05, "loss": 1.8924, "step": 5530 }, { "epoch": 0.5819042609153078, "grad_norm": 1.431031584739685, "learning_rate": 7.851271076205843e-05, "loss": 1.6172, "step": 5531 }, { "epoch": 0.5820094687006838, "grad_norm": 1.0963670015335083, "learning_rate": 7.84794306196868e-05, "loss": 1.6189, "step": 5532 }, { "epoch": 0.58211467648606, "grad_norm": 1.4139877557754517, "learning_rate": 7.844615297642805e-05, "loss": 1.8776, "step": 5533 }, { "epoch": 0.5822198842714361, "grad_norm": 1.4871132373809814, "learning_rate": 7.84128778361466e-05, "loss": 1.6445, "step": 5534 }, { "epoch": 0.5823250920568122, "grad_norm": 1.2888953685760498, "learning_rate": 7.837960520270652e-05, "loss": 1.4948, "step": 5535 }, { "epoch": 0.5824302998421883, "grad_norm": 1.376495599746704, "learning_rate": 7.83463350799717e-05, "loss": 1.8704, "step": 5536 }, { "epoch": 0.5825355076275645, "grad_norm": 1.1731458902359009, "learning_rate": 7.831306747180571e-05, "loss": 1.5219, "step": 5537 }, { "epoch": 0.5826407154129406, "grad_norm": 1.5646111965179443, "learning_rate": 7.827980238207177e-05, "loss": 1.3992, "step": 5538 }, { "epoch": 0.5827459231983166, "grad_norm": 1.2269337177276611, "learning_rate": 7.824653981463284e-05, "loss": 2.2809, "step": 5539 }, { "epoch": 0.5828511309836928, "grad_norm": 1.4412834644317627, "learning_rate": 7.821327977335164e-05, "loss": 1.2724, "step": 5540 }, { "epoch": 0.5829563387690689, "grad_norm": 2.3292593955993652, "learning_rate": 7.818002226209049e-05, "loss": 1.4849, "step": 5541 }, { "epoch": 0.583061546554445, "grad_norm": 1.5618584156036377, "learning_rate": 7.814676728471151e-05, "loss": 2.0205, "step": 5542 }, { "epoch": 0.5831667543398211, "grad_norm": 1.4830820560455322, "learning_rate": 7.811351484507647e-05, "loss": 1.7858, "step": 5543 }, { "epoch": 0.5832719621251973, "grad_norm": 1.1995006799697876, "learning_rate": 7.808026494704692e-05, "loss": 2.016, "step": 5544 }, { "epoch": 0.5833771699105734, "grad_norm": 1.5571500062942505, "learning_rate": 7.804701759448398e-05, "loss": 1.7179, "step": 5545 }, { "epoch": 0.5834823776959495, "grad_norm": 1.301533579826355, "learning_rate": 7.801377279124862e-05, "loss": 2.0771, "step": 5546 }, { "epoch": 0.5835875854813256, "grad_norm": 1.841912031173706, "learning_rate": 7.798053054120143e-05, "loss": 1.9201, "step": 5547 }, { "epoch": 0.5836927932667018, "grad_norm": 2.079071044921875, "learning_rate": 7.794729084820272e-05, "loss": 2.0332, "step": 5548 }, { "epoch": 0.5837980010520779, "grad_norm": 1.3038408756256104, "learning_rate": 7.791405371611249e-05, "loss": 1.6136, "step": 5549 }, { "epoch": 0.583903208837454, "grad_norm": 1.194056510925293, "learning_rate": 7.788081914879051e-05, "loss": 1.6015, "step": 5550 }, { "epoch": 0.5840084166228301, "grad_norm": 1.3977482318878174, "learning_rate": 7.784758715009616e-05, "loss": 1.8922, "step": 5551 }, { "epoch": 0.5841136244082062, "grad_norm": 1.124822735786438, "learning_rate": 7.781435772388854e-05, "loss": 1.4993, "step": 5552 }, { "epoch": 0.5842188321935823, "grad_norm": 1.9948351383209229, "learning_rate": 7.778113087402649e-05, "loss": 1.7896, "step": 5553 }, { "epoch": 0.5843240399789584, "grad_norm": 1.693495273590088, "learning_rate": 7.774790660436858e-05, "loss": 1.8484, "step": 5554 }, { "epoch": 0.5844292477643346, "grad_norm": 1.079588532447815, "learning_rate": 7.771468491877299e-05, "loss": 2.1711, "step": 5555 }, { "epoch": 0.5845344555497107, "grad_norm": 2.974010467529297, "learning_rate": 7.768146582109765e-05, "loss": 1.8077, "step": 5556 }, { "epoch": 0.5846396633350868, "grad_norm": 2.0303964614868164, "learning_rate": 7.764824931520018e-05, "loss": 1.8446, "step": 5557 }, { "epoch": 0.5847448711204629, "grad_norm": 1.5578415393829346, "learning_rate": 7.761503540493795e-05, "loss": 1.1523, "step": 5558 }, { "epoch": 0.5848500789058391, "grad_norm": 1.480180025100708, "learning_rate": 7.758182409416792e-05, "loss": 1.5651, "step": 5559 }, { "epoch": 0.5849552866912151, "grad_norm": 1.6015815734863281, "learning_rate": 7.754861538674683e-05, "loss": 1.8731, "step": 5560 }, { "epoch": 0.5850604944765913, "grad_norm": 2.9025611877441406, "learning_rate": 7.751540928653113e-05, "loss": 0.9335, "step": 5561 }, { "epoch": 0.5851657022619674, "grad_norm": 1.393959403038025, "learning_rate": 7.748220579737689e-05, "loss": 2.0774, "step": 5562 }, { "epoch": 0.5852709100473436, "grad_norm": 1.700785756111145, "learning_rate": 7.744900492313995e-05, "loss": 1.7171, "step": 5563 }, { "epoch": 0.5853761178327196, "grad_norm": 1.6016006469726562, "learning_rate": 7.741580666767583e-05, "loss": 1.7215, "step": 5564 }, { "epoch": 0.5854813256180957, "grad_norm": 1.1057853698730469, "learning_rate": 7.738261103483968e-05, "loss": 1.1364, "step": 5565 }, { "epoch": 0.5855865334034719, "grad_norm": 1.9537837505340576, "learning_rate": 7.734941802848643e-05, "loss": 1.5892, "step": 5566 }, { "epoch": 0.5856917411888479, "grad_norm": 1.664334774017334, "learning_rate": 7.731622765247069e-05, "loss": 1.7183, "step": 5567 }, { "epoch": 0.5857969489742241, "grad_norm": 1.9867991209030151, "learning_rate": 7.728303991064675e-05, "loss": 1.5369, "step": 5568 }, { "epoch": 0.5859021567596002, "grad_norm": 1.713571310043335, "learning_rate": 7.724985480686856e-05, "loss": 2.0291, "step": 5569 }, { "epoch": 0.5860073645449764, "grad_norm": 1.1955255270004272, "learning_rate": 7.721667234498982e-05, "loss": 2.378, "step": 5570 }, { "epoch": 0.5861125723303524, "grad_norm": 1.5413669347763062, "learning_rate": 7.718349252886395e-05, "loss": 2.0389, "step": 5571 }, { "epoch": 0.5862177801157286, "grad_norm": 1.7773537635803223, "learning_rate": 7.715031536234392e-05, "loss": 1.7247, "step": 5572 }, { "epoch": 0.5863229879011047, "grad_norm": 1.7139322757720947, "learning_rate": 7.711714084928251e-05, "loss": 1.7779, "step": 5573 }, { "epoch": 0.5864281956864807, "grad_norm": 1.3501460552215576, "learning_rate": 7.708396899353219e-05, "loss": 1.6001, "step": 5574 }, { "epoch": 0.5865334034718569, "grad_norm": 3.4829583168029785, "learning_rate": 7.705079979894509e-05, "loss": 1.5646, "step": 5575 }, { "epoch": 0.586638611257233, "grad_norm": 1.7532110214233398, "learning_rate": 7.701763326937304e-05, "loss": 1.5209, "step": 5576 }, { "epoch": 0.5867438190426092, "grad_norm": 1.371377944946289, "learning_rate": 7.698446940866754e-05, "loss": 1.5853, "step": 5577 }, { "epoch": 0.5868490268279852, "grad_norm": 1.3144980669021606, "learning_rate": 7.695130822067984e-05, "loss": 1.5741, "step": 5578 }, { "epoch": 0.5869542346133614, "grad_norm": 1.3463817834854126, "learning_rate": 7.691814970926083e-05, "loss": 1.7503, "step": 5579 }, { "epoch": 0.5870594423987375, "grad_norm": 1.7925662994384766, "learning_rate": 7.688499387826107e-05, "loss": 1.3115, "step": 5580 }, { "epoch": 0.5871646501841137, "grad_norm": 4.266043186187744, "learning_rate": 7.685184073153085e-05, "loss": 1.97, "step": 5581 }, { "epoch": 0.5872698579694897, "grad_norm": 1.344207525253296, "learning_rate": 7.681869027292016e-05, "loss": 1.6598, "step": 5582 }, { "epoch": 0.5873750657548659, "grad_norm": 1.2654930353164673, "learning_rate": 7.67855425062786e-05, "loss": 1.6116, "step": 5583 }, { "epoch": 0.587480273540242, "grad_norm": 1.6905845403671265, "learning_rate": 7.675239743545557e-05, "loss": 2.0407, "step": 5584 }, { "epoch": 0.587585481325618, "grad_norm": 1.8982789516448975, "learning_rate": 7.67192550643001e-05, "loss": 1.9079, "step": 5585 }, { "epoch": 0.5876906891109942, "grad_norm": 1.407412052154541, "learning_rate": 7.668611539666085e-05, "loss": 1.8989, "step": 5586 }, { "epoch": 0.5877958968963704, "grad_norm": 1.6381629705429077, "learning_rate": 7.665297843638623e-05, "loss": 1.3814, "step": 5587 }, { "epoch": 0.5879011046817465, "grad_norm": 2.0129973888397217, "learning_rate": 7.661984418732438e-05, "loss": 2.0981, "step": 5588 }, { "epoch": 0.5880063124671225, "grad_norm": 1.516152024269104, "learning_rate": 7.6586712653323e-05, "loss": 1.6191, "step": 5589 }, { "epoch": 0.5881115202524987, "grad_norm": 1.2309309244155884, "learning_rate": 7.655358383822959e-05, "loss": 1.4041, "step": 5590 }, { "epoch": 0.5882167280378748, "grad_norm": 1.7488607168197632, "learning_rate": 7.652045774589129e-05, "loss": 2.1543, "step": 5591 }, { "epoch": 0.5883219358232509, "grad_norm": 1.7684071063995361, "learning_rate": 7.648733438015493e-05, "loss": 2.056, "step": 5592 }, { "epoch": 0.588427143608627, "grad_norm": 1.56985342502594, "learning_rate": 7.645421374486702e-05, "loss": 1.8957, "step": 5593 }, { "epoch": 0.5885323513940032, "grad_norm": 2.020207166671753, "learning_rate": 7.64210958438737e-05, "loss": 1.7548, "step": 5594 }, { "epoch": 0.5886375591793793, "grad_norm": 1.3547688722610474, "learning_rate": 7.638798068102086e-05, "loss": 1.8783, "step": 5595 }, { "epoch": 0.5887427669647554, "grad_norm": 1.2740882635116577, "learning_rate": 7.635486826015412e-05, "loss": 2.1481, "step": 5596 }, { "epoch": 0.5888479747501315, "grad_norm": 1.9236841201782227, "learning_rate": 7.632175858511863e-05, "loss": 1.3462, "step": 5597 }, { "epoch": 0.5889531825355077, "grad_norm": 1.3053388595581055, "learning_rate": 7.628865165975934e-05, "loss": 1.6169, "step": 5598 }, { "epoch": 0.5890583903208837, "grad_norm": 1.8421201705932617, "learning_rate": 7.625554748792085e-05, "loss": 1.6345, "step": 5599 }, { "epoch": 0.5891635981062598, "grad_norm": 1.4617395401000977, "learning_rate": 7.622244607344748e-05, "loss": 1.9279, "step": 5600 }, { "epoch": 0.589268805891636, "grad_norm": 1.3376179933547974, "learning_rate": 7.618934742018312e-05, "loss": 1.5823, "step": 5601 }, { "epoch": 0.5893740136770121, "grad_norm": 1.5780961513519287, "learning_rate": 7.615625153197143e-05, "loss": 2.1328, "step": 5602 }, { "epoch": 0.5894792214623882, "grad_norm": 1.1965007781982422, "learning_rate": 7.612315841265577e-05, "loss": 1.136, "step": 5603 }, { "epoch": 0.5895844292477643, "grad_norm": 1.290488600730896, "learning_rate": 7.609006806607907e-05, "loss": 1.5277, "step": 5604 }, { "epoch": 0.5896896370331405, "grad_norm": 1.6050409078598022, "learning_rate": 7.605698049608403e-05, "loss": 1.8864, "step": 5605 }, { "epoch": 0.5897948448185165, "grad_norm": 1.6614772081375122, "learning_rate": 7.602389570651303e-05, "loss": 1.9447, "step": 5606 }, { "epoch": 0.5899000526038927, "grad_norm": 1.3600127696990967, "learning_rate": 7.599081370120804e-05, "loss": 1.6589, "step": 5607 }, { "epoch": 0.5900052603892688, "grad_norm": 1.4860687255859375, "learning_rate": 7.595773448401081e-05, "loss": 1.5742, "step": 5608 }, { "epoch": 0.590110468174645, "grad_norm": 1.154513955116272, "learning_rate": 7.59246580587627e-05, "loss": 2.1251, "step": 5609 }, { "epoch": 0.590215675960021, "grad_norm": 2.0505974292755127, "learning_rate": 7.589158442930478e-05, "loss": 1.799, "step": 5610 }, { "epoch": 0.5903208837453972, "grad_norm": 2.0056207180023193, "learning_rate": 7.585851359947776e-05, "loss": 1.6373, "step": 5611 }, { "epoch": 0.5904260915307733, "grad_norm": 1.5547747611999512, "learning_rate": 7.582544557312205e-05, "loss": 1.4256, "step": 5612 }, { "epoch": 0.5905312993161494, "grad_norm": 1.8376978635787964, "learning_rate": 7.579238035407776e-05, "loss": 1.3878, "step": 5613 }, { "epoch": 0.5906365071015255, "grad_norm": 1.0613032579421997, "learning_rate": 7.575931794618466e-05, "loss": 1.9392, "step": 5614 }, { "epoch": 0.5907417148869016, "grad_norm": 1.1583870649337769, "learning_rate": 7.572625835328211e-05, "loss": 1.7329, "step": 5615 }, { "epoch": 0.5908469226722778, "grad_norm": 1.783522367477417, "learning_rate": 7.569320157920923e-05, "loss": 1.6231, "step": 5616 }, { "epoch": 0.5909521304576538, "grad_norm": 1.3971893787384033, "learning_rate": 7.566014762780483e-05, "loss": 1.6672, "step": 5617 }, { "epoch": 0.59105733824303, "grad_norm": 1.681029200553894, "learning_rate": 7.562709650290732e-05, "loss": 1.5265, "step": 5618 }, { "epoch": 0.5911625460284061, "grad_norm": 1.8332031965255737, "learning_rate": 7.559404820835484e-05, "loss": 1.695, "step": 5619 }, { "epoch": 0.5912677538137823, "grad_norm": 1.792855143547058, "learning_rate": 7.556100274798519e-05, "loss": 1.7872, "step": 5620 }, { "epoch": 0.5913729615991583, "grad_norm": 1.1729624271392822, "learning_rate": 7.55279601256358e-05, "loss": 2.0353, "step": 5621 }, { "epoch": 0.5914781693845345, "grad_norm": 1.0647164583206177, "learning_rate": 7.549492034514381e-05, "loss": 1.7371, "step": 5622 }, { "epoch": 0.5915833771699106, "grad_norm": 1.6151602268218994, "learning_rate": 7.546188341034603e-05, "loss": 1.0778, "step": 5623 }, { "epoch": 0.5916885849552866, "grad_norm": 1.2735862731933594, "learning_rate": 7.542884932507896e-05, "loss": 1.7771, "step": 5624 }, { "epoch": 0.5917937927406628, "grad_norm": 1.3248671293258667, "learning_rate": 7.539581809317866e-05, "loss": 1.3799, "step": 5625 }, { "epoch": 0.5918990005260389, "grad_norm": 1.618085265159607, "learning_rate": 7.536278971848101e-05, "loss": 1.9688, "step": 5626 }, { "epoch": 0.5920042083114151, "grad_norm": 1.132401704788208, "learning_rate": 7.532976420482146e-05, "loss": 1.8808, "step": 5627 }, { "epoch": 0.5921094160967911, "grad_norm": 1.2111188173294067, "learning_rate": 7.529674155603516e-05, "loss": 1.8875, "step": 5628 }, { "epoch": 0.5922146238821673, "grad_norm": 1.4924447536468506, "learning_rate": 7.52637217759569e-05, "loss": 1.5021, "step": 5629 }, { "epoch": 0.5923198316675434, "grad_norm": 1.672135353088379, "learning_rate": 7.52307048684212e-05, "loss": 1.4673, "step": 5630 }, { "epoch": 0.5924250394529195, "grad_norm": 1.491942048072815, "learning_rate": 7.519769083726216e-05, "loss": 1.5303, "step": 5631 }, { "epoch": 0.5925302472382956, "grad_norm": 1.7676005363464355, "learning_rate": 7.51646796863136e-05, "loss": 1.7333, "step": 5632 }, { "epoch": 0.5926354550236718, "grad_norm": 0.917346179485321, "learning_rate": 7.513167141940904e-05, "loss": 1.8271, "step": 5633 }, { "epoch": 0.5927406628090479, "grad_norm": 1.5192989110946655, "learning_rate": 7.509866604038157e-05, "loss": 1.8536, "step": 5634 }, { "epoch": 0.592845870594424, "grad_norm": 0.7993506789207458, "learning_rate": 7.506566355306402e-05, "loss": 1.6131, "step": 5635 }, { "epoch": 0.5929510783798001, "grad_norm": 1.2340017557144165, "learning_rate": 7.503266396128887e-05, "loss": 1.6488, "step": 5636 }, { "epoch": 0.5930562861651762, "grad_norm": 1.8110322952270508, "learning_rate": 7.499966726888823e-05, "loss": 2.1135, "step": 5637 }, { "epoch": 0.5931614939505523, "grad_norm": 1.2862292528152466, "learning_rate": 7.49666734796939e-05, "loss": 2.2786, "step": 5638 }, { "epoch": 0.5932667017359284, "grad_norm": 2.9395089149475098, "learning_rate": 7.493368259753734e-05, "loss": 1.8967, "step": 5639 }, { "epoch": 0.5933719095213046, "grad_norm": 2.132434844970703, "learning_rate": 7.490069462624967e-05, "loss": 1.3319, "step": 5640 }, { "epoch": 0.5934771173066807, "grad_norm": 1.9779353141784668, "learning_rate": 7.486770956966171e-05, "loss": 2.0619, "step": 5641 }, { "epoch": 0.5935823250920568, "grad_norm": 1.540423035621643, "learning_rate": 7.483472743160387e-05, "loss": 1.6819, "step": 5642 }, { "epoch": 0.5936875328774329, "grad_norm": 1.2363083362579346, "learning_rate": 7.480174821590624e-05, "loss": 1.7712, "step": 5643 }, { "epoch": 0.5937927406628091, "grad_norm": 1.7649072408676147, "learning_rate": 7.476877192639866e-05, "loss": 1.4488, "step": 5644 }, { "epoch": 0.5938979484481852, "grad_norm": 1.634068489074707, "learning_rate": 7.473579856691047e-05, "loss": 2.044, "step": 5645 }, { "epoch": 0.5940031562335613, "grad_norm": 1.0379782915115356, "learning_rate": 7.470282814127081e-05, "loss": 1.8553, "step": 5646 }, { "epoch": 0.5941083640189374, "grad_norm": 1.697240948677063, "learning_rate": 7.466986065330841e-05, "loss": 1.4889, "step": 5647 }, { "epoch": 0.5942135718043136, "grad_norm": 1.2661585807800293, "learning_rate": 7.463689610685171e-05, "loss": 2.0126, "step": 5648 }, { "epoch": 0.5943187795896896, "grad_norm": 1.558271884918213, "learning_rate": 7.460393450572872e-05, "loss": 1.5544, "step": 5649 }, { "epoch": 0.5944239873750657, "grad_norm": 1.657724142074585, "learning_rate": 7.457097585376719e-05, "loss": 2.0922, "step": 5650 }, { "epoch": 0.5945291951604419, "grad_norm": 1.5575658082962036, "learning_rate": 7.453802015479452e-05, "loss": 2.0446, "step": 5651 }, { "epoch": 0.594634402945818, "grad_norm": 1.7816355228424072, "learning_rate": 7.45050674126377e-05, "loss": 1.5467, "step": 5652 }, { "epoch": 0.5947396107311941, "grad_norm": 1.14240562915802, "learning_rate": 7.447211763112346e-05, "loss": 1.4401, "step": 5653 }, { "epoch": 0.5948448185165702, "grad_norm": 1.691174030303955, "learning_rate": 7.443917081407816e-05, "loss": 1.5377, "step": 5654 }, { "epoch": 0.5949500263019464, "grad_norm": 2.1762633323669434, "learning_rate": 7.440622696532775e-05, "loss": 1.6951, "step": 5655 }, { "epoch": 0.5950552340873224, "grad_norm": 1.880930781364441, "learning_rate": 7.437328608869793e-05, "loss": 1.3951, "step": 5656 }, { "epoch": 0.5951604418726986, "grad_norm": 1.4270281791687012, "learning_rate": 7.434034818801405e-05, "loss": 2.372, "step": 5657 }, { "epoch": 0.5952656496580747, "grad_norm": 1.9598197937011719, "learning_rate": 7.4307413267101e-05, "loss": 1.3984, "step": 5658 }, { "epoch": 0.5953708574434509, "grad_norm": 1.6525239944458008, "learning_rate": 7.427448132978346e-05, "loss": 1.699, "step": 5659 }, { "epoch": 0.5954760652288269, "grad_norm": 1.7269477844238281, "learning_rate": 7.424155237988567e-05, "loss": 1.6827, "step": 5660 }, { "epoch": 0.595581273014203, "grad_norm": 1.3311235904693604, "learning_rate": 7.420862642123158e-05, "loss": 1.6645, "step": 5661 }, { "epoch": 0.5956864807995792, "grad_norm": 1.446498990058899, "learning_rate": 7.417570345764481e-05, "loss": 1.3863, "step": 5662 }, { "epoch": 0.5957916885849552, "grad_norm": 1.93930184841156, "learning_rate": 7.414278349294852e-05, "loss": 1.7474, "step": 5663 }, { "epoch": 0.5958968963703314, "grad_norm": 1.7791824340820312, "learning_rate": 7.410986653096565e-05, "loss": 1.564, "step": 5664 }, { "epoch": 0.5960021041557075, "grad_norm": 1.487729549407959, "learning_rate": 7.407695257551875e-05, "loss": 1.54, "step": 5665 }, { "epoch": 0.5961073119410837, "grad_norm": 1.072798490524292, "learning_rate": 7.404404163042995e-05, "loss": 1.4969, "step": 5666 }, { "epoch": 0.5962125197264597, "grad_norm": 1.5234678983688354, "learning_rate": 7.401113369952113e-05, "loss": 2.2265, "step": 5667 }, { "epoch": 0.5963177275118359, "grad_norm": 1.3278653621673584, "learning_rate": 7.397822878661377e-05, "loss": 2.0208, "step": 5668 }, { "epoch": 0.596422935297212, "grad_norm": 1.424271583557129, "learning_rate": 7.394532689552905e-05, "loss": 2.034, "step": 5669 }, { "epoch": 0.5965281430825881, "grad_norm": 1.7164738178253174, "learning_rate": 7.391242803008768e-05, "loss": 2.2689, "step": 5670 }, { "epoch": 0.5966333508679642, "grad_norm": 3.884221315383911, "learning_rate": 7.387953219411015e-05, "loss": 1.8426, "step": 5671 }, { "epoch": 0.5967385586533404, "grad_norm": 1.339316964149475, "learning_rate": 7.384663939141656e-05, "loss": 1.9429, "step": 5672 }, { "epoch": 0.5968437664387165, "grad_norm": 1.4739105701446533, "learning_rate": 7.381374962582659e-05, "loss": 1.3312, "step": 5673 }, { "epoch": 0.5969489742240925, "grad_norm": 1.0755393505096436, "learning_rate": 7.378086290115964e-05, "loss": 1.7607, "step": 5674 }, { "epoch": 0.5970541820094687, "grad_norm": 1.6911028623580933, "learning_rate": 7.374797922123478e-05, "loss": 1.4523, "step": 5675 }, { "epoch": 0.5971593897948448, "grad_norm": 1.7561362981796265, "learning_rate": 7.371509858987061e-05, "loss": 2.0099, "step": 5676 }, { "epoch": 0.597264597580221, "grad_norm": 1.560784935951233, "learning_rate": 7.368222101088549e-05, "loss": 1.5806, "step": 5677 }, { "epoch": 0.597369805365597, "grad_norm": 1.1424559354782104, "learning_rate": 7.364934648809741e-05, "loss": 2.1778, "step": 5678 }, { "epoch": 0.5974750131509732, "grad_norm": 1.393723726272583, "learning_rate": 7.361647502532395e-05, "loss": 2.0167, "step": 5679 }, { "epoch": 0.5975802209363493, "grad_norm": 0.8226059079170227, "learning_rate": 7.358360662638236e-05, "loss": 1.9121, "step": 5680 }, { "epoch": 0.5976854287217254, "grad_norm": 1.6506311893463135, "learning_rate": 7.355074129508953e-05, "loss": 2.2151, "step": 5681 }, { "epoch": 0.5977906365071015, "grad_norm": 1.2521584033966064, "learning_rate": 7.351787903526201e-05, "loss": 2.056, "step": 5682 }, { "epoch": 0.5978958442924777, "grad_norm": 1.4110209941864014, "learning_rate": 7.348501985071603e-05, "loss": 1.7132, "step": 5683 }, { "epoch": 0.5980010520778538, "grad_norm": 1.7280937433242798, "learning_rate": 7.345216374526736e-05, "loss": 1.8772, "step": 5684 }, { "epoch": 0.5981062598632298, "grad_norm": 1.448601484298706, "learning_rate": 7.341931072273148e-05, "loss": 1.9043, "step": 5685 }, { "epoch": 0.598211467648606, "grad_norm": 1.5108412504196167, "learning_rate": 7.338646078692356e-05, "loss": 1.0912, "step": 5686 }, { "epoch": 0.5983166754339821, "grad_norm": 1.2896003723144531, "learning_rate": 7.335361394165825e-05, "loss": 1.8589, "step": 5687 }, { "epoch": 0.5984218832193582, "grad_norm": 1.537742257118225, "learning_rate": 7.332077019075005e-05, "loss": 1.4406, "step": 5688 }, { "epoch": 0.5985270910047343, "grad_norm": 1.5651648044586182, "learning_rate": 7.328792953801296e-05, "loss": 1.7907, "step": 5689 }, { "epoch": 0.5986322987901105, "grad_norm": 1.8168197870254517, "learning_rate": 7.325509198726064e-05, "loss": 2.1356, "step": 5690 }, { "epoch": 0.5987375065754866, "grad_norm": 1.6306514739990234, "learning_rate": 7.322225754230641e-05, "loss": 2.0181, "step": 5691 }, { "epoch": 0.5988427143608627, "grad_norm": 1.733379602432251, "learning_rate": 7.318942620696323e-05, "loss": 1.3749, "step": 5692 }, { "epoch": 0.5989479221462388, "grad_norm": 2.751979351043701, "learning_rate": 7.315659798504373e-05, "loss": 1.2602, "step": 5693 }, { "epoch": 0.599053129931615, "grad_norm": 1.8571940660476685, "learning_rate": 7.312377288036009e-05, "loss": 1.5986, "step": 5694 }, { "epoch": 0.599158337716991, "grad_norm": 2.700819253921509, "learning_rate": 7.30909508967242e-05, "loss": 2.2613, "step": 5695 }, { "epoch": 0.5992635455023672, "grad_norm": 1.9650945663452148, "learning_rate": 7.30581320379476e-05, "loss": 1.4637, "step": 5696 }, { "epoch": 0.5993687532877433, "grad_norm": 2.2203495502471924, "learning_rate": 7.302531630784137e-05, "loss": 1.7075, "step": 5697 }, { "epoch": 0.5994739610731195, "grad_norm": 1.5563139915466309, "learning_rate": 7.299250371021635e-05, "loss": 1.9756, "step": 5698 }, { "epoch": 0.5995791688584955, "grad_norm": 1.332543969154358, "learning_rate": 7.295969424888295e-05, "loss": 1.4028, "step": 5699 }, { "epoch": 0.5996843766438716, "grad_norm": 1.5485048294067383, "learning_rate": 7.292688792765126e-05, "loss": 1.5521, "step": 5700 }, { "epoch": 0.5997895844292478, "grad_norm": 1.584691047668457, "learning_rate": 7.289408475033086e-05, "loss": 1.5764, "step": 5701 }, { "epoch": 0.5998947922146238, "grad_norm": 1.7013684511184692, "learning_rate": 7.286128472073114e-05, "loss": 2.1275, "step": 5702 }, { "epoch": 0.6, "grad_norm": 1.437490701675415, "learning_rate": 7.282848784266107e-05, "loss": 2.214, "step": 5703 }, { "epoch": 0.6001052077853761, "grad_norm": 1.080392837524414, "learning_rate": 7.279569411992926e-05, "loss": 1.4222, "step": 5704 }, { "epoch": 0.6002104155707523, "grad_norm": 1.6214826107025146, "learning_rate": 7.276290355634387e-05, "loss": 2.1349, "step": 5705 }, { "epoch": 0.6003156233561283, "grad_norm": 1.7013198137283325, "learning_rate": 7.273011615571282e-05, "loss": 1.5759, "step": 5706 }, { "epoch": 0.6004208311415045, "grad_norm": 1.5219919681549072, "learning_rate": 7.26973319218436e-05, "loss": 2.1324, "step": 5707 }, { "epoch": 0.6005260389268806, "grad_norm": 1.9878424406051636, "learning_rate": 7.266455085854329e-05, "loss": 2.4534, "step": 5708 }, { "epoch": 0.6006312467122568, "grad_norm": 1.9398351907730103, "learning_rate": 7.263177296961867e-05, "loss": 1.2152, "step": 5709 }, { "epoch": 0.6007364544976328, "grad_norm": 1.528769850730896, "learning_rate": 7.259899825887617e-05, "loss": 2.2585, "step": 5710 }, { "epoch": 0.600841662283009, "grad_norm": 1.6075571775436401, "learning_rate": 7.256622673012175e-05, "loss": 2.076, "step": 5711 }, { "epoch": 0.6009468700683851, "grad_norm": 1.6357545852661133, "learning_rate": 7.253345838716108e-05, "loss": 1.9542, "step": 5712 }, { "epoch": 0.6010520778537611, "grad_norm": 1.8700578212738037, "learning_rate": 7.250069323379945e-05, "loss": 1.5798, "step": 5713 }, { "epoch": 0.6011572856391373, "grad_norm": 2.4654650688171387, "learning_rate": 7.246793127384174e-05, "loss": 1.8467, "step": 5714 }, { "epoch": 0.6012624934245134, "grad_norm": 2.0539462566375732, "learning_rate": 7.243517251109254e-05, "loss": 1.6161, "step": 5715 }, { "epoch": 0.6013677012098896, "grad_norm": 2.258213520050049, "learning_rate": 7.240241694935597e-05, "loss": 1.9685, "step": 5716 }, { "epoch": 0.6014729089952656, "grad_norm": 1.7232213020324707, "learning_rate": 7.236966459243586e-05, "loss": 2.4748, "step": 5717 }, { "epoch": 0.6015781167806418, "grad_norm": 1.12027907371521, "learning_rate": 7.233691544413558e-05, "loss": 1.8557, "step": 5718 }, { "epoch": 0.6016833245660179, "grad_norm": 1.5948566198349, "learning_rate": 7.230416950825825e-05, "loss": 1.2433, "step": 5719 }, { "epoch": 0.601788532351394, "grad_norm": 1.1951110363006592, "learning_rate": 7.227142678860652e-05, "loss": 1.9291, "step": 5720 }, { "epoch": 0.6018937401367701, "grad_norm": 1.3234333992004395, "learning_rate": 7.22386872889827e-05, "loss": 2.0288, "step": 5721 }, { "epoch": 0.6019989479221463, "grad_norm": 1.8448857069015503, "learning_rate": 7.22059510131887e-05, "loss": 2.0855, "step": 5722 }, { "epoch": 0.6021041557075224, "grad_norm": 1.6373887062072754, "learning_rate": 7.217321796502605e-05, "loss": 1.0645, "step": 5723 }, { "epoch": 0.6022093634928984, "grad_norm": 2.3060243129730225, "learning_rate": 7.214048814829598e-05, "loss": 1.3767, "step": 5724 }, { "epoch": 0.6023145712782746, "grad_norm": 1.55854332447052, "learning_rate": 7.210776156679931e-05, "loss": 1.5182, "step": 5725 }, { "epoch": 0.6024197790636507, "grad_norm": 1.64441978931427, "learning_rate": 7.20750382243364e-05, "loss": 2.3997, "step": 5726 }, { "epoch": 0.6025249868490268, "grad_norm": 0.9315603375434875, "learning_rate": 7.204231812470736e-05, "loss": 2.0199, "step": 5727 }, { "epoch": 0.6026301946344029, "grad_norm": 1.946433663368225, "learning_rate": 7.200960127171188e-05, "loss": 2.2558, "step": 5728 }, { "epoch": 0.6027354024197791, "grad_norm": 1.7605088949203491, "learning_rate": 7.197688766914921e-05, "loss": 1.8627, "step": 5729 }, { "epoch": 0.6028406102051552, "grad_norm": 1.548949956893921, "learning_rate": 7.19441773208183e-05, "loss": 1.4795, "step": 5730 }, { "epoch": 0.6029458179905313, "grad_norm": 2.702446222305298, "learning_rate": 7.19114702305177e-05, "loss": 1.6094, "step": 5731 }, { "epoch": 0.6030510257759074, "grad_norm": 2.4522550106048584, "learning_rate": 7.187876640204556e-05, "loss": 1.7925, "step": 5732 }, { "epoch": 0.6031562335612836, "grad_norm": 1.2998313903808594, "learning_rate": 7.184606583919966e-05, "loss": 1.4222, "step": 5733 }, { "epoch": 0.6032614413466596, "grad_norm": 2.228344678878784, "learning_rate": 7.181336854577747e-05, "loss": 1.7094, "step": 5734 }, { "epoch": 0.6033666491320357, "grad_norm": 1.8566731214523315, "learning_rate": 7.178067452557595e-05, "loss": 1.943, "step": 5735 }, { "epoch": 0.6034718569174119, "grad_norm": 1.346657395362854, "learning_rate": 7.174798378239176e-05, "loss": 1.7142, "step": 5736 }, { "epoch": 0.603577064702788, "grad_norm": 1.3058844804763794, "learning_rate": 7.171529632002121e-05, "loss": 1.9819, "step": 5737 }, { "epoch": 0.6036822724881641, "grad_norm": 1.9217864274978638, "learning_rate": 7.168261214226014e-05, "loss": 1.6599, "step": 5738 }, { "epoch": 0.6037874802735402, "grad_norm": 1.5166659355163574, "learning_rate": 7.164993125290407e-05, "loss": 1.9654, "step": 5739 }, { "epoch": 0.6038926880589164, "grad_norm": 1.7919169664382935, "learning_rate": 7.161725365574811e-05, "loss": 1.7845, "step": 5740 }, { "epoch": 0.6039978958442925, "grad_norm": 1.7721195220947266, "learning_rate": 7.158457935458706e-05, "loss": 1.7366, "step": 5741 }, { "epoch": 0.6041031036296686, "grad_norm": 3.324281930923462, "learning_rate": 7.155190835321523e-05, "loss": 1.8447, "step": 5742 }, { "epoch": 0.6042083114150447, "grad_norm": 1.3101855516433716, "learning_rate": 7.151924065542665e-05, "loss": 1.9632, "step": 5743 }, { "epoch": 0.6043135192004209, "grad_norm": 1.5195070505142212, "learning_rate": 7.14865762650148e-05, "loss": 1.4127, "step": 5744 }, { "epoch": 0.6044187269857969, "grad_norm": 1.4836463928222656, "learning_rate": 7.1453915185773e-05, "loss": 2.0167, "step": 5745 }, { "epoch": 0.604523934771173, "grad_norm": 2.11446475982666, "learning_rate": 7.1421257421494e-05, "loss": 1.4259, "step": 5746 }, { "epoch": 0.6046291425565492, "grad_norm": 1.1224156618118286, "learning_rate": 7.138860297597026e-05, "loss": 1.6661, "step": 5747 }, { "epoch": 0.6047343503419254, "grad_norm": 0.8229581713676453, "learning_rate": 7.135595185299386e-05, "loss": 1.5664, "step": 5748 }, { "epoch": 0.6048395581273014, "grad_norm": 1.6141940355300903, "learning_rate": 7.132330405635645e-05, "loss": 1.6177, "step": 5749 }, { "epoch": 0.6049447659126775, "grad_norm": 1.415408968925476, "learning_rate": 7.12906595898493e-05, "loss": 1.1493, "step": 5750 }, { "epoch": 0.6050499736980537, "grad_norm": 2.1405954360961914, "learning_rate": 7.12580184572633e-05, "loss": 2.1279, "step": 5751 }, { "epoch": 0.6051551814834297, "grad_norm": 1.3183344602584839, "learning_rate": 7.122538066238902e-05, "loss": 1.8046, "step": 5752 }, { "epoch": 0.6052603892688059, "grad_norm": 2.0879404544830322, "learning_rate": 7.119274620901649e-05, "loss": 1.4804, "step": 5753 }, { "epoch": 0.605365597054182, "grad_norm": 1.4626708030700684, "learning_rate": 7.116011510093547e-05, "loss": 1.9303, "step": 5754 }, { "epoch": 0.6054708048395582, "grad_norm": 1.3531354665756226, "learning_rate": 7.112748734193537e-05, "loss": 1.9332, "step": 5755 }, { "epoch": 0.6055760126249342, "grad_norm": 2.5419464111328125, "learning_rate": 7.109486293580505e-05, "loss": 2.2981, "step": 5756 }, { "epoch": 0.6056812204103104, "grad_norm": 1.8105266094207764, "learning_rate": 7.106224188633311e-05, "loss": 2.1267, "step": 5757 }, { "epoch": 0.6057864281956865, "grad_norm": 1.5312819480895996, "learning_rate": 7.102962419730776e-05, "loss": 1.9916, "step": 5758 }, { "epoch": 0.6058916359810625, "grad_norm": 2.3092429637908936, "learning_rate": 7.099700987251674e-05, "loss": 2.2213, "step": 5759 }, { "epoch": 0.6059968437664387, "grad_norm": 1.4174106121063232, "learning_rate": 7.096439891574745e-05, "loss": 1.9643, "step": 5760 }, { "epoch": 0.6061020515518148, "grad_norm": 2.2073097229003906, "learning_rate": 7.09317913307869e-05, "loss": 2.1425, "step": 5761 }, { "epoch": 0.606207259337191, "grad_norm": 1.472976803779602, "learning_rate": 7.089918712142172e-05, "loss": 1.3008, "step": 5762 }, { "epoch": 0.606312467122567, "grad_norm": 27.837261199951172, "learning_rate": 7.086658629143811e-05, "loss": 1.9546, "step": 5763 }, { "epoch": 0.6064176749079432, "grad_norm": 1.4600732326507568, "learning_rate": 7.083398884462194e-05, "loss": 1.7937, "step": 5764 }, { "epoch": 0.6065228826933193, "grad_norm": 1.1610373258590698, "learning_rate": 7.080139478475853e-05, "loss": 2.1027, "step": 5765 }, { "epoch": 0.6066280904786954, "grad_norm": 1.7224698066711426, "learning_rate": 7.076880411563305e-05, "loss": 1.8358, "step": 5766 }, { "epoch": 0.6067332982640715, "grad_norm": 1.7691140174865723, "learning_rate": 7.073621684103007e-05, "loss": 1.7801, "step": 5767 }, { "epoch": 0.6068385060494477, "grad_norm": 1.6140860319137573, "learning_rate": 7.070363296473384e-05, "loss": 1.1727, "step": 5768 }, { "epoch": 0.6069437138348238, "grad_norm": 1.1361531019210815, "learning_rate": 7.067105249052828e-05, "loss": 1.7667, "step": 5769 }, { "epoch": 0.6070489216201999, "grad_norm": 1.355162501335144, "learning_rate": 7.063847542219679e-05, "loss": 1.0353, "step": 5770 }, { "epoch": 0.607154129405576, "grad_norm": 0.9227995872497559, "learning_rate": 7.060590176352248e-05, "loss": 1.9074, "step": 5771 }, { "epoch": 0.6072593371909522, "grad_norm": 1.033094882965088, "learning_rate": 7.057333151828799e-05, "loss": 2.163, "step": 5772 }, { "epoch": 0.6073645449763283, "grad_norm": 1.4324654340744019, "learning_rate": 7.054076469027565e-05, "loss": 1.7172, "step": 5773 }, { "epoch": 0.6074697527617043, "grad_norm": 2.334664821624756, "learning_rate": 7.050820128326724e-05, "loss": 2.092, "step": 5774 }, { "epoch": 0.6075749605470805, "grad_norm": 2.054091453552246, "learning_rate": 7.047564130104434e-05, "loss": 1.0949, "step": 5775 }, { "epoch": 0.6076801683324566, "grad_norm": 2.1884820461273193, "learning_rate": 7.044308474738798e-05, "loss": 1.7727, "step": 5776 }, { "epoch": 0.6077853761178327, "grad_norm": 1.3433359861373901, "learning_rate": 7.041053162607886e-05, "loss": 1.8029, "step": 5777 }, { "epoch": 0.6078905839032088, "grad_norm": 1.5086860656738281, "learning_rate": 7.037798194089728e-05, "loss": 1.7065, "step": 5778 }, { "epoch": 0.607995791688585, "grad_norm": 1.6031910181045532, "learning_rate": 7.034543569562313e-05, "loss": 1.7895, "step": 5779 }, { "epoch": 0.6081009994739611, "grad_norm": 1.8059049844741821, "learning_rate": 7.031289289403584e-05, "loss": 1.7179, "step": 5780 }, { "epoch": 0.6082062072593372, "grad_norm": 1.4660775661468506, "learning_rate": 7.028035353991456e-05, "loss": 2.0142, "step": 5781 }, { "epoch": 0.6083114150447133, "grad_norm": 1.5748193264007568, "learning_rate": 7.024781763703797e-05, "loss": 1.9199, "step": 5782 }, { "epoch": 0.6084166228300895, "grad_norm": 1.2944796085357666, "learning_rate": 7.021528518918433e-05, "loss": 1.1247, "step": 5783 }, { "epoch": 0.6085218306154655, "grad_norm": 1.12498140335083, "learning_rate": 7.018275620013154e-05, "loss": 1.3863, "step": 5784 }, { "epoch": 0.6086270384008416, "grad_norm": 1.4992774724960327, "learning_rate": 7.01502306736571e-05, "loss": 1.9758, "step": 5785 }, { "epoch": 0.6087322461862178, "grad_norm": 1.8976898193359375, "learning_rate": 7.01177086135381e-05, "loss": 1.6387, "step": 5786 }, { "epoch": 0.608837453971594, "grad_norm": 1.2787110805511475, "learning_rate": 7.008519002355118e-05, "loss": 1.9901, "step": 5787 }, { "epoch": 0.60894266175697, "grad_norm": 1.5045870542526245, "learning_rate": 7.005267490747263e-05, "loss": 1.5003, "step": 5788 }, { "epoch": 0.6090478695423461, "grad_norm": 1.5272982120513916, "learning_rate": 7.002016326907831e-05, "loss": 2.1544, "step": 5789 }, { "epoch": 0.6091530773277223, "grad_norm": 1.7053319215774536, "learning_rate": 6.998765511214374e-05, "loss": 1.8607, "step": 5790 }, { "epoch": 0.6092582851130983, "grad_norm": 1.7263697385787964, "learning_rate": 6.995515044044393e-05, "loss": 2.009, "step": 5791 }, { "epoch": 0.6093634928984745, "grad_norm": 1.4831238985061646, "learning_rate": 6.992264925775356e-05, "loss": 1.7212, "step": 5792 }, { "epoch": 0.6094687006838506, "grad_norm": 1.557859182357788, "learning_rate": 6.989015156784689e-05, "loss": 1.6485, "step": 5793 }, { "epoch": 0.6095739084692268, "grad_norm": 1.1065948009490967, "learning_rate": 6.98576573744978e-05, "loss": 1.7752, "step": 5794 }, { "epoch": 0.6096791162546028, "grad_norm": 1.3376137018203735, "learning_rate": 6.982516668147967e-05, "loss": 1.8011, "step": 5795 }, { "epoch": 0.609784324039979, "grad_norm": 1.223147988319397, "learning_rate": 6.979267949256558e-05, "loss": 1.2879, "step": 5796 }, { "epoch": 0.6098895318253551, "grad_norm": 1.0986629724502563, "learning_rate": 6.976019581152818e-05, "loss": 2.0554, "step": 5797 }, { "epoch": 0.6099947396107311, "grad_norm": 1.3327361345291138, "learning_rate": 6.972771564213963e-05, "loss": 1.5186, "step": 5798 }, { "epoch": 0.6100999473961073, "grad_norm": 1.3183317184448242, "learning_rate": 6.969523898817176e-05, "loss": 1.9726, "step": 5799 }, { "epoch": 0.6102051551814834, "grad_norm": 1.7785512208938599, "learning_rate": 6.966276585339604e-05, "loss": 2.1342, "step": 5800 }, { "epoch": 0.6103103629668596, "grad_norm": 1.0888605117797852, "learning_rate": 6.96302962415834e-05, "loss": 1.4301, "step": 5801 }, { "epoch": 0.6104155707522356, "grad_norm": 1.8897411823272705, "learning_rate": 6.959783015650446e-05, "loss": 2.3269, "step": 5802 }, { "epoch": 0.6105207785376118, "grad_norm": 1.019864559173584, "learning_rate": 6.956536760192938e-05, "loss": 1.5989, "step": 5803 }, { "epoch": 0.6106259863229879, "grad_norm": 1.5215952396392822, "learning_rate": 6.953290858162794e-05, "loss": 2.14, "step": 5804 }, { "epoch": 0.6107311941083641, "grad_norm": 1.6711841821670532, "learning_rate": 6.95004530993695e-05, "loss": 2.1474, "step": 5805 }, { "epoch": 0.6108364018937401, "grad_norm": 1.50629460811615, "learning_rate": 6.946800115892305e-05, "loss": 2.319, "step": 5806 }, { "epoch": 0.6109416096791163, "grad_norm": 1.2559032440185547, "learning_rate": 6.943555276405705e-05, "loss": 1.4975, "step": 5807 }, { "epoch": 0.6110468174644924, "grad_norm": 0.9776951670646667, "learning_rate": 6.940310791853968e-05, "loss": 2.0983, "step": 5808 }, { "epoch": 0.6111520252498684, "grad_norm": 1.521141767501831, "learning_rate": 6.937066662613863e-05, "loss": 1.5946, "step": 5809 }, { "epoch": 0.6112572330352446, "grad_norm": 1.377365231513977, "learning_rate": 6.933822889062118e-05, "loss": 1.4309, "step": 5810 }, { "epoch": 0.6113624408206207, "grad_norm": 1.1052137613296509, "learning_rate": 6.930579471575427e-05, "loss": 2.1511, "step": 5811 }, { "epoch": 0.6114676486059969, "grad_norm": 1.3557707071304321, "learning_rate": 6.927336410530432e-05, "loss": 1.6768, "step": 5812 }, { "epoch": 0.6115728563913729, "grad_norm": 2.0096118450164795, "learning_rate": 6.924093706303743e-05, "loss": 1.9034, "step": 5813 }, { "epoch": 0.6116780641767491, "grad_norm": 2.108721971511841, "learning_rate": 6.920851359271922e-05, "loss": 1.6447, "step": 5814 }, { "epoch": 0.6117832719621252, "grad_norm": 1.1239184141159058, "learning_rate": 6.917609369811496e-05, "loss": 1.9551, "step": 5815 }, { "epoch": 0.6118884797475013, "grad_norm": 1.2308555841445923, "learning_rate": 6.914367738298941e-05, "loss": 2.1679, "step": 5816 }, { "epoch": 0.6119936875328774, "grad_norm": 1.2711541652679443, "learning_rate": 6.9111264651107e-05, "loss": 1.9665, "step": 5817 }, { "epoch": 0.6120988953182536, "grad_norm": 1.186928629875183, "learning_rate": 6.907885550623172e-05, "loss": 1.644, "step": 5818 }, { "epoch": 0.6122041031036297, "grad_norm": 1.484810709953308, "learning_rate": 6.904644995212713e-05, "loss": 1.3018, "step": 5819 }, { "epoch": 0.6123093108890058, "grad_norm": 1.1081911325454712, "learning_rate": 6.901404799255638e-05, "loss": 1.6068, "step": 5820 }, { "epoch": 0.6124145186743819, "grad_norm": 1.3067530393600464, "learning_rate": 6.898164963128221e-05, "loss": 1.6391, "step": 5821 }, { "epoch": 0.612519726459758, "grad_norm": 0.9942488670349121, "learning_rate": 6.894925487206691e-05, "loss": 1.9811, "step": 5822 }, { "epoch": 0.6126249342451341, "grad_norm": 1.2306791543960571, "learning_rate": 6.891686371867239e-05, "loss": 1.9109, "step": 5823 }, { "epoch": 0.6127301420305102, "grad_norm": 1.1397932767868042, "learning_rate": 6.888447617486016e-05, "loss": 1.5098, "step": 5824 }, { "epoch": 0.6128353498158864, "grad_norm": 1.0217232704162598, "learning_rate": 6.885209224439123e-05, "loss": 1.4489, "step": 5825 }, { "epoch": 0.6129405576012625, "grad_norm": 2.21636700630188, "learning_rate": 6.881971193102625e-05, "loss": 1.5184, "step": 5826 }, { "epoch": 0.6130457653866386, "grad_norm": 1.0205903053283691, "learning_rate": 6.878733523852549e-05, "loss": 1.8485, "step": 5827 }, { "epoch": 0.6131509731720147, "grad_norm": 1.5595358610153198, "learning_rate": 6.875496217064867e-05, "loss": 1.5117, "step": 5828 }, { "epoch": 0.6132561809573909, "grad_norm": 1.1299363374710083, "learning_rate": 6.872259273115525e-05, "loss": 1.9574, "step": 5829 }, { "epoch": 0.6133613887427669, "grad_norm": 1.109512448310852, "learning_rate": 6.869022692380411e-05, "loss": 2.0785, "step": 5830 }, { "epoch": 0.6134665965281431, "grad_norm": 1.951183795928955, "learning_rate": 6.865786475235381e-05, "loss": 2.1442, "step": 5831 }, { "epoch": 0.6135718043135192, "grad_norm": 1.3576364517211914, "learning_rate": 6.862550622056249e-05, "loss": 1.6519, "step": 5832 }, { "epoch": 0.6136770120988954, "grad_norm": 1.2096575498580933, "learning_rate": 6.85931513321878e-05, "loss": 1.4433, "step": 5833 }, { "epoch": 0.6137822198842714, "grad_norm": 2.158343553543091, "learning_rate": 6.856080009098701e-05, "loss": 1.4268, "step": 5834 }, { "epoch": 0.6138874276696475, "grad_norm": 1.6646260023117065, "learning_rate": 6.852845250071702e-05, "loss": 1.9002, "step": 5835 }, { "epoch": 0.6139926354550237, "grad_norm": 1.3010480403900146, "learning_rate": 6.849610856513418e-05, "loss": 1.8029, "step": 5836 }, { "epoch": 0.6140978432403998, "grad_norm": 1.888411283493042, "learning_rate": 6.846376828799451e-05, "loss": 1.7934, "step": 5837 }, { "epoch": 0.6142030510257759, "grad_norm": 1.5725120306015015, "learning_rate": 6.843143167305361e-05, "loss": 1.2792, "step": 5838 }, { "epoch": 0.614308258811152, "grad_norm": 1.0562975406646729, "learning_rate": 6.839909872406657e-05, "loss": 1.8781, "step": 5839 }, { "epoch": 0.6144134665965282, "grad_norm": 1.287243127822876, "learning_rate": 6.836676944478812e-05, "loss": 1.6942, "step": 5840 }, { "epoch": 0.6145186743819042, "grad_norm": 1.386404275894165, "learning_rate": 6.83344438389726e-05, "loss": 1.4059, "step": 5841 }, { "epoch": 0.6146238821672804, "grad_norm": 1.312274694442749, "learning_rate": 6.830212191037386e-05, "loss": 1.6319, "step": 5842 }, { "epoch": 0.6147290899526565, "grad_norm": 1.408747673034668, "learning_rate": 6.826980366274529e-05, "loss": 1.0995, "step": 5843 }, { "epoch": 0.6148342977380327, "grad_norm": 1.132150650024414, "learning_rate": 6.823748909983994e-05, "loss": 1.7678, "step": 5844 }, { "epoch": 0.6149395055234087, "grad_norm": 1.0861999988555908, "learning_rate": 6.820517822541041e-05, "loss": 1.2332, "step": 5845 }, { "epoch": 0.6150447133087849, "grad_norm": 1.2827601432800293, "learning_rate": 6.81728710432088e-05, "loss": 2.0137, "step": 5846 }, { "epoch": 0.615149921094161, "grad_norm": 1.5106278657913208, "learning_rate": 6.81405675569869e-05, "loss": 1.7959, "step": 5847 }, { "epoch": 0.615255128879537, "grad_norm": 1.3239543437957764, "learning_rate": 6.810826777049597e-05, "loss": 1.6723, "step": 5848 }, { "epoch": 0.6153603366649132, "grad_norm": 1.4004448652267456, "learning_rate": 6.807597168748689e-05, "loss": 1.9686, "step": 5849 }, { "epoch": 0.6154655444502893, "grad_norm": 0.9085174798965454, "learning_rate": 6.804367931171013e-05, "loss": 1.5498, "step": 5850 }, { "epoch": 0.6155707522356655, "grad_norm": 1.3527324199676514, "learning_rate": 6.801139064691562e-05, "loss": 1.4251, "step": 5851 }, { "epoch": 0.6156759600210415, "grad_norm": 1.9584040641784668, "learning_rate": 6.797910569685297e-05, "loss": 1.6893, "step": 5852 }, { "epoch": 0.6157811678064177, "grad_norm": 1.093604326248169, "learning_rate": 6.794682446527137e-05, "loss": 1.369, "step": 5853 }, { "epoch": 0.6158863755917938, "grad_norm": 1.4752413034439087, "learning_rate": 6.791454695591945e-05, "loss": 1.6492, "step": 5854 }, { "epoch": 0.6159915833771699, "grad_norm": 0.9944615960121155, "learning_rate": 6.788227317254556e-05, "loss": 1.9943, "step": 5855 }, { "epoch": 0.616096791162546, "grad_norm": 1.6758373975753784, "learning_rate": 6.785000311889754e-05, "loss": 1.9175, "step": 5856 }, { "epoch": 0.6162019989479222, "grad_norm": 2.0718863010406494, "learning_rate": 6.781773679872276e-05, "loss": 1.7659, "step": 5857 }, { "epoch": 0.6163072067332983, "grad_norm": 1.6973742246627808, "learning_rate": 6.778547421576825e-05, "loss": 1.8969, "step": 5858 }, { "epoch": 0.6164124145186743, "grad_norm": 1.7583503723144531, "learning_rate": 6.775321537378054e-05, "loss": 1.8226, "step": 5859 }, { "epoch": 0.6165176223040505, "grad_norm": 1.5600526332855225, "learning_rate": 6.772096027650574e-05, "loss": 1.6122, "step": 5860 }, { "epoch": 0.6166228300894266, "grad_norm": 1.8818323612213135, "learning_rate": 6.768870892768952e-05, "loss": 1.4151, "step": 5861 }, { "epoch": 0.6167280378748027, "grad_norm": 1.4384057521820068, "learning_rate": 6.765646133107714e-05, "loss": 2.2372, "step": 5862 }, { "epoch": 0.6168332456601788, "grad_norm": 1.1038570404052734, "learning_rate": 6.762421749041342e-05, "loss": 2.0336, "step": 5863 }, { "epoch": 0.616938453445555, "grad_norm": 1.2065256834030151, "learning_rate": 6.759197740944267e-05, "loss": 1.1006, "step": 5864 }, { "epoch": 0.6170436612309311, "grad_norm": 2.1591498851776123, "learning_rate": 6.75597410919089e-05, "loss": 1.3848, "step": 5865 }, { "epoch": 0.6171488690163072, "grad_norm": 1.131312608718872, "learning_rate": 6.752750854155558e-05, "loss": 1.5338, "step": 5866 }, { "epoch": 0.6172540768016833, "grad_norm": 2.397049903869629, "learning_rate": 6.749527976212573e-05, "loss": 1.3482, "step": 5867 }, { "epoch": 0.6173592845870595, "grad_norm": 1.6215959787368774, "learning_rate": 6.746305475736202e-05, "loss": 1.6116, "step": 5868 }, { "epoch": 0.6174644923724356, "grad_norm": 1.2333688735961914, "learning_rate": 6.743083353100664e-05, "loss": 1.7272, "step": 5869 }, { "epoch": 0.6175697001578117, "grad_norm": 2.3157310485839844, "learning_rate": 6.739861608680129e-05, "loss": 1.7432, "step": 5870 }, { "epoch": 0.6176749079431878, "grad_norm": 1.532241940498352, "learning_rate": 6.736640242848735e-05, "loss": 1.1707, "step": 5871 }, { "epoch": 0.617780115728564, "grad_norm": 1.3878227472305298, "learning_rate": 6.733419255980559e-05, "loss": 2.2097, "step": 5872 }, { "epoch": 0.61788532351394, "grad_norm": 1.6810948848724365, "learning_rate": 6.730198648449648e-05, "loss": 1.7966, "step": 5873 }, { "epoch": 0.6179905312993161, "grad_norm": 1.3429570198059082, "learning_rate": 6.726978420630002e-05, "loss": 1.7373, "step": 5874 }, { "epoch": 0.6180957390846923, "grad_norm": 1.960030436515808, "learning_rate": 6.723758572895573e-05, "loss": 2.0929, "step": 5875 }, { "epoch": 0.6182009468700684, "grad_norm": 2.1462693214416504, "learning_rate": 6.720539105620272e-05, "loss": 1.7261, "step": 5876 }, { "epoch": 0.6183061546554445, "grad_norm": 1.1600507497787476, "learning_rate": 6.717320019177969e-05, "loss": 1.5239, "step": 5877 }, { "epoch": 0.6184113624408206, "grad_norm": 1.3913511037826538, "learning_rate": 6.71410131394248e-05, "loss": 1.8831, "step": 5878 }, { "epoch": 0.6185165702261968, "grad_norm": 1.2141426801681519, "learning_rate": 6.710882990287585e-05, "loss": 2.0194, "step": 5879 }, { "epoch": 0.6186217780115728, "grad_norm": 1.3260263204574585, "learning_rate": 6.70766504858702e-05, "loss": 1.4936, "step": 5880 }, { "epoch": 0.618726985796949, "grad_norm": 1.1059815883636475, "learning_rate": 6.704447489214468e-05, "loss": 2.0024, "step": 5881 }, { "epoch": 0.6188321935823251, "grad_norm": 1.362945795059204, "learning_rate": 6.701230312543578e-05, "loss": 1.6022, "step": 5882 }, { "epoch": 0.6189374013677013, "grad_norm": 1.131805658340454, "learning_rate": 6.698013518947952e-05, "loss": 2.0098, "step": 5883 }, { "epoch": 0.6190426091530773, "grad_norm": 1.8835530281066895, "learning_rate": 6.69479710880114e-05, "loss": 1.5957, "step": 5884 }, { "epoch": 0.6191478169384534, "grad_norm": 1.4850059747695923, "learning_rate": 6.691581082476656e-05, "loss": 1.617, "step": 5885 }, { "epoch": 0.6192530247238296, "grad_norm": 1.5727633237838745, "learning_rate": 6.688365440347965e-05, "loss": 1.9237, "step": 5886 }, { "epoch": 0.6193582325092056, "grad_norm": 1.0962116718292236, "learning_rate": 6.685150182788495e-05, "loss": 1.6268, "step": 5887 }, { "epoch": 0.6194634402945818, "grad_norm": 2.0313258171081543, "learning_rate": 6.681935310171616e-05, "loss": 1.8827, "step": 5888 }, { "epoch": 0.6195686480799579, "grad_norm": 1.543372631072998, "learning_rate": 6.678720822870663e-05, "loss": 1.8296, "step": 5889 }, { "epoch": 0.6196738558653341, "grad_norm": 1.0897983312606812, "learning_rate": 6.675506721258926e-05, "loss": 1.9225, "step": 5890 }, { "epoch": 0.6197790636507101, "grad_norm": 1.2541539669036865, "learning_rate": 6.672293005709644e-05, "loss": 1.6706, "step": 5891 }, { "epoch": 0.6198842714360863, "grad_norm": 1.780050277709961, "learning_rate": 6.669079676596019e-05, "loss": 1.6821, "step": 5892 }, { "epoch": 0.6199894792214624, "grad_norm": 1.5210779905319214, "learning_rate": 6.665866734291205e-05, "loss": 1.7069, "step": 5893 }, { "epoch": 0.6200946870068385, "grad_norm": 1.2912250757217407, "learning_rate": 6.662654179168306e-05, "loss": 1.7669, "step": 5894 }, { "epoch": 0.6201998947922146, "grad_norm": 1.736703634262085, "learning_rate": 6.659442011600387e-05, "loss": 1.457, "step": 5895 }, { "epoch": 0.6203051025775908, "grad_norm": 1.1300891637802124, "learning_rate": 6.656230231960466e-05, "loss": 1.8526, "step": 5896 }, { "epoch": 0.6204103103629669, "grad_norm": 1.8092447519302368, "learning_rate": 6.653018840621516e-05, "loss": 2.3459, "step": 5897 }, { "epoch": 0.6205155181483429, "grad_norm": 1.1894848346710205, "learning_rate": 6.649807837956472e-05, "loss": 1.5572, "step": 5898 }, { "epoch": 0.6206207259337191, "grad_norm": 1.3011435270309448, "learning_rate": 6.646597224338207e-05, "loss": 1.6887, "step": 5899 }, { "epoch": 0.6207259337190952, "grad_norm": 2.1688480377197266, "learning_rate": 6.643387000139565e-05, "loss": 1.3811, "step": 5900 }, { "epoch": 0.6208311415044714, "grad_norm": 1.4247039556503296, "learning_rate": 6.640177165733339e-05, "loss": 1.7982, "step": 5901 }, { "epoch": 0.6209363492898474, "grad_norm": 0.9942802786827087, "learning_rate": 6.636967721492274e-05, "loss": 1.3522, "step": 5902 }, { "epoch": 0.6210415570752236, "grad_norm": 1.3744851350784302, "learning_rate": 6.633758667789074e-05, "loss": 1.7667, "step": 5903 }, { "epoch": 0.6211467648605997, "grad_norm": 1.2810285091400146, "learning_rate": 6.630550004996396e-05, "loss": 2.0339, "step": 5904 }, { "epoch": 0.6212519726459758, "grad_norm": 1.1830532550811768, "learning_rate": 6.627341733486847e-05, "loss": 1.8695, "step": 5905 }, { "epoch": 0.6213571804313519, "grad_norm": 1.8294991254806519, "learning_rate": 6.624133853632998e-05, "loss": 1.95, "step": 5906 }, { "epoch": 0.6214623882167281, "grad_norm": 2.3274435997009277, "learning_rate": 6.620926365807372e-05, "loss": 1.9036, "step": 5907 }, { "epoch": 0.6215675960021042, "grad_norm": 1.6169378757476807, "learning_rate": 6.617719270382436e-05, "loss": 2.0314, "step": 5908 }, { "epoch": 0.6216728037874802, "grad_norm": 1.2674856185913086, "learning_rate": 6.614512567730625e-05, "loss": 1.1626, "step": 5909 }, { "epoch": 0.6217780115728564, "grad_norm": 1.3875672817230225, "learning_rate": 6.611306258224319e-05, "loss": 1.5651, "step": 5910 }, { "epoch": 0.6218832193582325, "grad_norm": 2.156421422958374, "learning_rate": 6.608100342235861e-05, "loss": 2.0087, "step": 5911 }, { "epoch": 0.6219884271436086, "grad_norm": 1.7543668746948242, "learning_rate": 6.604894820137541e-05, "loss": 1.9096, "step": 5912 }, { "epoch": 0.6220936349289847, "grad_norm": 1.749904990196228, "learning_rate": 6.601689692301604e-05, "loss": 1.5697, "step": 5913 }, { "epoch": 0.6221988427143609, "grad_norm": 1.3653181791305542, "learning_rate": 6.598484959100257e-05, "loss": 1.8171, "step": 5914 }, { "epoch": 0.622304050499737, "grad_norm": 1.5228540897369385, "learning_rate": 6.59528062090565e-05, "loss": 1.6228, "step": 5915 }, { "epoch": 0.6224092582851131, "grad_norm": 1.0161175727844238, "learning_rate": 6.592076678089889e-05, "loss": 1.3673, "step": 5916 }, { "epoch": 0.6225144660704892, "grad_norm": 1.3234752416610718, "learning_rate": 6.588873131025042e-05, "loss": 1.1581, "step": 5917 }, { "epoch": 0.6226196738558654, "grad_norm": 1.3997480869293213, "learning_rate": 6.585669980083128e-05, "loss": 1.6416, "step": 5918 }, { "epoch": 0.6227248816412414, "grad_norm": 2.785766124725342, "learning_rate": 6.582467225636116e-05, "loss": 1.6409, "step": 5919 }, { "epoch": 0.6228300894266176, "grad_norm": 1.0819607973098755, "learning_rate": 6.579264868055928e-05, "loss": 1.4652, "step": 5920 }, { "epoch": 0.6229352972119937, "grad_norm": 1.7267218828201294, "learning_rate": 6.576062907714448e-05, "loss": 2.1395, "step": 5921 }, { "epoch": 0.6230405049973698, "grad_norm": 1.7600226402282715, "learning_rate": 6.572861344983511e-05, "loss": 2.0258, "step": 5922 }, { "epoch": 0.6231457127827459, "grad_norm": 1.3963185548782349, "learning_rate": 6.569660180234898e-05, "loss": 1.7034, "step": 5923 }, { "epoch": 0.623250920568122, "grad_norm": 1.8364591598510742, "learning_rate": 6.566459413840351e-05, "loss": 1.9706, "step": 5924 }, { "epoch": 0.6233561283534982, "grad_norm": 1.4244076013565063, "learning_rate": 6.56325904617157e-05, "loss": 2.1374, "step": 5925 }, { "epoch": 0.6234613361388742, "grad_norm": 1.3648873567581177, "learning_rate": 6.560059077600195e-05, "loss": 1.6418, "step": 5926 }, { "epoch": 0.6235665439242504, "grad_norm": 1.695998191833496, "learning_rate": 6.556859508497834e-05, "loss": 1.712, "step": 5927 }, { "epoch": 0.6236717517096265, "grad_norm": 2.2389862537384033, "learning_rate": 6.553660339236041e-05, "loss": 1.6126, "step": 5928 }, { "epoch": 0.6237769594950027, "grad_norm": 1.46173095703125, "learning_rate": 6.550461570186322e-05, "loss": 2.1347, "step": 5929 }, { "epoch": 0.6238821672803787, "grad_norm": 1.7502105236053467, "learning_rate": 6.547263201720143e-05, "loss": 1.4319, "step": 5930 }, { "epoch": 0.6239873750657549, "grad_norm": 1.4961512088775635, "learning_rate": 6.54406523420892e-05, "loss": 2.2034, "step": 5931 }, { "epoch": 0.624092582851131, "grad_norm": 1.5784069299697876, "learning_rate": 6.54086766802402e-05, "loss": 2.0528, "step": 5932 }, { "epoch": 0.6241977906365072, "grad_norm": 1.485357642173767, "learning_rate": 6.537670503536766e-05, "loss": 2.0438, "step": 5933 }, { "epoch": 0.6243029984218832, "grad_norm": 1.615233063697815, "learning_rate": 6.534473741118434e-05, "loss": 1.5522, "step": 5934 }, { "epoch": 0.6244082062072593, "grad_norm": 1.7099108695983887, "learning_rate": 6.53127738114026e-05, "loss": 1.8988, "step": 5935 }, { "epoch": 0.6245134139926355, "grad_norm": 1.4433890581130981, "learning_rate": 6.528081423973422e-05, "loss": 2.0022, "step": 5936 }, { "epoch": 0.6246186217780115, "grad_norm": 1.8532782793045044, "learning_rate": 6.52488586998905e-05, "loss": 1.748, "step": 5937 }, { "epoch": 0.6247238295633877, "grad_norm": 1.721971869468689, "learning_rate": 6.52169071955824e-05, "loss": 1.4468, "step": 5938 }, { "epoch": 0.6248290373487638, "grad_norm": 1.340277910232544, "learning_rate": 6.518495973052036e-05, "loss": 1.4, "step": 5939 }, { "epoch": 0.62493424513414, "grad_norm": 1.9042940139770508, "learning_rate": 6.515301630841426e-05, "loss": 1.6993, "step": 5940 }, { "epoch": 0.625039452919516, "grad_norm": 2.6405863761901855, "learning_rate": 6.512107693297365e-05, "loss": 1.4162, "step": 5941 }, { "epoch": 0.6251446607048922, "grad_norm": 1.6686121225357056, "learning_rate": 6.508914160790752e-05, "loss": 1.3212, "step": 5942 }, { "epoch": 0.6252498684902683, "grad_norm": 1.4782112836837769, "learning_rate": 6.505721033692443e-05, "loss": 1.7841, "step": 5943 }, { "epoch": 0.6253550762756444, "grad_norm": 1.8376623392105103, "learning_rate": 6.502528312373241e-05, "loss": 1.5778, "step": 5944 }, { "epoch": 0.6254602840610205, "grad_norm": 1.424533724784851, "learning_rate": 6.49933599720391e-05, "loss": 1.4852, "step": 5945 }, { "epoch": 0.6255654918463966, "grad_norm": 1.5104315280914307, "learning_rate": 6.496144088555162e-05, "loss": 1.6104, "step": 5946 }, { "epoch": 0.6256706996317728, "grad_norm": 1.5233991146087646, "learning_rate": 6.492952586797665e-05, "loss": 1.2861, "step": 5947 }, { "epoch": 0.6257759074171488, "grad_norm": 1.3382761478424072, "learning_rate": 6.489761492302034e-05, "loss": 2.0656, "step": 5948 }, { "epoch": 0.625881115202525, "grad_norm": 1.0290378332138062, "learning_rate": 6.486570805438843e-05, "loss": 1.7404, "step": 5949 }, { "epoch": 0.6259863229879011, "grad_norm": 1.8992608785629272, "learning_rate": 6.483380526578615e-05, "loss": 1.3986, "step": 5950 }, { "epoch": 0.6260915307732772, "grad_norm": 1.5879778861999512, "learning_rate": 6.480190656091825e-05, "loss": 1.7387, "step": 5951 }, { "epoch": 0.6261967385586533, "grad_norm": 1.1449265480041504, "learning_rate": 6.477001194348906e-05, "loss": 1.3209, "step": 5952 }, { "epoch": 0.6263019463440295, "grad_norm": 1.9107697010040283, "learning_rate": 6.473812141720234e-05, "loss": 1.6479, "step": 5953 }, { "epoch": 0.6264071541294056, "grad_norm": 2.4911386966705322, "learning_rate": 6.47062349857615e-05, "loss": 1.9478, "step": 5954 }, { "epoch": 0.6265123619147817, "grad_norm": 1.8238067626953125, "learning_rate": 6.467435265286935e-05, "loss": 1.6787, "step": 5955 }, { "epoch": 0.6266175697001578, "grad_norm": 1.4120367765426636, "learning_rate": 6.46424744222283e-05, "loss": 1.6102, "step": 5956 }, { "epoch": 0.626722777485534, "grad_norm": 0.9109852910041809, "learning_rate": 6.461060029754031e-05, "loss": 1.982, "step": 5957 }, { "epoch": 0.62682798527091, "grad_norm": 1.119532823562622, "learning_rate": 6.457873028250674e-05, "loss": 1.7647, "step": 5958 }, { "epoch": 0.6269331930562861, "grad_norm": 1.667096734046936, "learning_rate": 6.454686438082858e-05, "loss": 1.2471, "step": 5959 }, { "epoch": 0.6270384008416623, "grad_norm": 1.9557980298995972, "learning_rate": 6.45150025962063e-05, "loss": 1.553, "step": 5960 }, { "epoch": 0.6271436086270384, "grad_norm": 1.4863619804382324, "learning_rate": 6.448314493233995e-05, "loss": 1.706, "step": 5961 }, { "epoch": 0.6272488164124145, "grad_norm": 1.5268634557724, "learning_rate": 6.445129139292899e-05, "loss": 1.7509, "step": 5962 }, { "epoch": 0.6273540241977906, "grad_norm": 1.5882983207702637, "learning_rate": 6.441944198167253e-05, "loss": 1.9265, "step": 5963 }, { "epoch": 0.6274592319831668, "grad_norm": 1.2085886001586914, "learning_rate": 6.43875967022691e-05, "loss": 1.6107, "step": 5964 }, { "epoch": 0.6275644397685429, "grad_norm": 2.4494829177856445, "learning_rate": 6.435575555841679e-05, "loss": 1.9172, "step": 5965 }, { "epoch": 0.627669647553919, "grad_norm": 0.8796269297599792, "learning_rate": 6.432391855381321e-05, "loss": 0.9647, "step": 5966 }, { "epoch": 0.6277748553392951, "grad_norm": 1.2112059593200684, "learning_rate": 6.42920856921555e-05, "loss": 1.734, "step": 5967 }, { "epoch": 0.6278800631246713, "grad_norm": 1.265119194984436, "learning_rate": 6.426025697714029e-05, "loss": 1.9329, "step": 5968 }, { "epoch": 0.6279852709100473, "grad_norm": 1.5609668493270874, "learning_rate": 6.422843241246374e-05, "loss": 1.5995, "step": 5969 }, { "epoch": 0.6280904786954234, "grad_norm": 1.1384879350662231, "learning_rate": 6.419661200182158e-05, "loss": 2.1466, "step": 5970 }, { "epoch": 0.6281956864807996, "grad_norm": 1.4794455766677856, "learning_rate": 6.416479574890894e-05, "loss": 2.2411, "step": 5971 }, { "epoch": 0.6283008942661757, "grad_norm": 1.0125900506973267, "learning_rate": 6.413298365742055e-05, "loss": 1.6002, "step": 5972 }, { "epoch": 0.6284061020515518, "grad_norm": 1.3207452297210693, "learning_rate": 6.41011757310507e-05, "loss": 1.6828, "step": 5973 }, { "epoch": 0.6285113098369279, "grad_norm": 1.2414960861206055, "learning_rate": 6.406937197349308e-05, "loss": 1.6645, "step": 5974 }, { "epoch": 0.6286165176223041, "grad_norm": 1.6514127254486084, "learning_rate": 6.403757238844096e-05, "loss": 1.9715, "step": 5975 }, { "epoch": 0.6287217254076801, "grad_norm": 1.0575364828109741, "learning_rate": 6.400577697958718e-05, "loss": 2.2627, "step": 5976 }, { "epoch": 0.6288269331930563, "grad_norm": 1.608067512512207, "learning_rate": 6.397398575062396e-05, "loss": 1.871, "step": 5977 }, { "epoch": 0.6289321409784324, "grad_norm": 1.4367784261703491, "learning_rate": 6.394219870524314e-05, "loss": 1.8095, "step": 5978 }, { "epoch": 0.6290373487638086, "grad_norm": 1.3168582916259766, "learning_rate": 6.391041584713608e-05, "loss": 1.6412, "step": 5979 }, { "epoch": 0.6291425565491846, "grad_norm": 1.5024646520614624, "learning_rate": 6.387863717999357e-05, "loss": 2.0073, "step": 5980 }, { "epoch": 0.6292477643345608, "grad_norm": 1.1878137588500977, "learning_rate": 6.384686270750599e-05, "loss": 1.0912, "step": 5981 }, { "epoch": 0.6293529721199369, "grad_norm": 1.134263515472412, "learning_rate": 6.381509243336318e-05, "loss": 1.9806, "step": 5982 }, { "epoch": 0.6294581799053129, "grad_norm": 1.093222737312317, "learning_rate": 6.378332636125453e-05, "loss": 1.8243, "step": 5983 }, { "epoch": 0.6295633876906891, "grad_norm": 1.2039942741394043, "learning_rate": 6.375156449486895e-05, "loss": 1.6477, "step": 5984 }, { "epoch": 0.6296685954760652, "grad_norm": 1.28606116771698, "learning_rate": 6.371980683789479e-05, "loss": 1.6914, "step": 5985 }, { "epoch": 0.6297738032614414, "grad_norm": 1.0567982196807861, "learning_rate": 6.368805339402e-05, "loss": 1.9269, "step": 5986 }, { "epoch": 0.6298790110468174, "grad_norm": 1.3032525777816772, "learning_rate": 6.365630416693203e-05, "loss": 1.9851, "step": 5987 }, { "epoch": 0.6299842188321936, "grad_norm": 2.6468424797058105, "learning_rate": 6.362455916031774e-05, "loss": 1.8291, "step": 5988 }, { "epoch": 0.6300894266175697, "grad_norm": 1.2577786445617676, "learning_rate": 6.359281837786363e-05, "loss": 1.6674, "step": 5989 }, { "epoch": 0.6301946344029458, "grad_norm": 1.3022758960723877, "learning_rate": 6.356108182325562e-05, "loss": 1.4832, "step": 5990 }, { "epoch": 0.6302998421883219, "grad_norm": 1.515007495880127, "learning_rate": 6.352934950017921e-05, "loss": 2.0965, "step": 5991 }, { "epoch": 0.6304050499736981, "grad_norm": 1.462588906288147, "learning_rate": 6.349762141231934e-05, "loss": 1.5093, "step": 5992 }, { "epoch": 0.6305102577590742, "grad_norm": 1.503005862236023, "learning_rate": 6.34658975633605e-05, "loss": 1.4638, "step": 5993 }, { "epoch": 0.6306154655444502, "grad_norm": 1.4400442838668823, "learning_rate": 6.343417795698667e-05, "loss": 1.2181, "step": 5994 }, { "epoch": 0.6307206733298264, "grad_norm": 1.6780505180358887, "learning_rate": 6.340246259688133e-05, "loss": 1.6997, "step": 5995 }, { "epoch": 0.6308258811152025, "grad_norm": 1.3209447860717773, "learning_rate": 6.337075148672751e-05, "loss": 1.5113, "step": 5996 }, { "epoch": 0.6309310889005787, "grad_norm": 1.7085509300231934, "learning_rate": 6.333904463020772e-05, "loss": 1.8627, "step": 5997 }, { "epoch": 0.6310362966859547, "grad_norm": 1.7101259231567383, "learning_rate": 6.330734203100394e-05, "loss": 2.1645, "step": 5998 }, { "epoch": 0.6311415044713309, "grad_norm": 1.1601276397705078, "learning_rate": 6.327564369279768e-05, "loss": 2.0211, "step": 5999 }, { "epoch": 0.631246712256707, "grad_norm": 1.344951868057251, "learning_rate": 6.324394961927005e-05, "loss": 1.6263, "step": 6000 }, { "epoch": 0.6313519200420831, "grad_norm": 1.1966255903244019, "learning_rate": 6.321225981410147e-05, "loss": 1.8129, "step": 6001 }, { "epoch": 0.6314571278274592, "grad_norm": 1.8970527648925781, "learning_rate": 6.318057428097203e-05, "loss": 1.8969, "step": 6002 }, { "epoch": 0.6315623356128354, "grad_norm": 1.4465181827545166, "learning_rate": 6.314889302356125e-05, "loss": 2.0013, "step": 6003 }, { "epoch": 0.6316675433982115, "grad_norm": 1.2381423711776733, "learning_rate": 6.311721604554816e-05, "loss": 1.6398, "step": 6004 }, { "epoch": 0.6317727511835876, "grad_norm": 1.656591773033142, "learning_rate": 6.308554335061135e-05, "loss": 1.757, "step": 6005 }, { "epoch": 0.6318779589689637, "grad_norm": 1.8384569883346558, "learning_rate": 6.305387494242882e-05, "loss": 1.7847, "step": 6006 }, { "epoch": 0.6319831667543399, "grad_norm": 1.5828437805175781, "learning_rate": 6.302221082467812e-05, "loss": 1.5665, "step": 6007 }, { "epoch": 0.6320883745397159, "grad_norm": 1.4291951656341553, "learning_rate": 6.299055100103632e-05, "loss": 1.9382, "step": 6008 }, { "epoch": 0.632193582325092, "grad_norm": 1.4509108066558838, "learning_rate": 6.295889547517997e-05, "loss": 2.2804, "step": 6009 }, { "epoch": 0.6322987901104682, "grad_norm": 1.5382047891616821, "learning_rate": 6.29272442507851e-05, "loss": 0.9914, "step": 6010 }, { "epoch": 0.6324039978958443, "grad_norm": 0.95433109998703, "learning_rate": 6.289559733152727e-05, "loss": 1.648, "step": 6011 }, { "epoch": 0.6325092056812204, "grad_norm": 0.9161847829818726, "learning_rate": 6.286395472108158e-05, "loss": 1.5227, "step": 6012 }, { "epoch": 0.6326144134665965, "grad_norm": 2.2820706367492676, "learning_rate": 6.283231642312251e-05, "loss": 1.4385, "step": 6013 }, { "epoch": 0.6327196212519727, "grad_norm": 1.1651874780654907, "learning_rate": 6.280068244132415e-05, "loss": 1.3021, "step": 6014 }, { "epoch": 0.6328248290373487, "grad_norm": 1.7624272108078003, "learning_rate": 6.276905277936005e-05, "loss": 1.7141, "step": 6015 }, { "epoch": 0.6329300368227249, "grad_norm": 1.4675081968307495, "learning_rate": 6.273742744090325e-05, "loss": 1.8335, "step": 6016 }, { "epoch": 0.633035244608101, "grad_norm": 1.489524245262146, "learning_rate": 6.270580642962629e-05, "loss": 1.8555, "step": 6017 }, { "epoch": 0.6331404523934772, "grad_norm": 1.2529215812683105, "learning_rate": 6.267418974920125e-05, "loss": 1.7023, "step": 6018 }, { "epoch": 0.6332456601788532, "grad_norm": 1.7118120193481445, "learning_rate": 6.26425774032996e-05, "loss": 1.7771, "step": 6019 }, { "epoch": 0.6333508679642293, "grad_norm": 1.1630779504776, "learning_rate": 6.261096939559243e-05, "loss": 1.6504, "step": 6020 }, { "epoch": 0.6334560757496055, "grad_norm": 1.5538249015808105, "learning_rate": 6.257936572975029e-05, "loss": 1.967, "step": 6021 }, { "epoch": 0.6335612835349815, "grad_norm": 1.3640360832214355, "learning_rate": 6.25477664094432e-05, "loss": 1.583, "step": 6022 }, { "epoch": 0.6336664913203577, "grad_norm": 1.6162277460098267, "learning_rate": 6.251617143834065e-05, "loss": 1.8734, "step": 6023 }, { "epoch": 0.6337716991057338, "grad_norm": 1.6816718578338623, "learning_rate": 6.248458082011167e-05, "loss": 1.6501, "step": 6024 }, { "epoch": 0.63387690689111, "grad_norm": 1.3452551364898682, "learning_rate": 6.245299455842477e-05, "loss": 1.609, "step": 6025 }, { "epoch": 0.633982114676486, "grad_norm": 1.8827195167541504, "learning_rate": 6.2421412656948e-05, "loss": 1.2681, "step": 6026 }, { "epoch": 0.6340873224618622, "grad_norm": 1.4761601686477661, "learning_rate": 6.238983511934883e-05, "loss": 1.4392, "step": 6027 }, { "epoch": 0.6341925302472383, "grad_norm": 1.6614230871200562, "learning_rate": 6.235826194929423e-05, "loss": 1.8507, "step": 6028 }, { "epoch": 0.6342977380326145, "grad_norm": 1.2700610160827637, "learning_rate": 6.232669315045076e-05, "loss": 1.9591, "step": 6029 }, { "epoch": 0.6344029458179905, "grad_norm": 1.129776120185852, "learning_rate": 6.229512872648435e-05, "loss": 1.9298, "step": 6030 }, { "epoch": 0.6345081536033667, "grad_norm": 1.5436091423034668, "learning_rate": 6.226356868106046e-05, "loss": 1.6272, "step": 6031 }, { "epoch": 0.6346133613887428, "grad_norm": 1.54899001121521, "learning_rate": 6.22320130178441e-05, "loss": 1.6734, "step": 6032 }, { "epoch": 0.6347185691741188, "grad_norm": 2.6774120330810547, "learning_rate": 6.220046174049968e-05, "loss": 1.5251, "step": 6033 }, { "epoch": 0.634823776959495, "grad_norm": 1.2315189838409424, "learning_rate": 6.216891485269118e-05, "loss": 1.4497, "step": 6034 }, { "epoch": 0.6349289847448711, "grad_norm": 0.9964236617088318, "learning_rate": 6.213737235808201e-05, "loss": 1.695, "step": 6035 }, { "epoch": 0.6350341925302473, "grad_norm": 1.0956590175628662, "learning_rate": 6.210583426033513e-05, "loss": 2.1008, "step": 6036 }, { "epoch": 0.6351394003156233, "grad_norm": 1.8140305280685425, "learning_rate": 6.207430056311292e-05, "loss": 1.6233, "step": 6037 }, { "epoch": 0.6352446081009995, "grad_norm": 2.13716721534729, "learning_rate": 6.204277127007729e-05, "loss": 2.0819, "step": 6038 }, { "epoch": 0.6353498158863756, "grad_norm": 1.4235343933105469, "learning_rate": 6.201124638488968e-05, "loss": 1.3355, "step": 6039 }, { "epoch": 0.6354550236717517, "grad_norm": 1.6728320121765137, "learning_rate": 6.19797259112109e-05, "loss": 1.7395, "step": 6040 }, { "epoch": 0.6355602314571278, "grad_norm": 1.455208659172058, "learning_rate": 6.194820985270136e-05, "loss": 1.9213, "step": 6041 }, { "epoch": 0.635665439242504, "grad_norm": 1.577768325805664, "learning_rate": 6.191669821302091e-05, "loss": 1.9931, "step": 6042 }, { "epoch": 0.6357706470278801, "grad_norm": 1.7181400060653687, "learning_rate": 6.188519099582893e-05, "loss": 1.69, "step": 6043 }, { "epoch": 0.6358758548132561, "grad_norm": 1.1263937950134277, "learning_rate": 6.185368820478417e-05, "loss": 1.3001, "step": 6044 }, { "epoch": 0.6359810625986323, "grad_norm": 1.8166402578353882, "learning_rate": 6.182218984354497e-05, "loss": 1.9147, "step": 6045 }, { "epoch": 0.6360862703840084, "grad_norm": 1.7598438262939453, "learning_rate": 6.179069591576916e-05, "loss": 1.9621, "step": 6046 }, { "epoch": 0.6361914781693845, "grad_norm": 1.1624400615692139, "learning_rate": 6.175920642511404e-05, "loss": 2.1653, "step": 6047 }, { "epoch": 0.6362966859547606, "grad_norm": 2.103053092956543, "learning_rate": 6.172772137523632e-05, "loss": 1.9937, "step": 6048 }, { "epoch": 0.6364018937401368, "grad_norm": 0.8931379914283752, "learning_rate": 6.169624076979229e-05, "loss": 1.8209, "step": 6049 }, { "epoch": 0.6365071015255129, "grad_norm": 1.4275034666061401, "learning_rate": 6.166476461243771e-05, "loss": 1.3759, "step": 6050 }, { "epoch": 0.636612309310889, "grad_norm": 1.7524127960205078, "learning_rate": 6.163329290682775e-05, "loss": 1.8575, "step": 6051 }, { "epoch": 0.6367175170962651, "grad_norm": 1.2306782007217407, "learning_rate": 6.160182565661717e-05, "loss": 1.3555, "step": 6052 }, { "epoch": 0.6368227248816413, "grad_norm": 1.199044942855835, "learning_rate": 6.157036286546014e-05, "loss": 1.6082, "step": 6053 }, { "epoch": 0.6369279326670173, "grad_norm": 0.8733108043670654, "learning_rate": 6.153890453701031e-05, "loss": 1.8242, "step": 6054 }, { "epoch": 0.6370331404523935, "grad_norm": 1.531132698059082, "learning_rate": 6.150745067492085e-05, "loss": 1.5927, "step": 6055 }, { "epoch": 0.6371383482377696, "grad_norm": 2.1788547039031982, "learning_rate": 6.14760012828444e-05, "loss": 2.3346, "step": 6056 }, { "epoch": 0.6372435560231458, "grad_norm": 1.90433931350708, "learning_rate": 6.144455636443304e-05, "loss": 1.5207, "step": 6057 }, { "epoch": 0.6373487638085218, "grad_norm": 2.0594825744628906, "learning_rate": 6.141311592333841e-05, "loss": 1.23, "step": 6058 }, { "epoch": 0.6374539715938979, "grad_norm": 1.3014582395553589, "learning_rate": 6.138167996321155e-05, "loss": 1.8238, "step": 6059 }, { "epoch": 0.6375591793792741, "grad_norm": 1.347284197807312, "learning_rate": 6.135024848770306e-05, "loss": 1.1295, "step": 6060 }, { "epoch": 0.6376643871646502, "grad_norm": 1.683262825012207, "learning_rate": 6.131882150046291e-05, "loss": 1.449, "step": 6061 }, { "epoch": 0.6377695949500263, "grad_norm": 1.515728235244751, "learning_rate": 6.128739900514064e-05, "loss": 1.3518, "step": 6062 }, { "epoch": 0.6378748027354024, "grad_norm": 1.24699068069458, "learning_rate": 6.125598100538528e-05, "loss": 1.9356, "step": 6063 }, { "epoch": 0.6379800105207786, "grad_norm": 2.0184340476989746, "learning_rate": 6.122456750484528e-05, "loss": 1.4496, "step": 6064 }, { "epoch": 0.6380852183061546, "grad_norm": 1.378085970878601, "learning_rate": 6.119315850716853e-05, "loss": 1.6465, "step": 6065 }, { "epoch": 0.6381904260915308, "grad_norm": 1.6989691257476807, "learning_rate": 6.116175401600249e-05, "loss": 1.8344, "step": 6066 }, { "epoch": 0.6382956338769069, "grad_norm": 1.1261394023895264, "learning_rate": 6.113035403499408e-05, "loss": 1.4539, "step": 6067 }, { "epoch": 0.6384008416622831, "grad_norm": 1.6350476741790771, "learning_rate": 6.109895856778967e-05, "loss": 1.5789, "step": 6068 }, { "epoch": 0.6385060494476591, "grad_norm": 1.9975515604019165, "learning_rate": 6.106756761803507e-05, "loss": 1.6606, "step": 6069 }, { "epoch": 0.6386112572330352, "grad_norm": 1.355108618736267, "learning_rate": 6.103618118937567e-05, "loss": 1.4639, "step": 6070 }, { "epoch": 0.6387164650184114, "grad_norm": 1.5188654661178589, "learning_rate": 6.1004799285456235e-05, "loss": 2.2806, "step": 6071 }, { "epoch": 0.6388216728037874, "grad_norm": 1.9493741989135742, "learning_rate": 6.097342190992105e-05, "loss": 2.7309, "step": 6072 }, { "epoch": 0.6389268805891636, "grad_norm": 1.889501690864563, "learning_rate": 6.0942049066413855e-05, "loss": 2.1665, "step": 6073 }, { "epoch": 0.6390320883745397, "grad_norm": 1.2038894891738892, "learning_rate": 6.091068075857791e-05, "loss": 1.5338, "step": 6074 }, { "epoch": 0.6391372961599159, "grad_norm": 1.072657823562622, "learning_rate": 6.087931699005588e-05, "loss": 1.9299, "step": 6075 }, { "epoch": 0.6392425039452919, "grad_norm": 1.8095741271972656, "learning_rate": 6.084795776448995e-05, "loss": 1.8311, "step": 6076 }, { "epoch": 0.6393477117306681, "grad_norm": 1.5067007541656494, "learning_rate": 6.0816603085521764e-05, "loss": 1.3399, "step": 6077 }, { "epoch": 0.6394529195160442, "grad_norm": 1.420461654663086, "learning_rate": 6.078525295679243e-05, "loss": 2.0262, "step": 6078 }, { "epoch": 0.6395581273014203, "grad_norm": 1.3096047639846802, "learning_rate": 6.075390738194252e-05, "loss": 1.7605, "step": 6079 }, { "epoch": 0.6396633350867964, "grad_norm": 1.2972394227981567, "learning_rate": 6.072256636461214e-05, "loss": 1.6696, "step": 6080 }, { "epoch": 0.6397685428721726, "grad_norm": 1.2217929363250732, "learning_rate": 6.0691229908440775e-05, "loss": 2.151, "step": 6081 }, { "epoch": 0.6398737506575487, "grad_norm": 1.5966821908950806, "learning_rate": 6.065989801706744e-05, "loss": 1.2176, "step": 6082 }, { "epoch": 0.6399789584429247, "grad_norm": 1.4299871921539307, "learning_rate": 6.0628570694130594e-05, "loss": 1.6411, "step": 6083 }, { "epoch": 0.6400841662283009, "grad_norm": 1.8349710702896118, "learning_rate": 6.059724794326822e-05, "loss": 1.697, "step": 6084 }, { "epoch": 0.640189374013677, "grad_norm": 0.9039126038551331, "learning_rate": 6.056592976811766e-05, "loss": 1.509, "step": 6085 }, { "epoch": 0.6402945817990531, "grad_norm": 1.0073765516281128, "learning_rate": 6.053461617231586e-05, "loss": 1.7619, "step": 6086 }, { "epoch": 0.6403997895844292, "grad_norm": 1.246187448501587, "learning_rate": 6.05033071594991e-05, "loss": 2.2623, "step": 6087 }, { "epoch": 0.6405049973698054, "grad_norm": 1.0114855766296387, "learning_rate": 6.047200273330325e-05, "loss": 1.8475, "step": 6088 }, { "epoch": 0.6406102051551815, "grad_norm": 1.7710341215133667, "learning_rate": 6.044070289736352e-05, "loss": 1.2952, "step": 6089 }, { "epoch": 0.6407154129405576, "grad_norm": 1.3931844234466553, "learning_rate": 6.04094076553147e-05, "loss": 1.7707, "step": 6090 }, { "epoch": 0.6408206207259337, "grad_norm": 1.1191129684448242, "learning_rate": 6.0378117010791e-05, "loss": 1.9957, "step": 6091 }, { "epoch": 0.6409258285113099, "grad_norm": 1.1291834115982056, "learning_rate": 6.034683096742613e-05, "loss": 1.8528, "step": 6092 }, { "epoch": 0.641031036296686, "grad_norm": 1.390347957611084, "learning_rate": 6.031554952885317e-05, "loss": 1.6936, "step": 6093 }, { "epoch": 0.641136244082062, "grad_norm": 1.2321089506149292, "learning_rate": 6.028427269870478e-05, "loss": 1.4666, "step": 6094 }, { "epoch": 0.6412414518674382, "grad_norm": 1.7802302837371826, "learning_rate": 6.025300048061302e-05, "loss": 2.4148, "step": 6095 }, { "epoch": 0.6413466596528143, "grad_norm": 1.3233754634857178, "learning_rate": 6.0221732878209425e-05, "loss": 1.2631, "step": 6096 }, { "epoch": 0.6414518674381904, "grad_norm": 1.9571051597595215, "learning_rate": 6.0190469895125e-05, "loss": 1.8066, "step": 6097 }, { "epoch": 0.6415570752235665, "grad_norm": 1.236015796661377, "learning_rate": 6.0159211534990246e-05, "loss": 1.7285, "step": 6098 }, { "epoch": 0.6416622830089427, "grad_norm": 2.017487049102783, "learning_rate": 6.012795780143503e-05, "loss": 2.0849, "step": 6099 }, { "epoch": 0.6417674907943188, "grad_norm": 1.846824049949646, "learning_rate": 6.009670869808879e-05, "loss": 1.7874, "step": 6100 }, { "epoch": 0.6418726985796949, "grad_norm": 2.1392126083374023, "learning_rate": 6.006546422858039e-05, "loss": 1.5903, "step": 6101 }, { "epoch": 0.641977906365071, "grad_norm": 2.1033949851989746, "learning_rate": 6.003422439653811e-05, "loss": 1.7481, "step": 6102 }, { "epoch": 0.6420831141504472, "grad_norm": 1.5869107246398926, "learning_rate": 6.0002989205589734e-05, "loss": 1.3625, "step": 6103 }, { "epoch": 0.6421883219358232, "grad_norm": 1.291629433631897, "learning_rate": 5.997175865936253e-05, "loss": 1.7495, "step": 6104 }, { "epoch": 0.6422935297211994, "grad_norm": 1.8969913721084595, "learning_rate": 5.994053276148319e-05, "loss": 1.7404, "step": 6105 }, { "epoch": 0.6423987375065755, "grad_norm": 1.4604777097702026, "learning_rate": 5.990931151557786e-05, "loss": 1.749, "step": 6106 }, { "epoch": 0.6425039452919517, "grad_norm": 1.6946632862091064, "learning_rate": 5.987809492527219e-05, "loss": 1.9089, "step": 6107 }, { "epoch": 0.6426091530773277, "grad_norm": 1.3629951477050781, "learning_rate": 5.984688299419121e-05, "loss": 1.7441, "step": 6108 }, { "epoch": 0.6427143608627038, "grad_norm": 1.3938630819320679, "learning_rate": 5.981567572595951e-05, "loss": 1.412, "step": 6109 }, { "epoch": 0.64281956864808, "grad_norm": 1.3842976093292236, "learning_rate": 5.978447312420103e-05, "loss": 1.7809, "step": 6110 }, { "epoch": 0.642924776433456, "grad_norm": 1.5348026752471924, "learning_rate": 5.9753275192539284e-05, "loss": 1.398, "step": 6111 }, { "epoch": 0.6430299842188322, "grad_norm": 1.968120813369751, "learning_rate": 5.9722081934597164e-05, "loss": 1.6602, "step": 6112 }, { "epoch": 0.6431351920042083, "grad_norm": 1.070837378501892, "learning_rate": 5.9690893353997e-05, "loss": 1.6361, "step": 6113 }, { "epoch": 0.6432403997895845, "grad_norm": 2.2223477363586426, "learning_rate": 5.965970945436068e-05, "loss": 1.5202, "step": 6114 }, { "epoch": 0.6433456075749605, "grad_norm": 1.0596355199813843, "learning_rate": 5.9628530239309456e-05, "loss": 1.4806, "step": 6115 }, { "epoch": 0.6434508153603367, "grad_norm": 1.7896836996078491, "learning_rate": 5.95973557124641e-05, "loss": 1.3566, "step": 6116 }, { "epoch": 0.6435560231457128, "grad_norm": 1.657974362373352, "learning_rate": 5.9566185877444755e-05, "loss": 1.7571, "step": 6117 }, { "epoch": 0.6436612309310888, "grad_norm": 1.8864045143127441, "learning_rate": 5.9535020737871115e-05, "loss": 1.9383, "step": 6118 }, { "epoch": 0.643766438716465, "grad_norm": 1.6928189992904663, "learning_rate": 5.950386029736228e-05, "loss": 1.394, "step": 6119 }, { "epoch": 0.6438716465018411, "grad_norm": 1.4859063625335693, "learning_rate": 5.947270455953677e-05, "loss": 1.7629, "step": 6120 }, { "epoch": 0.6439768542872173, "grad_norm": 2.7389304637908936, "learning_rate": 5.9441553528012626e-05, "loss": 1.9255, "step": 6121 }, { "epoch": 0.6440820620725933, "grad_norm": 2.5364770889282227, "learning_rate": 5.9410407206407335e-05, "loss": 1.8894, "step": 6122 }, { "epoch": 0.6441872698579695, "grad_norm": 1.21177077293396, "learning_rate": 5.9379265598337786e-05, "loss": 1.7663, "step": 6123 }, { "epoch": 0.6442924776433456, "grad_norm": 1.6818538904190063, "learning_rate": 5.934812870742036e-05, "loss": 2.2091, "step": 6124 }, { "epoch": 0.6443976854287218, "grad_norm": 1.6835134029388428, "learning_rate": 5.9316996537270895e-05, "loss": 1.6155, "step": 6125 }, { "epoch": 0.6445028932140978, "grad_norm": 1.7322865724563599, "learning_rate": 5.928586909150464e-05, "loss": 1.8218, "step": 6126 }, { "epoch": 0.644608100999474, "grad_norm": 2.4374451637268066, "learning_rate": 5.925474637373635e-05, "loss": 1.2015, "step": 6127 }, { "epoch": 0.6447133087848501, "grad_norm": 1.3041414022445679, "learning_rate": 5.9223628387580176e-05, "loss": 2.0628, "step": 6128 }, { "epoch": 0.6448185165702262, "grad_norm": 1.4549946784973145, "learning_rate": 5.919251513664982e-05, "loss": 1.3928, "step": 6129 }, { "epoch": 0.6449237243556023, "grad_norm": 2.7458503246307373, "learning_rate": 5.916140662455828e-05, "loss": 1.7509, "step": 6130 }, { "epoch": 0.6450289321409785, "grad_norm": 1.7431880235671997, "learning_rate": 5.913030285491808e-05, "loss": 1.6354, "step": 6131 }, { "epoch": 0.6451341399263546, "grad_norm": 1.597713828086853, "learning_rate": 5.909920383134124e-05, "loss": 1.6892, "step": 6132 }, { "epoch": 0.6452393477117306, "grad_norm": 1.7393560409545898, "learning_rate": 5.90681095574392e-05, "loss": 1.5215, "step": 6133 }, { "epoch": 0.6453445554971068, "grad_norm": 1.863715648651123, "learning_rate": 5.903702003682278e-05, "loss": 2.0865, "step": 6134 }, { "epoch": 0.6454497632824829, "grad_norm": 1.6684576272964478, "learning_rate": 5.900593527310234e-05, "loss": 2.2619, "step": 6135 }, { "epoch": 0.645554971067859, "grad_norm": 1.9676207304000854, "learning_rate": 5.897485526988766e-05, "loss": 1.9868, "step": 6136 }, { "epoch": 0.6456601788532351, "grad_norm": 2.0112955570220947, "learning_rate": 5.8943780030787935e-05, "loss": 1.1787, "step": 6137 }, { "epoch": 0.6457653866386113, "grad_norm": 1.3841115236282349, "learning_rate": 5.891270955941184e-05, "loss": 1.5988, "step": 6138 }, { "epoch": 0.6458705944239874, "grad_norm": 2.0346131324768066, "learning_rate": 5.8881643859367475e-05, "loss": 1.7077, "step": 6139 }, { "epoch": 0.6459758022093635, "grad_norm": 1.9194620847702026, "learning_rate": 5.8850582934262446e-05, "loss": 1.7822, "step": 6140 }, { "epoch": 0.6460810099947396, "grad_norm": 1.3288987874984741, "learning_rate": 5.881952678770368e-05, "loss": 1.7559, "step": 6141 }, { "epoch": 0.6461862177801158, "grad_norm": 1.0419418811798096, "learning_rate": 5.8788475423297674e-05, "loss": 2.037, "step": 6142 }, { "epoch": 0.6462914255654918, "grad_norm": 1.2322132587432861, "learning_rate": 5.875742884465033e-05, "loss": 2.2489, "step": 6143 }, { "epoch": 0.6463966333508679, "grad_norm": 1.2339824438095093, "learning_rate": 5.872638705536695e-05, "loss": 1.3035, "step": 6144 }, { "epoch": 0.6465018411362441, "grad_norm": 1.2765202522277832, "learning_rate": 5.869535005905232e-05, "loss": 1.6457, "step": 6145 }, { "epoch": 0.6466070489216202, "grad_norm": 1.7985990047454834, "learning_rate": 5.86643178593107e-05, "loss": 1.2573, "step": 6146 }, { "epoch": 0.6467122567069963, "grad_norm": 1.2002838850021362, "learning_rate": 5.863329045974572e-05, "loss": 1.7499, "step": 6147 }, { "epoch": 0.6468174644923724, "grad_norm": 1.2847400903701782, "learning_rate": 5.860226786396049e-05, "loss": 2.0853, "step": 6148 }, { "epoch": 0.6469226722777486, "grad_norm": 1.2351857423782349, "learning_rate": 5.857125007555759e-05, "loss": 1.9727, "step": 6149 }, { "epoch": 0.6470278800631246, "grad_norm": 1.335922360420227, "learning_rate": 5.8540237098139006e-05, "loss": 1.3194, "step": 6150 }, { "epoch": 0.6471330878485008, "grad_norm": 2.39359188079834, "learning_rate": 5.850922893530617e-05, "loss": 1.9385, "step": 6151 }, { "epoch": 0.6472382956338769, "grad_norm": 1.5656342506408691, "learning_rate": 5.847822559065992e-05, "loss": 2.1779, "step": 6152 }, { "epoch": 0.6473435034192531, "grad_norm": 1.4513521194458008, "learning_rate": 5.8447227067800616e-05, "loss": 1.644, "step": 6153 }, { "epoch": 0.6474487112046291, "grad_norm": 1.374786138534546, "learning_rate": 5.841623337032801e-05, "loss": 1.4539, "step": 6154 }, { "epoch": 0.6475539189900053, "grad_norm": 1.5855339765548706, "learning_rate": 5.838524450184126e-05, "loss": 1.6174, "step": 6155 }, { "epoch": 0.6476591267753814, "grad_norm": 1.3137321472167969, "learning_rate": 5.835426046593906e-05, "loss": 1.5084, "step": 6156 }, { "epoch": 0.6477643345607575, "grad_norm": 1.2792092561721802, "learning_rate": 5.8323281266219466e-05, "loss": 1.8137, "step": 6157 }, { "epoch": 0.6478695423461336, "grad_norm": 1.172930121421814, "learning_rate": 5.8292306906279935e-05, "loss": 1.7933, "step": 6158 }, { "epoch": 0.6479747501315097, "grad_norm": 1.8277498483657837, "learning_rate": 5.8261337389717506e-05, "loss": 2.0978, "step": 6159 }, { "epoch": 0.6480799579168859, "grad_norm": 1.2689887285232544, "learning_rate": 5.823037272012852e-05, "loss": 1.3535, "step": 6160 }, { "epoch": 0.6481851657022619, "grad_norm": 1.2342352867126465, "learning_rate": 5.8199412901108774e-05, "loss": 2.1196, "step": 6161 }, { "epoch": 0.6482903734876381, "grad_norm": 1.4118258953094482, "learning_rate": 5.8168457936253604e-05, "loss": 2.2767, "step": 6162 }, { "epoch": 0.6483955812730142, "grad_norm": 2.2059452533721924, "learning_rate": 5.8137507829157655e-05, "loss": 1.887, "step": 6163 }, { "epoch": 0.6485007890583904, "grad_norm": 1.9310505390167236, "learning_rate": 5.8106562583415037e-05, "loss": 1.9649, "step": 6164 }, { "epoch": 0.6486059968437664, "grad_norm": 2.263485908508301, "learning_rate": 5.807562220261939e-05, "loss": 2.1794, "step": 6165 }, { "epoch": 0.6487112046291426, "grad_norm": 1.5354760885238647, "learning_rate": 5.804468669036369e-05, "loss": 1.8551, "step": 6166 }, { "epoch": 0.6488164124145187, "grad_norm": 2.3956634998321533, "learning_rate": 5.801375605024033e-05, "loss": 1.8773, "step": 6167 }, { "epoch": 0.6489216201998947, "grad_norm": 1.603357195854187, "learning_rate": 5.798283028584126e-05, "loss": 1.9813, "step": 6168 }, { "epoch": 0.6490268279852709, "grad_norm": 1.1227657794952393, "learning_rate": 5.795190940075774e-05, "loss": 1.279, "step": 6169 }, { "epoch": 0.649132035770647, "grad_norm": 1.2054638862609863, "learning_rate": 5.792099339858048e-05, "loss": 1.933, "step": 6170 }, { "epoch": 0.6492372435560232, "grad_norm": 1.7264693975448608, "learning_rate": 5.789008228289975e-05, "loss": 1.4221, "step": 6171 }, { "epoch": 0.6493424513413992, "grad_norm": 1.8171310424804688, "learning_rate": 5.785917605730509e-05, "loss": 2.2143, "step": 6172 }, { "epoch": 0.6494476591267754, "grad_norm": 1.2820940017700195, "learning_rate": 5.7828274725385544e-05, "loss": 1.1761, "step": 6173 }, { "epoch": 0.6495528669121515, "grad_norm": 2.7752187252044678, "learning_rate": 5.7797378290729595e-05, "loss": 1.6048, "step": 6174 }, { "epoch": 0.6496580746975276, "grad_norm": 2.0099592208862305, "learning_rate": 5.7766486756925086e-05, "loss": 2.1283, "step": 6175 }, { "epoch": 0.6497632824829037, "grad_norm": 1.9316635131835938, "learning_rate": 5.773560012755945e-05, "loss": 2.0693, "step": 6176 }, { "epoch": 0.6498684902682799, "grad_norm": 1.1823091506958008, "learning_rate": 5.770471840621938e-05, "loss": 2.015, "step": 6177 }, { "epoch": 0.649973698053656, "grad_norm": 1.3414077758789062, "learning_rate": 5.767384159649107e-05, "loss": 1.5643, "step": 6178 }, { "epoch": 0.650078905839032, "grad_norm": 1.3535269498825073, "learning_rate": 5.764296970196018e-05, "loss": 1.6854, "step": 6179 }, { "epoch": 0.6501841136244082, "grad_norm": 1.28633451461792, "learning_rate": 5.761210272621175e-05, "loss": 1.9889, "step": 6180 }, { "epoch": 0.6502893214097843, "grad_norm": 1.7541005611419678, "learning_rate": 5.7581240672830206e-05, "loss": 2.1746, "step": 6181 }, { "epoch": 0.6503945291951604, "grad_norm": 1.0183143615722656, "learning_rate": 5.7550383545399545e-05, "loss": 1.1908, "step": 6182 }, { "epoch": 0.6504997369805365, "grad_norm": 1.4759997129440308, "learning_rate": 5.7519531347503054e-05, "loss": 1.4782, "step": 6183 }, { "epoch": 0.6506049447659127, "grad_norm": 1.615997076034546, "learning_rate": 5.7488684082723454e-05, "loss": 1.966, "step": 6184 }, { "epoch": 0.6507101525512888, "grad_norm": 1.790147304534912, "learning_rate": 5.745784175464304e-05, "loss": 1.5509, "step": 6185 }, { "epoch": 0.6508153603366649, "grad_norm": 1.5848060846328735, "learning_rate": 5.742700436684337e-05, "loss": 1.7556, "step": 6186 }, { "epoch": 0.650920568122041, "grad_norm": 1.6461782455444336, "learning_rate": 5.739617192290545e-05, "loss": 1.6253, "step": 6187 }, { "epoch": 0.6510257759074172, "grad_norm": 1.7758451700210571, "learning_rate": 5.736534442640984e-05, "loss": 1.7024, "step": 6188 }, { "epoch": 0.6511309836927933, "grad_norm": 1.5912493467330933, "learning_rate": 5.73345218809364e-05, "loss": 1.5275, "step": 6189 }, { "epoch": 0.6512361914781694, "grad_norm": 0.911089301109314, "learning_rate": 5.7303704290064375e-05, "loss": 1.5513, "step": 6190 }, { "epoch": 0.6513413992635455, "grad_norm": 1.3374494314193726, "learning_rate": 5.727289165737263e-05, "loss": 1.6507, "step": 6191 }, { "epoch": 0.6514466070489217, "grad_norm": 1.4440089464187622, "learning_rate": 5.724208398643924e-05, "loss": 1.9977, "step": 6192 }, { "epoch": 0.6515518148342977, "grad_norm": 2.0308356285095215, "learning_rate": 5.721128128084191e-05, "loss": 1.7098, "step": 6193 }, { "epoch": 0.6516570226196738, "grad_norm": 1.5039621591567993, "learning_rate": 5.7180483544157546e-05, "loss": 1.539, "step": 6194 }, { "epoch": 0.65176223040505, "grad_norm": 1.9608867168426514, "learning_rate": 5.7149690779962594e-05, "loss": 1.5212, "step": 6195 }, { "epoch": 0.6518674381904261, "grad_norm": 2.1692678928375244, "learning_rate": 5.711890299183298e-05, "loss": 1.9479, "step": 6196 }, { "epoch": 0.6519726459758022, "grad_norm": 1.2889890670776367, "learning_rate": 5.7088120183343976e-05, "loss": 1.9426, "step": 6197 }, { "epoch": 0.6520778537611783, "grad_norm": 1.587933897972107, "learning_rate": 5.705734235807021e-05, "loss": 1.6135, "step": 6198 }, { "epoch": 0.6521830615465545, "grad_norm": 2.34407639503479, "learning_rate": 5.7026569519585916e-05, "loss": 1.769, "step": 6199 }, { "epoch": 0.6522882693319305, "grad_norm": 1.3627784252166748, "learning_rate": 5.6995801671464556e-05, "loss": 1.8221, "step": 6200 }, { "epoch": 0.6523934771173067, "grad_norm": 1.99224054813385, "learning_rate": 5.696503881727917e-05, "loss": 1.7675, "step": 6201 }, { "epoch": 0.6524986849026828, "grad_norm": 1.4435157775878906, "learning_rate": 5.693428096060213e-05, "loss": 1.2552, "step": 6202 }, { "epoch": 0.652603892688059, "grad_norm": 1.6768929958343506, "learning_rate": 5.6903528105005175e-05, "loss": 2.1129, "step": 6203 }, { "epoch": 0.652709100473435, "grad_norm": 1.7496428489685059, "learning_rate": 5.6872780254059646e-05, "loss": 1.8461, "step": 6204 }, { "epoch": 0.6528143082588111, "grad_norm": 2.0133605003356934, "learning_rate": 5.6842037411336116e-05, "loss": 1.4858, "step": 6205 }, { "epoch": 0.6529195160441873, "grad_norm": 1.2797411680221558, "learning_rate": 5.6811299580404634e-05, "loss": 1.4251, "step": 6206 }, { "epoch": 0.6530247238295633, "grad_norm": 1.3031213283538818, "learning_rate": 5.678056676483474e-05, "loss": 1.9861, "step": 6207 }, { "epoch": 0.6531299316149395, "grad_norm": 0.9973400831222534, "learning_rate": 5.6749838968195326e-05, "loss": 1.543, "step": 6208 }, { "epoch": 0.6532351394003156, "grad_norm": 1.331733226776123, "learning_rate": 5.671911619405465e-05, "loss": 1.9739, "step": 6209 }, { "epoch": 0.6533403471856918, "grad_norm": 2.5411765575408936, "learning_rate": 5.668839844598053e-05, "loss": 2.0505, "step": 6210 }, { "epoch": 0.6534455549710678, "grad_norm": 1.2405493259429932, "learning_rate": 5.665768572754007e-05, "loss": 1.9774, "step": 6211 }, { "epoch": 0.653550762756444, "grad_norm": 1.3992949724197388, "learning_rate": 5.6626978042299814e-05, "loss": 1.735, "step": 6212 }, { "epoch": 0.6536559705418201, "grad_norm": 1.305866003036499, "learning_rate": 5.6596275393825804e-05, "loss": 2.2097, "step": 6213 }, { "epoch": 0.6537611783271962, "grad_norm": 1.8051230907440186, "learning_rate": 5.656557778568341e-05, "loss": 0.9921, "step": 6214 }, { "epoch": 0.6538663861125723, "grad_norm": 1.1146427392959595, "learning_rate": 5.653488522143744e-05, "loss": 1.6378, "step": 6215 }, { "epoch": 0.6539715938979485, "grad_norm": 1.5699180364608765, "learning_rate": 5.650419770465213e-05, "loss": 1.5805, "step": 6216 }, { "epoch": 0.6540768016833246, "grad_norm": 1.5119853019714355, "learning_rate": 5.647351523889106e-05, "loss": 1.5158, "step": 6217 }, { "epoch": 0.6541820094687006, "grad_norm": 1.7245757579803467, "learning_rate": 5.6442837827717386e-05, "loss": 1.2592, "step": 6218 }, { "epoch": 0.6542872172540768, "grad_norm": 1.8998960256576538, "learning_rate": 5.6412165474693526e-05, "loss": 1.7007, "step": 6219 }, { "epoch": 0.6543924250394529, "grad_norm": 1.3357900381088257, "learning_rate": 5.638149818338131e-05, "loss": 1.5266, "step": 6220 }, { "epoch": 0.6544976328248291, "grad_norm": 1.37607741355896, "learning_rate": 5.635083595734212e-05, "loss": 1.6224, "step": 6221 }, { "epoch": 0.6546028406102051, "grad_norm": 1.2907686233520508, "learning_rate": 5.6320178800136626e-05, "loss": 1.5942, "step": 6222 }, { "epoch": 0.6547080483955813, "grad_norm": 1.0506342649459839, "learning_rate": 5.628952671532488e-05, "loss": 1.6633, "step": 6223 }, { "epoch": 0.6548132561809574, "grad_norm": 1.3889976739883423, "learning_rate": 5.625887970646651e-05, "loss": 1.5176, "step": 6224 }, { "epoch": 0.6549184639663335, "grad_norm": 1.7190347909927368, "learning_rate": 5.6228237777120406e-05, "loss": 1.4025, "step": 6225 }, { "epoch": 0.6550236717517096, "grad_norm": 2.0763463973999023, "learning_rate": 5.6197600930844864e-05, "loss": 1.6678, "step": 6226 }, { "epoch": 0.6551288795370858, "grad_norm": 1.7063326835632324, "learning_rate": 5.616696917119773e-05, "loss": 1.6968, "step": 6227 }, { "epoch": 0.6552340873224619, "grad_norm": 1.3571560382843018, "learning_rate": 5.6136342501736126e-05, "loss": 1.7551, "step": 6228 }, { "epoch": 0.655339295107838, "grad_norm": 1.795301079750061, "learning_rate": 5.610572092601659e-05, "loss": 1.7408, "step": 6229 }, { "epoch": 0.6554445028932141, "grad_norm": 1.3519048690795898, "learning_rate": 5.6075104447595186e-05, "loss": 1.8361, "step": 6230 }, { "epoch": 0.6555497106785902, "grad_norm": 1.5597832202911377, "learning_rate": 5.604449307002726e-05, "loss": 2.2093, "step": 6231 }, { "epoch": 0.6556549184639663, "grad_norm": 1.250503420829773, "learning_rate": 5.601388679686757e-05, "loss": 1.7934, "step": 6232 }, { "epoch": 0.6557601262493424, "grad_norm": 1.4893306493759155, "learning_rate": 5.598328563167039e-05, "loss": 1.7416, "step": 6233 }, { "epoch": 0.6558653340347186, "grad_norm": 2.179457902908325, "learning_rate": 5.5952689577989324e-05, "loss": 1.2715, "step": 6234 }, { "epoch": 0.6559705418200947, "grad_norm": 2.1621286869049072, "learning_rate": 5.592209863937733e-05, "loss": 1.8506, "step": 6235 }, { "epoch": 0.6560757496054708, "grad_norm": 1.5253031253814697, "learning_rate": 5.589151281938695e-05, "loss": 1.5244, "step": 6236 }, { "epoch": 0.6561809573908469, "grad_norm": 2.098914384841919, "learning_rate": 5.586093212156986e-05, "loss": 1.7779, "step": 6237 }, { "epoch": 0.6562861651762231, "grad_norm": 1.6869080066680908, "learning_rate": 5.583035654947743e-05, "loss": 1.5032, "step": 6238 }, { "epoch": 0.6563913729615991, "grad_norm": 1.3635993003845215, "learning_rate": 5.5799786106660234e-05, "loss": 1.3241, "step": 6239 }, { "epoch": 0.6564965807469753, "grad_norm": 1.3757116794586182, "learning_rate": 5.576922079666829e-05, "loss": 1.5449, "step": 6240 }, { "epoch": 0.6566017885323514, "grad_norm": 1.2479023933410645, "learning_rate": 5.573866062305113e-05, "loss": 1.9185, "step": 6241 }, { "epoch": 0.6567069963177276, "grad_norm": 1.2713433504104614, "learning_rate": 5.570810558935756e-05, "loss": 1.955, "step": 6242 }, { "epoch": 0.6568122041031036, "grad_norm": 1.468234658241272, "learning_rate": 5.56775556991358e-05, "loss": 1.5705, "step": 6243 }, { "epoch": 0.6569174118884797, "grad_norm": 1.4933536052703857, "learning_rate": 5.5647010955933586e-05, "loss": 1.5719, "step": 6244 }, { "epoch": 0.6570226196738559, "grad_norm": 1.548898458480835, "learning_rate": 5.561647136329789e-05, "loss": 1.3829, "step": 6245 }, { "epoch": 0.6571278274592319, "grad_norm": 1.346425175666809, "learning_rate": 5.5585936924775275e-05, "loss": 2.2525, "step": 6246 }, { "epoch": 0.6572330352446081, "grad_norm": 1.4224555492401123, "learning_rate": 5.555540764391156e-05, "loss": 1.5195, "step": 6247 }, { "epoch": 0.6573382430299842, "grad_norm": 1.1593234539031982, "learning_rate": 5.552488352425195e-05, "loss": 1.8726, "step": 6248 }, { "epoch": 0.6574434508153604, "grad_norm": 1.3007352352142334, "learning_rate": 5.549436456934121e-05, "loss": 1.6618, "step": 6249 }, { "epoch": 0.6575486586007364, "grad_norm": 1.5232993364334106, "learning_rate": 5.5463850782723346e-05, "loss": 1.1512, "step": 6250 }, { "epoch": 0.6576538663861126, "grad_norm": 1.0250799655914307, "learning_rate": 5.5433342167941803e-05, "loss": 1.8019, "step": 6251 }, { "epoch": 0.6577590741714887, "grad_norm": 1.5869766473770142, "learning_rate": 5.540283872853953e-05, "loss": 1.0935, "step": 6252 }, { "epoch": 0.6578642819568649, "grad_norm": 1.3457857370376587, "learning_rate": 5.5372340468058726e-05, "loss": 1.8767, "step": 6253 }, { "epoch": 0.6579694897422409, "grad_norm": 1.3926914930343628, "learning_rate": 5.5341847390041035e-05, "loss": 1.7272, "step": 6254 }, { "epoch": 0.658074697527617, "grad_norm": 1.3577390909194946, "learning_rate": 5.531135949802759e-05, "loss": 1.9582, "step": 6255 }, { "epoch": 0.6581799053129932, "grad_norm": 1.4779307842254639, "learning_rate": 5.52808767955588e-05, "loss": 1.578, "step": 6256 }, { "epoch": 0.6582851130983692, "grad_norm": 1.3357998132705688, "learning_rate": 5.5250399286174546e-05, "loss": 1.6586, "step": 6257 }, { "epoch": 0.6583903208837454, "grad_norm": 1.462868094444275, "learning_rate": 5.521992697341407e-05, "loss": 1.3614, "step": 6258 }, { "epoch": 0.6584955286691215, "grad_norm": 1.4444942474365234, "learning_rate": 5.518945986081596e-05, "loss": 1.1518, "step": 6259 }, { "epoch": 0.6586007364544977, "grad_norm": 1.828895092010498, "learning_rate": 5.515899795191837e-05, "loss": 1.2489, "step": 6260 }, { "epoch": 0.6587059442398737, "grad_norm": 1.7447843551635742, "learning_rate": 5.512854125025868e-05, "loss": 1.7433, "step": 6261 }, { "epoch": 0.6588111520252499, "grad_norm": 1.3825078010559082, "learning_rate": 5.5098089759373714e-05, "loss": 2.2669, "step": 6262 }, { "epoch": 0.658916359810626, "grad_norm": 1.5063486099243164, "learning_rate": 5.5067643482799746e-05, "loss": 1.814, "step": 6263 }, { "epoch": 0.6590215675960021, "grad_norm": 1.3307785987854004, "learning_rate": 5.50372024240724e-05, "loss": 1.5017, "step": 6264 }, { "epoch": 0.6591267753813782, "grad_norm": 1.2832993268966675, "learning_rate": 5.500676658672662e-05, "loss": 1.5099, "step": 6265 }, { "epoch": 0.6592319831667544, "grad_norm": 1.9960840940475464, "learning_rate": 5.4976335974296923e-05, "loss": 1.546, "step": 6266 }, { "epoch": 0.6593371909521305, "grad_norm": 2.2886388301849365, "learning_rate": 5.4945910590317074e-05, "loss": 1.4721, "step": 6267 }, { "epoch": 0.6594423987375065, "grad_norm": 1.4624122381210327, "learning_rate": 5.491549043832023e-05, "loss": 1.8469, "step": 6268 }, { "epoch": 0.6595476065228827, "grad_norm": 1.8550081253051758, "learning_rate": 5.488507552183906e-05, "loss": 1.6482, "step": 6269 }, { "epoch": 0.6596528143082588, "grad_norm": 1.4133846759796143, "learning_rate": 5.4854665844405505e-05, "loss": 1.7455, "step": 6270 }, { "epoch": 0.6597580220936349, "grad_norm": 1.177406668663025, "learning_rate": 5.48242614095509e-05, "loss": 1.6862, "step": 6271 }, { "epoch": 0.659863229879011, "grad_norm": 1.473006010055542, "learning_rate": 5.4793862220806114e-05, "loss": 1.888, "step": 6272 }, { "epoch": 0.6599684376643872, "grad_norm": 1.623795986175537, "learning_rate": 5.4763468281701235e-05, "loss": 1.6365, "step": 6273 }, { "epoch": 0.6600736454497633, "grad_norm": 1.2633938789367676, "learning_rate": 5.473307959576579e-05, "loss": 1.6566, "step": 6274 }, { "epoch": 0.6601788532351394, "grad_norm": 1.473612904548645, "learning_rate": 5.470269616652879e-05, "loss": 2.0853, "step": 6275 }, { "epoch": 0.6602840610205155, "grad_norm": 1.1714433431625366, "learning_rate": 5.467231799751853e-05, "loss": 1.3471, "step": 6276 }, { "epoch": 0.6603892688058917, "grad_norm": 1.7451331615447998, "learning_rate": 5.464194509226267e-05, "loss": 1.5852, "step": 6277 }, { "epoch": 0.6604944765912677, "grad_norm": 1.2476874589920044, "learning_rate": 5.461157745428841e-05, "loss": 2.072, "step": 6278 }, { "epoch": 0.6605996843766438, "grad_norm": 1.997950553894043, "learning_rate": 5.45812150871222e-05, "loss": 1.5509, "step": 6279 }, { "epoch": 0.66070489216202, "grad_norm": 1.408400058746338, "learning_rate": 5.455085799428992e-05, "loss": 1.9504, "step": 6280 }, { "epoch": 0.6608100999473961, "grad_norm": 1.8198645114898682, "learning_rate": 5.452050617931683e-05, "loss": 2.1562, "step": 6281 }, { "epoch": 0.6609153077327722, "grad_norm": 1.5449212789535522, "learning_rate": 5.449015964572758e-05, "loss": 1.5864, "step": 6282 }, { "epoch": 0.6610205155181483, "grad_norm": 1.971395492553711, "learning_rate": 5.445981839704626e-05, "loss": 1.6987, "step": 6283 }, { "epoch": 0.6611257233035245, "grad_norm": 1.50814950466156, "learning_rate": 5.4429482436796265e-05, "loss": 1.385, "step": 6284 }, { "epoch": 0.6612309310889006, "grad_norm": 0.9933773279190063, "learning_rate": 5.439915176850037e-05, "loss": 1.5161, "step": 6285 }, { "epoch": 0.6613361388742767, "grad_norm": 1.3976936340332031, "learning_rate": 5.4368826395680875e-05, "loss": 1.037, "step": 6286 }, { "epoch": 0.6614413466596528, "grad_norm": 1.625756859779358, "learning_rate": 5.4338506321859304e-05, "loss": 1.6669, "step": 6287 }, { "epoch": 0.661546554445029, "grad_norm": 1.8389314413070679, "learning_rate": 5.430819155055659e-05, "loss": 2.227, "step": 6288 }, { "epoch": 0.661651762230405, "grad_norm": 1.3766262531280518, "learning_rate": 5.427788208529318e-05, "loss": 2.0434, "step": 6289 }, { "epoch": 0.6617569700157812, "grad_norm": 1.752624750137329, "learning_rate": 5.4247577929588745e-05, "loss": 1.7892, "step": 6290 }, { "epoch": 0.6618621778011573, "grad_norm": 1.2732161283493042, "learning_rate": 5.4217279086962416e-05, "loss": 1.6846, "step": 6291 }, { "epoch": 0.6619673855865335, "grad_norm": 1.762704849243164, "learning_rate": 5.418698556093271e-05, "loss": 1.3161, "step": 6292 }, { "epoch": 0.6620725933719095, "grad_norm": 1.4395411014556885, "learning_rate": 5.41566973550175e-05, "loss": 2.1022, "step": 6293 }, { "epoch": 0.6621778011572856, "grad_norm": 1.3859562873840332, "learning_rate": 5.41264144727341e-05, "loss": 1.3643, "step": 6294 }, { "epoch": 0.6622830089426618, "grad_norm": 1.1399462223052979, "learning_rate": 5.409613691759914e-05, "loss": 2.0001, "step": 6295 }, { "epoch": 0.6623882167280378, "grad_norm": 2.5576746463775635, "learning_rate": 5.406586469312859e-05, "loss": 1.2268, "step": 6296 }, { "epoch": 0.662493424513414, "grad_norm": 1.959189534187317, "learning_rate": 5.403559780283795e-05, "loss": 0.9835, "step": 6297 }, { "epoch": 0.6625986322987901, "grad_norm": 1.6175181865692139, "learning_rate": 5.400533625024199e-05, "loss": 1.8782, "step": 6298 }, { "epoch": 0.6627038400841663, "grad_norm": 1.2457187175750732, "learning_rate": 5.397508003885483e-05, "loss": 1.7866, "step": 6299 }, { "epoch": 0.6628090478695423, "grad_norm": 1.5392271280288696, "learning_rate": 5.394482917219015e-05, "loss": 1.664, "step": 6300 }, { "epoch": 0.6629142556549185, "grad_norm": 2.1098384857177734, "learning_rate": 5.391458365376072e-05, "loss": 2.0587, "step": 6301 }, { "epoch": 0.6630194634402946, "grad_norm": 1.1697083711624146, "learning_rate": 5.3884343487078984e-05, "loss": 2.0644, "step": 6302 }, { "epoch": 0.6631246712256706, "grad_norm": 1.4275423288345337, "learning_rate": 5.385410867565658e-05, "loss": 2.0324, "step": 6303 }, { "epoch": 0.6632298790110468, "grad_norm": 1.6790199279785156, "learning_rate": 5.382387922300454e-05, "loss": 1.5068, "step": 6304 }, { "epoch": 0.663335086796423, "grad_norm": 1.8416249752044678, "learning_rate": 5.379365513263338e-05, "loss": 1.9226, "step": 6305 }, { "epoch": 0.6634402945817991, "grad_norm": 1.2681195735931396, "learning_rate": 5.3763436408052904e-05, "loss": 2.0517, "step": 6306 }, { "epoch": 0.6635455023671751, "grad_norm": 1.1384334564208984, "learning_rate": 5.3733223052772265e-05, "loss": 1.3946, "step": 6307 }, { "epoch": 0.6636507101525513, "grad_norm": 0.976108968257904, "learning_rate": 5.370301507030012e-05, "loss": 1.823, "step": 6308 }, { "epoch": 0.6637559179379274, "grad_norm": 1.8622645139694214, "learning_rate": 5.367281246414439e-05, "loss": 1.3448, "step": 6309 }, { "epoch": 0.6638611257233035, "grad_norm": 1.617946982383728, "learning_rate": 5.364261523781234e-05, "loss": 1.6499, "step": 6310 }, { "epoch": 0.6639663335086796, "grad_norm": 1.8407984972000122, "learning_rate": 5.361242339481078e-05, "loss": 2.258, "step": 6311 }, { "epoch": 0.6640715412940558, "grad_norm": 1.4248191118240356, "learning_rate": 5.358223693864575e-05, "loss": 1.4035, "step": 6312 }, { "epoch": 0.6641767490794319, "grad_norm": 1.5487806797027588, "learning_rate": 5.3552055872822636e-05, "loss": 1.6018, "step": 6313 }, { "epoch": 0.664281956864808, "grad_norm": 1.2339686155319214, "learning_rate": 5.352188020084638e-05, "loss": 2.0545, "step": 6314 }, { "epoch": 0.6643871646501841, "grad_norm": 1.4714442491531372, "learning_rate": 5.349170992622112e-05, "loss": 1.9142, "step": 6315 }, { "epoch": 0.6644923724355603, "grad_norm": 1.6540426015853882, "learning_rate": 5.34615450524504e-05, "loss": 1.6512, "step": 6316 }, { "epoch": 0.6645975802209364, "grad_norm": 1.5326392650604248, "learning_rate": 5.3431385583037244e-05, "loss": 1.5426, "step": 6317 }, { "epoch": 0.6647027880063124, "grad_norm": 1.2688010931015015, "learning_rate": 5.340123152148393e-05, "loss": 1.5912, "step": 6318 }, { "epoch": 0.6648079957916886, "grad_norm": 1.2774553298950195, "learning_rate": 5.337108287129211e-05, "loss": 1.7242, "step": 6319 }, { "epoch": 0.6649132035770647, "grad_norm": 1.8071551322937012, "learning_rate": 5.334093963596294e-05, "loss": 1.8998, "step": 6320 }, { "epoch": 0.6650184113624408, "grad_norm": 1.0663738250732422, "learning_rate": 5.33108018189968e-05, "loss": 1.7188, "step": 6321 }, { "epoch": 0.6651236191478169, "grad_norm": 1.3195592164993286, "learning_rate": 5.328066942389351e-05, "loss": 1.8181, "step": 6322 }, { "epoch": 0.6652288269331931, "grad_norm": 1.5998344421386719, "learning_rate": 5.325054245415223e-05, "loss": 1.454, "step": 6323 }, { "epoch": 0.6653340347185692, "grad_norm": 1.0443423986434937, "learning_rate": 5.322042091327148e-05, "loss": 1.8442, "step": 6324 }, { "epoch": 0.6654392425039453, "grad_norm": 1.2177835702896118, "learning_rate": 5.319030480474923e-05, "loss": 1.6729, "step": 6325 }, { "epoch": 0.6655444502893214, "grad_norm": 2.082824945449829, "learning_rate": 5.316019413208275e-05, "loss": 1.8512, "step": 6326 }, { "epoch": 0.6656496580746976, "grad_norm": 1.6515694856643677, "learning_rate": 5.313008889876865e-05, "loss": 1.9645, "step": 6327 }, { "epoch": 0.6657548658600736, "grad_norm": 1.6280758380889893, "learning_rate": 5.309998910830303e-05, "loss": 2.4568, "step": 6328 }, { "epoch": 0.6658600736454497, "grad_norm": 1.4808907508850098, "learning_rate": 5.306989476418123e-05, "loss": 1.864, "step": 6329 }, { "epoch": 0.6659652814308259, "grad_norm": 1.735153079032898, "learning_rate": 5.3039805869897985e-05, "loss": 2.0455, "step": 6330 }, { "epoch": 0.666070489216202, "grad_norm": 1.4877818822860718, "learning_rate": 5.3009722428947475e-05, "loss": 1.5956, "step": 6331 }, { "epoch": 0.6661756970015781, "grad_norm": 1.3403220176696777, "learning_rate": 5.297964444482317e-05, "loss": 1.806, "step": 6332 }, { "epoch": 0.6662809047869542, "grad_norm": 1.4084241390228271, "learning_rate": 5.294957192101788e-05, "loss": 1.4097, "step": 6333 }, { "epoch": 0.6663861125723304, "grad_norm": 1.148952603340149, "learning_rate": 5.2919504861023903e-05, "loss": 2.0219, "step": 6334 }, { "epoch": 0.6664913203577064, "grad_norm": 1.5325878858566284, "learning_rate": 5.288944326833281e-05, "loss": 2.2054, "step": 6335 }, { "epoch": 0.6665965281430826, "grad_norm": 1.375962495803833, "learning_rate": 5.285938714643548e-05, "loss": 1.1267, "step": 6336 }, { "epoch": 0.6667017359284587, "grad_norm": 1.8014800548553467, "learning_rate": 5.2829336498822335e-05, "loss": 1.6149, "step": 6337 }, { "epoch": 0.6668069437138349, "grad_norm": 1.3949754238128662, "learning_rate": 5.279929132898298e-05, "loss": 1.2513, "step": 6338 }, { "epoch": 0.6669121514992109, "grad_norm": 1.4356800317764282, "learning_rate": 5.276925164040653e-05, "loss": 1.763, "step": 6339 }, { "epoch": 0.667017359284587, "grad_norm": 1.1746344566345215, "learning_rate": 5.2739217436581365e-05, "loss": 1.503, "step": 6340 }, { "epoch": 0.6671225670699632, "grad_norm": 2.2012033462524414, "learning_rate": 5.270918872099522e-05, "loss": 2.013, "step": 6341 }, { "epoch": 0.6672277748553394, "grad_norm": 2.013820171356201, "learning_rate": 5.2679165497135285e-05, "loss": 1.8092, "step": 6342 }, { "epoch": 0.6673329826407154, "grad_norm": 1.2771358489990234, "learning_rate": 5.264914776848808e-05, "loss": 1.6848, "step": 6343 }, { "epoch": 0.6674381904260915, "grad_norm": 1.3562238216400146, "learning_rate": 5.2619135538539355e-05, "loss": 1.7494, "step": 6344 }, { "epoch": 0.6675433982114677, "grad_norm": 2.059318780899048, "learning_rate": 5.2589128810774426e-05, "loss": 1.9948, "step": 6345 }, { "epoch": 0.6676486059968437, "grad_norm": 1.2015169858932495, "learning_rate": 5.2559127588677846e-05, "loss": 1.5834, "step": 6346 }, { "epoch": 0.6677538137822199, "grad_norm": 1.4178433418273926, "learning_rate": 5.252913187573354e-05, "loss": 1.76, "step": 6347 }, { "epoch": 0.667859021567596, "grad_norm": 1.4683200120925903, "learning_rate": 5.249914167542486e-05, "loss": 1.7822, "step": 6348 }, { "epoch": 0.6679642293529722, "grad_norm": 1.260953664779663, "learning_rate": 5.246915699123439e-05, "loss": 0.8526, "step": 6349 }, { "epoch": 0.6680694371383482, "grad_norm": 1.269636869430542, "learning_rate": 5.243917782664425e-05, "loss": 1.7709, "step": 6350 }, { "epoch": 0.6681746449237244, "grad_norm": 1.585425615310669, "learning_rate": 5.240920418513577e-05, "loss": 1.542, "step": 6351 }, { "epoch": 0.6682798527091005, "grad_norm": 2.510784149169922, "learning_rate": 5.2379236070189677e-05, "loss": 1.4086, "step": 6352 }, { "epoch": 0.6683850604944765, "grad_norm": 1.2472009658813477, "learning_rate": 5.234927348528611e-05, "loss": 1.7158, "step": 6353 }, { "epoch": 0.6684902682798527, "grad_norm": 1.3211387395858765, "learning_rate": 5.231931643390451e-05, "loss": 1.8004, "step": 6354 }, { "epoch": 0.6685954760652288, "grad_norm": 1.3025023937225342, "learning_rate": 5.228936491952363e-05, "loss": 2.2033, "step": 6355 }, { "epoch": 0.668700683850605, "grad_norm": 2.7773120403289795, "learning_rate": 5.2259418945621754e-05, "loss": 2.0115, "step": 6356 }, { "epoch": 0.668805891635981, "grad_norm": 1.2266188859939575, "learning_rate": 5.222947851567633e-05, "loss": 1.3392, "step": 6357 }, { "epoch": 0.6689110994213572, "grad_norm": 1.2715981006622314, "learning_rate": 5.219954363316424e-05, "loss": 1.6663, "step": 6358 }, { "epoch": 0.6690163072067333, "grad_norm": 2.1951401233673096, "learning_rate": 5.2169614301561775e-05, "loss": 1.6716, "step": 6359 }, { "epoch": 0.6691215149921094, "grad_norm": 1.2387498617172241, "learning_rate": 5.2139690524344495e-05, "loss": 1.6217, "step": 6360 }, { "epoch": 0.6692267227774855, "grad_norm": 1.4151759147644043, "learning_rate": 5.210977230498733e-05, "loss": 2.2641, "step": 6361 }, { "epoch": 0.6693319305628617, "grad_norm": 1.9566744565963745, "learning_rate": 5.207985964696462e-05, "loss": 1.018, "step": 6362 }, { "epoch": 0.6694371383482378, "grad_norm": 1.473006010055542, "learning_rate": 5.2049952553750046e-05, "loss": 1.7412, "step": 6363 }, { "epoch": 0.6695423461336139, "grad_norm": 1.857047438621521, "learning_rate": 5.202005102881653e-05, "loss": 1.3064, "step": 6364 }, { "epoch": 0.66964755391899, "grad_norm": 1.2658127546310425, "learning_rate": 5.199015507563656e-05, "loss": 1.7702, "step": 6365 }, { "epoch": 0.6697527617043662, "grad_norm": 1.1785234212875366, "learning_rate": 5.1960264697681726e-05, "loss": 2.3375, "step": 6366 }, { "epoch": 0.6698579694897422, "grad_norm": 1.1640081405639648, "learning_rate": 5.19303798984232e-05, "loss": 2.1538, "step": 6367 }, { "epoch": 0.6699631772751183, "grad_norm": 1.3098970651626587, "learning_rate": 5.1900500681331363e-05, "loss": 2.0698, "step": 6368 }, { "epoch": 0.6700683850604945, "grad_norm": 1.9889769554138184, "learning_rate": 5.1870627049875954e-05, "loss": 1.8563, "step": 6369 }, { "epoch": 0.6701735928458706, "grad_norm": 1.559300184249878, "learning_rate": 5.184075900752619e-05, "loss": 1.4302, "step": 6370 }, { "epoch": 0.6702788006312467, "grad_norm": 1.8356385231018066, "learning_rate": 5.1810896557750485e-05, "loss": 1.4985, "step": 6371 }, { "epoch": 0.6703840084166228, "grad_norm": 1.3510042428970337, "learning_rate": 5.178103970401664e-05, "loss": 1.2977, "step": 6372 }, { "epoch": 0.670489216201999, "grad_norm": 1.291181206703186, "learning_rate": 5.1751188449791924e-05, "loss": 1.4883, "step": 6373 }, { "epoch": 0.6705944239873751, "grad_norm": 1.7430511713027954, "learning_rate": 5.1721342798542795e-05, "loss": 1.609, "step": 6374 }, { "epoch": 0.6706996317727512, "grad_norm": 1.0107218027114868, "learning_rate": 5.169150275373513e-05, "loss": 1.5741, "step": 6375 }, { "epoch": 0.6708048395581273, "grad_norm": 1.1406487226486206, "learning_rate": 5.16616683188342e-05, "loss": 1.1794, "step": 6376 }, { "epoch": 0.6709100473435035, "grad_norm": 1.4461215734481812, "learning_rate": 5.163183949730456e-05, "loss": 1.4193, "step": 6377 }, { "epoch": 0.6710152551288795, "grad_norm": 1.2850546836853027, "learning_rate": 5.1602016292610075e-05, "loss": 1.5627, "step": 6378 }, { "epoch": 0.6711204629142556, "grad_norm": 1.6368341445922852, "learning_rate": 5.157219870821413e-05, "loss": 1.6965, "step": 6379 }, { "epoch": 0.6712256706996318, "grad_norm": 1.221516489982605, "learning_rate": 5.154238674757925e-05, "loss": 1.3989, "step": 6380 }, { "epoch": 0.6713308784850079, "grad_norm": 1.8983935117721558, "learning_rate": 5.151258041416742e-05, "loss": 2.2616, "step": 6381 }, { "epoch": 0.671436086270384, "grad_norm": 1.487594485282898, "learning_rate": 5.148277971143998e-05, "loss": 2.0648, "step": 6382 }, { "epoch": 0.6715412940557601, "grad_norm": 1.1469671726226807, "learning_rate": 5.145298464285757e-05, "loss": 1.7304, "step": 6383 }, { "epoch": 0.6716465018411363, "grad_norm": 1.3301132917404175, "learning_rate": 5.142319521188017e-05, "loss": 1.673, "step": 6384 }, { "epoch": 0.6717517096265123, "grad_norm": 1.2145934104919434, "learning_rate": 5.1393411421967174e-05, "loss": 1.6685, "step": 6385 }, { "epoch": 0.6718569174118885, "grad_norm": 1.669686198234558, "learning_rate": 5.136363327657725e-05, "loss": 1.9549, "step": 6386 }, { "epoch": 0.6719621251972646, "grad_norm": 1.2095863819122314, "learning_rate": 5.1333860779168455e-05, "loss": 1.6674, "step": 6387 }, { "epoch": 0.6720673329826408, "grad_norm": 1.208296537399292, "learning_rate": 5.1304093933198136e-05, "loss": 1.8812, "step": 6388 }, { "epoch": 0.6721725407680168, "grad_norm": 1.2008256912231445, "learning_rate": 5.127433274212301e-05, "loss": 1.8069, "step": 6389 }, { "epoch": 0.672277748553393, "grad_norm": 1.6719691753387451, "learning_rate": 5.12445772093992e-05, "loss": 1.8409, "step": 6390 }, { "epoch": 0.6723829563387691, "grad_norm": 1.1335160732269287, "learning_rate": 5.1214827338482094e-05, "loss": 1.9089, "step": 6391 }, { "epoch": 0.6724881641241451, "grad_norm": 1.1994752883911133, "learning_rate": 5.1185083132826414e-05, "loss": 1.6759, "step": 6392 }, { "epoch": 0.6725933719095213, "grad_norm": 1.675121784210205, "learning_rate": 5.115534459588631e-05, "loss": 1.1881, "step": 6393 }, { "epoch": 0.6726985796948974, "grad_norm": 2.1219699382781982, "learning_rate": 5.1125611731115174e-05, "loss": 1.4658, "step": 6394 }, { "epoch": 0.6728037874802736, "grad_norm": 1.6067357063293457, "learning_rate": 5.1095884541965835e-05, "loss": 1.4778, "step": 6395 }, { "epoch": 0.6729089952656496, "grad_norm": 1.3906060457229614, "learning_rate": 5.106616303189039e-05, "loss": 1.727, "step": 6396 }, { "epoch": 0.6730142030510258, "grad_norm": 1.3164790868759155, "learning_rate": 5.103644720434027e-05, "loss": 1.8831, "step": 6397 }, { "epoch": 0.6731194108364019, "grad_norm": 2.2263622283935547, "learning_rate": 5.100673706276633e-05, "loss": 1.7309, "step": 6398 }, { "epoch": 0.673224618621778, "grad_norm": 2.4222521781921387, "learning_rate": 5.097703261061868e-05, "loss": 1.2371, "step": 6399 }, { "epoch": 0.6733298264071541, "grad_norm": 1.8905748128890991, "learning_rate": 5.094733385134677e-05, "loss": 1.5675, "step": 6400 }, { "epoch": 0.6734350341925303, "grad_norm": 1.5610456466674805, "learning_rate": 5.091764078839949e-05, "loss": 1.4919, "step": 6401 }, { "epoch": 0.6735402419779064, "grad_norm": 1.896572232246399, "learning_rate": 5.088795342522497e-05, "loss": 1.4104, "step": 6402 }, { "epoch": 0.6736454497632824, "grad_norm": 1.2327032089233398, "learning_rate": 5.085827176527064e-05, "loss": 1.3777, "step": 6403 }, { "epoch": 0.6737506575486586, "grad_norm": 1.7423434257507324, "learning_rate": 5.082859581198344e-05, "loss": 1.6206, "step": 6404 }, { "epoch": 0.6738558653340347, "grad_norm": 1.2722043991088867, "learning_rate": 5.0798925568809486e-05, "loss": 1.4621, "step": 6405 }, { "epoch": 0.6739610731194109, "grad_norm": 1.320483922958374, "learning_rate": 5.076926103919426e-05, "loss": 2.0733, "step": 6406 }, { "epoch": 0.6740662809047869, "grad_norm": 1.2347058057785034, "learning_rate": 5.0739602226582706e-05, "loss": 1.6421, "step": 6407 }, { "epoch": 0.6741714886901631, "grad_norm": 1.6619998216629028, "learning_rate": 5.0709949134418865e-05, "loss": 2.2148, "step": 6408 }, { "epoch": 0.6742766964755392, "grad_norm": 2.012146234512329, "learning_rate": 5.0680301766146355e-05, "loss": 1.6082, "step": 6409 }, { "epoch": 0.6743819042609153, "grad_norm": 1.2811518907546997, "learning_rate": 5.0650660125207994e-05, "loss": 1.6369, "step": 6410 }, { "epoch": 0.6744871120462914, "grad_norm": 1.504457712173462, "learning_rate": 5.062102421504593e-05, "loss": 1.9906, "step": 6411 }, { "epoch": 0.6745923198316676, "grad_norm": 1.718910574913025, "learning_rate": 5.059139403910177e-05, "loss": 1.5476, "step": 6412 }, { "epoch": 0.6746975276170437, "grad_norm": 1.8171217441558838, "learning_rate": 5.056176960081631e-05, "loss": 1.605, "step": 6413 }, { "epoch": 0.6748027354024198, "grad_norm": 1.2599389553070068, "learning_rate": 5.0532150903629724e-05, "loss": 1.892, "step": 6414 }, { "epoch": 0.6749079431877959, "grad_norm": 1.823708415031433, "learning_rate": 5.050253795098159e-05, "loss": 1.8375, "step": 6415 }, { "epoch": 0.675013150973172, "grad_norm": 1.2546007633209229, "learning_rate": 5.047293074631074e-05, "loss": 1.4461, "step": 6416 }, { "epoch": 0.6751183587585481, "grad_norm": 1.1789088249206543, "learning_rate": 5.0443329293055305e-05, "loss": 1.7132, "step": 6417 }, { "epoch": 0.6752235665439242, "grad_norm": 1.3993732929229736, "learning_rate": 5.041373359465289e-05, "loss": 1.5287, "step": 6418 }, { "epoch": 0.6753287743293004, "grad_norm": 1.6638224124908447, "learning_rate": 5.0384143654540314e-05, "loss": 1.1025, "step": 6419 }, { "epoch": 0.6754339821146765, "grad_norm": 1.2212295532226562, "learning_rate": 5.035455947615373e-05, "loss": 2.0747, "step": 6420 }, { "epoch": 0.6755391899000526, "grad_norm": 1.5106666088104248, "learning_rate": 5.032498106292869e-05, "loss": 1.3987, "step": 6421 }, { "epoch": 0.6756443976854287, "grad_norm": 1.0953783988952637, "learning_rate": 5.029540841830004e-05, "loss": 1.4668, "step": 6422 }, { "epoch": 0.6757496054708049, "grad_norm": 1.4428025484085083, "learning_rate": 5.0265841545701886e-05, "loss": 1.8213, "step": 6423 }, { "epoch": 0.6758548132561809, "grad_norm": 1.1565446853637695, "learning_rate": 5.023628044856783e-05, "loss": 1.9793, "step": 6424 }, { "epoch": 0.6759600210415571, "grad_norm": 1.158007025718689, "learning_rate": 5.020672513033066e-05, "loss": 1.5613, "step": 6425 }, { "epoch": 0.6760652288269332, "grad_norm": 1.8131704330444336, "learning_rate": 5.017717559442249e-05, "loss": 2.1706, "step": 6426 }, { "epoch": 0.6761704366123094, "grad_norm": 1.257173776626587, "learning_rate": 5.014763184427489e-05, "loss": 1.7876, "step": 6427 }, { "epoch": 0.6762756443976854, "grad_norm": 1.4548490047454834, "learning_rate": 5.011809388331865e-05, "loss": 2.1786, "step": 6428 }, { "epoch": 0.6763808521830615, "grad_norm": 1.3961397409439087, "learning_rate": 5.0088561714983906e-05, "loss": 1.6834, "step": 6429 }, { "epoch": 0.6764860599684377, "grad_norm": 1.5321639776229858, "learning_rate": 5.0059035342700144e-05, "loss": 1.0357, "step": 6430 }, { "epoch": 0.6765912677538137, "grad_norm": 1.6330370903015137, "learning_rate": 5.0029514769896114e-05, "loss": 1.6928, "step": 6431 }, { "epoch": 0.6766964755391899, "grad_norm": 1.063062310218811, "learning_rate": 5.000000000000002e-05, "loss": 1.5404, "step": 6432 }, { "epoch": 0.676801683324566, "grad_norm": 0.9866712093353271, "learning_rate": 4.9970491036439284e-05, "loss": 1.7373, "step": 6433 }, { "epoch": 0.6769068911099422, "grad_norm": 1.9362293481826782, "learning_rate": 4.9940987882640647e-05, "loss": 2.12, "step": 6434 }, { "epoch": 0.6770120988953182, "grad_norm": 1.8418405055999756, "learning_rate": 4.991149054203027e-05, "loss": 1.6442, "step": 6435 }, { "epoch": 0.6771173066806944, "grad_norm": 1.7248798608779907, "learning_rate": 4.988199901803357e-05, "loss": 1.6048, "step": 6436 }, { "epoch": 0.6772225144660705, "grad_norm": 1.2888562679290771, "learning_rate": 4.985251331407524e-05, "loss": 1.7667, "step": 6437 }, { "epoch": 0.6773277222514467, "grad_norm": 1.4221709966659546, "learning_rate": 4.982303343357946e-05, "loss": 1.7982, "step": 6438 }, { "epoch": 0.6774329300368227, "grad_norm": 1.8176785707473755, "learning_rate": 4.9793559379969566e-05, "loss": 1.9093, "step": 6439 }, { "epoch": 0.6775381378221988, "grad_norm": 1.803604245185852, "learning_rate": 4.9764091156668266e-05, "loss": 1.9377, "step": 6440 }, { "epoch": 0.677643345607575, "grad_norm": 1.8046791553497314, "learning_rate": 4.973462876709767e-05, "loss": 1.357, "step": 6441 }, { "epoch": 0.677748553392951, "grad_norm": 2.0363752841949463, "learning_rate": 4.970517221467909e-05, "loss": 1.4152, "step": 6442 }, { "epoch": 0.6778537611783272, "grad_norm": 1.5641330480575562, "learning_rate": 4.967572150283326e-05, "loss": 1.8567, "step": 6443 }, { "epoch": 0.6779589689637033, "grad_norm": 1.910548448562622, "learning_rate": 4.9646276634980194e-05, "loss": 2.0569, "step": 6444 }, { "epoch": 0.6780641767490795, "grad_norm": 1.1518720388412476, "learning_rate": 4.961683761453917e-05, "loss": 1.8144, "step": 6445 }, { "epoch": 0.6781693845344555, "grad_norm": 2.0624051094055176, "learning_rate": 4.958740444492892e-05, "loss": 1.5392, "step": 6446 }, { "epoch": 0.6782745923198317, "grad_norm": 1.4468212127685547, "learning_rate": 4.955797712956739e-05, "loss": 1.4592, "step": 6447 }, { "epoch": 0.6783798001052078, "grad_norm": 1.4319233894348145, "learning_rate": 4.9528555671871835e-05, "loss": 1.6202, "step": 6448 }, { "epoch": 0.6784850078905839, "grad_norm": 2.070284605026245, "learning_rate": 4.9499140075258957e-05, "loss": 1.5695, "step": 6449 }, { "epoch": 0.67859021567596, "grad_norm": 1.7108412981033325, "learning_rate": 4.9469730343144635e-05, "loss": 1.3484, "step": 6450 }, { "epoch": 0.6786954234613362, "grad_norm": 1.4341391324996948, "learning_rate": 4.944032647894414e-05, "loss": 1.2341, "step": 6451 }, { "epoch": 0.6788006312467123, "grad_norm": 1.3790180683135986, "learning_rate": 4.941092848607204e-05, "loss": 1.4654, "step": 6452 }, { "epoch": 0.6789058390320883, "grad_norm": 2.0244486331939697, "learning_rate": 4.9381536367942195e-05, "loss": 1.7548, "step": 6453 }, { "epoch": 0.6790110468174645, "grad_norm": 1.3763808012008667, "learning_rate": 4.935215012796789e-05, "loss": 1.2737, "step": 6454 }, { "epoch": 0.6791162546028406, "grad_norm": 1.4411334991455078, "learning_rate": 4.93227697695616e-05, "loss": 1.1812, "step": 6455 }, { "epoch": 0.6792214623882167, "grad_norm": 1.3906333446502686, "learning_rate": 4.929339529613515e-05, "loss": 1.8517, "step": 6456 }, { "epoch": 0.6793266701735928, "grad_norm": 1.6477974653244019, "learning_rate": 4.9264026711099764e-05, "loss": 1.6151, "step": 6457 }, { "epoch": 0.679431877958969, "grad_norm": 1.5431394577026367, "learning_rate": 4.9234664017865896e-05, "loss": 2.044, "step": 6458 }, { "epoch": 0.6795370857443451, "grad_norm": 1.9720886945724487, "learning_rate": 4.920530721984329e-05, "loss": 1.6605, "step": 6459 }, { "epoch": 0.6796422935297212, "grad_norm": 1.2280595302581787, "learning_rate": 4.917595632044113e-05, "loss": 1.6364, "step": 6460 }, { "epoch": 0.6797475013150973, "grad_norm": 1.3215405941009521, "learning_rate": 4.914661132306779e-05, "loss": 1.275, "step": 6461 }, { "epoch": 0.6798527091004735, "grad_norm": 1.446702241897583, "learning_rate": 4.911727223113099e-05, "loss": 2.1152, "step": 6462 }, { "epoch": 0.6799579168858495, "grad_norm": 1.3523727655410767, "learning_rate": 4.908793904803787e-05, "loss": 1.2915, "step": 6463 }, { "epoch": 0.6800631246712256, "grad_norm": 1.8233401775360107, "learning_rate": 4.9058611777194716e-05, "loss": 1.0423, "step": 6464 }, { "epoch": 0.6801683324566018, "grad_norm": 1.9023778438568115, "learning_rate": 4.9029290422007204e-05, "loss": 1.5998, "step": 6465 }, { "epoch": 0.680273540241978, "grad_norm": 1.1877436637878418, "learning_rate": 4.8999974985880384e-05, "loss": 1.9294, "step": 6466 }, { "epoch": 0.680378748027354, "grad_norm": 1.5044838190078735, "learning_rate": 4.8970665472218537e-05, "loss": 1.5298, "step": 6467 }, { "epoch": 0.6804839558127301, "grad_norm": 1.9050568342208862, "learning_rate": 4.8941361884425215e-05, "loss": 1.5252, "step": 6468 }, { "epoch": 0.6805891635981063, "grad_norm": 1.1263233423233032, "learning_rate": 4.891206422590347e-05, "loss": 1.6088, "step": 6469 }, { "epoch": 0.6806943713834824, "grad_norm": 1.9254624843597412, "learning_rate": 4.8882772500055464e-05, "loss": 2.1593, "step": 6470 }, { "epoch": 0.6807995791688585, "grad_norm": 2.4042844772338867, "learning_rate": 4.885348671028273e-05, "loss": 1.6122, "step": 6471 }, { "epoch": 0.6809047869542346, "grad_norm": 1.4427109956741333, "learning_rate": 4.882420685998623e-05, "loss": 1.304, "step": 6472 }, { "epoch": 0.6810099947396108, "grad_norm": 1.020768404006958, "learning_rate": 4.8794932952566e-05, "loss": 1.9123, "step": 6473 }, { "epoch": 0.6811152025249868, "grad_norm": 1.6786848306655884, "learning_rate": 4.8765664991421634e-05, "loss": 1.9933, "step": 6474 }, { "epoch": 0.681220410310363, "grad_norm": 1.6853234767913818, "learning_rate": 4.8736402979951867e-05, "loss": 1.9755, "step": 6475 }, { "epoch": 0.6813256180957391, "grad_norm": 1.9556273221969604, "learning_rate": 4.870714692155479e-05, "loss": 1.5554, "step": 6476 }, { "epoch": 0.6814308258811153, "grad_norm": 1.1733824014663696, "learning_rate": 4.867789681962788e-05, "loss": 1.7569, "step": 6477 }, { "epoch": 0.6815360336664913, "grad_norm": 1.0935428142547607, "learning_rate": 4.864865267756779e-05, "loss": 1.993, "step": 6478 }, { "epoch": 0.6816412414518674, "grad_norm": 1.0769548416137695, "learning_rate": 4.8619414498770556e-05, "loss": 1.8029, "step": 6479 }, { "epoch": 0.6817464492372436, "grad_norm": 1.2517523765563965, "learning_rate": 4.859018228663155e-05, "loss": 2.1288, "step": 6480 }, { "epoch": 0.6818516570226196, "grad_norm": 1.2191598415374756, "learning_rate": 4.856095604454539e-05, "loss": 1.2368, "step": 6481 }, { "epoch": 0.6819568648079958, "grad_norm": 1.3996628522872925, "learning_rate": 4.8531735775905975e-05, "loss": 1.7399, "step": 6482 }, { "epoch": 0.6820620725933719, "grad_norm": 1.9159706830978394, "learning_rate": 4.850252148410665e-05, "loss": 1.5246, "step": 6483 }, { "epoch": 0.6821672803787481, "grad_norm": 1.7578186988830566, "learning_rate": 4.8473313172539925e-05, "loss": 1.8513, "step": 6484 }, { "epoch": 0.6822724881641241, "grad_norm": 1.3070460557937622, "learning_rate": 4.8444110844597626e-05, "loss": 1.6536, "step": 6485 }, { "epoch": 0.6823776959495003, "grad_norm": 2.009951591491699, "learning_rate": 4.8414914503671006e-05, "loss": 1.1448, "step": 6486 }, { "epoch": 0.6824829037348764, "grad_norm": 2.186450719833374, "learning_rate": 4.838572415315046e-05, "loss": 1.8157, "step": 6487 }, { "epoch": 0.6825881115202524, "grad_norm": 1.4713935852050781, "learning_rate": 4.835653979642585e-05, "loss": 1.0556, "step": 6488 }, { "epoch": 0.6826933193056286, "grad_norm": 1.4540444612503052, "learning_rate": 4.832736143688621e-05, "loss": 0.9736, "step": 6489 }, { "epoch": 0.6827985270910047, "grad_norm": 1.8345993757247925, "learning_rate": 4.829818907791988e-05, "loss": 1.4606, "step": 6490 }, { "epoch": 0.6829037348763809, "grad_norm": 1.7492965459823608, "learning_rate": 4.826902272291467e-05, "loss": 2.0597, "step": 6491 }, { "epoch": 0.6830089426617569, "grad_norm": 1.3600223064422607, "learning_rate": 4.8239862375257484e-05, "loss": 1.4011, "step": 6492 }, { "epoch": 0.6831141504471331, "grad_norm": 1.2309643030166626, "learning_rate": 4.821070803833464e-05, "loss": 1.3888, "step": 6493 }, { "epoch": 0.6832193582325092, "grad_norm": 1.445009708404541, "learning_rate": 4.818155971553174e-05, "loss": 2.1089, "step": 6494 }, { "epoch": 0.6833245660178853, "grad_norm": 1.5966815948486328, "learning_rate": 4.815241741023367e-05, "loss": 1.3903, "step": 6495 }, { "epoch": 0.6834297738032614, "grad_norm": 2.34873104095459, "learning_rate": 4.8123281125824605e-05, "loss": 1.5625, "step": 6496 }, { "epoch": 0.6835349815886376, "grad_norm": 1.5460422039031982, "learning_rate": 4.809415086568812e-05, "loss": 1.5584, "step": 6497 }, { "epoch": 0.6836401893740137, "grad_norm": 1.2038516998291016, "learning_rate": 4.806502663320692e-05, "loss": 1.5281, "step": 6498 }, { "epoch": 0.6837453971593898, "grad_norm": 1.3815281391143799, "learning_rate": 4.803590843176321e-05, "loss": 1.5034, "step": 6499 }, { "epoch": 0.6838506049447659, "grad_norm": 0.981395959854126, "learning_rate": 4.800679626473833e-05, "loss": 1.9422, "step": 6500 }, { "epoch": 0.683955812730142, "grad_norm": 1.7010056972503662, "learning_rate": 4.797769013551295e-05, "loss": 1.8847, "step": 6501 }, { "epoch": 0.6840610205155182, "grad_norm": 1.381854772567749, "learning_rate": 4.7948590047467153e-05, "loss": 1.5431, "step": 6502 }, { "epoch": 0.6841662283008942, "grad_norm": 1.5627236366271973, "learning_rate": 4.7919496003980204e-05, "loss": 2.1837, "step": 6503 }, { "epoch": 0.6842714360862704, "grad_norm": 1.6263660192489624, "learning_rate": 4.7890408008430634e-05, "loss": 1.7972, "step": 6504 }, { "epoch": 0.6843766438716465, "grad_norm": 1.3480515480041504, "learning_rate": 4.786132606419643e-05, "loss": 2.0057, "step": 6505 }, { "epoch": 0.6844818516570226, "grad_norm": 1.6764869689941406, "learning_rate": 4.783225017465475e-05, "loss": 1.7601, "step": 6506 }, { "epoch": 0.6845870594423987, "grad_norm": 1.2646028995513916, "learning_rate": 4.780318034318202e-05, "loss": 1.5116, "step": 6507 }, { "epoch": 0.6846922672277749, "grad_norm": 1.2866673469543457, "learning_rate": 4.7774116573154125e-05, "loss": 1.5891, "step": 6508 }, { "epoch": 0.684797475013151, "grad_norm": 1.5979012250900269, "learning_rate": 4.774505886794609e-05, "loss": 1.6371, "step": 6509 }, { "epoch": 0.6849026827985271, "grad_norm": 1.9245673418045044, "learning_rate": 4.771600723093227e-05, "loss": 1.6979, "step": 6510 }, { "epoch": 0.6850078905839032, "grad_norm": 1.8134851455688477, "learning_rate": 4.7686961665486396e-05, "loss": 1.9395, "step": 6511 }, { "epoch": 0.6851130983692794, "grad_norm": 1.7789303064346313, "learning_rate": 4.76579221749814e-05, "loss": 1.915, "step": 6512 }, { "epoch": 0.6852183061546554, "grad_norm": 1.5152759552001953, "learning_rate": 4.7628888762789504e-05, "loss": 1.4219, "step": 6513 }, { "epoch": 0.6853235139400315, "grad_norm": 1.3812377452850342, "learning_rate": 4.7599861432282334e-05, "loss": 1.9226, "step": 6514 }, { "epoch": 0.6854287217254077, "grad_norm": 0.8945170044898987, "learning_rate": 4.757084018683071e-05, "loss": 1.481, "step": 6515 }, { "epoch": 0.6855339295107838, "grad_norm": 1.3005067110061646, "learning_rate": 4.754182502980477e-05, "loss": 1.8098, "step": 6516 }, { "epoch": 0.6856391372961599, "grad_norm": 2.368222951889038, "learning_rate": 4.7512815964573966e-05, "loss": 1.1709, "step": 6517 }, { "epoch": 0.685744345081536, "grad_norm": 1.7931416034698486, "learning_rate": 4.748381299450695e-05, "loss": 1.9524, "step": 6518 }, { "epoch": 0.6858495528669122, "grad_norm": 1.7992273569107056, "learning_rate": 4.7454816122971846e-05, "loss": 0.7749, "step": 6519 }, { "epoch": 0.6859547606522882, "grad_norm": 1.1509588956832886, "learning_rate": 4.7425825353335915e-05, "loss": 1.5281, "step": 6520 }, { "epoch": 0.6860599684376644, "grad_norm": 2.1700100898742676, "learning_rate": 4.7396840688965726e-05, "loss": 1.7834, "step": 6521 }, { "epoch": 0.6861651762230405, "grad_norm": 1.983696699142456, "learning_rate": 4.7367862133227244e-05, "loss": 1.9279, "step": 6522 }, { "epoch": 0.6862703840084167, "grad_norm": 1.1107069253921509, "learning_rate": 4.7338889689485624e-05, "loss": 1.4591, "step": 6523 }, { "epoch": 0.6863755917937927, "grad_norm": 1.1690186262130737, "learning_rate": 4.730992336110529e-05, "loss": 1.857, "step": 6524 }, { "epoch": 0.6864807995791689, "grad_norm": 1.3441321849822998, "learning_rate": 4.7280963151450096e-05, "loss": 1.4621, "step": 6525 }, { "epoch": 0.686586007364545, "grad_norm": 2.181993246078491, "learning_rate": 4.7252009063883054e-05, "loss": 1.483, "step": 6526 }, { "epoch": 0.686691215149921, "grad_norm": 1.914829134941101, "learning_rate": 4.722306110176647e-05, "loss": 0.7554, "step": 6527 }, { "epoch": 0.6867964229352972, "grad_norm": 2.0463366508483887, "learning_rate": 4.719411926846203e-05, "loss": 1.722, "step": 6528 }, { "epoch": 0.6869016307206733, "grad_norm": 1.2413219213485718, "learning_rate": 4.716518356733064e-05, "loss": 1.53, "step": 6529 }, { "epoch": 0.6870068385060495, "grad_norm": 1.5359559059143066, "learning_rate": 4.713625400173247e-05, "loss": 1.1939, "step": 6530 }, { "epoch": 0.6871120462914255, "grad_norm": 1.2304325103759766, "learning_rate": 4.7107330575027084e-05, "loss": 1.6209, "step": 6531 }, { "epoch": 0.6872172540768017, "grad_norm": 1.448864221572876, "learning_rate": 4.707841329057322e-05, "loss": 1.6243, "step": 6532 }, { "epoch": 0.6873224618621778, "grad_norm": 1.8410067558288574, "learning_rate": 4.7049502151728933e-05, "loss": 1.5567, "step": 6533 }, { "epoch": 0.687427669647554, "grad_norm": 2.130892753601074, "learning_rate": 4.702059716185162e-05, "loss": 1.9438, "step": 6534 }, { "epoch": 0.68753287743293, "grad_norm": 1.4536875486373901, "learning_rate": 4.6991698324297874e-05, "loss": 1.7863, "step": 6535 }, { "epoch": 0.6876380852183062, "grad_norm": 1.5897191762924194, "learning_rate": 4.696280564242371e-05, "loss": 0.9989, "step": 6536 }, { "epoch": 0.6877432930036823, "grad_norm": 1.4431312084197998, "learning_rate": 4.693391911958426e-05, "loss": 1.7681, "step": 6537 }, { "epoch": 0.6878485007890583, "grad_norm": 1.8787363767623901, "learning_rate": 4.690503875913399e-05, "loss": 1.8113, "step": 6538 }, { "epoch": 0.6879537085744345, "grad_norm": 1.407496452331543, "learning_rate": 4.687616456442677e-05, "loss": 1.7591, "step": 6539 }, { "epoch": 0.6880589163598106, "grad_norm": 1.6579171419143677, "learning_rate": 4.684729653881563e-05, "loss": 1.686, "step": 6540 }, { "epoch": 0.6881641241451868, "grad_norm": 2.2351560592651367, "learning_rate": 4.681843468565288e-05, "loss": 1.7476, "step": 6541 }, { "epoch": 0.6882693319305628, "grad_norm": 2.1286520957946777, "learning_rate": 4.6789579008290216e-05, "loss": 1.9967, "step": 6542 }, { "epoch": 0.688374539715939, "grad_norm": 1.540056586265564, "learning_rate": 4.676072951007849e-05, "loss": 1.0093, "step": 6543 }, { "epoch": 0.6884797475013151, "grad_norm": 1.2352454662322998, "learning_rate": 4.673188619436798e-05, "loss": 1.7239, "step": 6544 }, { "epoch": 0.6885849552866912, "grad_norm": 1.0865057706832886, "learning_rate": 4.670304906450811e-05, "loss": 2.0791, "step": 6545 }, { "epoch": 0.6886901630720673, "grad_norm": 1.6150873899459839, "learning_rate": 4.667421812384761e-05, "loss": 1.813, "step": 6546 }, { "epoch": 0.6887953708574435, "grad_norm": 1.4798706769943237, "learning_rate": 4.66453933757346e-05, "loss": 1.634, "step": 6547 }, { "epoch": 0.6889005786428196, "grad_norm": 1.3703545331954956, "learning_rate": 4.661657482351637e-05, "loss": 1.7388, "step": 6548 }, { "epoch": 0.6890057864281957, "grad_norm": 1.6316715478897095, "learning_rate": 4.658776247053948e-05, "loss": 2.3265, "step": 6549 }, { "epoch": 0.6891109942135718, "grad_norm": 1.0946226119995117, "learning_rate": 4.6558956320149884e-05, "loss": 1.2866, "step": 6550 }, { "epoch": 0.689216201998948, "grad_norm": 1.6693997383117676, "learning_rate": 4.6530156375692726e-05, "loss": 1.583, "step": 6551 }, { "epoch": 0.689321409784324, "grad_norm": 1.315307378768921, "learning_rate": 4.650136264051238e-05, "loss": 2.1853, "step": 6552 }, { "epoch": 0.6894266175697001, "grad_norm": 1.1383665800094604, "learning_rate": 4.6472575117952676e-05, "loss": 1.7014, "step": 6553 }, { "epoch": 0.6895318253550763, "grad_norm": 1.3326300382614136, "learning_rate": 4.644379381135655e-05, "loss": 1.7168, "step": 6554 }, { "epoch": 0.6896370331404524, "grad_norm": 1.85318922996521, "learning_rate": 4.641501872406626e-05, "loss": 1.7368, "step": 6555 }, { "epoch": 0.6897422409258285, "grad_norm": 1.3762325048446655, "learning_rate": 4.6386249859423434e-05, "loss": 2.0301, "step": 6556 }, { "epoch": 0.6898474487112046, "grad_norm": 1.573012351989746, "learning_rate": 4.635748722076887e-05, "loss": 2.1703, "step": 6557 }, { "epoch": 0.6899526564965808, "grad_norm": 1.4833385944366455, "learning_rate": 4.632873081144267e-05, "loss": 1.4837, "step": 6558 }, { "epoch": 0.6900578642819568, "grad_norm": 1.669286847114563, "learning_rate": 4.629998063478422e-05, "loss": 1.6238, "step": 6559 }, { "epoch": 0.690163072067333, "grad_norm": 1.670414924621582, "learning_rate": 4.627123669413216e-05, "loss": 1.5925, "step": 6560 }, { "epoch": 0.6902682798527091, "grad_norm": 1.836533784866333, "learning_rate": 4.624249899282449e-05, "loss": 1.5332, "step": 6561 }, { "epoch": 0.6903734876380853, "grad_norm": 1.0955008268356323, "learning_rate": 4.6213767534198395e-05, "loss": 1.9474, "step": 6562 }, { "epoch": 0.6904786954234613, "grad_norm": 1.3523670434951782, "learning_rate": 4.618504232159032e-05, "loss": 1.3039, "step": 6563 }, { "epoch": 0.6905839032088374, "grad_norm": 1.6386244297027588, "learning_rate": 4.6156323358336116e-05, "loss": 1.7856, "step": 6564 }, { "epoch": 0.6906891109942136, "grad_norm": 1.7706154584884644, "learning_rate": 4.6127610647770767e-05, "loss": 2.0213, "step": 6565 }, { "epoch": 0.6907943187795897, "grad_norm": 1.2387391328811646, "learning_rate": 4.6098904193228576e-05, "loss": 1.5836, "step": 6566 }, { "epoch": 0.6908995265649658, "grad_norm": 1.4333043098449707, "learning_rate": 4.6070203998043173e-05, "loss": 1.6886, "step": 6567 }, { "epoch": 0.6910047343503419, "grad_norm": 1.220526933670044, "learning_rate": 4.60415100655474e-05, "loss": 1.6656, "step": 6568 }, { "epoch": 0.6911099421357181, "grad_norm": 1.2803022861480713, "learning_rate": 4.601282239907334e-05, "loss": 1.6475, "step": 6569 }, { "epoch": 0.6912151499210941, "grad_norm": 1.9146497249603271, "learning_rate": 4.5984141001952477e-05, "loss": 1.638, "step": 6570 }, { "epoch": 0.6913203577064703, "grad_norm": 0.9470534324645996, "learning_rate": 4.595546587751545e-05, "loss": 1.6093, "step": 6571 }, { "epoch": 0.6914255654918464, "grad_norm": 1.471522569656372, "learning_rate": 4.592679702909216e-05, "loss": 1.1011, "step": 6572 }, { "epoch": 0.6915307732772226, "grad_norm": 1.4972786903381348, "learning_rate": 4.589813446001192e-05, "loss": 1.4328, "step": 6573 }, { "epoch": 0.6916359810625986, "grad_norm": 1.2175990343093872, "learning_rate": 4.5869478173603175e-05, "loss": 1.8741, "step": 6574 }, { "epoch": 0.6917411888479748, "grad_norm": 1.4953633546829224, "learning_rate": 4.584082817319364e-05, "loss": 1.8677, "step": 6575 }, { "epoch": 0.6918463966333509, "grad_norm": 1.126177191734314, "learning_rate": 4.581218446211043e-05, "loss": 1.9968, "step": 6576 }, { "epoch": 0.6919516044187269, "grad_norm": 1.063721776008606, "learning_rate": 4.578354704367978e-05, "loss": 1.6822, "step": 6577 }, { "epoch": 0.6920568122041031, "grad_norm": 1.747952938079834, "learning_rate": 4.575491592122727e-05, "loss": 1.8211, "step": 6578 }, { "epoch": 0.6921620199894792, "grad_norm": 1.776321291923523, "learning_rate": 4.572629109807782e-05, "loss": 2.0429, "step": 6579 }, { "epoch": 0.6922672277748554, "grad_norm": 1.6665074825286865, "learning_rate": 4.569767257755538e-05, "loss": 1.7637, "step": 6580 }, { "epoch": 0.6923724355602314, "grad_norm": 1.534345030784607, "learning_rate": 4.566906036298345e-05, "loss": 1.4153, "step": 6581 }, { "epoch": 0.6924776433456076, "grad_norm": 1.4633084535598755, "learning_rate": 4.564045445768464e-05, "loss": 1.7596, "step": 6582 }, { "epoch": 0.6925828511309837, "grad_norm": 1.559431552886963, "learning_rate": 4.561185486498081e-05, "loss": 1.2046, "step": 6583 }, { "epoch": 0.6926880589163598, "grad_norm": 1.8445146083831787, "learning_rate": 4.558326158819322e-05, "loss": 1.4508, "step": 6584 }, { "epoch": 0.6927932667017359, "grad_norm": 1.142551064491272, "learning_rate": 4.555467463064227e-05, "loss": 1.0419, "step": 6585 }, { "epoch": 0.6928984744871121, "grad_norm": 2.6292953491210938, "learning_rate": 4.552609399564762e-05, "loss": 1.4199, "step": 6586 }, { "epoch": 0.6930036822724882, "grad_norm": 0.9221459627151489, "learning_rate": 4.549751968652836e-05, "loss": 1.4626, "step": 6587 }, { "epoch": 0.6931088900578642, "grad_norm": 1.3929451704025269, "learning_rate": 4.5468951706602644e-05, "loss": 2.0308, "step": 6588 }, { "epoch": 0.6932140978432404, "grad_norm": 1.388808012008667, "learning_rate": 4.5440390059187964e-05, "loss": 1.8868, "step": 6589 }, { "epoch": 0.6933193056286165, "grad_norm": 1.4524317979812622, "learning_rate": 4.541183474760118e-05, "loss": 1.747, "step": 6590 }, { "epoch": 0.6934245134139926, "grad_norm": 1.3403692245483398, "learning_rate": 4.538328577515821e-05, "loss": 1.5627, "step": 6591 }, { "epoch": 0.6935297211993687, "grad_norm": 1.3830885887145996, "learning_rate": 4.535474314517447e-05, "loss": 1.6002, "step": 6592 }, { "epoch": 0.6936349289847449, "grad_norm": 2.1278774738311768, "learning_rate": 4.532620686096446e-05, "loss": 1.9424, "step": 6593 }, { "epoch": 0.693740136770121, "grad_norm": 1.8062392473220825, "learning_rate": 4.529767692584198e-05, "loss": 2.012, "step": 6594 }, { "epoch": 0.6938453445554971, "grad_norm": 1.8589589595794678, "learning_rate": 4.5269153343120174e-05, "loss": 1.4928, "step": 6595 }, { "epoch": 0.6939505523408732, "grad_norm": 1.7464429140090942, "learning_rate": 4.524063611611138e-05, "loss": 1.1396, "step": 6596 }, { "epoch": 0.6940557601262494, "grad_norm": 1.3345447778701782, "learning_rate": 4.5212125248127143e-05, "loss": 1.5639, "step": 6597 }, { "epoch": 0.6941609679116255, "grad_norm": 1.2210328578948975, "learning_rate": 4.518362074247844e-05, "loss": 1.8796, "step": 6598 }, { "epoch": 0.6942661756970016, "grad_norm": 1.5386043787002563, "learning_rate": 4.515512260247534e-05, "loss": 1.1067, "step": 6599 }, { "epoch": 0.6943713834823777, "grad_norm": 1.575567364692688, "learning_rate": 4.5126630831427264e-05, "loss": 1.0896, "step": 6600 }, { "epoch": 0.6944765912677539, "grad_norm": 1.171311855316162, "learning_rate": 4.5098145432642845e-05, "loss": 1.6938, "step": 6601 }, { "epoch": 0.6945817990531299, "grad_norm": 1.322606086730957, "learning_rate": 4.506966640942999e-05, "loss": 1.9014, "step": 6602 }, { "epoch": 0.694687006838506, "grad_norm": 1.8430259227752686, "learning_rate": 4.504119376509591e-05, "loss": 1.916, "step": 6603 }, { "epoch": 0.6947922146238822, "grad_norm": 1.57040536403656, "learning_rate": 4.501272750294704e-05, "loss": 1.1643, "step": 6604 }, { "epoch": 0.6948974224092583, "grad_norm": 1.510570764541626, "learning_rate": 4.4984267626289e-05, "loss": 1.4288, "step": 6605 }, { "epoch": 0.6950026301946344, "grad_norm": 1.7676854133605957, "learning_rate": 4.495581413842685e-05, "loss": 1.0889, "step": 6606 }, { "epoch": 0.6951078379800105, "grad_norm": 1.2651560306549072, "learning_rate": 4.492736704266475e-05, "loss": 1.6327, "step": 6607 }, { "epoch": 0.6952130457653867, "grad_norm": 1.7821506261825562, "learning_rate": 4.4898926342306115e-05, "loss": 1.7645, "step": 6608 }, { "epoch": 0.6953182535507627, "grad_norm": 1.2521933317184448, "learning_rate": 4.487049204065377e-05, "loss": 2.0037, "step": 6609 }, { "epoch": 0.6954234613361389, "grad_norm": 1.3124496936798096, "learning_rate": 4.4842064141009644e-05, "loss": 1.5326, "step": 6610 }, { "epoch": 0.695528669121515, "grad_norm": 1.9574371576309204, "learning_rate": 4.4813642646674936e-05, "loss": 1.8915, "step": 6611 }, { "epoch": 0.6956338769068912, "grad_norm": 1.5282095670700073, "learning_rate": 4.4785227560950226e-05, "loss": 1.9569, "step": 6612 }, { "epoch": 0.6957390846922672, "grad_norm": 1.1745871305465698, "learning_rate": 4.475681888713522e-05, "loss": 1.8232, "step": 6613 }, { "epoch": 0.6958442924776433, "grad_norm": 1.0485895872116089, "learning_rate": 4.472841662852888e-05, "loss": 1.814, "step": 6614 }, { "epoch": 0.6959495002630195, "grad_norm": 1.5237230062484741, "learning_rate": 4.470002078842957e-05, "loss": 1.4868, "step": 6615 }, { "epoch": 0.6960547080483955, "grad_norm": 1.6121946573257446, "learning_rate": 4.467163137013473e-05, "loss": 1.727, "step": 6616 }, { "epoch": 0.6961599158337717, "grad_norm": 1.475019097328186, "learning_rate": 4.4643248376941104e-05, "loss": 1.5386, "step": 6617 }, { "epoch": 0.6962651236191478, "grad_norm": 1.4994443655014038, "learning_rate": 4.461487181214481e-05, "loss": 1.6624, "step": 6618 }, { "epoch": 0.696370331404524, "grad_norm": 2.619147300720215, "learning_rate": 4.458650167904106e-05, "loss": 1.5808, "step": 6619 }, { "epoch": 0.6964755391899, "grad_norm": 1.311523675918579, "learning_rate": 4.455813798092438e-05, "loss": 1.7372, "step": 6620 }, { "epoch": 0.6965807469752762, "grad_norm": 1.382138967514038, "learning_rate": 4.452978072108859e-05, "loss": 1.5582, "step": 6621 }, { "epoch": 0.6966859547606523, "grad_norm": 1.6938810348510742, "learning_rate": 4.450142990282671e-05, "loss": 1.7523, "step": 6622 }, { "epoch": 0.6967911625460284, "grad_norm": 1.1859630346298218, "learning_rate": 4.4473085529431024e-05, "loss": 1.7407, "step": 6623 }, { "epoch": 0.6968963703314045, "grad_norm": 1.2967774868011475, "learning_rate": 4.4444747604193074e-05, "loss": 1.9172, "step": 6624 }, { "epoch": 0.6970015781167807, "grad_norm": 1.2925198078155518, "learning_rate": 4.44164161304036e-05, "loss": 1.9331, "step": 6625 }, { "epoch": 0.6971067859021568, "grad_norm": 1.6732769012451172, "learning_rate": 4.438809111135274e-05, "loss": 1.8172, "step": 6626 }, { "epoch": 0.6972119936875328, "grad_norm": 1.5457383394241333, "learning_rate": 4.435977255032971e-05, "loss": 1.7216, "step": 6627 }, { "epoch": 0.697317201472909, "grad_norm": 1.5493268966674805, "learning_rate": 4.4331460450623064e-05, "loss": 1.2244, "step": 6628 }, { "epoch": 0.6974224092582851, "grad_norm": 1.3660433292388916, "learning_rate": 4.430315481552063e-05, "loss": 1.4947, "step": 6629 }, { "epoch": 0.6975276170436613, "grad_norm": 1.6390910148620605, "learning_rate": 4.427485564830942e-05, "loss": 1.8469, "step": 6630 }, { "epoch": 0.6976328248290373, "grad_norm": 2.015472173690796, "learning_rate": 4.424656295227568e-05, "loss": 1.7447, "step": 6631 }, { "epoch": 0.6977380326144135, "grad_norm": 1.0427132844924927, "learning_rate": 4.4218276730705045e-05, "loss": 1.5705, "step": 6632 }, { "epoch": 0.6978432403997896, "grad_norm": 1.1284888982772827, "learning_rate": 4.4189996986882245e-05, "loss": 1.869, "step": 6633 }, { "epoch": 0.6979484481851657, "grad_norm": 1.4589309692382812, "learning_rate": 4.4161723724091276e-05, "loss": 1.6884, "step": 6634 }, { "epoch": 0.6980536559705418, "grad_norm": 0.9497972726821899, "learning_rate": 4.413345694561549e-05, "loss": 1.9814, "step": 6635 }, { "epoch": 0.698158863755918, "grad_norm": 1.5925368070602417, "learning_rate": 4.410519665473736e-05, "loss": 1.5224, "step": 6636 }, { "epoch": 0.6982640715412941, "grad_norm": 2.054982900619507, "learning_rate": 4.407694285473871e-05, "loss": 1.4793, "step": 6637 }, { "epoch": 0.6983692793266701, "grad_norm": 1.2141071557998657, "learning_rate": 4.404869554890054e-05, "loss": 1.2192, "step": 6638 }, { "epoch": 0.6984744871120463, "grad_norm": 1.3335728645324707, "learning_rate": 4.402045474050308e-05, "loss": 1.9024, "step": 6639 }, { "epoch": 0.6985796948974224, "grad_norm": 1.548563838005066, "learning_rate": 4.399222043282591e-05, "loss": 2.0828, "step": 6640 }, { "epoch": 0.6986849026827985, "grad_norm": 2.5695066452026367, "learning_rate": 4.3963992629147755e-05, "loss": 1.768, "step": 6641 }, { "epoch": 0.6987901104681746, "grad_norm": 1.7723878622055054, "learning_rate": 4.393577133274658e-05, "loss": 1.7614, "step": 6642 }, { "epoch": 0.6988953182535508, "grad_norm": 1.082208275794983, "learning_rate": 4.390755654689973e-05, "loss": 2.1651, "step": 6643 }, { "epoch": 0.6990005260389269, "grad_norm": 1.8237231969833374, "learning_rate": 4.3879348274883594e-05, "loss": 1.8408, "step": 6644 }, { "epoch": 0.699105733824303, "grad_norm": 1.6238867044448853, "learning_rate": 4.3851146519973906e-05, "loss": 1.2499, "step": 6645 }, { "epoch": 0.6992109416096791, "grad_norm": 1.6902471780776978, "learning_rate": 4.382295128544572e-05, "loss": 1.916, "step": 6646 }, { "epoch": 0.6993161493950553, "grad_norm": 1.5541309118270874, "learning_rate": 4.379476257457318e-05, "loss": 1.6904, "step": 6647 }, { "epoch": 0.6994213571804313, "grad_norm": 1.7713851928710938, "learning_rate": 4.376658039062981e-05, "loss": 1.7081, "step": 6648 }, { "epoch": 0.6995265649658075, "grad_norm": 1.4880828857421875, "learning_rate": 4.373840473688829e-05, "loss": 1.865, "step": 6649 }, { "epoch": 0.6996317727511836, "grad_norm": 1.3452609777450562, "learning_rate": 4.371023561662052e-05, "loss": 2.0241, "step": 6650 }, { "epoch": 0.6997369805365597, "grad_norm": 1.048627495765686, "learning_rate": 4.3682073033097785e-05, "loss": 1.8618, "step": 6651 }, { "epoch": 0.6998421883219358, "grad_norm": 1.0086387395858765, "learning_rate": 4.365391698959044e-05, "loss": 1.8462, "step": 6652 }, { "epoch": 0.6999473961073119, "grad_norm": 1.9956762790679932, "learning_rate": 4.3625767489368143e-05, "loss": 1.7446, "step": 6653 }, { "epoch": 0.7000526038926881, "grad_norm": 1.3309284448623657, "learning_rate": 4.3597624535699865e-05, "loss": 1.8599, "step": 6654 }, { "epoch": 0.7001578116780641, "grad_norm": 1.1695083379745483, "learning_rate": 4.3569488131853733e-05, "loss": 1.3716, "step": 6655 }, { "epoch": 0.7002630194634403, "grad_norm": 1.7402067184448242, "learning_rate": 4.354135828109707e-05, "loss": 1.3841, "step": 6656 }, { "epoch": 0.7003682272488164, "grad_norm": 1.5015475749969482, "learning_rate": 4.351323498669659e-05, "loss": 1.2452, "step": 6657 }, { "epoch": 0.7004734350341926, "grad_norm": 1.4218254089355469, "learning_rate": 4.3485118251918146e-05, "loss": 1.9119, "step": 6658 }, { "epoch": 0.7005786428195686, "grad_norm": 1.1995227336883545, "learning_rate": 4.345700808002676e-05, "loss": 1.6662, "step": 6659 }, { "epoch": 0.7006838506049448, "grad_norm": 1.214152455329895, "learning_rate": 4.3428904474286894e-05, "loss": 1.6176, "step": 6660 }, { "epoch": 0.7007890583903209, "grad_norm": 1.6113765239715576, "learning_rate": 4.340080743796204e-05, "loss": 1.8252, "step": 6661 }, { "epoch": 0.7008942661756971, "grad_norm": 1.3512358665466309, "learning_rate": 4.337271697431503e-05, "loss": 1.5226, "step": 6662 }, { "epoch": 0.7009994739610731, "grad_norm": 1.1863796710968018, "learning_rate": 4.3344633086607955e-05, "loss": 1.6714, "step": 6663 }, { "epoch": 0.7011046817464492, "grad_norm": 1.354353904724121, "learning_rate": 4.331655577810207e-05, "loss": 1.7832, "step": 6664 }, { "epoch": 0.7012098895318254, "grad_norm": 1.0943442583084106, "learning_rate": 4.328848505205792e-05, "loss": 1.9705, "step": 6665 }, { "epoch": 0.7013150973172014, "grad_norm": 1.4298934936523438, "learning_rate": 4.326042091173526e-05, "loss": 1.2593, "step": 6666 }, { "epoch": 0.7014203051025776, "grad_norm": 2.2146830558776855, "learning_rate": 4.3232363360393026e-05, "loss": 1.7405, "step": 6667 }, { "epoch": 0.7015255128879537, "grad_norm": 1.1296732425689697, "learning_rate": 4.320431240128955e-05, "loss": 1.8832, "step": 6668 }, { "epoch": 0.7016307206733299, "grad_norm": 1.771040439605713, "learning_rate": 4.317626803768224e-05, "loss": 1.9983, "step": 6669 }, { "epoch": 0.7017359284587059, "grad_norm": 1.8517831563949585, "learning_rate": 4.3148230272827784e-05, "loss": 1.7378, "step": 6670 }, { "epoch": 0.7018411362440821, "grad_norm": 1.6702775955200195, "learning_rate": 4.312019910998217e-05, "loss": 1.7574, "step": 6671 }, { "epoch": 0.7019463440294582, "grad_norm": 1.3865050077438354, "learning_rate": 4.3092174552400535e-05, "loss": 1.5659, "step": 6672 }, { "epoch": 0.7020515518148343, "grad_norm": 1.095326542854309, "learning_rate": 4.306415660333724e-05, "loss": 1.0576, "step": 6673 }, { "epoch": 0.7021567596002104, "grad_norm": 1.3745871782302856, "learning_rate": 4.303614526604598e-05, "loss": 1.9348, "step": 6674 }, { "epoch": 0.7022619673855865, "grad_norm": 1.3735612630844116, "learning_rate": 4.300814054377961e-05, "loss": 1.3382, "step": 6675 }, { "epoch": 0.7023671751709627, "grad_norm": 1.8741841316223145, "learning_rate": 4.298014243979016e-05, "loss": 1.7592, "step": 6676 }, { "epoch": 0.7024723829563387, "grad_norm": 1.2442336082458496, "learning_rate": 4.295215095732904e-05, "loss": 1.4593, "step": 6677 }, { "epoch": 0.7025775907417149, "grad_norm": 1.6263591051101685, "learning_rate": 4.292416609964678e-05, "loss": 1.3543, "step": 6678 }, { "epoch": 0.702682798527091, "grad_norm": 1.3969215154647827, "learning_rate": 4.289618786999313e-05, "loss": 1.6856, "step": 6679 }, { "epoch": 0.7027880063124671, "grad_norm": 1.4358292818069458, "learning_rate": 4.2868216271617175e-05, "loss": 1.7041, "step": 6680 }, { "epoch": 0.7028932140978432, "grad_norm": 1.739772081375122, "learning_rate": 4.284025130776711e-05, "loss": 2.2992, "step": 6681 }, { "epoch": 0.7029984218832194, "grad_norm": 1.508034586906433, "learning_rate": 4.281229298169046e-05, "loss": 1.5951, "step": 6682 }, { "epoch": 0.7031036296685955, "grad_norm": 1.9585610628128052, "learning_rate": 4.278434129663392e-05, "loss": 1.8921, "step": 6683 }, { "epoch": 0.7032088374539716, "grad_norm": 1.0624831914901733, "learning_rate": 4.275639625584338e-05, "loss": 1.7377, "step": 6684 }, { "epoch": 0.7033140452393477, "grad_norm": 1.728935956954956, "learning_rate": 4.2728457862564074e-05, "loss": 1.6178, "step": 6685 }, { "epoch": 0.7034192530247239, "grad_norm": 1.3254528045654297, "learning_rate": 4.2700526120040405e-05, "loss": 1.6511, "step": 6686 }, { "epoch": 0.7035244608100999, "grad_norm": 1.6863447427749634, "learning_rate": 4.267260103151589e-05, "loss": 1.4184, "step": 6687 }, { "epoch": 0.703629668595476, "grad_norm": 1.829534888267517, "learning_rate": 4.264468260023348e-05, "loss": 1.9914, "step": 6688 }, { "epoch": 0.7037348763808522, "grad_norm": 1.3819561004638672, "learning_rate": 4.261677082943521e-05, "loss": 1.7343, "step": 6689 }, { "epoch": 0.7038400841662283, "grad_norm": 1.2711637020111084, "learning_rate": 4.2588865722362334e-05, "loss": 1.9204, "step": 6690 }, { "epoch": 0.7039452919516044, "grad_norm": 1.641611099243164, "learning_rate": 4.256096728225548e-05, "loss": 1.5314, "step": 6691 }, { "epoch": 0.7040504997369805, "grad_norm": 1.4409161806106567, "learning_rate": 4.253307551235431e-05, "loss": 1.745, "step": 6692 }, { "epoch": 0.7041557075223567, "grad_norm": 1.646813988685608, "learning_rate": 4.25051904158979e-05, "loss": 1.9297, "step": 6693 }, { "epoch": 0.7042609153077328, "grad_norm": 1.5589001178741455, "learning_rate": 4.247731199612439e-05, "loss": 1.5343, "step": 6694 }, { "epoch": 0.7043661230931089, "grad_norm": 2.051630973815918, "learning_rate": 4.244944025627118e-05, "loss": 1.4001, "step": 6695 }, { "epoch": 0.704471330878485, "grad_norm": 1.5140016078948975, "learning_rate": 4.2421575199575e-05, "loss": 1.8798, "step": 6696 }, { "epoch": 0.7045765386638612, "grad_norm": 1.471285104751587, "learning_rate": 4.23937168292717e-05, "loss": 1.5356, "step": 6697 }, { "epoch": 0.7046817464492372, "grad_norm": 1.4824402332305908, "learning_rate": 4.236586514859633e-05, "loss": 1.5193, "step": 6698 }, { "epoch": 0.7047869542346133, "grad_norm": 1.7141013145446777, "learning_rate": 4.233802016078329e-05, "loss": 1.4694, "step": 6699 }, { "epoch": 0.7048921620199895, "grad_norm": 1.1465423107147217, "learning_rate": 4.23101818690661e-05, "loss": 1.6485, "step": 6700 }, { "epoch": 0.7049973698053656, "grad_norm": 1.4803214073181152, "learning_rate": 4.2282350276677475e-05, "loss": 1.6299, "step": 6701 }, { "epoch": 0.7051025775907417, "grad_norm": 1.4447064399719238, "learning_rate": 4.2254525386849497e-05, "loss": 1.7181, "step": 6702 }, { "epoch": 0.7052077853761178, "grad_norm": 2.032977819442749, "learning_rate": 4.222670720281333e-05, "loss": 0.969, "step": 6703 }, { "epoch": 0.705312993161494, "grad_norm": 1.6613085269927979, "learning_rate": 4.219889572779937e-05, "loss": 1.4189, "step": 6704 }, { "epoch": 0.70541820094687, "grad_norm": 1.3568087816238403, "learning_rate": 4.217109096503736e-05, "loss": 1.8463, "step": 6705 }, { "epoch": 0.7055234087322462, "grad_norm": 1.6580621004104614, "learning_rate": 4.214329291775613e-05, "loss": 1.6142, "step": 6706 }, { "epoch": 0.7056286165176223, "grad_norm": 1.5139204263687134, "learning_rate": 4.2115501589183734e-05, "loss": 1.7448, "step": 6707 }, { "epoch": 0.7057338243029985, "grad_norm": 2.026707649230957, "learning_rate": 4.208771698254761e-05, "loss": 1.5281, "step": 6708 }, { "epoch": 0.7058390320883745, "grad_norm": 1.6222214698791504, "learning_rate": 4.205993910107413e-05, "loss": 1.3807, "step": 6709 }, { "epoch": 0.7059442398737507, "grad_norm": 1.521702766418457, "learning_rate": 4.2032167947989175e-05, "loss": 1.706, "step": 6710 }, { "epoch": 0.7060494476591268, "grad_norm": 1.6336759328842163, "learning_rate": 4.200440352651767e-05, "loss": 1.8752, "step": 6711 }, { "epoch": 0.7061546554445028, "grad_norm": 1.2385070323944092, "learning_rate": 4.197664583988376e-05, "loss": 1.8389, "step": 6712 }, { "epoch": 0.706259863229879, "grad_norm": 1.7074710130691528, "learning_rate": 4.1948894891310955e-05, "loss": 2.3842, "step": 6713 }, { "epoch": 0.7063650710152551, "grad_norm": 1.0736308097839355, "learning_rate": 4.192115068402183e-05, "loss": 1.5353, "step": 6714 }, { "epoch": 0.7064702788006313, "grad_norm": 2.215698719024658, "learning_rate": 4.189341322123818e-05, "loss": 1.0607, "step": 6715 }, { "epoch": 0.7065754865860073, "grad_norm": 2.0693204402923584, "learning_rate": 4.186568250618115e-05, "loss": 1.8865, "step": 6716 }, { "epoch": 0.7066806943713835, "grad_norm": 2.8470065593719482, "learning_rate": 4.183795854207098e-05, "loss": 1.8147, "step": 6717 }, { "epoch": 0.7067859021567596, "grad_norm": 1.833847999572754, "learning_rate": 4.181024133212713e-05, "loss": 1.4404, "step": 6718 }, { "epoch": 0.7068911099421357, "grad_norm": 1.5717699527740479, "learning_rate": 4.1782530879568374e-05, "loss": 1.5635, "step": 6719 }, { "epoch": 0.7069963177275118, "grad_norm": 1.44779372215271, "learning_rate": 4.175482718761259e-05, "loss": 1.9344, "step": 6720 }, { "epoch": 0.707101525512888, "grad_norm": 1.8596739768981934, "learning_rate": 4.172713025947691e-05, "loss": 2.044, "step": 6721 }, { "epoch": 0.7072067332982641, "grad_norm": 1.5004318952560425, "learning_rate": 4.169944009837773e-05, "loss": 1.8553, "step": 6722 }, { "epoch": 0.7073119410836401, "grad_norm": 1.9273593425750732, "learning_rate": 4.1671756707530594e-05, "loss": 1.2872, "step": 6723 }, { "epoch": 0.7074171488690163, "grad_norm": 1.7082390785217285, "learning_rate": 4.164408009015024e-05, "loss": 1.8351, "step": 6724 }, { "epoch": 0.7075223566543924, "grad_norm": 2.25231671333313, "learning_rate": 4.1616410249450746e-05, "loss": 1.7099, "step": 6725 }, { "epoch": 0.7076275644397686, "grad_norm": 1.2057231664657593, "learning_rate": 4.1588747188645275e-05, "loss": 2.0397, "step": 6726 }, { "epoch": 0.7077327722251446, "grad_norm": 1.857649564743042, "learning_rate": 4.156109091094622e-05, "loss": 1.8514, "step": 6727 }, { "epoch": 0.7078379800105208, "grad_norm": 1.4656842947006226, "learning_rate": 4.1533441419565265e-05, "loss": 1.2047, "step": 6728 }, { "epoch": 0.7079431877958969, "grad_norm": 1.4205302000045776, "learning_rate": 4.150579871771324e-05, "loss": 1.2295, "step": 6729 }, { "epoch": 0.708048395581273, "grad_norm": 1.5245219469070435, "learning_rate": 4.14781628086002e-05, "loss": 1.7168, "step": 6730 }, { "epoch": 0.7081536033666491, "grad_norm": 1.4015170335769653, "learning_rate": 4.145053369543539e-05, "loss": 1.6294, "step": 6731 }, { "epoch": 0.7082588111520253, "grad_norm": 1.7922075986862183, "learning_rate": 4.1422911381427274e-05, "loss": 1.4777, "step": 6732 }, { "epoch": 0.7083640189374014, "grad_norm": 1.0388702154159546, "learning_rate": 4.1395295869783615e-05, "loss": 1.8166, "step": 6733 }, { "epoch": 0.7084692267227775, "grad_norm": 1.3340606689453125, "learning_rate": 4.136768716371125e-05, "loss": 1.4049, "step": 6734 }, { "epoch": 0.7085744345081536, "grad_norm": 1.485906720161438, "learning_rate": 4.134008526641628e-05, "loss": 1.9171, "step": 6735 }, { "epoch": 0.7086796422935298, "grad_norm": 1.4040229320526123, "learning_rate": 4.131249018110408e-05, "loss": 1.3881, "step": 6736 }, { "epoch": 0.7087848500789058, "grad_norm": 2.063403844833374, "learning_rate": 4.12849019109791e-05, "loss": 1.8729, "step": 6737 }, { "epoch": 0.7088900578642819, "grad_norm": 1.6362403631210327, "learning_rate": 4.1257320459245154e-05, "loss": 1.7883, "step": 6738 }, { "epoch": 0.7089952656496581, "grad_norm": 1.6882222890853882, "learning_rate": 4.122974582910515e-05, "loss": 1.3546, "step": 6739 }, { "epoch": 0.7091004734350342, "grad_norm": 1.3080660104751587, "learning_rate": 4.1202178023761195e-05, "loss": 1.4403, "step": 6740 }, { "epoch": 0.7092056812204103, "grad_norm": 2.508234739303589, "learning_rate": 4.117461704641473e-05, "loss": 1.5945, "step": 6741 }, { "epoch": 0.7093108890057864, "grad_norm": 1.9061312675476074, "learning_rate": 4.1147062900266285e-05, "loss": 1.9146, "step": 6742 }, { "epoch": 0.7094160967911626, "grad_norm": 1.108188271522522, "learning_rate": 4.111951558851559e-05, "loss": 1.9639, "step": 6743 }, { "epoch": 0.7095213045765386, "grad_norm": 1.4938721656799316, "learning_rate": 4.109197511436169e-05, "loss": 2.0542, "step": 6744 }, { "epoch": 0.7096265123619148, "grad_norm": 1.5593281984329224, "learning_rate": 4.106444148100275e-05, "loss": 1.6947, "step": 6745 }, { "epoch": 0.7097317201472909, "grad_norm": 1.297189712524414, "learning_rate": 4.10369146916361e-05, "loss": 1.4036, "step": 6746 }, { "epoch": 0.7098369279326671, "grad_norm": 0.986399233341217, "learning_rate": 4.100939474945843e-05, "loss": 1.5753, "step": 6747 }, { "epoch": 0.7099421357180431, "grad_norm": 1.9157464504241943, "learning_rate": 4.09818816576655e-05, "loss": 1.4842, "step": 6748 }, { "epoch": 0.7100473435034192, "grad_norm": 1.0592901706695557, "learning_rate": 4.095437541945226e-05, "loss": 2.2621, "step": 6749 }, { "epoch": 0.7101525512887954, "grad_norm": 1.2856818437576294, "learning_rate": 4.0926876038013026e-05, "loss": 1.7017, "step": 6750 }, { "epoch": 0.7102577590741714, "grad_norm": 1.17574942111969, "learning_rate": 4.0899383516541146e-05, "loss": 1.4187, "step": 6751 }, { "epoch": 0.7103629668595476, "grad_norm": 3.1928186416625977, "learning_rate": 4.087189785822925e-05, "loss": 1.4191, "step": 6752 }, { "epoch": 0.7104681746449237, "grad_norm": 1.1456485986709595, "learning_rate": 4.084441906626914e-05, "loss": 1.5175, "step": 6753 }, { "epoch": 0.7105733824302999, "grad_norm": 1.2433396577835083, "learning_rate": 4.0816947143851816e-05, "loss": 1.2899, "step": 6754 }, { "epoch": 0.7106785902156759, "grad_norm": 1.4701285362243652, "learning_rate": 4.078948209416758e-05, "loss": 1.6928, "step": 6755 }, { "epoch": 0.7107837980010521, "grad_norm": 1.0511400699615479, "learning_rate": 4.0762023920405804e-05, "loss": 1.7949, "step": 6756 }, { "epoch": 0.7108890057864282, "grad_norm": 1.6696053743362427, "learning_rate": 4.073457262575509e-05, "loss": 1.991, "step": 6757 }, { "epoch": 0.7109942135718044, "grad_norm": 1.6411384344100952, "learning_rate": 4.0707128213403336e-05, "loss": 1.3082, "step": 6758 }, { "epoch": 0.7110994213571804, "grad_norm": 2.3022947311401367, "learning_rate": 4.067969068653754e-05, "loss": 1.8167, "step": 6759 }, { "epoch": 0.7112046291425566, "grad_norm": 1.5651785135269165, "learning_rate": 4.0652260048343885e-05, "loss": 1.7786, "step": 6760 }, { "epoch": 0.7113098369279327, "grad_norm": 1.68247389793396, "learning_rate": 4.0624836302007886e-05, "loss": 2.5257, "step": 6761 }, { "epoch": 0.7114150447133087, "grad_norm": 1.1986908912658691, "learning_rate": 4.059741945071412e-05, "loss": 1.7782, "step": 6762 }, { "epoch": 0.7115202524986849, "grad_norm": 1.7932888269424438, "learning_rate": 4.05700094976464e-05, "loss": 1.3013, "step": 6763 }, { "epoch": 0.711625460284061, "grad_norm": 1.4314543008804321, "learning_rate": 4.054260644598781e-05, "loss": 1.3228, "step": 6764 }, { "epoch": 0.7117306680694372, "grad_norm": 1.5555866956710815, "learning_rate": 4.0515210298920545e-05, "loss": 1.5795, "step": 6765 }, { "epoch": 0.7118358758548132, "grad_norm": 1.2650176286697388, "learning_rate": 4.048782105962598e-05, "loss": 2.0545, "step": 6766 }, { "epoch": 0.7119410836401894, "grad_norm": 1.1688004732131958, "learning_rate": 4.0460438731284845e-05, "loss": 1.5544, "step": 6767 }, { "epoch": 0.7120462914255655, "grad_norm": 1.4285098314285278, "learning_rate": 4.0433063317076893e-05, "loss": 1.4884, "step": 6768 }, { "epoch": 0.7121514992109416, "grad_norm": 2.4605870246887207, "learning_rate": 4.0405694820181104e-05, "loss": 1.0857, "step": 6769 }, { "epoch": 0.7122567069963177, "grad_norm": 1.3824515342712402, "learning_rate": 4.037833324377578e-05, "loss": 1.6449, "step": 6770 }, { "epoch": 0.7123619147816939, "grad_norm": 2.0455679893493652, "learning_rate": 4.035097859103829e-05, "loss": 1.7399, "step": 6771 }, { "epoch": 0.71246712256707, "grad_norm": 1.9680933952331543, "learning_rate": 4.032363086514523e-05, "loss": 2.0078, "step": 6772 }, { "epoch": 0.712572330352446, "grad_norm": 1.2317272424697876, "learning_rate": 4.0296290069272416e-05, "loss": 1.2191, "step": 6773 }, { "epoch": 0.7126775381378222, "grad_norm": 1.2015796899795532, "learning_rate": 4.026895620659479e-05, "loss": 1.8345, "step": 6774 }, { "epoch": 0.7127827459231983, "grad_norm": 2.4510412216186523, "learning_rate": 4.024162928028663e-05, "loss": 1.554, "step": 6775 }, { "epoch": 0.7128879537085744, "grad_norm": 1.147552251815796, "learning_rate": 4.021430929352128e-05, "loss": 1.9372, "step": 6776 }, { "epoch": 0.7129931614939505, "grad_norm": 1.3203489780426025, "learning_rate": 4.01869962494713e-05, "loss": 1.7651, "step": 6777 }, { "epoch": 0.7130983692793267, "grad_norm": 2.268237829208374, "learning_rate": 4.0159690151308504e-05, "loss": 1.2124, "step": 6778 }, { "epoch": 0.7132035770647028, "grad_norm": 1.9930332899093628, "learning_rate": 4.013239100220385e-05, "loss": 1.3614, "step": 6779 }, { "epoch": 0.7133087848500789, "grad_norm": 1.6461526155471802, "learning_rate": 4.010509880532745e-05, "loss": 1.8164, "step": 6780 }, { "epoch": 0.713413992635455, "grad_norm": 1.5002760887145996, "learning_rate": 4.007781356384873e-05, "loss": 2.0341, "step": 6781 }, { "epoch": 0.7135192004208312, "grad_norm": 2.1459338665008545, "learning_rate": 4.0050535280936205e-05, "loss": 1.5856, "step": 6782 }, { "epoch": 0.7136244082062072, "grad_norm": 1.3347117900848389, "learning_rate": 4.002326395975758e-05, "loss": 1.6578, "step": 6783 }, { "epoch": 0.7137296159915834, "grad_norm": 2.3518266677856445, "learning_rate": 3.9995999603479836e-05, "loss": 1.6109, "step": 6784 }, { "epoch": 0.7138348237769595, "grad_norm": 1.601401686668396, "learning_rate": 3.996874221526905e-05, "loss": 1.5894, "step": 6785 }, { "epoch": 0.7139400315623357, "grad_norm": 2.32871150970459, "learning_rate": 3.994149179829058e-05, "loss": 1.2623, "step": 6786 }, { "epoch": 0.7140452393477117, "grad_norm": 1.1024394035339355, "learning_rate": 3.9914248355708894e-05, "loss": 2.0739, "step": 6787 }, { "epoch": 0.7141504471330878, "grad_norm": 1.1708279848098755, "learning_rate": 3.988701189068765e-05, "loss": 1.6601, "step": 6788 }, { "epoch": 0.714255654918464, "grad_norm": 1.7237240076065063, "learning_rate": 3.985978240638981e-05, "loss": 1.7179, "step": 6789 }, { "epoch": 0.7143608627038401, "grad_norm": 1.493378758430481, "learning_rate": 3.9832559905977404e-05, "loss": 1.97, "step": 6790 }, { "epoch": 0.7144660704892162, "grad_norm": 1.8581169843673706, "learning_rate": 3.9805344392611653e-05, "loss": 1.7689, "step": 6791 }, { "epoch": 0.7145712782745923, "grad_norm": 1.4340283870697021, "learning_rate": 3.977813586945308e-05, "loss": 1.2957, "step": 6792 }, { "epoch": 0.7146764860599685, "grad_norm": 1.2262341976165771, "learning_rate": 3.9750934339661275e-05, "loss": 1.7558, "step": 6793 }, { "epoch": 0.7147816938453445, "grad_norm": 2.0445914268493652, "learning_rate": 3.972373980639508e-05, "loss": 2.3494, "step": 6794 }, { "epoch": 0.7148869016307207, "grad_norm": 2.4502785205841064, "learning_rate": 3.9696552272812484e-05, "loss": 2.2171, "step": 6795 }, { "epoch": 0.7149921094160968, "grad_norm": 1.6572961807250977, "learning_rate": 3.966937174207066e-05, "loss": 1.8123, "step": 6796 }, { "epoch": 0.715097317201473, "grad_norm": 2.4192516803741455, "learning_rate": 3.9642198217326075e-05, "loss": 1.5431, "step": 6797 }, { "epoch": 0.715202524986849, "grad_norm": 2.080559253692627, "learning_rate": 3.961503170173426e-05, "loss": 1.6864, "step": 6798 }, { "epoch": 0.7153077327722251, "grad_norm": 2.1489667892456055, "learning_rate": 3.958787219844994e-05, "loss": 1.4183, "step": 6799 }, { "epoch": 0.7154129405576013, "grad_norm": 0.9947004914283752, "learning_rate": 3.9560719710627115e-05, "loss": 1.6476, "step": 6800 }, { "epoch": 0.7155181483429773, "grad_norm": 1.2469807863235474, "learning_rate": 3.9533574241418884e-05, "loss": 1.5677, "step": 6801 }, { "epoch": 0.7156233561283535, "grad_norm": 1.3769633769989014, "learning_rate": 3.9506435793977535e-05, "loss": 1.7477, "step": 6802 }, { "epoch": 0.7157285639137296, "grad_norm": 1.638057827949524, "learning_rate": 3.947930437145464e-05, "loss": 1.2805, "step": 6803 }, { "epoch": 0.7158337716991058, "grad_norm": 1.301336646080017, "learning_rate": 3.9452179977000826e-05, "loss": 1.4646, "step": 6804 }, { "epoch": 0.7159389794844818, "grad_norm": 1.5804367065429688, "learning_rate": 3.942506261376594e-05, "loss": 2.0871, "step": 6805 }, { "epoch": 0.716044187269858, "grad_norm": 1.6908665895462036, "learning_rate": 3.93979522848991e-05, "loss": 1.9848, "step": 6806 }, { "epoch": 0.7161493950552341, "grad_norm": 1.653495192527771, "learning_rate": 3.93708489935485e-05, "loss": 2.2619, "step": 6807 }, { "epoch": 0.7162546028406102, "grad_norm": 1.4094468355178833, "learning_rate": 3.934375274286154e-05, "loss": 1.3118, "step": 6808 }, { "epoch": 0.7163598106259863, "grad_norm": 1.1481539011001587, "learning_rate": 3.931666353598485e-05, "loss": 1.5423, "step": 6809 }, { "epoch": 0.7164650184113625, "grad_norm": 1.4447335004806519, "learning_rate": 3.928958137606421e-05, "loss": 1.7084, "step": 6810 }, { "epoch": 0.7165702261967386, "grad_norm": 1.6430801153182983, "learning_rate": 3.9262506266244534e-05, "loss": 1.5448, "step": 6811 }, { "epoch": 0.7166754339821146, "grad_norm": 1.7120585441589355, "learning_rate": 3.923543820967004e-05, "loss": 1.8249, "step": 6812 }, { "epoch": 0.7167806417674908, "grad_norm": 1.6759033203125, "learning_rate": 3.9208377209484014e-05, "loss": 1.4926, "step": 6813 }, { "epoch": 0.7168858495528669, "grad_norm": 1.6250011920928955, "learning_rate": 3.918132326882892e-05, "loss": 1.7885, "step": 6814 }, { "epoch": 0.716991057338243, "grad_norm": 1.3283613920211792, "learning_rate": 3.9154276390846555e-05, "loss": 1.2566, "step": 6815 }, { "epoch": 0.7170962651236191, "grad_norm": 1.6472797393798828, "learning_rate": 3.912723657867764e-05, "loss": 1.2617, "step": 6816 }, { "epoch": 0.7172014729089953, "grad_norm": 1.686281442642212, "learning_rate": 3.910020383546233e-05, "loss": 1.7821, "step": 6817 }, { "epoch": 0.7173066806943714, "grad_norm": 1.3786574602127075, "learning_rate": 3.907317816433982e-05, "loss": 1.6652, "step": 6818 }, { "epoch": 0.7174118884797475, "grad_norm": 1.1315668821334839, "learning_rate": 3.9046159568448446e-05, "loss": 1.6919, "step": 6819 }, { "epoch": 0.7175170962651236, "grad_norm": 1.1525264978408813, "learning_rate": 3.90191480509259e-05, "loss": 2.0687, "step": 6820 }, { "epoch": 0.7176223040504998, "grad_norm": 2.229443311691284, "learning_rate": 3.8992143614908874e-05, "loss": 1.6294, "step": 6821 }, { "epoch": 0.7177275118358759, "grad_norm": 1.3578799962997437, "learning_rate": 3.896514626353328e-05, "loss": 1.0833, "step": 6822 }, { "epoch": 0.717832719621252, "grad_norm": 1.2835088968276978, "learning_rate": 3.893815599993431e-05, "loss": 2.0935, "step": 6823 }, { "epoch": 0.7179379274066281, "grad_norm": 1.247178077697754, "learning_rate": 3.8911172827246215e-05, "loss": 1.4496, "step": 6824 }, { "epoch": 0.7180431351920042, "grad_norm": 1.1701315641403198, "learning_rate": 3.888419674860241e-05, "loss": 1.8054, "step": 6825 }, { "epoch": 0.7181483429773803, "grad_norm": 1.4860901832580566, "learning_rate": 3.885722776713563e-05, "loss": 1.6664, "step": 6826 }, { "epoch": 0.7182535507627564, "grad_norm": 2.2401113510131836, "learning_rate": 3.8830265885977656e-05, "loss": 1.5906, "step": 6827 }, { "epoch": 0.7183587585481326, "grad_norm": 1.4506216049194336, "learning_rate": 3.8803311108259435e-05, "loss": 1.4774, "step": 6828 }, { "epoch": 0.7184639663335087, "grad_norm": 1.3430507183074951, "learning_rate": 3.877636343711122e-05, "loss": 1.604, "step": 6829 }, { "epoch": 0.7185691741188848, "grad_norm": 1.241802453994751, "learning_rate": 3.874942287566227e-05, "loss": 1.391, "step": 6830 }, { "epoch": 0.7186743819042609, "grad_norm": 1.9131029844284058, "learning_rate": 3.8722489427041185e-05, "loss": 2.0294, "step": 6831 }, { "epoch": 0.7187795896896371, "grad_norm": 1.5295307636260986, "learning_rate": 3.869556309437563e-05, "loss": 1.2301, "step": 6832 }, { "epoch": 0.7188847974750131, "grad_norm": 1.5379595756530762, "learning_rate": 3.866864388079242e-05, "loss": 1.8451, "step": 6833 }, { "epoch": 0.7189900052603893, "grad_norm": 1.5962355136871338, "learning_rate": 3.864173178941767e-05, "loss": 1.1769, "step": 6834 }, { "epoch": 0.7190952130457654, "grad_norm": 1.6853702068328857, "learning_rate": 3.8614826823376557e-05, "loss": 1.1957, "step": 6835 }, { "epoch": 0.7192004208311416, "grad_norm": 1.4030197858810425, "learning_rate": 3.858792898579348e-05, "loss": 1.0337, "step": 6836 }, { "epoch": 0.7193056286165176, "grad_norm": 1.4564900398254395, "learning_rate": 3.8561038279791974e-05, "loss": 1.8006, "step": 6837 }, { "epoch": 0.7194108364018937, "grad_norm": 1.0711987018585205, "learning_rate": 3.853415470849479e-05, "loss": 1.8206, "step": 6838 }, { "epoch": 0.7195160441872699, "grad_norm": 1.8588398694992065, "learning_rate": 3.8507278275023774e-05, "loss": 1.295, "step": 6839 }, { "epoch": 0.7196212519726459, "grad_norm": 2.0701515674591064, "learning_rate": 3.848040898250007e-05, "loss": 1.3578, "step": 6840 }, { "epoch": 0.7197264597580221, "grad_norm": 1.5314396619796753, "learning_rate": 3.8453546834043866e-05, "loss": 1.7377, "step": 6841 }, { "epoch": 0.7198316675433982, "grad_norm": 1.5056813955307007, "learning_rate": 3.842669183277463e-05, "loss": 1.7067, "step": 6842 }, { "epoch": 0.7199368753287744, "grad_norm": 2.3147194385528564, "learning_rate": 3.839984398181092e-05, "loss": 1.9101, "step": 6843 }, { "epoch": 0.7200420831141504, "grad_norm": 2.0493955612182617, "learning_rate": 3.8373003284270445e-05, "loss": 1.6542, "step": 6844 }, { "epoch": 0.7201472908995266, "grad_norm": 1.2979793548583984, "learning_rate": 3.834616974327021e-05, "loss": 1.5933, "step": 6845 }, { "epoch": 0.7202524986849027, "grad_norm": 1.7987275123596191, "learning_rate": 3.831934336192625e-05, "loss": 1.5552, "step": 6846 }, { "epoch": 0.7203577064702787, "grad_norm": 1.7454094886779785, "learning_rate": 3.829252414335381e-05, "loss": 1.7823, "step": 6847 }, { "epoch": 0.7204629142556549, "grad_norm": 1.5051946640014648, "learning_rate": 3.826571209066737e-05, "loss": 1.4311, "step": 6848 }, { "epoch": 0.720568122041031, "grad_norm": 1.2123810052871704, "learning_rate": 3.8238907206980513e-05, "loss": 1.8447, "step": 6849 }, { "epoch": 0.7206733298264072, "grad_norm": 1.1441395282745361, "learning_rate": 3.821210949540593e-05, "loss": 1.6574, "step": 6850 }, { "epoch": 0.7207785376117832, "grad_norm": 1.625401258468628, "learning_rate": 3.818531895905566e-05, "loss": 1.9151, "step": 6851 }, { "epoch": 0.7208837453971594, "grad_norm": 2.0465898513793945, "learning_rate": 3.815853560104075e-05, "loss": 1.8502, "step": 6852 }, { "epoch": 0.7209889531825355, "grad_norm": 1.7587473392486572, "learning_rate": 3.813175942447141e-05, "loss": 1.6247, "step": 6853 }, { "epoch": 0.7210941609679117, "grad_norm": 3.3340325355529785, "learning_rate": 3.810499043245718e-05, "loss": 1.1276, "step": 6854 }, { "epoch": 0.7211993687532877, "grad_norm": 1.4002957344055176, "learning_rate": 3.807822862810657e-05, "loss": 1.452, "step": 6855 }, { "epoch": 0.7213045765386639, "grad_norm": 1.8713266849517822, "learning_rate": 3.805147401452734e-05, "loss": 1.9372, "step": 6856 }, { "epoch": 0.72140978432404, "grad_norm": 2.1480062007904053, "learning_rate": 3.802472659482649e-05, "loss": 1.6592, "step": 6857 }, { "epoch": 0.721514992109416, "grad_norm": 0.9366277456283569, "learning_rate": 3.799798637211005e-05, "loss": 1.5559, "step": 6858 }, { "epoch": 0.7216201998947922, "grad_norm": 1.6962757110595703, "learning_rate": 3.7971253349483285e-05, "loss": 1.6838, "step": 6859 }, { "epoch": 0.7217254076801684, "grad_norm": 1.3592772483825684, "learning_rate": 3.794452753005061e-05, "loss": 1.8806, "step": 6860 }, { "epoch": 0.7218306154655445, "grad_norm": 2.0401268005371094, "learning_rate": 3.791780891691558e-05, "loss": 2.4508, "step": 6861 }, { "epoch": 0.7219358232509205, "grad_norm": 1.0858590602874756, "learning_rate": 3.7891097513180995e-05, "loss": 2.0971, "step": 6862 }, { "epoch": 0.7220410310362967, "grad_norm": 1.913306713104248, "learning_rate": 3.786439332194874e-05, "loss": 1.1957, "step": 6863 }, { "epoch": 0.7221462388216728, "grad_norm": 2.756697416305542, "learning_rate": 3.783769634631985e-05, "loss": 1.5992, "step": 6864 }, { "epoch": 0.7222514466070489, "grad_norm": 1.9628227949142456, "learning_rate": 3.781100658939461e-05, "loss": 2.0914, "step": 6865 }, { "epoch": 0.722356654392425, "grad_norm": 1.16515052318573, "learning_rate": 3.7784324054272405e-05, "loss": 1.7869, "step": 6866 }, { "epoch": 0.7224618621778012, "grad_norm": 1.3066214323043823, "learning_rate": 3.7757648744051736e-05, "loss": 2.3816, "step": 6867 }, { "epoch": 0.7225670699631773, "grad_norm": 1.6055272817611694, "learning_rate": 3.773098066183039e-05, "loss": 1.3618, "step": 6868 }, { "epoch": 0.7226722777485534, "grad_norm": 1.2972073554992676, "learning_rate": 3.7704319810705225e-05, "loss": 1.9931, "step": 6869 }, { "epoch": 0.7227774855339295, "grad_norm": 1.3764903545379639, "learning_rate": 3.767766619377222e-05, "loss": 2.0662, "step": 6870 }, { "epoch": 0.7228826933193057, "grad_norm": 1.6971714496612549, "learning_rate": 3.7651019814126654e-05, "loss": 1.4372, "step": 6871 }, { "epoch": 0.7229879011046817, "grad_norm": 2.1044673919677734, "learning_rate": 3.7624380674862845e-05, "loss": 2.0204, "step": 6872 }, { "epoch": 0.7230931088900578, "grad_norm": 1.2413538694381714, "learning_rate": 3.759774877907428e-05, "loss": 1.8452, "step": 6873 }, { "epoch": 0.723198316675434, "grad_norm": 1.1550841331481934, "learning_rate": 3.75711241298537e-05, "loss": 1.655, "step": 6874 }, { "epoch": 0.7233035244608101, "grad_norm": 1.314095139503479, "learning_rate": 3.75445067302929e-05, "loss": 1.648, "step": 6875 }, { "epoch": 0.7234087322461862, "grad_norm": 1.6076027154922485, "learning_rate": 3.751789658348284e-05, "loss": 1.8427, "step": 6876 }, { "epoch": 0.7235139400315623, "grad_norm": 1.512660264968872, "learning_rate": 3.749129369251372e-05, "loss": 2.0951, "step": 6877 }, { "epoch": 0.7236191478169385, "grad_norm": 1.5200482606887817, "learning_rate": 3.7464698060474814e-05, "loss": 1.3188, "step": 6878 }, { "epoch": 0.7237243556023145, "grad_norm": 1.276044249534607, "learning_rate": 3.7438109690454646e-05, "loss": 1.5648, "step": 6879 }, { "epoch": 0.7238295633876907, "grad_norm": 1.4830344915390015, "learning_rate": 3.741152858554077e-05, "loss": 1.3373, "step": 6880 }, { "epoch": 0.7239347711730668, "grad_norm": 1.0072591304779053, "learning_rate": 3.7384954748819934e-05, "loss": 1.9321, "step": 6881 }, { "epoch": 0.724039978958443, "grad_norm": 1.2842603921890259, "learning_rate": 3.7358388183378166e-05, "loss": 1.791, "step": 6882 }, { "epoch": 0.724145186743819, "grad_norm": 2.4097936153411865, "learning_rate": 3.733182889230049e-05, "loss": 1.6026, "step": 6883 }, { "epoch": 0.7242503945291952, "grad_norm": 1.9151344299316406, "learning_rate": 3.730527687867114e-05, "loss": 1.7359, "step": 6884 }, { "epoch": 0.7243556023145713, "grad_norm": 1.9665045738220215, "learning_rate": 3.727873214557357e-05, "loss": 2.2739, "step": 6885 }, { "epoch": 0.7244608100999474, "grad_norm": 1.395699143409729, "learning_rate": 3.725219469609026e-05, "loss": 1.382, "step": 6886 }, { "epoch": 0.7245660178853235, "grad_norm": 2.1957099437713623, "learning_rate": 3.722566453330298e-05, "loss": 1.5297, "step": 6887 }, { "epoch": 0.7246712256706996, "grad_norm": 2.5647594928741455, "learning_rate": 3.7199141660292594e-05, "loss": 1.7129, "step": 6888 }, { "epoch": 0.7247764334560758, "grad_norm": 1.1315737962722778, "learning_rate": 3.717262608013903e-05, "loss": 1.2686, "step": 6889 }, { "epoch": 0.7248816412414518, "grad_norm": 2.1699776649475098, "learning_rate": 3.714611779592156e-05, "loss": 2.1215, "step": 6890 }, { "epoch": 0.724986849026828, "grad_norm": 1.1801518201828003, "learning_rate": 3.711961681071845e-05, "loss": 2.093, "step": 6891 }, { "epoch": 0.7250920568122041, "grad_norm": 1.2889292240142822, "learning_rate": 3.7093123127607155e-05, "loss": 1.5338, "step": 6892 }, { "epoch": 0.7251972645975803, "grad_norm": 1.0799230337142944, "learning_rate": 3.706663674966435e-05, "loss": 1.2338, "step": 6893 }, { "epoch": 0.7253024723829563, "grad_norm": 0.9696769714355469, "learning_rate": 3.7040157679965796e-05, "loss": 1.7135, "step": 6894 }, { "epoch": 0.7254076801683325, "grad_norm": 1.161821961402893, "learning_rate": 3.701368592158636e-05, "loss": 1.9596, "step": 6895 }, { "epoch": 0.7255128879537086, "grad_norm": 1.847954511642456, "learning_rate": 3.6987221477600206e-05, "loss": 1.9575, "step": 6896 }, { "epoch": 0.7256180957390846, "grad_norm": 1.5871635675430298, "learning_rate": 3.6960764351080535e-05, "loss": 1.4081, "step": 6897 }, { "epoch": 0.7257233035244608, "grad_norm": 2.1948516368865967, "learning_rate": 3.6934314545099666e-05, "loss": 1.8729, "step": 6898 }, { "epoch": 0.7258285113098369, "grad_norm": 1.237642765045166, "learning_rate": 3.690787206272923e-05, "loss": 1.4971, "step": 6899 }, { "epoch": 0.7259337190952131, "grad_norm": 2.1544601917266846, "learning_rate": 3.6881436907039845e-05, "loss": 1.6227, "step": 6900 }, { "epoch": 0.7260389268805891, "grad_norm": 1.725161075592041, "learning_rate": 3.6855009081101355e-05, "loss": 2.0619, "step": 6901 }, { "epoch": 0.7261441346659653, "grad_norm": 2.4666850566864014, "learning_rate": 3.6828588587982715e-05, "loss": 1.9632, "step": 6902 }, { "epoch": 0.7262493424513414, "grad_norm": 1.0997231006622314, "learning_rate": 3.680217543075204e-05, "loss": 1.6217, "step": 6903 }, { "epoch": 0.7263545502367175, "grad_norm": 1.5529903173446655, "learning_rate": 3.6775769612476666e-05, "loss": 1.8783, "step": 6904 }, { "epoch": 0.7264597580220936, "grad_norm": 1.4422789812088013, "learning_rate": 3.674937113622297e-05, "loss": 1.4074, "step": 6905 }, { "epoch": 0.7265649658074698, "grad_norm": 1.7190214395523071, "learning_rate": 3.6722980005056474e-05, "loss": 1.3818, "step": 6906 }, { "epoch": 0.7266701735928459, "grad_norm": 1.7955530881881714, "learning_rate": 3.669659622204199e-05, "loss": 2.1845, "step": 6907 }, { "epoch": 0.726775381378222, "grad_norm": 2.1686935424804688, "learning_rate": 3.6670219790243344e-05, "loss": 1.2339, "step": 6908 }, { "epoch": 0.7268805891635981, "grad_norm": 1.332140326499939, "learning_rate": 3.664385071272348e-05, "loss": 1.6447, "step": 6909 }, { "epoch": 0.7269857969489742, "grad_norm": 2.21612548828125, "learning_rate": 3.6617488992544656e-05, "loss": 1.5974, "step": 6910 }, { "epoch": 0.7270910047343503, "grad_norm": 1.5317012071609497, "learning_rate": 3.659113463276812e-05, "loss": 1.6033, "step": 6911 }, { "epoch": 0.7271962125197264, "grad_norm": 1.3678877353668213, "learning_rate": 3.656478763645428e-05, "loss": 1.4386, "step": 6912 }, { "epoch": 0.7273014203051026, "grad_norm": 1.0950294733047485, "learning_rate": 3.6538448006662795e-05, "loss": 1.7116, "step": 6913 }, { "epoch": 0.7274066280904787, "grad_norm": 1.2202073335647583, "learning_rate": 3.6512115746452366e-05, "loss": 1.0636, "step": 6914 }, { "epoch": 0.7275118358758548, "grad_norm": 1.1715128421783447, "learning_rate": 3.648579085888085e-05, "loss": 1.5944, "step": 6915 }, { "epoch": 0.7276170436612309, "grad_norm": 0.9897493720054626, "learning_rate": 3.6459473347005334e-05, "loss": 1.6186, "step": 6916 }, { "epoch": 0.7277222514466071, "grad_norm": 1.9510959386825562, "learning_rate": 3.643316321388194e-05, "loss": 1.1786, "step": 6917 }, { "epoch": 0.7278274592319832, "grad_norm": 1.5720961093902588, "learning_rate": 3.640686046256594e-05, "loss": 1.6419, "step": 6918 }, { "epoch": 0.7279326670173593, "grad_norm": 1.484616994857788, "learning_rate": 3.6380565096111866e-05, "loss": 1.5632, "step": 6919 }, { "epoch": 0.7280378748027354, "grad_norm": 1.5460129976272583, "learning_rate": 3.6354277117573264e-05, "loss": 1.5821, "step": 6920 }, { "epoch": 0.7281430825881116, "grad_norm": 1.5257236957550049, "learning_rate": 3.632799653000286e-05, "loss": 1.3601, "step": 6921 }, { "epoch": 0.7282482903734876, "grad_norm": 1.349097490310669, "learning_rate": 3.630172333645261e-05, "loss": 1.8427, "step": 6922 }, { "epoch": 0.7283534981588637, "grad_norm": 1.3090531826019287, "learning_rate": 3.627545753997341e-05, "loss": 1.4297, "step": 6923 }, { "epoch": 0.7284587059442399, "grad_norm": 1.86859130859375, "learning_rate": 3.624919914361552e-05, "loss": 1.3909, "step": 6924 }, { "epoch": 0.728563913729616, "grad_norm": 2.1052424907684326, "learning_rate": 3.622294815042821e-05, "loss": 1.4364, "step": 6925 }, { "epoch": 0.7286691215149921, "grad_norm": 1.1871145963668823, "learning_rate": 3.6196704563459885e-05, "loss": 1.5061, "step": 6926 }, { "epoch": 0.7287743293003682, "grad_norm": 2.0205655097961426, "learning_rate": 3.617046838575819e-05, "loss": 1.5433, "step": 6927 }, { "epoch": 0.7288795370857444, "grad_norm": 1.3786308765411377, "learning_rate": 3.6144239620369816e-05, "loss": 1.7252, "step": 6928 }, { "epoch": 0.7289847448711204, "grad_norm": 1.8365287780761719, "learning_rate": 3.611801827034059e-05, "loss": 1.3477, "step": 6929 }, { "epoch": 0.7290899526564966, "grad_norm": 2.479865789413452, "learning_rate": 3.609180433871558e-05, "loss": 1.362, "step": 6930 }, { "epoch": 0.7291951604418727, "grad_norm": 1.4172632694244385, "learning_rate": 3.606559782853889e-05, "loss": 1.4992, "step": 6931 }, { "epoch": 0.7293003682272489, "grad_norm": 1.7480148077011108, "learning_rate": 3.603939874285375e-05, "loss": 1.7928, "step": 6932 }, { "epoch": 0.7294055760126249, "grad_norm": 2.404942512512207, "learning_rate": 3.6013207084702646e-05, "loss": 1.1866, "step": 6933 }, { "epoch": 0.729510783798001, "grad_norm": 1.3613299131393433, "learning_rate": 3.5987022857127086e-05, "loss": 1.1066, "step": 6934 }, { "epoch": 0.7296159915833772, "grad_norm": 2.4102582931518555, "learning_rate": 3.596084606316778e-05, "loss": 1.5663, "step": 6935 }, { "epoch": 0.7297211993687532, "grad_norm": 1.294023036956787, "learning_rate": 3.593467670586457e-05, "loss": 1.8498, "step": 6936 }, { "epoch": 0.7298264071541294, "grad_norm": 1.4353657960891724, "learning_rate": 3.5908514788256344e-05, "loss": 2.0498, "step": 6937 }, { "epoch": 0.7299316149395055, "grad_norm": 2.242220401763916, "learning_rate": 3.588236031338129e-05, "loss": 1.8686, "step": 6938 }, { "epoch": 0.7300368227248817, "grad_norm": 1.6816960573196411, "learning_rate": 3.585621328427658e-05, "loss": 1.8161, "step": 6939 }, { "epoch": 0.7301420305102577, "grad_norm": 1.5004388093948364, "learning_rate": 3.58300737039786e-05, "loss": 1.7814, "step": 6940 }, { "epoch": 0.7302472382956339, "grad_norm": 2.273651599884033, "learning_rate": 3.580394157552286e-05, "loss": 1.9734, "step": 6941 }, { "epoch": 0.73035244608101, "grad_norm": 1.1475566625595093, "learning_rate": 3.577781690194399e-05, "loss": 1.4193, "step": 6942 }, { "epoch": 0.7304576538663861, "grad_norm": 1.948401927947998, "learning_rate": 3.5751699686275786e-05, "loss": 1.8889, "step": 6943 }, { "epoch": 0.7305628616517622, "grad_norm": 2.96512770652771, "learning_rate": 3.572558993155112e-05, "loss": 2.0799, "step": 6944 }, { "epoch": 0.7306680694371384, "grad_norm": 1.6030654907226562, "learning_rate": 3.569948764080201e-05, "loss": 1.497, "step": 6945 }, { "epoch": 0.7307732772225145, "grad_norm": 1.6341030597686768, "learning_rate": 3.5673392817059705e-05, "loss": 1.5135, "step": 6946 }, { "epoch": 0.7308784850078905, "grad_norm": 1.9366779327392578, "learning_rate": 3.5647305463354466e-05, "loss": 1.916, "step": 6947 }, { "epoch": 0.7309836927932667, "grad_norm": 1.9645856618881226, "learning_rate": 3.562122558271569e-05, "loss": 1.4412, "step": 6948 }, { "epoch": 0.7310889005786428, "grad_norm": 1.8454338312149048, "learning_rate": 3.559515317817204e-05, "loss": 1.4741, "step": 6949 }, { "epoch": 0.731194108364019, "grad_norm": 1.1774706840515137, "learning_rate": 3.556908825275117e-05, "loss": 1.5745, "step": 6950 }, { "epoch": 0.731299316149395, "grad_norm": 1.3022416830062866, "learning_rate": 3.5543030809479874e-05, "loss": 1.7731, "step": 6951 }, { "epoch": 0.7314045239347712, "grad_norm": 0.9204453229904175, "learning_rate": 3.5516980851384194e-05, "loss": 1.5551, "step": 6952 }, { "epoch": 0.7315097317201473, "grad_norm": 1.7485677003860474, "learning_rate": 3.549093838148919e-05, "loss": 1.6053, "step": 6953 }, { "epoch": 0.7316149395055234, "grad_norm": 1.1012275218963623, "learning_rate": 3.546490340281906e-05, "loss": 1.6423, "step": 6954 }, { "epoch": 0.7317201472908995, "grad_norm": 1.050255537033081, "learning_rate": 3.543887591839723e-05, "loss": 1.4111, "step": 6955 }, { "epoch": 0.7318253550762757, "grad_norm": 1.184462070465088, "learning_rate": 3.5412855931246126e-05, "loss": 1.9403, "step": 6956 }, { "epoch": 0.7319305628616518, "grad_norm": 1.2454978227615356, "learning_rate": 3.538684344438736e-05, "loss": 1.5388, "step": 6957 }, { "epoch": 0.7320357706470279, "grad_norm": 1.4453421831130981, "learning_rate": 3.5360838460841725e-05, "loss": 1.9107, "step": 6958 }, { "epoch": 0.732140978432404, "grad_norm": 1.2171581983566284, "learning_rate": 3.533484098362908e-05, "loss": 1.5818, "step": 6959 }, { "epoch": 0.7322461862177801, "grad_norm": 2.109985113143921, "learning_rate": 3.5308851015768375e-05, "loss": 2.0371, "step": 6960 }, { "epoch": 0.7323513940031562, "grad_norm": 2.1604323387145996, "learning_rate": 3.52828685602778e-05, "loss": 1.6088, "step": 6961 }, { "epoch": 0.7324566017885323, "grad_norm": 1.6006240844726562, "learning_rate": 3.525689362017461e-05, "loss": 1.353, "step": 6962 }, { "epoch": 0.7325618095739085, "grad_norm": 3.5379319190979004, "learning_rate": 3.523092619847512e-05, "loss": 1.9756, "step": 6963 }, { "epoch": 0.7326670173592846, "grad_norm": 1.7658113241195679, "learning_rate": 3.520496629819494e-05, "loss": 1.8514, "step": 6964 }, { "epoch": 0.7327722251446607, "grad_norm": 2.1337103843688965, "learning_rate": 3.517901392234865e-05, "loss": 1.785, "step": 6965 }, { "epoch": 0.7328774329300368, "grad_norm": 1.1942282915115356, "learning_rate": 3.515306907395002e-05, "loss": 1.2969, "step": 6966 }, { "epoch": 0.732982640715413, "grad_norm": 1.3888230323791504, "learning_rate": 3.512713175601194e-05, "loss": 1.5518, "step": 6967 }, { "epoch": 0.733087848500789, "grad_norm": 1.5503004789352417, "learning_rate": 3.51012019715464e-05, "loss": 1.4631, "step": 6968 }, { "epoch": 0.7331930562861652, "grad_norm": 2.311354160308838, "learning_rate": 3.5075279723564616e-05, "loss": 1.0551, "step": 6969 }, { "epoch": 0.7332982640715413, "grad_norm": 2.0432231426239014, "learning_rate": 3.504936501507679e-05, "loss": 2.2122, "step": 6970 }, { "epoch": 0.7334034718569175, "grad_norm": 1.2789908647537231, "learning_rate": 3.502345784909229e-05, "loss": 1.6112, "step": 6971 }, { "epoch": 0.7335086796422935, "grad_norm": 1.3882858753204346, "learning_rate": 3.499755822861971e-05, "loss": 1.2767, "step": 6972 }, { "epoch": 0.7336138874276696, "grad_norm": 1.920723795890808, "learning_rate": 3.497166615666664e-05, "loss": 1.8132, "step": 6973 }, { "epoch": 0.7337190952130458, "grad_norm": 1.692369818687439, "learning_rate": 3.4945781636239814e-05, "loss": 1.7823, "step": 6974 }, { "epoch": 0.7338243029984218, "grad_norm": 1.6629142761230469, "learning_rate": 3.491990467034518e-05, "loss": 1.7026, "step": 6975 }, { "epoch": 0.733929510783798, "grad_norm": 1.6108320951461792, "learning_rate": 3.4894035261987715e-05, "loss": 1.2472, "step": 6976 }, { "epoch": 0.7340347185691741, "grad_norm": 2.1136841773986816, "learning_rate": 3.486817341417151e-05, "loss": 1.0959, "step": 6977 }, { "epoch": 0.7341399263545503, "grad_norm": 2.271759510040283, "learning_rate": 3.484231912989989e-05, "loss": 1.7992, "step": 6978 }, { "epoch": 0.7342451341399263, "grad_norm": 1.7375454902648926, "learning_rate": 3.481647241217516e-05, "loss": 1.3712, "step": 6979 }, { "epoch": 0.7343503419253025, "grad_norm": 1.6927552223205566, "learning_rate": 3.4790633263998874e-05, "loss": 1.3709, "step": 6980 }, { "epoch": 0.7344555497106786, "grad_norm": 1.4837682247161865, "learning_rate": 3.476480168837161e-05, "loss": 1.4844, "step": 6981 }, { "epoch": 0.7345607574960548, "grad_norm": 1.7805767059326172, "learning_rate": 3.473897768829308e-05, "loss": 1.9579, "step": 6982 }, { "epoch": 0.7346659652814308, "grad_norm": 1.7141696214675903, "learning_rate": 3.471316126676222e-05, "loss": 1.1072, "step": 6983 }, { "epoch": 0.734771173066807, "grad_norm": 2.4495108127593994, "learning_rate": 3.4687352426776945e-05, "loss": 1.4545, "step": 6984 }, { "epoch": 0.7348763808521831, "grad_norm": 1.993420124053955, "learning_rate": 3.466155117133433e-05, "loss": 2.0231, "step": 6985 }, { "epoch": 0.7349815886375591, "grad_norm": 1.47242271900177, "learning_rate": 3.4635757503430685e-05, "loss": 1.5673, "step": 6986 }, { "epoch": 0.7350867964229353, "grad_norm": 2.614037036895752, "learning_rate": 3.460997142606126e-05, "loss": 2.0294, "step": 6987 }, { "epoch": 0.7351920042083114, "grad_norm": 1.551087737083435, "learning_rate": 3.4584192942220495e-05, "loss": 1.889, "step": 6988 }, { "epoch": 0.7352972119936876, "grad_norm": 1.1275653839111328, "learning_rate": 3.455842205490202e-05, "loss": 1.9836, "step": 6989 }, { "epoch": 0.7354024197790636, "grad_norm": 2.598032236099243, "learning_rate": 3.453265876709847e-05, "loss": 1.7154, "step": 6990 }, { "epoch": 0.7355076275644398, "grad_norm": 2.10746431350708, "learning_rate": 3.45069030818017e-05, "loss": 1.7135, "step": 6991 }, { "epoch": 0.7356128353498159, "grad_norm": 1.3205444812774658, "learning_rate": 3.448115500200263e-05, "loss": 1.7395, "step": 6992 }, { "epoch": 0.735718043135192, "grad_norm": 2.8115272521972656, "learning_rate": 3.4455414530691234e-05, "loss": 1.9317, "step": 6993 }, { "epoch": 0.7358232509205681, "grad_norm": 2.6412436962127686, "learning_rate": 3.442968167085675e-05, "loss": 1.5553, "step": 6994 }, { "epoch": 0.7359284587059443, "grad_norm": 1.441660761833191, "learning_rate": 3.440395642548743e-05, "loss": 1.4242, "step": 6995 }, { "epoch": 0.7360336664913204, "grad_norm": 1.848536729812622, "learning_rate": 3.437823879757059e-05, "loss": 1.2416, "step": 6996 }, { "epoch": 0.7361388742766964, "grad_norm": 1.5345301628112793, "learning_rate": 3.435252879009284e-05, "loss": 2.0876, "step": 6997 }, { "epoch": 0.7362440820620726, "grad_norm": 1.2060550451278687, "learning_rate": 3.432682640603975e-05, "loss": 2.075, "step": 6998 }, { "epoch": 0.7363492898474487, "grad_norm": 1.2402889728546143, "learning_rate": 3.430113164839601e-05, "loss": 2.3051, "step": 6999 }, { "epoch": 0.7364544976328248, "grad_norm": 1.3935623168945312, "learning_rate": 3.427544452014556e-05, "loss": 1.045, "step": 7000 }, { "epoch": 0.7365597054182009, "grad_norm": 1.647644281387329, "learning_rate": 3.42497650242713e-05, "loss": 1.5662, "step": 7001 }, { "epoch": 0.7366649132035771, "grad_norm": 1.3244524002075195, "learning_rate": 3.422409316375529e-05, "loss": 1.6658, "step": 7002 }, { "epoch": 0.7367701209889532, "grad_norm": 2.1039204597473145, "learning_rate": 3.4198428941578776e-05, "loss": 1.6034, "step": 7003 }, { "epoch": 0.7368753287743293, "grad_norm": 1.919314980506897, "learning_rate": 3.417277236072203e-05, "loss": 1.7035, "step": 7004 }, { "epoch": 0.7369805365597054, "grad_norm": 1.365127682685852, "learning_rate": 3.414712342416443e-05, "loss": 1.7206, "step": 7005 }, { "epoch": 0.7370857443450816, "grad_norm": 1.5440795421600342, "learning_rate": 3.4121482134884575e-05, "loss": 0.6072, "step": 7006 }, { "epoch": 0.7371909521304576, "grad_norm": 1.7925177812576294, "learning_rate": 3.409584849586006e-05, "loss": 1.7965, "step": 7007 }, { "epoch": 0.7372961599158337, "grad_norm": 1.45937979221344, "learning_rate": 3.4070222510067653e-05, "loss": 1.5833, "step": 7008 }, { "epoch": 0.7374013677012099, "grad_norm": 1.4615490436553955, "learning_rate": 3.40446041804832e-05, "loss": 1.5594, "step": 7009 }, { "epoch": 0.737506575486586, "grad_norm": 2.050783395767212, "learning_rate": 3.401899351008163e-05, "loss": 1.8929, "step": 7010 }, { "epoch": 0.7376117832719621, "grad_norm": 1.2625833749771118, "learning_rate": 3.3993390501837116e-05, "loss": 1.7766, "step": 7011 }, { "epoch": 0.7377169910573382, "grad_norm": 1.5997296571731567, "learning_rate": 3.396779515872282e-05, "loss": 2.3762, "step": 7012 }, { "epoch": 0.7378221988427144, "grad_norm": 1.210913062095642, "learning_rate": 3.3942207483710986e-05, "loss": 1.93, "step": 7013 }, { "epoch": 0.7379274066280905, "grad_norm": 1.6027042865753174, "learning_rate": 3.391662747977312e-05, "loss": 2.1033, "step": 7014 }, { "epoch": 0.7380326144134666, "grad_norm": 1.183409571647644, "learning_rate": 3.389105514987969e-05, "loss": 2.1993, "step": 7015 }, { "epoch": 0.7381378221988427, "grad_norm": 1.665959358215332, "learning_rate": 3.38654904970003e-05, "loss": 1.7956, "step": 7016 }, { "epoch": 0.7382430299842189, "grad_norm": 1.5837026834487915, "learning_rate": 3.383993352410375e-05, "loss": 1.4741, "step": 7017 }, { "epoch": 0.7383482377695949, "grad_norm": 1.7137260437011719, "learning_rate": 3.381438423415787e-05, "loss": 1.6929, "step": 7018 }, { "epoch": 0.738453445554971, "grad_norm": 1.120949625968933, "learning_rate": 3.378884263012957e-05, "loss": 1.3851, "step": 7019 }, { "epoch": 0.7385586533403472, "grad_norm": 1.4287333488464355, "learning_rate": 3.3763308714984974e-05, "loss": 2.1058, "step": 7020 }, { "epoch": 0.7386638611257234, "grad_norm": 1.9645508527755737, "learning_rate": 3.373778249168923e-05, "loss": 1.747, "step": 7021 }, { "epoch": 0.7387690689110994, "grad_norm": 1.1778647899627686, "learning_rate": 3.3712263963206583e-05, "loss": 2.0621, "step": 7022 }, { "epoch": 0.7388742766964755, "grad_norm": 2.0654361248016357, "learning_rate": 3.368675313250046e-05, "loss": 1.6014, "step": 7023 }, { "epoch": 0.7389794844818517, "grad_norm": 1.6172906160354614, "learning_rate": 3.3661250002533305e-05, "loss": 1.6385, "step": 7024 }, { "epoch": 0.7390846922672277, "grad_norm": 1.6722594499588013, "learning_rate": 3.363575457626678e-05, "loss": 1.7566, "step": 7025 }, { "epoch": 0.7391899000526039, "grad_norm": 1.1120942831039429, "learning_rate": 3.361026685666152e-05, "loss": 1.7478, "step": 7026 }, { "epoch": 0.73929510783798, "grad_norm": 1.4074763059616089, "learning_rate": 3.358478684667734e-05, "loss": 1.9311, "step": 7027 }, { "epoch": 0.7394003156233562, "grad_norm": 1.5094165802001953, "learning_rate": 3.355931454927319e-05, "loss": 1.4962, "step": 7028 }, { "epoch": 0.7395055234087322, "grad_norm": 1.2964214086532593, "learning_rate": 3.353384996740709e-05, "loss": 2.0931, "step": 7029 }, { "epoch": 0.7396107311941084, "grad_norm": 1.637769103050232, "learning_rate": 3.3508393104036076e-05, "loss": 1.556, "step": 7030 }, { "epoch": 0.7397159389794845, "grad_norm": 1.4458996057510376, "learning_rate": 3.348294396211643e-05, "loss": 1.7264, "step": 7031 }, { "epoch": 0.7398211467648605, "grad_norm": 1.4599863290786743, "learning_rate": 3.345750254460348e-05, "loss": 1.3761, "step": 7032 }, { "epoch": 0.7399263545502367, "grad_norm": 1.3754396438598633, "learning_rate": 3.3432068854451594e-05, "loss": 1.8374, "step": 7033 }, { "epoch": 0.7400315623356128, "grad_norm": 1.6223207712173462, "learning_rate": 3.3406642894614394e-05, "loss": 1.5258, "step": 7034 }, { "epoch": 0.740136770120989, "grad_norm": 2.438016414642334, "learning_rate": 3.3381224668044434e-05, "loss": 1.9796, "step": 7035 }, { "epoch": 0.740241977906365, "grad_norm": 1.7482492923736572, "learning_rate": 3.3355814177693514e-05, "loss": 2.0305, "step": 7036 }, { "epoch": 0.7403471856917412, "grad_norm": 2.278324842453003, "learning_rate": 3.3330411426512435e-05, "loss": 1.6287, "step": 7037 }, { "epoch": 0.7404523934771173, "grad_norm": 1.471503734588623, "learning_rate": 3.3305016417451105e-05, "loss": 1.3077, "step": 7038 }, { "epoch": 0.7405576012624934, "grad_norm": 1.7559709548950195, "learning_rate": 3.327962915345864e-05, "loss": 1.2328, "step": 7039 }, { "epoch": 0.7406628090478695, "grad_norm": 1.2783993482589722, "learning_rate": 3.325424963748313e-05, "loss": 1.6599, "step": 7040 }, { "epoch": 0.7407680168332457, "grad_norm": 1.633362054824829, "learning_rate": 3.3228877872471786e-05, "loss": 1.8694, "step": 7041 }, { "epoch": 0.7408732246186218, "grad_norm": 1.3471500873565674, "learning_rate": 3.3203513861371017e-05, "loss": 1.4373, "step": 7042 }, { "epoch": 0.7409784324039979, "grad_norm": 1.6833635568618774, "learning_rate": 3.317815760712622e-05, "loss": 1.2696, "step": 7043 }, { "epoch": 0.741083640189374, "grad_norm": 1.1694504022598267, "learning_rate": 3.315280911268193e-05, "loss": 1.5157, "step": 7044 }, { "epoch": 0.7411888479747502, "grad_norm": 1.1271198987960815, "learning_rate": 3.312746838098181e-05, "loss": 1.4756, "step": 7045 }, { "epoch": 0.7412940557601263, "grad_norm": 1.275770664215088, "learning_rate": 3.310213541496858e-05, "loss": 1.4592, "step": 7046 }, { "epoch": 0.7413992635455023, "grad_norm": 1.2125309705734253, "learning_rate": 3.307681021758405e-05, "loss": 1.9089, "step": 7047 }, { "epoch": 0.7415044713308785, "grad_norm": 1.1902837753295898, "learning_rate": 3.305149279176921e-05, "loss": 2.156, "step": 7048 }, { "epoch": 0.7416096791162546, "grad_norm": 1.931227684020996, "learning_rate": 3.302618314046405e-05, "loss": 1.6646, "step": 7049 }, { "epoch": 0.7417148869016307, "grad_norm": 1.360236644744873, "learning_rate": 3.300088126660768e-05, "loss": 0.9917, "step": 7050 }, { "epoch": 0.7418200946870068, "grad_norm": 1.1433465480804443, "learning_rate": 3.297558717313839e-05, "loss": 1.7004, "step": 7051 }, { "epoch": 0.741925302472383, "grad_norm": 1.5731385946273804, "learning_rate": 3.295030086299341e-05, "loss": 1.8221, "step": 7052 }, { "epoch": 0.7420305102577591, "grad_norm": 1.5698689222335815, "learning_rate": 3.292502233910922e-05, "loss": 2.1785, "step": 7053 }, { "epoch": 0.7421357180431352, "grad_norm": 2.299558162689209, "learning_rate": 3.2899751604421324e-05, "loss": 1.2728, "step": 7054 }, { "epoch": 0.7422409258285113, "grad_norm": 1.727777123451233, "learning_rate": 3.287448866186428e-05, "loss": 1.5016, "step": 7055 }, { "epoch": 0.7423461336138875, "grad_norm": 1.1530907154083252, "learning_rate": 3.284923351437187e-05, "loss": 1.8296, "step": 7056 }, { "epoch": 0.7424513413992635, "grad_norm": 1.4847744703292847, "learning_rate": 3.282398616487684e-05, "loss": 1.5375, "step": 7057 }, { "epoch": 0.7425565491846396, "grad_norm": 1.4450937509536743, "learning_rate": 3.279874661631106e-05, "loss": 2.0141, "step": 7058 }, { "epoch": 0.7426617569700158, "grad_norm": 1.7158372402191162, "learning_rate": 3.277351487160558e-05, "loss": 1.7055, "step": 7059 }, { "epoch": 0.742766964755392, "grad_norm": 1.9537938833236694, "learning_rate": 3.2748290933690454e-05, "loss": 1.7621, "step": 7060 }, { "epoch": 0.742872172540768, "grad_norm": 1.7717899084091187, "learning_rate": 3.2723074805494805e-05, "loss": 1.357, "step": 7061 }, { "epoch": 0.7429773803261441, "grad_norm": 1.749004602432251, "learning_rate": 3.269786648994697e-05, "loss": 1.998, "step": 7062 }, { "epoch": 0.7430825881115203, "grad_norm": 1.6080665588378906, "learning_rate": 3.267266598997429e-05, "loss": 1.6851, "step": 7063 }, { "epoch": 0.7431877958968963, "grad_norm": 1.6543947458267212, "learning_rate": 3.2647473308503164e-05, "loss": 1.483, "step": 7064 }, { "epoch": 0.7432930036822725, "grad_norm": 1.3643039464950562, "learning_rate": 3.262228844845922e-05, "loss": 1.7009, "step": 7065 }, { "epoch": 0.7433982114676486, "grad_norm": 1.9166144132614136, "learning_rate": 3.259711141276703e-05, "loss": 2.1545, "step": 7066 }, { "epoch": 0.7435034192530248, "grad_norm": 1.5741544961929321, "learning_rate": 3.2571942204350324e-05, "loss": 1.9652, "step": 7067 }, { "epoch": 0.7436086270384008, "grad_norm": 1.287895679473877, "learning_rate": 3.254678082613196e-05, "loss": 1.753, "step": 7068 }, { "epoch": 0.743713834823777, "grad_norm": 1.658474326133728, "learning_rate": 3.252162728103382e-05, "loss": 1.4791, "step": 7069 }, { "epoch": 0.7438190426091531, "grad_norm": 1.8779799938201904, "learning_rate": 3.249648157197688e-05, "loss": 1.3374, "step": 7070 }, { "epoch": 0.7439242503945291, "grad_norm": 1.7601832151412964, "learning_rate": 3.2471343701881275e-05, "loss": 1.4406, "step": 7071 }, { "epoch": 0.7440294581799053, "grad_norm": 1.6893101930618286, "learning_rate": 3.244621367366616e-05, "loss": 1.0462, "step": 7072 }, { "epoch": 0.7441346659652814, "grad_norm": 1.2970998287200928, "learning_rate": 3.24210914902498e-05, "loss": 1.7947, "step": 7073 }, { "epoch": 0.7442398737506576, "grad_norm": 1.8401261568069458, "learning_rate": 3.2395977154549554e-05, "loss": 2.1458, "step": 7074 }, { "epoch": 0.7443450815360336, "grad_norm": 2.419576406478882, "learning_rate": 3.2370870669481834e-05, "loss": 2.044, "step": 7075 }, { "epoch": 0.7444502893214098, "grad_norm": 1.1183511018753052, "learning_rate": 3.234577203796223e-05, "loss": 1.7889, "step": 7076 }, { "epoch": 0.7445554971067859, "grad_norm": 1.3616372346878052, "learning_rate": 3.232068126290535e-05, "loss": 1.8218, "step": 7077 }, { "epoch": 0.7446607048921621, "grad_norm": 1.127997875213623, "learning_rate": 3.229559834722485e-05, "loss": 2.2213, "step": 7078 }, { "epoch": 0.7447659126775381, "grad_norm": 1.5394973754882812, "learning_rate": 3.227052329383362e-05, "loss": 1.6531, "step": 7079 }, { "epoch": 0.7448711204629143, "grad_norm": 1.4563318490982056, "learning_rate": 3.224545610564345e-05, "loss": 2.0104, "step": 7080 }, { "epoch": 0.7449763282482904, "grad_norm": 1.76889169216156, "learning_rate": 3.2220396785565375e-05, "loss": 1.372, "step": 7081 }, { "epoch": 0.7450815360336664, "grad_norm": 1.5505903959274292, "learning_rate": 3.219534533650944e-05, "loss": 2.0011, "step": 7082 }, { "epoch": 0.7451867438190426, "grad_norm": 1.7687225341796875, "learning_rate": 3.217030176138474e-05, "loss": 1.8728, "step": 7083 }, { "epoch": 0.7452919516044187, "grad_norm": 1.5347532033920288, "learning_rate": 3.214526606309957e-05, "loss": 1.6496, "step": 7084 }, { "epoch": 0.7453971593897949, "grad_norm": 1.3686238527297974, "learning_rate": 3.212023824456121e-05, "loss": 1.7251, "step": 7085 }, { "epoch": 0.7455023671751709, "grad_norm": 1.5368924140930176, "learning_rate": 3.2095218308676024e-05, "loss": 1.6474, "step": 7086 }, { "epoch": 0.7456075749605471, "grad_norm": 1.4504789113998413, "learning_rate": 3.207020625834957e-05, "loss": 1.838, "step": 7087 }, { "epoch": 0.7457127827459232, "grad_norm": 1.296593427658081, "learning_rate": 3.204520209648637e-05, "loss": 1.4902, "step": 7088 }, { "epoch": 0.7458179905312993, "grad_norm": 1.3226828575134277, "learning_rate": 3.2020205825990056e-05, "loss": 1.504, "step": 7089 }, { "epoch": 0.7459231983166754, "grad_norm": 1.6971758604049683, "learning_rate": 3.199521744976342e-05, "loss": 1.4026, "step": 7090 }, { "epoch": 0.7460284061020516, "grad_norm": 2.515711545944214, "learning_rate": 3.1970236970708234e-05, "loss": 1.9895, "step": 7091 }, { "epoch": 0.7461336138874277, "grad_norm": 2.220277786254883, "learning_rate": 3.194526439172539e-05, "loss": 1.8306, "step": 7092 }, { "epoch": 0.7462388216728038, "grad_norm": 1.5911275148391724, "learning_rate": 3.192029971571492e-05, "loss": 2.0089, "step": 7093 }, { "epoch": 0.7463440294581799, "grad_norm": 1.8149131536483765, "learning_rate": 3.189534294557587e-05, "loss": 1.6243, "step": 7094 }, { "epoch": 0.746449237243556, "grad_norm": 1.3475899696350098, "learning_rate": 3.187039408420638e-05, "loss": 1.5206, "step": 7095 }, { "epoch": 0.7465544450289321, "grad_norm": 1.1558003425598145, "learning_rate": 3.184545313450368e-05, "loss": 2.1106, "step": 7096 }, { "epoch": 0.7466596528143082, "grad_norm": 1.5029397010803223, "learning_rate": 3.182052009936404e-05, "loss": 1.6606, "step": 7097 }, { "epoch": 0.7467648605996844, "grad_norm": 1.187970519065857, "learning_rate": 3.1795594981682917e-05, "loss": 1.9256, "step": 7098 }, { "epoch": 0.7468700683850605, "grad_norm": 1.2517000436782837, "learning_rate": 3.1770677784354773e-05, "loss": 1.4797, "step": 7099 }, { "epoch": 0.7469752761704366, "grad_norm": 1.3138651847839355, "learning_rate": 3.174576851027311e-05, "loss": 2.2821, "step": 7100 }, { "epoch": 0.7470804839558127, "grad_norm": 1.6332142353057861, "learning_rate": 3.172086716233061e-05, "loss": 1.5739, "step": 7101 }, { "epoch": 0.7471856917411889, "grad_norm": 1.3998867273330688, "learning_rate": 3.1695973743418994e-05, "loss": 1.7157, "step": 7102 }, { "epoch": 0.7472908995265649, "grad_norm": 1.38740873336792, "learning_rate": 3.167108825642897e-05, "loss": 1.3737, "step": 7103 }, { "epoch": 0.7473961073119411, "grad_norm": 1.5622433423995972, "learning_rate": 3.164621070425051e-05, "loss": 1.449, "step": 7104 }, { "epoch": 0.7475013150973172, "grad_norm": 1.275417685508728, "learning_rate": 3.162134108977253e-05, "loss": 1.6909, "step": 7105 }, { "epoch": 0.7476065228826934, "grad_norm": 1.0392786264419556, "learning_rate": 3.159647941588298e-05, "loss": 1.8211, "step": 7106 }, { "epoch": 0.7477117306680694, "grad_norm": 1.3279063701629639, "learning_rate": 3.1571625685469086e-05, "loss": 1.6451, "step": 7107 }, { "epoch": 0.7478169384534455, "grad_norm": 1.5140197277069092, "learning_rate": 3.1546779901416965e-05, "loss": 2.0696, "step": 7108 }, { "epoch": 0.7479221462388217, "grad_norm": 2.04419207572937, "learning_rate": 3.152194206661185e-05, "loss": 1.963, "step": 7109 }, { "epoch": 0.7480273540241978, "grad_norm": 1.3939889669418335, "learning_rate": 3.149711218393814e-05, "loss": 1.2607, "step": 7110 }, { "epoch": 0.7481325618095739, "grad_norm": 1.5365245342254639, "learning_rate": 3.147229025627922e-05, "loss": 1.5934, "step": 7111 }, { "epoch": 0.74823776959495, "grad_norm": 1.3564833402633667, "learning_rate": 3.144747628651754e-05, "loss": 1.3875, "step": 7112 }, { "epoch": 0.7483429773803262, "grad_norm": 1.9279536008834839, "learning_rate": 3.142267027753474e-05, "loss": 2.0533, "step": 7113 }, { "epoch": 0.7484481851657022, "grad_norm": 1.5397069454193115, "learning_rate": 3.139787223221141e-05, "loss": 1.8882, "step": 7114 }, { "epoch": 0.7485533929510784, "grad_norm": 2.6162831783294678, "learning_rate": 3.137308215342729e-05, "loss": 2.1646, "step": 7115 }, { "epoch": 0.7486586007364545, "grad_norm": 1.33000910282135, "learning_rate": 3.134830004406114e-05, "loss": 1.243, "step": 7116 }, { "epoch": 0.7487638085218307, "grad_norm": 1.1963053941726685, "learning_rate": 3.1323525906990826e-05, "loss": 1.856, "step": 7117 }, { "epoch": 0.7488690163072067, "grad_norm": 1.3302133083343506, "learning_rate": 3.129875974509332e-05, "loss": 1.9321, "step": 7118 }, { "epoch": 0.7489742240925829, "grad_norm": 1.8992441892623901, "learning_rate": 3.127400156124463e-05, "loss": 1.5662, "step": 7119 }, { "epoch": 0.749079431877959, "grad_norm": 1.2559592723846436, "learning_rate": 3.1249251358319784e-05, "loss": 1.9587, "step": 7120 }, { "epoch": 0.749184639663335, "grad_norm": 1.8781805038452148, "learning_rate": 3.122450913919302e-05, "loss": 1.5198, "step": 7121 }, { "epoch": 0.7492898474487112, "grad_norm": 1.254428505897522, "learning_rate": 3.1199774906737557e-05, "loss": 2.0107, "step": 7122 }, { "epoch": 0.7493950552340873, "grad_norm": 1.7342922687530518, "learning_rate": 3.1175048663825626e-05, "loss": 1.8242, "step": 7123 }, { "epoch": 0.7495002630194635, "grad_norm": 1.5174115896224976, "learning_rate": 3.11503304133287e-05, "loss": 1.2619, "step": 7124 }, { "epoch": 0.7496054708048395, "grad_norm": 1.4857186079025269, "learning_rate": 3.1125620158117186e-05, "loss": 1.4888, "step": 7125 }, { "epoch": 0.7497106785902157, "grad_norm": 1.7400273084640503, "learning_rate": 3.110091790106057e-05, "loss": 1.5356, "step": 7126 }, { "epoch": 0.7498158863755918, "grad_norm": 1.744299292564392, "learning_rate": 3.107622364502751e-05, "loss": 2.2264, "step": 7127 }, { "epoch": 0.7499210941609679, "grad_norm": 2.1220436096191406, "learning_rate": 3.105153739288561e-05, "loss": 1.645, "step": 7128 }, { "epoch": 0.750026301946344, "grad_norm": 1.1671090126037598, "learning_rate": 3.102685914750166e-05, "loss": 1.389, "step": 7129 }, { "epoch": 0.7501315097317202, "grad_norm": 1.858191728591919, "learning_rate": 3.100218891174144e-05, "loss": 1.56, "step": 7130 }, { "epoch": 0.7502367175170963, "grad_norm": 1.2349094152450562, "learning_rate": 3.097752668846977e-05, "loss": 1.6298, "step": 7131 }, { "epoch": 0.7503419253024723, "grad_norm": 1.369010329246521, "learning_rate": 3.095287248055069e-05, "loss": 1.7109, "step": 7132 }, { "epoch": 0.7504471330878485, "grad_norm": 1.2926762104034424, "learning_rate": 3.092822629084715e-05, "loss": 1.5841, "step": 7133 }, { "epoch": 0.7505523408732246, "grad_norm": 1.9117960929870605, "learning_rate": 3.090358812222122e-05, "loss": 2.0065, "step": 7134 }, { "epoch": 0.7506575486586007, "grad_norm": 1.1057976484298706, "learning_rate": 3.0878957977534096e-05, "loss": 1.5541, "step": 7135 }, { "epoch": 0.7507627564439768, "grad_norm": 1.4562286138534546, "learning_rate": 3.0854335859645975e-05, "loss": 1.7463, "step": 7136 }, { "epoch": 0.750867964229353, "grad_norm": 1.2036335468292236, "learning_rate": 3.0829721771416146e-05, "loss": 1.3571, "step": 7137 }, { "epoch": 0.7509731720147291, "grad_norm": 1.2491194009780884, "learning_rate": 3.080511571570294e-05, "loss": 1.834, "step": 7138 }, { "epoch": 0.7510783798001052, "grad_norm": 1.3567789793014526, "learning_rate": 3.078051769536378e-05, "loss": 1.8808, "step": 7139 }, { "epoch": 0.7511835875854813, "grad_norm": 1.4406784772872925, "learning_rate": 3.0755927713255174e-05, "loss": 1.8301, "step": 7140 }, { "epoch": 0.7512887953708575, "grad_norm": 1.7407746315002441, "learning_rate": 3.073134577223268e-05, "loss": 1.3971, "step": 7141 }, { "epoch": 0.7513940031562336, "grad_norm": 1.3878988027572632, "learning_rate": 3.070677187515087e-05, "loss": 1.412, "step": 7142 }, { "epoch": 0.7514992109416097, "grad_norm": 2.31683611869812, "learning_rate": 3.06822060248635e-05, "loss": 2.0428, "step": 7143 }, { "epoch": 0.7516044187269858, "grad_norm": 1.463760495185852, "learning_rate": 3.065764822422329e-05, "loss": 1.7479, "step": 7144 }, { "epoch": 0.751709626512362, "grad_norm": 1.3902920484542847, "learning_rate": 3.063309847608202e-05, "loss": 1.5929, "step": 7145 }, { "epoch": 0.751814834297738, "grad_norm": 1.9475911855697632, "learning_rate": 3.060855678329063e-05, "loss": 1.6469, "step": 7146 }, { "epoch": 0.7519200420831141, "grad_norm": 1.611803650856018, "learning_rate": 3.0584023148699046e-05, "loss": 1.775, "step": 7147 }, { "epoch": 0.7520252498684903, "grad_norm": 1.7494837045669556, "learning_rate": 3.055949757515624e-05, "loss": 1.7225, "step": 7148 }, { "epoch": 0.7521304576538664, "grad_norm": 1.4257961511611938, "learning_rate": 3.0534980065510345e-05, "loss": 1.7894, "step": 7149 }, { "epoch": 0.7522356654392425, "grad_norm": 1.4211279153823853, "learning_rate": 3.0510470622608478e-05, "loss": 1.7914, "step": 7150 }, { "epoch": 0.7523408732246186, "grad_norm": 1.7004531621932983, "learning_rate": 3.0485969249296797e-05, "loss": 2.1648, "step": 7151 }, { "epoch": 0.7524460810099948, "grad_norm": 1.0059231519699097, "learning_rate": 3.0461475948420637e-05, "loss": 1.7455, "step": 7152 }, { "epoch": 0.7525512887953708, "grad_norm": 1.09248685836792, "learning_rate": 3.043699072282429e-05, "loss": 1.839, "step": 7153 }, { "epoch": 0.752656496580747, "grad_norm": 2.1308209896087646, "learning_rate": 3.041251357535111e-05, "loss": 1.5851, "step": 7154 }, { "epoch": 0.7527617043661231, "grad_norm": 1.225701928138733, "learning_rate": 3.0388044508843616e-05, "loss": 2.0519, "step": 7155 }, { "epoch": 0.7528669121514993, "grad_norm": 1.9547263383865356, "learning_rate": 3.0363583526143292e-05, "loss": 1.7462, "step": 7156 }, { "epoch": 0.7529721199368753, "grad_norm": 1.4787694215774536, "learning_rate": 3.0339130630090673e-05, "loss": 1.0761, "step": 7157 }, { "epoch": 0.7530773277222514, "grad_norm": 1.6599515676498413, "learning_rate": 3.031468582352548e-05, "loss": 1.8568, "step": 7158 }, { "epoch": 0.7531825355076276, "grad_norm": 2.056112766265869, "learning_rate": 3.0290249109286296e-05, "loss": 1.7289, "step": 7159 }, { "epoch": 0.7532877432930036, "grad_norm": 1.7584683895111084, "learning_rate": 3.0265820490210973e-05, "loss": 1.9411, "step": 7160 }, { "epoch": 0.7533929510783798, "grad_norm": 1.8417630195617676, "learning_rate": 3.0241399969136276e-05, "loss": 1.5764, "step": 7161 }, { "epoch": 0.7534981588637559, "grad_norm": 1.1658049821853638, "learning_rate": 3.0216987548898068e-05, "loss": 1.7206, "step": 7162 }, { "epoch": 0.7536033666491321, "grad_norm": 1.9770032167434692, "learning_rate": 3.019258323233133e-05, "loss": 1.5526, "step": 7163 }, { "epoch": 0.7537085744345081, "grad_norm": 1.6163029670715332, "learning_rate": 3.0168187022270032e-05, "loss": 1.8265, "step": 7164 }, { "epoch": 0.7538137822198843, "grad_norm": 1.530423641204834, "learning_rate": 3.0143798921547193e-05, "loss": 1.5206, "step": 7165 }, { "epoch": 0.7539189900052604, "grad_norm": 1.2398508787155151, "learning_rate": 3.011941893299499e-05, "loss": 1.5016, "step": 7166 }, { "epoch": 0.7540241977906365, "grad_norm": 1.3902400732040405, "learning_rate": 3.0095047059444546e-05, "loss": 1.2807, "step": 7167 }, { "epoch": 0.7541294055760126, "grad_norm": 1.967103362083435, "learning_rate": 3.0070683303726076e-05, "loss": 1.3802, "step": 7168 }, { "epoch": 0.7542346133613888, "grad_norm": 1.6876822710037231, "learning_rate": 3.0046327668668904e-05, "loss": 1.5044, "step": 7169 }, { "epoch": 0.7543398211467649, "grad_norm": 1.5691266059875488, "learning_rate": 3.002198015710136e-05, "loss": 1.4267, "step": 7170 }, { "epoch": 0.7544450289321409, "grad_norm": 1.312497615814209, "learning_rate": 2.999764077185079e-05, "loss": 1.4682, "step": 7171 }, { "epoch": 0.7545502367175171, "grad_norm": 1.056235432624817, "learning_rate": 2.997330951574371e-05, "loss": 1.8569, "step": 7172 }, { "epoch": 0.7546554445028932, "grad_norm": 1.192795753479004, "learning_rate": 2.9948986391605584e-05, "loss": 1.3137, "step": 7173 }, { "epoch": 0.7547606522882694, "grad_norm": 2.208214282989502, "learning_rate": 2.9924671402261018e-05, "loss": 1.7247, "step": 7174 }, { "epoch": 0.7548658600736454, "grad_norm": 1.3944358825683594, "learning_rate": 2.9900364550533612e-05, "loss": 1.6949, "step": 7175 }, { "epoch": 0.7549710678590216, "grad_norm": 1.4793987274169922, "learning_rate": 2.9876065839246005e-05, "loss": 1.8035, "step": 7176 }, { "epoch": 0.7550762756443977, "grad_norm": 1.936575174331665, "learning_rate": 2.9851775271219996e-05, "loss": 1.279, "step": 7177 }, { "epoch": 0.7551814834297738, "grad_norm": 1.4418457746505737, "learning_rate": 2.9827492849276317e-05, "loss": 1.6786, "step": 7178 }, { "epoch": 0.7552866912151499, "grad_norm": 1.314530849456787, "learning_rate": 2.9803218576234836e-05, "loss": 1.5827, "step": 7179 }, { "epoch": 0.7553918990005261, "grad_norm": 1.114014983177185, "learning_rate": 2.9778952454914422e-05, "loss": 1.484, "step": 7180 }, { "epoch": 0.7554971067859022, "grad_norm": 1.7452201843261719, "learning_rate": 2.9754694488133038e-05, "loss": 2.1129, "step": 7181 }, { "epoch": 0.7556023145712782, "grad_norm": 1.7121224403381348, "learning_rate": 2.9730444678707625e-05, "loss": 1.7277, "step": 7182 }, { "epoch": 0.7557075223566544, "grad_norm": 1.2562663555145264, "learning_rate": 2.9706203029454316e-05, "loss": 1.6911, "step": 7183 }, { "epoch": 0.7558127301420305, "grad_norm": 1.4814766645431519, "learning_rate": 2.9681969543188148e-05, "loss": 1.5525, "step": 7184 }, { "epoch": 0.7559179379274066, "grad_norm": 0.9775087833404541, "learning_rate": 2.965774422272334e-05, "loss": 1.9295, "step": 7185 }, { "epoch": 0.7560231457127827, "grad_norm": 1.847643494606018, "learning_rate": 2.9633527070873057e-05, "loss": 1.5732, "step": 7186 }, { "epoch": 0.7561283534981589, "grad_norm": 1.5069630146026611, "learning_rate": 2.9609318090449533e-05, "loss": 1.3961, "step": 7187 }, { "epoch": 0.756233561283535, "grad_norm": 1.6327673196792603, "learning_rate": 2.958511728426414e-05, "loss": 1.0892, "step": 7188 }, { "epoch": 0.7563387690689111, "grad_norm": 1.8135273456573486, "learning_rate": 2.956092465512721e-05, "loss": 1.354, "step": 7189 }, { "epoch": 0.7564439768542872, "grad_norm": 2.080944538116455, "learning_rate": 2.9536740205848113e-05, "loss": 1.5985, "step": 7190 }, { "epoch": 0.7565491846396634, "grad_norm": 1.4439144134521484, "learning_rate": 2.9512563939235382e-05, "loss": 1.5032, "step": 7191 }, { "epoch": 0.7566543924250394, "grad_norm": 1.6564826965332031, "learning_rate": 2.9488395858096485e-05, "loss": 1.9067, "step": 7192 }, { "epoch": 0.7567596002104156, "grad_norm": 1.779802918434143, "learning_rate": 2.9464235965237964e-05, "loss": 1.8031, "step": 7193 }, { "epoch": 0.7568648079957917, "grad_norm": 1.4289071559906006, "learning_rate": 2.9440084263465484e-05, "loss": 1.616, "step": 7194 }, { "epoch": 0.7569700157811678, "grad_norm": 1.3593642711639404, "learning_rate": 2.941594075558366e-05, "loss": 0.9734, "step": 7195 }, { "epoch": 0.7570752235665439, "grad_norm": 2.13067889213562, "learning_rate": 2.939180544439618e-05, "loss": 1.4978, "step": 7196 }, { "epoch": 0.75718043135192, "grad_norm": 1.6457139253616333, "learning_rate": 2.936767833270586e-05, "loss": 1.8645, "step": 7197 }, { "epoch": 0.7572856391372962, "grad_norm": 1.727137565612793, "learning_rate": 2.934355942331447e-05, "loss": 1.7537, "step": 7198 }, { "epoch": 0.7573908469226722, "grad_norm": 1.2516162395477295, "learning_rate": 2.9319448719022824e-05, "loss": 1.9984, "step": 7199 }, { "epoch": 0.7574960547080484, "grad_norm": 1.7397236824035645, "learning_rate": 2.929534622263088e-05, "loss": 1.6422, "step": 7200 }, { "epoch": 0.7576012624934245, "grad_norm": 1.722465991973877, "learning_rate": 2.927125193693756e-05, "loss": 1.745, "step": 7201 }, { "epoch": 0.7577064702788007, "grad_norm": 1.5950039625167847, "learning_rate": 2.9247165864740856e-05, "loss": 1.6426, "step": 7202 }, { "epoch": 0.7578116780641767, "grad_norm": 1.0130795240402222, "learning_rate": 2.9223088008837785e-05, "loss": 1.5676, "step": 7203 }, { "epoch": 0.7579168858495529, "grad_norm": 2.0876479148864746, "learning_rate": 2.919901837202441e-05, "loss": 1.7984, "step": 7204 }, { "epoch": 0.758022093634929, "grad_norm": 2.0102639198303223, "learning_rate": 2.917495695709592e-05, "loss": 1.2913, "step": 7205 }, { "epoch": 0.7581273014203052, "grad_norm": 1.934104561805725, "learning_rate": 2.915090376684646e-05, "loss": 0.9719, "step": 7206 }, { "epoch": 0.7582325092056812, "grad_norm": 1.983300805091858, "learning_rate": 2.9126858804069223e-05, "loss": 1.83, "step": 7207 }, { "epoch": 0.7583377169910573, "grad_norm": 2.1567113399505615, "learning_rate": 2.9102822071556512e-05, "loss": 1.7204, "step": 7208 }, { "epoch": 0.7584429247764335, "grad_norm": 1.1860876083374023, "learning_rate": 2.9078793572099616e-05, "loss": 1.9178, "step": 7209 }, { "epoch": 0.7585481325618095, "grad_norm": 2.058955669403076, "learning_rate": 2.905477330848886e-05, "loss": 1.7382, "step": 7210 }, { "epoch": 0.7586533403471857, "grad_norm": 1.8354625701904297, "learning_rate": 2.9030761283513684e-05, "loss": 2.3162, "step": 7211 }, { "epoch": 0.7587585481325618, "grad_norm": 1.469107747077942, "learning_rate": 2.9006757499962512e-05, "loss": 1.5963, "step": 7212 }, { "epoch": 0.758863755917938, "grad_norm": 1.9937857389450073, "learning_rate": 2.8982761960622782e-05, "loss": 1.22, "step": 7213 }, { "epoch": 0.758968963703314, "grad_norm": 1.7028518915176392, "learning_rate": 2.8958774668281084e-05, "loss": 1.4993, "step": 7214 }, { "epoch": 0.7590741714886902, "grad_norm": 1.4997313022613525, "learning_rate": 2.8934795625722943e-05, "loss": 1.2415, "step": 7215 }, { "epoch": 0.7591793792740663, "grad_norm": 2.3115732669830322, "learning_rate": 2.8910824835732952e-05, "loss": 1.4934, "step": 7216 }, { "epoch": 0.7592845870594424, "grad_norm": 1.127637267112732, "learning_rate": 2.8886862301094807e-05, "loss": 1.6622, "step": 7217 }, { "epoch": 0.7593897948448185, "grad_norm": 1.6619749069213867, "learning_rate": 2.8862908024591186e-05, "loss": 1.9157, "step": 7218 }, { "epoch": 0.7594950026301946, "grad_norm": 1.5014777183532715, "learning_rate": 2.8838962009003756e-05, "loss": 1.8232, "step": 7219 }, { "epoch": 0.7596002104155708, "grad_norm": 1.5407613515853882, "learning_rate": 2.881502425711339e-05, "loss": 1.6237, "step": 7220 }, { "epoch": 0.7597054182009468, "grad_norm": 1.672674298286438, "learning_rate": 2.8791094771699802e-05, "loss": 1.9443, "step": 7221 }, { "epoch": 0.759810625986323, "grad_norm": 1.8555692434310913, "learning_rate": 2.8767173555541972e-05, "loss": 1.3203, "step": 7222 }, { "epoch": 0.7599158337716991, "grad_norm": 1.1525888442993164, "learning_rate": 2.8743260611417665e-05, "loss": 1.4575, "step": 7223 }, { "epoch": 0.7600210415570752, "grad_norm": 1.2628978490829468, "learning_rate": 2.8719355942103842e-05, "loss": 1.5078, "step": 7224 }, { "epoch": 0.7601262493424513, "grad_norm": 1.7043256759643555, "learning_rate": 2.8695459550376515e-05, "loss": 1.2751, "step": 7225 }, { "epoch": 0.7602314571278275, "grad_norm": 2.068817615509033, "learning_rate": 2.867157143901067e-05, "loss": 1.9544, "step": 7226 }, { "epoch": 0.7603366649132036, "grad_norm": 1.3836297988891602, "learning_rate": 2.8647691610780326e-05, "loss": 1.5782, "step": 7227 }, { "epoch": 0.7604418726985797, "grad_norm": 1.6043787002563477, "learning_rate": 2.862382006845863e-05, "loss": 1.122, "step": 7228 }, { "epoch": 0.7605470804839558, "grad_norm": 1.506122350692749, "learning_rate": 2.8599956814817642e-05, "loss": 1.8322, "step": 7229 }, { "epoch": 0.760652288269332, "grad_norm": 1.124856948852539, "learning_rate": 2.857610185262859e-05, "loss": 1.8696, "step": 7230 }, { "epoch": 0.760757496054708, "grad_norm": 1.269283652305603, "learning_rate": 2.8552255184661624e-05, "loss": 1.8721, "step": 7231 }, { "epoch": 0.7608627038400841, "grad_norm": 1.4874763488769531, "learning_rate": 2.8528416813685975e-05, "loss": 1.6838, "step": 7232 }, { "epoch": 0.7609679116254603, "grad_norm": 1.3948214054107666, "learning_rate": 2.8504586742469952e-05, "loss": 1.6012, "step": 7233 }, { "epoch": 0.7610731194108364, "grad_norm": 1.520309329032898, "learning_rate": 2.848076497378085e-05, "loss": 2.2024, "step": 7234 }, { "epoch": 0.7611783271962125, "grad_norm": 1.4063392877578735, "learning_rate": 2.8456951510384966e-05, "loss": 1.7189, "step": 7235 }, { "epoch": 0.7612835349815886, "grad_norm": 1.1339725255966187, "learning_rate": 2.8433146355047748e-05, "loss": 1.6356, "step": 7236 }, { "epoch": 0.7613887427669648, "grad_norm": 1.4083565473556519, "learning_rate": 2.8409349510533578e-05, "loss": 1.9028, "step": 7237 }, { "epoch": 0.7614939505523409, "grad_norm": 1.4033015966415405, "learning_rate": 2.8385560979605884e-05, "loss": 1.6745, "step": 7238 }, { "epoch": 0.761599158337717, "grad_norm": 1.8718355894088745, "learning_rate": 2.83617807650272e-05, "loss": 1.8888, "step": 7239 }, { "epoch": 0.7617043661230931, "grad_norm": 1.4770317077636719, "learning_rate": 2.833800886955902e-05, "loss": 1.5846, "step": 7240 }, { "epoch": 0.7618095739084693, "grad_norm": 1.7958834171295166, "learning_rate": 2.831424529596185e-05, "loss": 1.2971, "step": 7241 }, { "epoch": 0.7619147816938453, "grad_norm": 1.5714819431304932, "learning_rate": 2.8290490046995365e-05, "loss": 1.4623, "step": 7242 }, { "epoch": 0.7620199894792214, "grad_norm": 1.6446837186813354, "learning_rate": 2.8266743125418128e-05, "loss": 1.7678, "step": 7243 }, { "epoch": 0.7621251972645976, "grad_norm": 1.509402871131897, "learning_rate": 2.8243004533987793e-05, "loss": 1.8722, "step": 7244 }, { "epoch": 0.7622304050499737, "grad_norm": 1.4639089107513428, "learning_rate": 2.8219274275461062e-05, "loss": 1.5998, "step": 7245 }, { "epoch": 0.7623356128353498, "grad_norm": 1.3890831470489502, "learning_rate": 2.8195552352593612e-05, "loss": 1.7413, "step": 7246 }, { "epoch": 0.7624408206207259, "grad_norm": 1.8161264657974243, "learning_rate": 2.8171838768140245e-05, "loss": 1.8833, "step": 7247 }, { "epoch": 0.7625460284061021, "grad_norm": 1.8306972980499268, "learning_rate": 2.8148133524854716e-05, "loss": 1.9024, "step": 7248 }, { "epoch": 0.7626512361914781, "grad_norm": 1.03922438621521, "learning_rate": 2.8124436625489813e-05, "loss": 1.7287, "step": 7249 }, { "epoch": 0.7627564439768543, "grad_norm": 2.0580270290374756, "learning_rate": 2.8100748072797435e-05, "loss": 1.5174, "step": 7250 }, { "epoch": 0.7628616517622304, "grad_norm": 1.4314181804656982, "learning_rate": 2.8077067869528417e-05, "loss": 1.6338, "step": 7251 }, { "epoch": 0.7629668595476066, "grad_norm": 1.2152514457702637, "learning_rate": 2.8053396018432644e-05, "loss": 1.6196, "step": 7252 }, { "epoch": 0.7630720673329826, "grad_norm": 2.0151803493499756, "learning_rate": 2.802973252225911e-05, "loss": 1.5984, "step": 7253 }, { "epoch": 0.7631772751183588, "grad_norm": 1.595259428024292, "learning_rate": 2.8006077383755747e-05, "loss": 1.4817, "step": 7254 }, { "epoch": 0.7632824829037349, "grad_norm": 0.9341708421707153, "learning_rate": 2.7982430605669517e-05, "loss": 1.7419, "step": 7255 }, { "epoch": 0.7633876906891109, "grad_norm": 1.5944820642471313, "learning_rate": 2.7958792190746496e-05, "loss": 1.6313, "step": 7256 }, { "epoch": 0.7634928984744871, "grad_norm": 1.1736741065979004, "learning_rate": 2.7935162141731718e-05, "loss": 1.6742, "step": 7257 }, { "epoch": 0.7635981062598632, "grad_norm": 1.133220911026001, "learning_rate": 2.7911540461369222e-05, "loss": 1.443, "step": 7258 }, { "epoch": 0.7637033140452394, "grad_norm": 1.820003867149353, "learning_rate": 2.788792715240218e-05, "loss": 1.9558, "step": 7259 }, { "epoch": 0.7638085218306154, "grad_norm": 1.6289899349212646, "learning_rate": 2.78643222175727e-05, "loss": 1.863, "step": 7260 }, { "epoch": 0.7639137296159916, "grad_norm": 1.1900887489318848, "learning_rate": 2.7840725659621915e-05, "loss": 2.0473, "step": 7261 }, { "epoch": 0.7640189374013677, "grad_norm": 2.775047779083252, "learning_rate": 2.781713748129008e-05, "loss": 1.7868, "step": 7262 }, { "epoch": 0.7641241451867438, "grad_norm": 1.3283895254135132, "learning_rate": 2.7793557685316363e-05, "loss": 1.4167, "step": 7263 }, { "epoch": 0.7642293529721199, "grad_norm": 1.6619638204574585, "learning_rate": 2.7769986274439e-05, "loss": 1.7392, "step": 7264 }, { "epoch": 0.7643345607574961, "grad_norm": 1.9690237045288086, "learning_rate": 2.774642325139535e-05, "loss": 1.3192, "step": 7265 }, { "epoch": 0.7644397685428722, "grad_norm": 1.4869492053985596, "learning_rate": 2.772286861892157e-05, "loss": 1.8371, "step": 7266 }, { "epoch": 0.7645449763282482, "grad_norm": 2.642908811569214, "learning_rate": 2.769932237975309e-05, "loss": 1.3455, "step": 7267 }, { "epoch": 0.7646501841136244, "grad_norm": 2.0182862281799316, "learning_rate": 2.7675784536624215e-05, "loss": 1.8982, "step": 7268 }, { "epoch": 0.7647553918990005, "grad_norm": 1.7238906621932983, "learning_rate": 2.7652255092268298e-05, "loss": 1.21, "step": 7269 }, { "epoch": 0.7648605996843767, "grad_norm": 1.1936345100402832, "learning_rate": 2.762873404941779e-05, "loss": 2.1362, "step": 7270 }, { "epoch": 0.7649658074697527, "grad_norm": 1.5153876543045044, "learning_rate": 2.7605221410804093e-05, "loss": 1.9925, "step": 7271 }, { "epoch": 0.7650710152551289, "grad_norm": 1.9088298082351685, "learning_rate": 2.7581717179157606e-05, "loss": 1.6018, "step": 7272 }, { "epoch": 0.765176223040505, "grad_norm": 1.2692924737930298, "learning_rate": 2.755822135720787e-05, "loss": 1.8706, "step": 7273 }, { "epoch": 0.7652814308258811, "grad_norm": 1.2598018646240234, "learning_rate": 2.7534733947683334e-05, "loss": 1.7554, "step": 7274 }, { "epoch": 0.7653866386112572, "grad_norm": 2.6628575325012207, "learning_rate": 2.7511254953311495e-05, "loss": 1.4819, "step": 7275 }, { "epoch": 0.7654918463966334, "grad_norm": 1.1039395332336426, "learning_rate": 2.748778437681895e-05, "loss": 1.0194, "step": 7276 }, { "epoch": 0.7655970541820095, "grad_norm": 1.6342849731445312, "learning_rate": 2.7464322220931205e-05, "loss": 1.455, "step": 7277 }, { "epoch": 0.7657022619673856, "grad_norm": 1.5950639247894287, "learning_rate": 2.7440868488372884e-05, "loss": 1.3383, "step": 7278 }, { "epoch": 0.7658074697527617, "grad_norm": 1.440382719039917, "learning_rate": 2.7417423181867585e-05, "loss": 1.8153, "step": 7279 }, { "epoch": 0.7659126775381379, "grad_norm": 2.1018497943878174, "learning_rate": 2.7393986304137887e-05, "loss": 1.6111, "step": 7280 }, { "epoch": 0.7660178853235139, "grad_norm": 1.6211340427398682, "learning_rate": 2.73705578579055e-05, "loss": 1.7447, "step": 7281 }, { "epoch": 0.76612309310889, "grad_norm": 1.3533055782318115, "learning_rate": 2.7347137845891068e-05, "loss": 1.5142, "step": 7282 }, { "epoch": 0.7662283008942662, "grad_norm": 1.5132229328155518, "learning_rate": 2.7323726270814253e-05, "loss": 1.6848, "step": 7283 }, { "epoch": 0.7663335086796423, "grad_norm": 1.4339923858642578, "learning_rate": 2.7300323135393812e-05, "loss": 1.6556, "step": 7284 }, { "epoch": 0.7664387164650184, "grad_norm": 2.367436170578003, "learning_rate": 2.727692844234746e-05, "loss": 1.9757, "step": 7285 }, { "epoch": 0.7665439242503945, "grad_norm": 1.1163835525512695, "learning_rate": 2.72535421943919e-05, "loss": 1.7622, "step": 7286 }, { "epoch": 0.7666491320357707, "grad_norm": 1.5662150382995605, "learning_rate": 2.7230164394242995e-05, "loss": 1.6492, "step": 7287 }, { "epoch": 0.7667543398211467, "grad_norm": 1.4997937679290771, "learning_rate": 2.720679504461542e-05, "loss": 1.3393, "step": 7288 }, { "epoch": 0.7668595476065229, "grad_norm": 2.0090813636779785, "learning_rate": 2.7183434148223052e-05, "loss": 1.1286, "step": 7289 }, { "epoch": 0.766964755391899, "grad_norm": 1.4088441133499146, "learning_rate": 2.7160081707778707e-05, "loss": 1.2337, "step": 7290 }, { "epoch": 0.7670699631772752, "grad_norm": 1.4657981395721436, "learning_rate": 2.7136737725994187e-05, "loss": 1.166, "step": 7291 }, { "epoch": 0.7671751709626512, "grad_norm": 2.0377955436706543, "learning_rate": 2.7113402205580408e-05, "loss": 1.8475, "step": 7292 }, { "epoch": 0.7672803787480273, "grad_norm": 1.972943663597107, "learning_rate": 2.7090075149247217e-05, "loss": 1.7478, "step": 7293 }, { "epoch": 0.7673855865334035, "grad_norm": 1.5420266389846802, "learning_rate": 2.7066756559703498e-05, "loss": 1.8444, "step": 7294 }, { "epoch": 0.7674907943187795, "grad_norm": 1.34401535987854, "learning_rate": 2.7043446439657193e-05, "loss": 1.4609, "step": 7295 }, { "epoch": 0.7675960021041557, "grad_norm": 1.3176796436309814, "learning_rate": 2.7020144791815218e-05, "loss": 1.3681, "step": 7296 }, { "epoch": 0.7677012098895318, "grad_norm": 1.1592514514923096, "learning_rate": 2.699685161888348e-05, "loss": 1.6415, "step": 7297 }, { "epoch": 0.767806417674908, "grad_norm": 1.5379077196121216, "learning_rate": 2.6973566923566994e-05, "loss": 1.543, "step": 7298 }, { "epoch": 0.767911625460284, "grad_norm": 2.4584033489227295, "learning_rate": 2.6950290708569716e-05, "loss": 1.558, "step": 7299 }, { "epoch": 0.7680168332456602, "grad_norm": 1.853628396987915, "learning_rate": 2.6927022976594607e-05, "loss": 2.2037, "step": 7300 }, { "epoch": 0.7681220410310363, "grad_norm": 1.1601117849349976, "learning_rate": 2.6903763730343713e-05, "loss": 1.8498, "step": 7301 }, { "epoch": 0.7682272488164125, "grad_norm": 1.5906758308410645, "learning_rate": 2.6880512972518047e-05, "loss": 2.0151, "step": 7302 }, { "epoch": 0.7683324566017885, "grad_norm": 1.081691026687622, "learning_rate": 2.6857270705817595e-05, "loss": 1.7113, "step": 7303 }, { "epoch": 0.7684376643871647, "grad_norm": 1.651273250579834, "learning_rate": 2.6834036932941474e-05, "loss": 1.6251, "step": 7304 }, { "epoch": 0.7685428721725408, "grad_norm": 1.15084969997406, "learning_rate": 2.681081165658772e-05, "loss": 1.5665, "step": 7305 }, { "epoch": 0.7686480799579168, "grad_norm": 1.6445162296295166, "learning_rate": 2.678759487945337e-05, "loss": 1.7392, "step": 7306 }, { "epoch": 0.768753287743293, "grad_norm": 1.2424595355987549, "learning_rate": 2.676438660423457e-05, "loss": 2.0125, "step": 7307 }, { "epoch": 0.7688584955286691, "grad_norm": 2.284137487411499, "learning_rate": 2.6741186833626407e-05, "loss": 1.8287, "step": 7308 }, { "epoch": 0.7689637033140453, "grad_norm": 2.1580452919006348, "learning_rate": 2.6717995570322964e-05, "loss": 1.9503, "step": 7309 }, { "epoch": 0.7690689110994213, "grad_norm": 1.483157753944397, "learning_rate": 2.669481281701739e-05, "loss": 1.3867, "step": 7310 }, { "epoch": 0.7691741188847975, "grad_norm": 1.2321768999099731, "learning_rate": 2.667163857640179e-05, "loss": 1.7758, "step": 7311 }, { "epoch": 0.7692793266701736, "grad_norm": 1.4711391925811768, "learning_rate": 2.664847285116736e-05, "loss": 1.371, "step": 7312 }, { "epoch": 0.7693845344555497, "grad_norm": 1.1832042932510376, "learning_rate": 2.6625315644004244e-05, "loss": 1.6168, "step": 7313 }, { "epoch": 0.7694897422409258, "grad_norm": 1.5034308433532715, "learning_rate": 2.660216695760157e-05, "loss": 1.6241, "step": 7314 }, { "epoch": 0.769594950026302, "grad_norm": 1.575340747833252, "learning_rate": 2.6579026794647587e-05, "loss": 1.6202, "step": 7315 }, { "epoch": 0.7697001578116781, "grad_norm": 1.1293988227844238, "learning_rate": 2.655589515782946e-05, "loss": 1.7913, "step": 7316 }, { "epoch": 0.7698053655970541, "grad_norm": 2.4325718879699707, "learning_rate": 2.653277204983334e-05, "loss": 1.902, "step": 7317 }, { "epoch": 0.7699105733824303, "grad_norm": 1.1691190004348755, "learning_rate": 2.650965747334452e-05, "loss": 1.5993, "step": 7318 }, { "epoch": 0.7700157811678064, "grad_norm": 1.888068675994873, "learning_rate": 2.648655143104717e-05, "loss": 1.406, "step": 7319 }, { "epoch": 0.7701209889531825, "grad_norm": 1.8982930183410645, "learning_rate": 2.6463453925624503e-05, "loss": 1.584, "step": 7320 }, { "epoch": 0.7702261967385586, "grad_norm": 1.547112226486206, "learning_rate": 2.6440364959758813e-05, "loss": 1.9128, "step": 7321 }, { "epoch": 0.7703314045239348, "grad_norm": 2.7643849849700928, "learning_rate": 2.641728453613127e-05, "loss": 2.1104, "step": 7322 }, { "epoch": 0.7704366123093109, "grad_norm": 1.3012899160385132, "learning_rate": 2.6394212657422225e-05, "loss": 1.4724, "step": 7323 }, { "epoch": 0.770541820094687, "grad_norm": 1.8508931398391724, "learning_rate": 2.6371149326310874e-05, "loss": 1.8537, "step": 7324 }, { "epoch": 0.7706470278800631, "grad_norm": 2.359945774078369, "learning_rate": 2.6348094545475465e-05, "loss": 1.7603, "step": 7325 }, { "epoch": 0.7707522356654393, "grad_norm": 1.6346330642700195, "learning_rate": 2.6325048317593337e-05, "loss": 1.9917, "step": 7326 }, { "epoch": 0.7708574434508153, "grad_norm": 1.718336820602417, "learning_rate": 2.6302010645340746e-05, "loss": 1.9148, "step": 7327 }, { "epoch": 0.7709626512361915, "grad_norm": 2.0984418392181396, "learning_rate": 2.6278981531392945e-05, "loss": 1.852, "step": 7328 }, { "epoch": 0.7710678590215676, "grad_norm": 1.1272097826004028, "learning_rate": 2.625596097842432e-05, "loss": 1.2315, "step": 7329 }, { "epoch": 0.7711730668069438, "grad_norm": 1.9899998903274536, "learning_rate": 2.6232948989108086e-05, "loss": 1.5241, "step": 7330 }, { "epoch": 0.7712782745923198, "grad_norm": 1.6918717622756958, "learning_rate": 2.6209945566116545e-05, "loss": 1.5811, "step": 7331 }, { "epoch": 0.7713834823776959, "grad_norm": 2.1108553409576416, "learning_rate": 2.618695071212107e-05, "loss": 1.7367, "step": 7332 }, { "epoch": 0.7714886901630721, "grad_norm": 2.049133062362671, "learning_rate": 2.616396442979192e-05, "loss": 1.7588, "step": 7333 }, { "epoch": 0.7715938979484482, "grad_norm": 2.400456428527832, "learning_rate": 2.6140986721798466e-05, "loss": 1.5582, "step": 7334 }, { "epoch": 0.7716991057338243, "grad_norm": 1.7933460474014282, "learning_rate": 2.6118017590809017e-05, "loss": 2.0447, "step": 7335 }, { "epoch": 0.7718043135192004, "grad_norm": 1.4793000221252441, "learning_rate": 2.6095057039490878e-05, "loss": 1.9915, "step": 7336 }, { "epoch": 0.7719095213045766, "grad_norm": 1.532442569732666, "learning_rate": 2.6072105070510422e-05, "loss": 1.7986, "step": 7337 }, { "epoch": 0.7720147290899526, "grad_norm": 1.4576950073242188, "learning_rate": 2.6049161686532965e-05, "loss": 1.3017, "step": 7338 }, { "epoch": 0.7721199368753288, "grad_norm": 1.2867484092712402, "learning_rate": 2.6026226890222814e-05, "loss": 1.3474, "step": 7339 }, { "epoch": 0.7722251446607049, "grad_norm": 1.4471523761749268, "learning_rate": 2.600330068424338e-05, "loss": 1.8549, "step": 7340 }, { "epoch": 0.7723303524460811, "grad_norm": 1.353736400604248, "learning_rate": 2.5980383071256975e-05, "loss": 1.4423, "step": 7341 }, { "epoch": 0.7724355602314571, "grad_norm": 1.9009381532669067, "learning_rate": 2.595747405392491e-05, "loss": 1.3345, "step": 7342 }, { "epoch": 0.7725407680168332, "grad_norm": 1.2530128955841064, "learning_rate": 2.59345736349076e-05, "loss": 1.5174, "step": 7343 }, { "epoch": 0.7726459758022094, "grad_norm": 1.770195722579956, "learning_rate": 2.5911681816864354e-05, "loss": 1.6167, "step": 7344 }, { "epoch": 0.7727511835875854, "grad_norm": 2.0301904678344727, "learning_rate": 2.588879860245351e-05, "loss": 1.6029, "step": 7345 }, { "epoch": 0.7728563913729616, "grad_norm": 2.069531202316284, "learning_rate": 2.5865923994332463e-05, "loss": 1.8537, "step": 7346 }, { "epoch": 0.7729615991583377, "grad_norm": 1.2178399562835693, "learning_rate": 2.5843057995157548e-05, "loss": 1.4361, "step": 7347 }, { "epoch": 0.7730668069437139, "grad_norm": 1.5581814050674438, "learning_rate": 2.582020060758409e-05, "loss": 1.5575, "step": 7348 }, { "epoch": 0.7731720147290899, "grad_norm": 1.6995668411254883, "learning_rate": 2.579735183426649e-05, "loss": 1.515, "step": 7349 }, { "epoch": 0.7732772225144661, "grad_norm": 1.9909636974334717, "learning_rate": 2.577451167785808e-05, "loss": 2.097, "step": 7350 }, { "epoch": 0.7733824302998422, "grad_norm": 1.49967622756958, "learning_rate": 2.5751680141011214e-05, "loss": 1.6389, "step": 7351 }, { "epoch": 0.7734876380852183, "grad_norm": 1.4037280082702637, "learning_rate": 2.5728857226377246e-05, "loss": 1.6968, "step": 7352 }, { "epoch": 0.7735928458705944, "grad_norm": 1.8199878931045532, "learning_rate": 2.5706042936606477e-05, "loss": 1.9098, "step": 7353 }, { "epoch": 0.7736980536559706, "grad_norm": 1.3631402254104614, "learning_rate": 2.5683237274348327e-05, "loss": 1.8551, "step": 7354 }, { "epoch": 0.7738032614413467, "grad_norm": 1.259263038635254, "learning_rate": 2.5660440242251117e-05, "loss": 1.6699, "step": 7355 }, { "epoch": 0.7739084692267227, "grad_norm": 1.1155054569244385, "learning_rate": 2.5637651842962164e-05, "loss": 1.5838, "step": 7356 }, { "epoch": 0.7740136770120989, "grad_norm": 1.8859355449676514, "learning_rate": 2.561487207912785e-05, "loss": 1.7185, "step": 7357 }, { "epoch": 0.774118884797475, "grad_norm": 1.8426570892333984, "learning_rate": 2.5592100953393504e-05, "loss": 1.9184, "step": 7358 }, { "epoch": 0.7742240925828511, "grad_norm": 1.5915162563323975, "learning_rate": 2.5569338468403426e-05, "loss": 1.8337, "step": 7359 }, { "epoch": 0.7743293003682272, "grad_norm": 1.3176912069320679, "learning_rate": 2.5546584626801006e-05, "loss": 1.5611, "step": 7360 }, { "epoch": 0.7744345081536034, "grad_norm": 1.1959328651428223, "learning_rate": 2.5523839431228537e-05, "loss": 2.022, "step": 7361 }, { "epoch": 0.7745397159389795, "grad_norm": 1.117728590965271, "learning_rate": 2.550110288432733e-05, "loss": 2.0816, "step": 7362 }, { "epoch": 0.7746449237243556, "grad_norm": 1.091355800628662, "learning_rate": 2.5478374988737753e-05, "loss": 1.4844, "step": 7363 }, { "epoch": 0.7747501315097317, "grad_norm": 1.6453313827514648, "learning_rate": 2.5455655747099093e-05, "loss": 1.8608, "step": 7364 }, { "epoch": 0.7748553392951079, "grad_norm": 1.3026262521743774, "learning_rate": 2.543294516204964e-05, "loss": 1.9639, "step": 7365 }, { "epoch": 0.774960547080484, "grad_norm": 1.868897795677185, "learning_rate": 2.541024323622674e-05, "loss": 1.6702, "step": 7366 }, { "epoch": 0.77506575486586, "grad_norm": 2.3049232959747314, "learning_rate": 2.5387549972266678e-05, "loss": 1.1612, "step": 7367 }, { "epoch": 0.7751709626512362, "grad_norm": 1.9099220037460327, "learning_rate": 2.5364865372804712e-05, "loss": 1.3566, "step": 7368 }, { "epoch": 0.7752761704366123, "grad_norm": 1.5426534414291382, "learning_rate": 2.5342189440475204e-05, "loss": 1.7221, "step": 7369 }, { "epoch": 0.7753813782219884, "grad_norm": 1.1355007886886597, "learning_rate": 2.531952217791136e-05, "loss": 1.6406, "step": 7370 }, { "epoch": 0.7754865860073645, "grad_norm": 1.2040497064590454, "learning_rate": 2.529686358774551e-05, "loss": 1.8762, "step": 7371 }, { "epoch": 0.7755917937927407, "grad_norm": 1.0842825174331665, "learning_rate": 2.5274213672608936e-05, "loss": 1.9909, "step": 7372 }, { "epoch": 0.7756970015781168, "grad_norm": 2.0430164337158203, "learning_rate": 2.52515724351318e-05, "loss": 1.8172, "step": 7373 }, { "epoch": 0.7758022093634929, "grad_norm": 1.6244868040084839, "learning_rate": 2.5228939877943448e-05, "loss": 1.9493, "step": 7374 }, { "epoch": 0.775907417148869, "grad_norm": 2.7612531185150146, "learning_rate": 2.520631600367209e-05, "loss": 2.1905, "step": 7375 }, { "epoch": 0.7760126249342452, "grad_norm": 1.8194602727890015, "learning_rate": 2.5183700814944945e-05, "loss": 1.7323, "step": 7376 }, { "epoch": 0.7761178327196212, "grad_norm": 1.4295552968978882, "learning_rate": 2.5161094314388278e-05, "loss": 1.9286, "step": 7377 }, { "epoch": 0.7762230405049974, "grad_norm": 1.6510827541351318, "learning_rate": 2.5138496504627263e-05, "loss": 1.4062, "step": 7378 }, { "epoch": 0.7763282482903735, "grad_norm": 1.3370580673217773, "learning_rate": 2.5115907388286165e-05, "loss": 0.8727, "step": 7379 }, { "epoch": 0.7764334560757497, "grad_norm": 1.845016598701477, "learning_rate": 2.509332696798816e-05, "loss": 2.2291, "step": 7380 }, { "epoch": 0.7765386638611257, "grad_norm": 2.652451515197754, "learning_rate": 2.5070755246355393e-05, "loss": 1.9059, "step": 7381 }, { "epoch": 0.7766438716465018, "grad_norm": 1.796697974205017, "learning_rate": 2.5048192226009126e-05, "loss": 0.9683, "step": 7382 }, { "epoch": 0.776749079431878, "grad_norm": 2.484699010848999, "learning_rate": 2.5025637909569475e-05, "loss": 1.9713, "step": 7383 }, { "epoch": 0.776854287217254, "grad_norm": 1.428265929222107, "learning_rate": 2.5003092299655584e-05, "loss": 1.8807, "step": 7384 }, { "epoch": 0.7769594950026302, "grad_norm": 1.6806254386901855, "learning_rate": 2.4980555398885653e-05, "loss": 2.3045, "step": 7385 }, { "epoch": 0.7770647027880063, "grad_norm": 1.6273754835128784, "learning_rate": 2.4958027209876788e-05, "loss": 1.5729, "step": 7386 }, { "epoch": 0.7771699105733825, "grad_norm": 1.8025354146957397, "learning_rate": 2.493550773524509e-05, "loss": 1.7046, "step": 7387 }, { "epoch": 0.7772751183587585, "grad_norm": 1.5753192901611328, "learning_rate": 2.4912996977605718e-05, "loss": 1.2373, "step": 7388 }, { "epoch": 0.7773803261441347, "grad_norm": 1.7229233980178833, "learning_rate": 2.4890494939572762e-05, "loss": 1.7167, "step": 7389 }, { "epoch": 0.7774855339295108, "grad_norm": 2.0432682037353516, "learning_rate": 2.4868001623759263e-05, "loss": 1.456, "step": 7390 }, { "epoch": 0.7775907417148868, "grad_norm": 1.1946852207183838, "learning_rate": 2.4845517032777364e-05, "loss": 1.5981, "step": 7391 }, { "epoch": 0.777695949500263, "grad_norm": 1.7232786417007446, "learning_rate": 2.4823041169238092e-05, "loss": 1.8003, "step": 7392 }, { "epoch": 0.7778011572856391, "grad_norm": 2.1846988201141357, "learning_rate": 2.480057403575148e-05, "loss": 1.6615, "step": 7393 }, { "epoch": 0.7779063650710153, "grad_norm": 1.523494839668274, "learning_rate": 2.4778115634926624e-05, "loss": 1.9023, "step": 7394 }, { "epoch": 0.7780115728563913, "grad_norm": 1.5766888856887817, "learning_rate": 2.4755665969371446e-05, "loss": 1.6448, "step": 7395 }, { "epoch": 0.7781167806417675, "grad_norm": 1.718214750289917, "learning_rate": 2.4733225041693033e-05, "loss": 1.8259, "step": 7396 }, { "epoch": 0.7782219884271436, "grad_norm": 1.3229570388793945, "learning_rate": 2.4710792854497346e-05, "loss": 1.775, "step": 7397 }, { "epoch": 0.7783271962125198, "grad_norm": 1.1155831813812256, "learning_rate": 2.4688369410389334e-05, "loss": 1.6853, "step": 7398 }, { "epoch": 0.7784324039978958, "grad_norm": 1.7406342029571533, "learning_rate": 2.4665954711973017e-05, "loss": 1.4841, "step": 7399 }, { "epoch": 0.778537611783272, "grad_norm": 1.489790439605713, "learning_rate": 2.464354876185131e-05, "loss": 1.4955, "step": 7400 }, { "epoch": 0.7786428195686481, "grad_norm": 1.8641880750656128, "learning_rate": 2.462115156262612e-05, "loss": 1.6168, "step": 7401 }, { "epoch": 0.7787480273540242, "grad_norm": 1.3012237548828125, "learning_rate": 2.4598763116898405e-05, "loss": 2.0688, "step": 7402 }, { "epoch": 0.7788532351394003, "grad_norm": 2.4502408504486084, "learning_rate": 2.4576383427268034e-05, "loss": 1.3047, "step": 7403 }, { "epoch": 0.7789584429247765, "grad_norm": 1.7557679414749146, "learning_rate": 2.455401249633387e-05, "loss": 1.3024, "step": 7404 }, { "epoch": 0.7790636507101526, "grad_norm": 2.018825054168701, "learning_rate": 2.4531650326693822e-05, "loss": 2.3045, "step": 7405 }, { "epoch": 0.7791688584955286, "grad_norm": 1.1796764135360718, "learning_rate": 2.4509296920944712e-05, "loss": 1.772, "step": 7406 }, { "epoch": 0.7792740662809048, "grad_norm": 1.2721928358078003, "learning_rate": 2.448695228168234e-05, "loss": 1.2919, "step": 7407 }, { "epoch": 0.7793792740662809, "grad_norm": 1.6079884767532349, "learning_rate": 2.4464616411501572e-05, "loss": 1.4524, "step": 7408 }, { "epoch": 0.779484481851657, "grad_norm": 1.7307249307632446, "learning_rate": 2.4442289312996158e-05, "loss": 1.9304, "step": 7409 }, { "epoch": 0.7795896896370331, "grad_norm": 1.4798263311386108, "learning_rate": 2.4419970988758857e-05, "loss": 1.9729, "step": 7410 }, { "epoch": 0.7796948974224093, "grad_norm": 1.2387443780899048, "learning_rate": 2.439766144138148e-05, "loss": 1.5655, "step": 7411 }, { "epoch": 0.7798001052077854, "grad_norm": 1.4330637454986572, "learning_rate": 2.4375360673454718e-05, "loss": 1.3443, "step": 7412 }, { "epoch": 0.7799053129931615, "grad_norm": 1.1398248672485352, "learning_rate": 2.435306868756827e-05, "loss": 1.8242, "step": 7413 }, { "epoch": 0.7800105207785376, "grad_norm": 2.0635628700256348, "learning_rate": 2.433078548631088e-05, "loss": 1.3481, "step": 7414 }, { "epoch": 0.7801157285639138, "grad_norm": 1.374840259552002, "learning_rate": 2.4308511072270202e-05, "loss": 1.5717, "step": 7415 }, { "epoch": 0.7802209363492898, "grad_norm": 1.5022417306900024, "learning_rate": 2.4286245448032895e-05, "loss": 1.6766, "step": 7416 }, { "epoch": 0.7803261441346659, "grad_norm": 1.2573763132095337, "learning_rate": 2.4263988616184574e-05, "loss": 1.5103, "step": 7417 }, { "epoch": 0.7804313519200421, "grad_norm": 1.6818538904190063, "learning_rate": 2.4241740579309836e-05, "loss": 0.8336, "step": 7418 }, { "epoch": 0.7805365597054182, "grad_norm": 1.3387900590896606, "learning_rate": 2.4219501339992334e-05, "loss": 1.8443, "step": 7419 }, { "epoch": 0.7806417674907943, "grad_norm": 1.4211339950561523, "learning_rate": 2.4197270900814594e-05, "loss": 1.7912, "step": 7420 }, { "epoch": 0.7807469752761704, "grad_norm": 1.4029207229614258, "learning_rate": 2.417504926435814e-05, "loss": 1.2693, "step": 7421 }, { "epoch": 0.7808521830615466, "grad_norm": 1.4126625061035156, "learning_rate": 2.415283643320356e-05, "loss": 1.4289, "step": 7422 }, { "epoch": 0.7809573908469227, "grad_norm": 1.4173007011413574, "learning_rate": 2.413063240993031e-05, "loss": 1.8065, "step": 7423 }, { "epoch": 0.7810625986322988, "grad_norm": 1.166293740272522, "learning_rate": 2.4108437197116905e-05, "loss": 1.9179, "step": 7424 }, { "epoch": 0.7811678064176749, "grad_norm": 1.7653440237045288, "learning_rate": 2.408625079734078e-05, "loss": 1.5022, "step": 7425 }, { "epoch": 0.7812730142030511, "grad_norm": 1.2314149141311646, "learning_rate": 2.406407321317835e-05, "loss": 1.5374, "step": 7426 }, { "epoch": 0.7813782219884271, "grad_norm": 1.553765058517456, "learning_rate": 2.4041904447205067e-05, "loss": 1.6092, "step": 7427 }, { "epoch": 0.7814834297738033, "grad_norm": 1.543643593788147, "learning_rate": 2.40197445019953e-05, "loss": 1.2104, "step": 7428 }, { "epoch": 0.7815886375591794, "grad_norm": 1.2574193477630615, "learning_rate": 2.3997593380122386e-05, "loss": 1.6868, "step": 7429 }, { "epoch": 0.7816938453445555, "grad_norm": 1.3462152481079102, "learning_rate": 2.3975451084158707e-05, "loss": 1.5889, "step": 7430 }, { "epoch": 0.7817990531299316, "grad_norm": 1.5161739587783813, "learning_rate": 2.395331761667554e-05, "loss": 1.805, "step": 7431 }, { "epoch": 0.7819042609153077, "grad_norm": 1.5476092100143433, "learning_rate": 2.3931192980243166e-05, "loss": 1.4936, "step": 7432 }, { "epoch": 0.7820094687006839, "grad_norm": 1.412503957748413, "learning_rate": 2.3909077177430893e-05, "loss": 1.5105, "step": 7433 }, { "epoch": 0.7821146764860599, "grad_norm": 1.7502893209457397, "learning_rate": 2.3886970210806915e-05, "loss": 1.1703, "step": 7434 }, { "epoch": 0.7822198842714361, "grad_norm": 1.8714232444763184, "learning_rate": 2.3864872082938426e-05, "loss": 1.2081, "step": 7435 }, { "epoch": 0.7823250920568122, "grad_norm": 1.5723828077316284, "learning_rate": 2.3842782796391672e-05, "loss": 1.4935, "step": 7436 }, { "epoch": 0.7824302998421884, "grad_norm": 1.6067105531692505, "learning_rate": 2.3820702353731773e-05, "loss": 1.9455, "step": 7437 }, { "epoch": 0.7825355076275644, "grad_norm": 1.2671598196029663, "learning_rate": 2.3798630757522844e-05, "loss": 1.6182, "step": 7438 }, { "epoch": 0.7826407154129406, "grad_norm": 1.860741138458252, "learning_rate": 2.3776568010328003e-05, "loss": 1.8387, "step": 7439 }, { "epoch": 0.7827459231983167, "grad_norm": 1.9345104694366455, "learning_rate": 2.3754514114709304e-05, "loss": 1.4232, "step": 7440 }, { "epoch": 0.7828511309836927, "grad_norm": 2.1556923389434814, "learning_rate": 2.3732469073227827e-05, "loss": 1.588, "step": 7441 }, { "epoch": 0.7829563387690689, "grad_norm": 1.9754849672317505, "learning_rate": 2.371043288844358e-05, "loss": 1.6852, "step": 7442 }, { "epoch": 0.783061546554445, "grad_norm": 1.2634505033493042, "learning_rate": 2.3688405562915517e-05, "loss": 1.8126, "step": 7443 }, { "epoch": 0.7831667543398212, "grad_norm": 2.078622579574585, "learning_rate": 2.3666387099201648e-05, "loss": 0.9941, "step": 7444 }, { "epoch": 0.7832719621251972, "grad_norm": 1.6752347946166992, "learning_rate": 2.3644377499858893e-05, "loss": 1.8038, "step": 7445 }, { "epoch": 0.7833771699105734, "grad_norm": 1.0644302368164062, "learning_rate": 2.3622376767443123e-05, "loss": 1.8299, "step": 7446 }, { "epoch": 0.7834823776959495, "grad_norm": 1.3895219564437866, "learning_rate": 2.3600384904509254e-05, "loss": 1.8465, "step": 7447 }, { "epoch": 0.7835875854813256, "grad_norm": 1.346755862236023, "learning_rate": 2.3578401913611103e-05, "loss": 1.5028, "step": 7448 }, { "epoch": 0.7836927932667017, "grad_norm": 1.92898428440094, "learning_rate": 2.3556427797301462e-05, "loss": 1.1488, "step": 7449 }, { "epoch": 0.7837980010520779, "grad_norm": 1.4644855260849, "learning_rate": 2.3534462558132177e-05, "loss": 1.8202, "step": 7450 }, { "epoch": 0.783903208837454, "grad_norm": 1.326379656791687, "learning_rate": 2.3512506198653948e-05, "loss": 1.3529, "step": 7451 }, { "epoch": 0.78400841662283, "grad_norm": 1.445070505142212, "learning_rate": 2.3490558721416477e-05, "loss": 1.7824, "step": 7452 }, { "epoch": 0.7841136244082062, "grad_norm": 1.5502876043319702, "learning_rate": 2.346862012896852e-05, "loss": 1.8986, "step": 7453 }, { "epoch": 0.7842188321935823, "grad_norm": 1.3673009872436523, "learning_rate": 2.3446690423857685e-05, "loss": 1.4538, "step": 7454 }, { "epoch": 0.7843240399789585, "grad_norm": 2.0649657249450684, "learning_rate": 2.3424769608630593e-05, "loss": 1.7845, "step": 7455 }, { "epoch": 0.7844292477643345, "grad_norm": 1.5386171340942383, "learning_rate": 2.340285768583287e-05, "loss": 2.1823, "step": 7456 }, { "epoch": 0.7845344555497107, "grad_norm": 2.3255977630615234, "learning_rate": 2.3380954658009057e-05, "loss": 1.5615, "step": 7457 }, { "epoch": 0.7846396633350868, "grad_norm": 2.3962433338165283, "learning_rate": 2.335906052770267e-05, "loss": 1.4474, "step": 7458 }, { "epoch": 0.7847448711204629, "grad_norm": 1.4439377784729004, "learning_rate": 2.3337175297456225e-05, "loss": 2.055, "step": 7459 }, { "epoch": 0.784850078905839, "grad_norm": 1.2580662965774536, "learning_rate": 2.3315298969811127e-05, "loss": 1.9116, "step": 7460 }, { "epoch": 0.7849552866912152, "grad_norm": 2.165815830230713, "learning_rate": 2.3293431547307887e-05, "loss": 1.5773, "step": 7461 }, { "epoch": 0.7850604944765913, "grad_norm": 1.3377245664596558, "learning_rate": 2.327157303248584e-05, "loss": 1.23, "step": 7462 }, { "epoch": 0.7851657022619674, "grad_norm": 1.4234360456466675, "learning_rate": 2.324972342788333e-05, "loss": 1.5741, "step": 7463 }, { "epoch": 0.7852709100473435, "grad_norm": 1.4981180429458618, "learning_rate": 2.3227882736037732e-05, "loss": 1.6655, "step": 7464 }, { "epoch": 0.7853761178327197, "grad_norm": 1.7403837442398071, "learning_rate": 2.3206050959485314e-05, "loss": 1.9563, "step": 7465 }, { "epoch": 0.7854813256180957, "grad_norm": 1.0999480485916138, "learning_rate": 2.3184228100761285e-05, "loss": 1.9103, "step": 7466 }, { "epoch": 0.7855865334034718, "grad_norm": 1.1709569692611694, "learning_rate": 2.316241416239994e-05, "loss": 1.8448, "step": 7467 }, { "epoch": 0.785691741188848, "grad_norm": 2.044626235961914, "learning_rate": 2.31406091469344e-05, "loss": 1.7845, "step": 7468 }, { "epoch": 0.7857969489742241, "grad_norm": 1.8200260400772095, "learning_rate": 2.3118813056896814e-05, "loss": 1.8616, "step": 7469 }, { "epoch": 0.7859021567596002, "grad_norm": 1.0556297302246094, "learning_rate": 2.3097025894818326e-05, "loss": 1.9248, "step": 7470 }, { "epoch": 0.7860073645449763, "grad_norm": 1.9120993614196777, "learning_rate": 2.307524766322896e-05, "loss": 1.1505, "step": 7471 }, { "epoch": 0.7861125723303525, "grad_norm": 1.3910388946533203, "learning_rate": 2.30534783646578e-05, "loss": 2.0402, "step": 7472 }, { "epoch": 0.7862177801157285, "grad_norm": 2.19036602973938, "learning_rate": 2.303171800163282e-05, "loss": 1.7415, "step": 7473 }, { "epoch": 0.7863229879011047, "grad_norm": 1.8024297952651978, "learning_rate": 2.300996657668095e-05, "loss": 1.6391, "step": 7474 }, { "epoch": 0.7864281956864808, "grad_norm": 1.367393136024475, "learning_rate": 2.298822409232817e-05, "loss": 1.9299, "step": 7475 }, { "epoch": 0.786533403471857, "grad_norm": 1.068220615386963, "learning_rate": 2.2966490551099328e-05, "loss": 1.2364, "step": 7476 }, { "epoch": 0.786638611257233, "grad_norm": 1.452365756034851, "learning_rate": 2.2944765955518242e-05, "loss": 1.0474, "step": 7477 }, { "epoch": 0.7867438190426091, "grad_norm": 1.1206425428390503, "learning_rate": 2.2923050308107785e-05, "loss": 1.9693, "step": 7478 }, { "epoch": 0.7868490268279853, "grad_norm": 1.8541259765625, "learning_rate": 2.290134361138968e-05, "loss": 1.7089, "step": 7479 }, { "epoch": 0.7869542346133613, "grad_norm": 1.5118783712387085, "learning_rate": 2.287964586788467e-05, "loss": 1.7571, "step": 7480 }, { "epoch": 0.7870594423987375, "grad_norm": 1.5137947797775269, "learning_rate": 2.2857957080112423e-05, "loss": 1.0681, "step": 7481 }, { "epoch": 0.7871646501841136, "grad_norm": 1.3139784336090088, "learning_rate": 2.2836277250591574e-05, "loss": 1.2861, "step": 7482 }, { "epoch": 0.7872698579694898, "grad_norm": 2.15798020362854, "learning_rate": 2.2814606381839786e-05, "loss": 1.4572, "step": 7483 }, { "epoch": 0.7873750657548658, "grad_norm": 1.4466586112976074, "learning_rate": 2.279294447637359e-05, "loss": 1.554, "step": 7484 }, { "epoch": 0.787480273540242, "grad_norm": 2.4875476360321045, "learning_rate": 2.2771291536708494e-05, "loss": 2.1975, "step": 7485 }, { "epoch": 0.7875854813256181, "grad_norm": 1.745013952255249, "learning_rate": 2.2749647565359024e-05, "loss": 1.6658, "step": 7486 }, { "epoch": 0.7876906891109943, "grad_norm": 0.8909004926681519, "learning_rate": 2.2728012564838608e-05, "loss": 1.9497, "step": 7487 }, { "epoch": 0.7877958968963703, "grad_norm": 1.4795335531234741, "learning_rate": 2.2706386537659606e-05, "loss": 1.4597, "step": 7488 }, { "epoch": 0.7879011046817465, "grad_norm": 1.7913676500320435, "learning_rate": 2.2684769486333445e-05, "loss": 1.4416, "step": 7489 }, { "epoch": 0.7880063124671226, "grad_norm": 1.4431122541427612, "learning_rate": 2.2663161413370415e-05, "loss": 1.8649, "step": 7490 }, { "epoch": 0.7881115202524986, "grad_norm": 1.1279875040054321, "learning_rate": 2.2641562321279752e-05, "loss": 1.5093, "step": 7491 }, { "epoch": 0.7882167280378748, "grad_norm": 1.534596562385559, "learning_rate": 2.2619972212569752e-05, "loss": 1.663, "step": 7492 }, { "epoch": 0.7883219358232509, "grad_norm": 1.2349904775619507, "learning_rate": 2.259839108974757e-05, "loss": 1.8006, "step": 7493 }, { "epoch": 0.7884271436086271, "grad_norm": 1.3401429653167725, "learning_rate": 2.2576818955319333e-05, "loss": 1.2232, "step": 7494 }, { "epoch": 0.7885323513940031, "grad_norm": 1.3276652097702026, "learning_rate": 2.2555255811790177e-05, "loss": 1.3863, "step": 7495 }, { "epoch": 0.7886375591793793, "grad_norm": 2.36476731300354, "learning_rate": 2.2533701661664154e-05, "loss": 1.9646, "step": 7496 }, { "epoch": 0.7887427669647554, "grad_norm": 1.2848833799362183, "learning_rate": 2.251215650744424e-05, "loss": 1.5951, "step": 7497 }, { "epoch": 0.7888479747501315, "grad_norm": 1.5567632913589478, "learning_rate": 2.2490620351632452e-05, "loss": 1.1698, "step": 7498 }, { "epoch": 0.7889531825355076, "grad_norm": 2.238565444946289, "learning_rate": 2.2469093196729696e-05, "loss": 1.9644, "step": 7499 }, { "epoch": 0.7890583903208838, "grad_norm": 1.1401231288909912, "learning_rate": 2.24475750452358e-05, "loss": 1.5368, "step": 7500 }, { "epoch": 0.7891635981062599, "grad_norm": 2.0586628913879395, "learning_rate": 2.242606589964972e-05, "loss": 1.9507, "step": 7501 }, { "epoch": 0.789268805891636, "grad_norm": 1.760940670967102, "learning_rate": 2.2404565762469088e-05, "loss": 1.8004, "step": 7502 }, { "epoch": 0.7893740136770121, "grad_norm": 1.8332325220108032, "learning_rate": 2.2383074636190748e-05, "loss": 1.1507, "step": 7503 }, { "epoch": 0.7894792214623882, "grad_norm": 1.1777528524398804, "learning_rate": 2.236159252331037e-05, "loss": 1.8222, "step": 7504 }, { "epoch": 0.7895844292477643, "grad_norm": 2.514356851577759, "learning_rate": 2.234011942632257e-05, "loss": 1.0901, "step": 7505 }, { "epoch": 0.7896896370331404, "grad_norm": 1.4754233360290527, "learning_rate": 2.2318655347720995e-05, "loss": 1.3529, "step": 7506 }, { "epoch": 0.7897948448185166, "grad_norm": 1.38016939163208, "learning_rate": 2.2297200289998176e-05, "loss": 1.4848, "step": 7507 }, { "epoch": 0.7899000526038927, "grad_norm": 2.304391384124756, "learning_rate": 2.2275754255645587e-05, "loss": 1.2804, "step": 7508 }, { "epoch": 0.7900052603892688, "grad_norm": 2.2973196506500244, "learning_rate": 2.2254317247153746e-05, "loss": 1.3045, "step": 7509 }, { "epoch": 0.7901104681746449, "grad_norm": 1.2090044021606445, "learning_rate": 2.2232889267012038e-05, "loss": 2.2979, "step": 7510 }, { "epoch": 0.7902156759600211, "grad_norm": 1.6775548458099365, "learning_rate": 2.221147031770878e-05, "loss": 1.7299, "step": 7511 }, { "epoch": 0.7903208837453971, "grad_norm": 1.9138872623443604, "learning_rate": 2.2190060401731362e-05, "loss": 1.9317, "step": 7512 }, { "epoch": 0.7904260915307733, "grad_norm": 1.4541093111038208, "learning_rate": 2.2168659521566004e-05, "loss": 1.3817, "step": 7513 }, { "epoch": 0.7905312993161494, "grad_norm": 2.349430799484253, "learning_rate": 2.2147267679697892e-05, "loss": 1.1925, "step": 7514 }, { "epoch": 0.7906365071015256, "grad_norm": 2.0313339233398438, "learning_rate": 2.2125884878611258e-05, "loss": 1.643, "step": 7515 }, { "epoch": 0.7907417148869016, "grad_norm": 1.2877901792526245, "learning_rate": 2.210451112078914e-05, "loss": 1.2234, "step": 7516 }, { "epoch": 0.7908469226722777, "grad_norm": 1.4288432598114014, "learning_rate": 2.2083146408713673e-05, "loss": 1.4857, "step": 7517 }, { "epoch": 0.7909521304576539, "grad_norm": 1.7954027652740479, "learning_rate": 2.206179074486584e-05, "loss": 1.359, "step": 7518 }, { "epoch": 0.79105733824303, "grad_norm": 1.427760124206543, "learning_rate": 2.204044413172558e-05, "loss": 2.6701, "step": 7519 }, { "epoch": 0.7911625460284061, "grad_norm": 1.8676557540893555, "learning_rate": 2.201910657177185e-05, "loss": 1.5542, "step": 7520 }, { "epoch": 0.7912677538137822, "grad_norm": 2.1520488262176514, "learning_rate": 2.19977780674825e-05, "loss": 1.6475, "step": 7521 }, { "epoch": 0.7913729615991584, "grad_norm": 2.3536930084228516, "learning_rate": 2.1976458621334317e-05, "loss": 1.2516, "step": 7522 }, { "epoch": 0.7914781693845344, "grad_norm": 1.8606661558151245, "learning_rate": 2.195514823580307e-05, "loss": 1.6912, "step": 7523 }, { "epoch": 0.7915833771699106, "grad_norm": 1.4110456705093384, "learning_rate": 2.1933846913363466e-05, "loss": 1.6379, "step": 7524 }, { "epoch": 0.7916885849552867, "grad_norm": 1.5939291715621948, "learning_rate": 2.1912554656489127e-05, "loss": 1.5101, "step": 7525 }, { "epoch": 0.7917937927406629, "grad_norm": 1.7965341806411743, "learning_rate": 2.1891271467652696e-05, "loss": 1.6246, "step": 7526 }, { "epoch": 0.7918990005260389, "grad_norm": 1.8288224935531616, "learning_rate": 2.186999734932569e-05, "loss": 1.5909, "step": 7527 }, { "epoch": 0.792004208311415, "grad_norm": 1.5028138160705566, "learning_rate": 2.1848732303978638e-05, "loss": 1.6557, "step": 7528 }, { "epoch": 0.7921094160967912, "grad_norm": 1.5027766227722168, "learning_rate": 2.1827476334080953e-05, "loss": 1.3482, "step": 7529 }, { "epoch": 0.7922146238821672, "grad_norm": 1.7562695741653442, "learning_rate": 2.1806229442101e-05, "loss": 2.0568, "step": 7530 }, { "epoch": 0.7923198316675434, "grad_norm": 1.675010085105896, "learning_rate": 2.178499163050617e-05, "loss": 1.6856, "step": 7531 }, { "epoch": 0.7924250394529195, "grad_norm": 1.9306681156158447, "learning_rate": 2.1763762901762696e-05, "loss": 1.4367, "step": 7532 }, { "epoch": 0.7925302472382957, "grad_norm": 1.7994093894958496, "learning_rate": 2.174254325833579e-05, "loss": 1.6346, "step": 7533 }, { "epoch": 0.7926354550236717, "grad_norm": 1.6323491334915161, "learning_rate": 2.172133270268967e-05, "loss": 1.7997, "step": 7534 }, { "epoch": 0.7927406628090479, "grad_norm": 1.3499654531478882, "learning_rate": 2.1700131237287414e-05, "loss": 1.4139, "step": 7535 }, { "epoch": 0.792845870594424, "grad_norm": 1.7880961894989014, "learning_rate": 2.1678938864591046e-05, "loss": 1.2011, "step": 7536 }, { "epoch": 0.7929510783798001, "grad_norm": 1.5255318880081177, "learning_rate": 2.1657755587061644e-05, "loss": 1.9614, "step": 7537 }, { "epoch": 0.7930562861651762, "grad_norm": 1.1118429899215698, "learning_rate": 2.1636581407159105e-05, "loss": 1.2734, "step": 7538 }, { "epoch": 0.7931614939505524, "grad_norm": 1.6367865800857544, "learning_rate": 2.1615416327342296e-05, "loss": 2.0369, "step": 7539 }, { "epoch": 0.7932667017359285, "grad_norm": 1.7008229494094849, "learning_rate": 2.1594260350069096e-05, "loss": 1.5876, "step": 7540 }, { "epoch": 0.7933719095213045, "grad_norm": 1.5725849866867065, "learning_rate": 2.157311347779626e-05, "loss": 1.5209, "step": 7541 }, { "epoch": 0.7934771173066807, "grad_norm": 1.7557533979415894, "learning_rate": 2.1551975712979478e-05, "loss": 1.2324, "step": 7542 }, { "epoch": 0.7935823250920568, "grad_norm": 1.1576135158538818, "learning_rate": 2.1530847058073466e-05, "loss": 2.2651, "step": 7543 }, { "epoch": 0.7936875328774329, "grad_norm": 1.6503269672393799, "learning_rate": 2.1509727515531786e-05, "loss": 1.9303, "step": 7544 }, { "epoch": 0.793792740662809, "grad_norm": 2.0574920177459717, "learning_rate": 2.1488617087806982e-05, "loss": 1.6262, "step": 7545 }, { "epoch": 0.7938979484481852, "grad_norm": 1.3746944665908813, "learning_rate": 2.1467515777350544e-05, "loss": 1.2937, "step": 7546 }, { "epoch": 0.7940031562335613, "grad_norm": 1.4370603561401367, "learning_rate": 2.1446423586612886e-05, "loss": 1.3888, "step": 7547 }, { "epoch": 0.7941083640189374, "grad_norm": 1.5636769533157349, "learning_rate": 2.142534051804339e-05, "loss": 1.312, "step": 7548 }, { "epoch": 0.7942135718043135, "grad_norm": 1.7676070928573608, "learning_rate": 2.140426657409038e-05, "loss": 1.97, "step": 7549 }, { "epoch": 0.7943187795896897, "grad_norm": 1.89109468460083, "learning_rate": 2.1383201757201042e-05, "loss": 1.271, "step": 7550 }, { "epoch": 0.7944239873750658, "grad_norm": 0.966954231262207, "learning_rate": 2.1362146069821643e-05, "loss": 1.6096, "step": 7551 }, { "epoch": 0.7945291951604418, "grad_norm": 1.17153799533844, "learning_rate": 2.1341099514397266e-05, "loss": 1.5011, "step": 7552 }, { "epoch": 0.794634402945818, "grad_norm": 1.4526724815368652, "learning_rate": 2.132006209337195e-05, "loss": 1.4198, "step": 7553 }, { "epoch": 0.7947396107311941, "grad_norm": 2.1010799407958984, "learning_rate": 2.1299033809188773e-05, "loss": 1.6424, "step": 7554 }, { "epoch": 0.7948448185165702, "grad_norm": 1.633360505104065, "learning_rate": 2.1278014664289648e-05, "loss": 1.9107, "step": 7555 }, { "epoch": 0.7949500263019463, "grad_norm": 1.9874669313430786, "learning_rate": 2.125700466111542e-05, "loss": 1.8695, "step": 7556 }, { "epoch": 0.7950552340873225, "grad_norm": 1.591950535774231, "learning_rate": 2.1236003802105974e-05, "loss": 1.7396, "step": 7557 }, { "epoch": 0.7951604418726986, "grad_norm": 1.1268935203552246, "learning_rate": 2.121501208970005e-05, "loss": 1.3255, "step": 7558 }, { "epoch": 0.7952656496580747, "grad_norm": 1.344058871269226, "learning_rate": 2.1194029526335303e-05, "loss": 1.7558, "step": 7559 }, { "epoch": 0.7953708574434508, "grad_norm": 1.8388925790786743, "learning_rate": 2.117305611444843e-05, "loss": 1.4927, "step": 7560 }, { "epoch": 0.795476065228827, "grad_norm": 0.9249711632728577, "learning_rate": 2.115209185647499e-05, "loss": 1.4039, "step": 7561 }, { "epoch": 0.795581273014203, "grad_norm": 1.0573887825012207, "learning_rate": 2.1131136754849447e-05, "loss": 1.5272, "step": 7562 }, { "epoch": 0.7956864807995792, "grad_norm": 1.5813140869140625, "learning_rate": 2.1110190812005315e-05, "loss": 1.2803, "step": 7563 }, { "epoch": 0.7957916885849553, "grad_norm": 2.2133073806762695, "learning_rate": 2.1089254030374916e-05, "loss": 2.0426, "step": 7564 }, { "epoch": 0.7958968963703315, "grad_norm": 1.3776949644088745, "learning_rate": 2.106832641238966e-05, "loss": 1.5064, "step": 7565 }, { "epoch": 0.7960021041557075, "grad_norm": 2.1420438289642334, "learning_rate": 2.1047407960479702e-05, "loss": 1.888, "step": 7566 }, { "epoch": 0.7961073119410836, "grad_norm": 0.984514594078064, "learning_rate": 2.102649867707426e-05, "loss": 1.5134, "step": 7567 }, { "epoch": 0.7962125197264598, "grad_norm": 1.1638802289962769, "learning_rate": 2.1005598564601492e-05, "loss": 1.907, "step": 7568 }, { "epoch": 0.7963177275118358, "grad_norm": 1.849143385887146, "learning_rate": 2.0984707625488442e-05, "loss": 2.197, "step": 7569 }, { "epoch": 0.796422935297212, "grad_norm": 1.8121823072433472, "learning_rate": 2.096382586216108e-05, "loss": 1.6732, "step": 7570 }, { "epoch": 0.7965281430825881, "grad_norm": 1.8981144428253174, "learning_rate": 2.0942953277044386e-05, "loss": 1.9267, "step": 7571 }, { "epoch": 0.7966333508679643, "grad_norm": 1.1137375831604004, "learning_rate": 2.092208987256217e-05, "loss": 1.5532, "step": 7572 }, { "epoch": 0.7967385586533403, "grad_norm": 1.7999489307403564, "learning_rate": 2.0901235651137284e-05, "loss": 2.235, "step": 7573 }, { "epoch": 0.7968437664387165, "grad_norm": 1.321373462677002, "learning_rate": 2.0880390615191448e-05, "loss": 1.6331, "step": 7574 }, { "epoch": 0.7969489742240926, "grad_norm": 2.581979274749756, "learning_rate": 2.0859554767145272e-05, "loss": 1.2734, "step": 7575 }, { "epoch": 0.7970541820094686, "grad_norm": 1.2444729804992676, "learning_rate": 2.0838728109418436e-05, "loss": 1.7502, "step": 7576 }, { "epoch": 0.7971593897948448, "grad_norm": 1.8155887126922607, "learning_rate": 2.081791064442943e-05, "loss": 1.1768, "step": 7577 }, { "epoch": 0.797264597580221, "grad_norm": 1.7820537090301514, "learning_rate": 2.079710237459569e-05, "loss": 1.7911, "step": 7578 }, { "epoch": 0.7973698053655971, "grad_norm": 1.5820388793945312, "learning_rate": 2.0776303302333677e-05, "loss": 1.8153, "step": 7579 }, { "epoch": 0.7974750131509731, "grad_norm": 2.486478090286255, "learning_rate": 2.0755513430058672e-05, "loss": 0.9953, "step": 7580 }, { "epoch": 0.7975802209363493, "grad_norm": 1.5562423467636108, "learning_rate": 2.073473276018493e-05, "loss": 1.8432, "step": 7581 }, { "epoch": 0.7976854287217254, "grad_norm": 1.674886703491211, "learning_rate": 2.0713961295125685e-05, "loss": 0.5957, "step": 7582 }, { "epoch": 0.7977906365071016, "grad_norm": 1.56822669506073, "learning_rate": 2.0693199037293022e-05, "loss": 1.8226, "step": 7583 }, { "epoch": 0.7978958442924776, "grad_norm": 1.492409110069275, "learning_rate": 2.067244598909799e-05, "loss": 1.4119, "step": 7584 }, { "epoch": 0.7980010520778538, "grad_norm": 1.627830147743225, "learning_rate": 2.0651702152950602e-05, "loss": 1.837, "step": 7585 }, { "epoch": 0.7981062598632299, "grad_norm": 1.6998107433319092, "learning_rate": 2.0630967531259758e-05, "loss": 1.6075, "step": 7586 }, { "epoch": 0.798211467648606, "grad_norm": 1.3308358192443848, "learning_rate": 2.0610242126433297e-05, "loss": 1.7592, "step": 7587 }, { "epoch": 0.7983166754339821, "grad_norm": 1.7519946098327637, "learning_rate": 2.0589525940877996e-05, "loss": 1.5667, "step": 7588 }, { "epoch": 0.7984218832193583, "grad_norm": 2.321702003479004, "learning_rate": 2.0568818976999526e-05, "loss": 1.3998, "step": 7589 }, { "epoch": 0.7985270910047344, "grad_norm": 1.7129671573638916, "learning_rate": 2.0548121237202576e-05, "loss": 1.7467, "step": 7590 }, { "epoch": 0.7986322987901104, "grad_norm": 1.7081111669540405, "learning_rate": 2.0527432723890684e-05, "loss": 1.6383, "step": 7591 }, { "epoch": 0.7987375065754866, "grad_norm": 1.5592128038406372, "learning_rate": 2.0506753439466297e-05, "loss": 1.7673, "step": 7592 }, { "epoch": 0.7988427143608627, "grad_norm": 2.919283390045166, "learning_rate": 2.04860833863309e-05, "loss": 1.7805, "step": 7593 }, { "epoch": 0.7989479221462388, "grad_norm": 2.3130650520324707, "learning_rate": 2.0465422566884805e-05, "loss": 1.2299, "step": 7594 }, { "epoch": 0.7990531299316149, "grad_norm": 1.2341796159744263, "learning_rate": 2.044477098352726e-05, "loss": 1.4645, "step": 7595 }, { "epoch": 0.7991583377169911, "grad_norm": 2.093064069747925, "learning_rate": 2.0424128638656513e-05, "loss": 1.6587, "step": 7596 }, { "epoch": 0.7992635455023672, "grad_norm": 2.029285430908203, "learning_rate": 2.040349553466967e-05, "loss": 1.9854, "step": 7597 }, { "epoch": 0.7993687532877433, "grad_norm": 1.8652676343917847, "learning_rate": 2.0382871673962766e-05, "loss": 1.6049, "step": 7598 }, { "epoch": 0.7994739610731194, "grad_norm": 1.369429588317871, "learning_rate": 2.0362257058930822e-05, "loss": 1.6057, "step": 7599 }, { "epoch": 0.7995791688584956, "grad_norm": 1.3833380937576294, "learning_rate": 2.0341651691967735e-05, "loss": 2.1065, "step": 7600 }, { "epoch": 0.7996843766438716, "grad_norm": 2.4091103076934814, "learning_rate": 2.0321055575466284e-05, "loss": 2.0793, "step": 7601 }, { "epoch": 0.7997895844292477, "grad_norm": 1.5020534992218018, "learning_rate": 2.0300468711818322e-05, "loss": 1.3944, "step": 7602 }, { "epoch": 0.7998947922146239, "grad_norm": 1.9810582399368286, "learning_rate": 2.027989110341446e-05, "loss": 2.023, "step": 7603 }, { "epoch": 0.8, "grad_norm": 1.6195448637008667, "learning_rate": 2.0259322752644327e-05, "loss": 1.1899, "step": 7604 }, { "epoch": 0.8001052077853761, "grad_norm": 1.4536365270614624, "learning_rate": 2.0238763661896477e-05, "loss": 1.6324, "step": 7605 }, { "epoch": 0.8002104155707522, "grad_norm": 1.5313684940338135, "learning_rate": 2.0218213833558352e-05, "loss": 1.6158, "step": 7606 }, { "epoch": 0.8003156233561284, "grad_norm": 1.2193766832351685, "learning_rate": 2.0197673270016327e-05, "loss": 1.7051, "step": 7607 }, { "epoch": 0.8004208311415044, "grad_norm": 1.6945223808288574, "learning_rate": 2.0177141973655766e-05, "loss": 1.9545, "step": 7608 }, { "epoch": 0.8005260389268806, "grad_norm": 1.5775741338729858, "learning_rate": 2.01566199468608e-05, "loss": 1.5485, "step": 7609 }, { "epoch": 0.8006312467122567, "grad_norm": 2.2319235801696777, "learning_rate": 2.0136107192014676e-05, "loss": 1.1478, "step": 7610 }, { "epoch": 0.8007364544976329, "grad_norm": 2.3744618892669678, "learning_rate": 2.011560371149943e-05, "loss": 1.3589, "step": 7611 }, { "epoch": 0.8008416622830089, "grad_norm": 1.1278767585754395, "learning_rate": 2.0095109507696053e-05, "loss": 1.7969, "step": 7612 }, { "epoch": 0.800946870068385, "grad_norm": 2.358863353729248, "learning_rate": 2.0074624582984512e-05, "loss": 1.5369, "step": 7613 }, { "epoch": 0.8010520778537612, "grad_norm": 1.1637340784072876, "learning_rate": 2.0054148939743634e-05, "loss": 1.6914, "step": 7614 }, { "epoch": 0.8011572856391374, "grad_norm": 1.5420373678207397, "learning_rate": 2.0033682580351144e-05, "loss": 1.3169, "step": 7615 }, { "epoch": 0.8012624934245134, "grad_norm": 2.321443557739258, "learning_rate": 2.001322550718382e-05, "loss": 1.8365, "step": 7616 }, { "epoch": 0.8013677012098895, "grad_norm": 1.631739616394043, "learning_rate": 1.9992777722617207e-05, "loss": 1.0929, "step": 7617 }, { "epoch": 0.8014729089952657, "grad_norm": 1.5631780624389648, "learning_rate": 1.997233922902585e-05, "loss": 1.6305, "step": 7618 }, { "epoch": 0.8015781167806417, "grad_norm": 1.8600497245788574, "learning_rate": 1.995191002878323e-05, "loss": 1.323, "step": 7619 }, { "epoch": 0.8016833245660179, "grad_norm": 1.6327234506607056, "learning_rate": 1.9931490124261688e-05, "loss": 1.7434, "step": 7620 }, { "epoch": 0.801788532351394, "grad_norm": 1.5391840934753418, "learning_rate": 1.9911079517832555e-05, "loss": 1.564, "step": 7621 }, { "epoch": 0.8018937401367702, "grad_norm": 1.508278727531433, "learning_rate": 1.9890678211866033e-05, "loss": 1.7802, "step": 7622 }, { "epoch": 0.8019989479221462, "grad_norm": 1.6780965328216553, "learning_rate": 1.9870286208731236e-05, "loss": 1.6782, "step": 7623 }, { "epoch": 0.8021041557075224, "grad_norm": 1.9837194681167603, "learning_rate": 1.9849903510796262e-05, "loss": 1.9724, "step": 7624 }, { "epoch": 0.8022093634928985, "grad_norm": 1.8240439891815186, "learning_rate": 1.9829530120428064e-05, "loss": 1.7938, "step": 7625 }, { "epoch": 0.8023145712782745, "grad_norm": 1.649254560470581, "learning_rate": 1.9809166039992522e-05, "loss": 1.2282, "step": 7626 }, { "epoch": 0.8024197790636507, "grad_norm": 2.882291793823242, "learning_rate": 1.978881127185448e-05, "loss": 1.2825, "step": 7627 }, { "epoch": 0.8025249868490268, "grad_norm": 1.6689751148223877, "learning_rate": 1.9768465818377656e-05, "loss": 1.7849, "step": 7628 }, { "epoch": 0.802630194634403, "grad_norm": 1.3756548166275024, "learning_rate": 1.9748129681924675e-05, "loss": 1.9856, "step": 7629 }, { "epoch": 0.802735402419779, "grad_norm": 1.1730574369430542, "learning_rate": 1.9727802864857194e-05, "loss": 1.7476, "step": 7630 }, { "epoch": 0.8028406102051552, "grad_norm": 1.5033677816390991, "learning_rate": 1.970748536953557e-05, "loss": 1.54, "step": 7631 }, { "epoch": 0.8029458179905313, "grad_norm": 2.147925615310669, "learning_rate": 1.9687177198319308e-05, "loss": 1.521, "step": 7632 }, { "epoch": 0.8030510257759074, "grad_norm": 1.849800944328308, "learning_rate": 1.9666878353566697e-05, "loss": 1.9367, "step": 7633 }, { "epoch": 0.8031562335612835, "grad_norm": 1.3101515769958496, "learning_rate": 1.9646588837634937e-05, "loss": 1.9278, "step": 7634 }, { "epoch": 0.8032614413466597, "grad_norm": 2.0604212284088135, "learning_rate": 1.9626308652880243e-05, "loss": 1.7948, "step": 7635 }, { "epoch": 0.8033666491320358, "grad_norm": 1.3913450241088867, "learning_rate": 1.9606037801657673e-05, "loss": 1.5491, "step": 7636 }, { "epoch": 0.8034718569174119, "grad_norm": 1.931066870689392, "learning_rate": 1.9585776286321167e-05, "loss": 1.4223, "step": 7637 }, { "epoch": 0.803577064702788, "grad_norm": 2.459156036376953, "learning_rate": 1.956552410922369e-05, "loss": 1.1812, "step": 7638 }, { "epoch": 0.8036822724881642, "grad_norm": 2.2327475547790527, "learning_rate": 1.9545281272717032e-05, "loss": 1.8111, "step": 7639 }, { "epoch": 0.8037874802735402, "grad_norm": 1.7723302841186523, "learning_rate": 1.9525047779151905e-05, "loss": 2.2684, "step": 7640 }, { "epoch": 0.8038926880589163, "grad_norm": 1.0957763195037842, "learning_rate": 1.950482363087801e-05, "loss": 1.3338, "step": 7641 }, { "epoch": 0.8039978958442925, "grad_norm": 1.4228318929672241, "learning_rate": 1.948460883024388e-05, "loss": 1.2643, "step": 7642 }, { "epoch": 0.8041031036296686, "grad_norm": 2.340993642807007, "learning_rate": 1.9464403379596963e-05, "loss": 2.2087, "step": 7643 }, { "epoch": 0.8042083114150447, "grad_norm": 1.3735971450805664, "learning_rate": 1.9444207281283723e-05, "loss": 1.388, "step": 7644 }, { "epoch": 0.8043135192004208, "grad_norm": 1.704942226409912, "learning_rate": 1.9424020537649414e-05, "loss": 1.6904, "step": 7645 }, { "epoch": 0.804418726985797, "grad_norm": 1.3440220355987549, "learning_rate": 1.940384315103825e-05, "loss": 2.1261, "step": 7646 }, { "epoch": 0.8045239347711731, "grad_norm": 1.9415428638458252, "learning_rate": 1.938367512379341e-05, "loss": 2.098, "step": 7647 }, { "epoch": 0.8046291425565492, "grad_norm": 1.430979609489441, "learning_rate": 1.9363516458256916e-05, "loss": 1.1349, "step": 7648 }, { "epoch": 0.8047343503419253, "grad_norm": 1.5660107135772705, "learning_rate": 1.93433671567697e-05, "loss": 2.0174, "step": 7649 }, { "epoch": 0.8048395581273015, "grad_norm": 1.210718035697937, "learning_rate": 1.932322722167168e-05, "loss": 1.4806, "step": 7650 }, { "epoch": 0.8049447659126775, "grad_norm": 1.4528659582138062, "learning_rate": 1.9303096655301633e-05, "loss": 1.9193, "step": 7651 }, { "epoch": 0.8050499736980536, "grad_norm": 1.8522127866744995, "learning_rate": 1.9282975459997234e-05, "loss": 1.4749, "step": 7652 }, { "epoch": 0.8051551814834298, "grad_norm": 1.7069746255874634, "learning_rate": 1.9262863638095097e-05, "loss": 1.7067, "step": 7653 }, { "epoch": 0.8052603892688059, "grad_norm": 1.1644920110702515, "learning_rate": 1.9242761191930725e-05, "loss": 1.9264, "step": 7654 }, { "epoch": 0.805365597054182, "grad_norm": 1.9184256792068481, "learning_rate": 1.9222668123838593e-05, "loss": 2.0388, "step": 7655 }, { "epoch": 0.8054708048395581, "grad_norm": 2.2873263359069824, "learning_rate": 1.9202584436152014e-05, "loss": 1.719, "step": 7656 }, { "epoch": 0.8055760126249343, "grad_norm": 1.712296724319458, "learning_rate": 1.9182510131203224e-05, "loss": 1.6544, "step": 7657 }, { "epoch": 0.8056812204103103, "grad_norm": 2.636054515838623, "learning_rate": 1.9162445211323432e-05, "loss": 1.9039, "step": 7658 }, { "epoch": 0.8057864281956865, "grad_norm": 1.4774726629257202, "learning_rate": 1.914238967884269e-05, "loss": 1.6511, "step": 7659 }, { "epoch": 0.8058916359810626, "grad_norm": 1.71102774143219, "learning_rate": 1.9122343536089937e-05, "loss": 1.2127, "step": 7660 }, { "epoch": 0.8059968437664388, "grad_norm": 1.1164214611053467, "learning_rate": 1.910230678539314e-05, "loss": 1.5642, "step": 7661 }, { "epoch": 0.8061020515518148, "grad_norm": 1.0777497291564941, "learning_rate": 1.9082279429079065e-05, "loss": 1.6877, "step": 7662 }, { "epoch": 0.806207259337191, "grad_norm": 1.3562448024749756, "learning_rate": 1.9062261469473397e-05, "loss": 1.9616, "step": 7663 }, { "epoch": 0.8063124671225671, "grad_norm": 1.527605652809143, "learning_rate": 1.9042252908900814e-05, "loss": 1.4711, "step": 7664 }, { "epoch": 0.8064176749079431, "grad_norm": 1.3711210489273071, "learning_rate": 1.9022253749684783e-05, "loss": 1.6572, "step": 7665 }, { "epoch": 0.8065228826933193, "grad_norm": 2.562347888946533, "learning_rate": 1.9002263994147796e-05, "loss": 1.9137, "step": 7666 }, { "epoch": 0.8066280904786954, "grad_norm": 1.8726879358291626, "learning_rate": 1.8982283644611175e-05, "loss": 1.1054, "step": 7667 }, { "epoch": 0.8067332982640716, "grad_norm": 2.17808198928833, "learning_rate": 1.8962312703395146e-05, "loss": 1.6859, "step": 7668 }, { "epoch": 0.8068385060494476, "grad_norm": 1.516634225845337, "learning_rate": 1.8942351172818905e-05, "loss": 1.5421, "step": 7669 }, { "epoch": 0.8069437138348238, "grad_norm": 1.740494966506958, "learning_rate": 1.8922399055200525e-05, "loss": 1.5834, "step": 7670 }, { "epoch": 0.8070489216201999, "grad_norm": 1.4491758346557617, "learning_rate": 1.8902456352856925e-05, "loss": 1.4972, "step": 7671 }, { "epoch": 0.807154129405576, "grad_norm": 1.3840901851654053, "learning_rate": 1.888252306810406e-05, "loss": 1.7152, "step": 7672 }, { "epoch": 0.8072593371909521, "grad_norm": 1.5224378108978271, "learning_rate": 1.886259920325667e-05, "loss": 1.2755, "step": 7673 }, { "epoch": 0.8073645449763283, "grad_norm": 1.3467313051223755, "learning_rate": 1.8842684760628425e-05, "loss": 1.9975, "step": 7674 }, { "epoch": 0.8074697527617044, "grad_norm": 1.9911725521087646, "learning_rate": 1.882277974253197e-05, "loss": 2.0025, "step": 7675 }, { "epoch": 0.8075749605470804, "grad_norm": 1.4871760606765747, "learning_rate": 1.8802884151278775e-05, "loss": 1.4795, "step": 7676 }, { "epoch": 0.8076801683324566, "grad_norm": 1.2734004259109497, "learning_rate": 1.8782997989179297e-05, "loss": 1.6471, "step": 7677 }, { "epoch": 0.8077853761178327, "grad_norm": 1.3544495105743408, "learning_rate": 1.8763121258542815e-05, "loss": 1.744, "step": 7678 }, { "epoch": 0.8078905839032089, "grad_norm": 1.0602861642837524, "learning_rate": 1.874325396167753e-05, "loss": 2.062, "step": 7679 }, { "epoch": 0.8079957916885849, "grad_norm": 1.5346481800079346, "learning_rate": 1.8723396100890623e-05, "loss": 0.9999, "step": 7680 }, { "epoch": 0.8081009994739611, "grad_norm": 1.847928762435913, "learning_rate": 1.870354767848809e-05, "loss": 1.5345, "step": 7681 }, { "epoch": 0.8082062072593372, "grad_norm": 1.852360725402832, "learning_rate": 1.8683708696774826e-05, "loss": 1.8659, "step": 7682 }, { "epoch": 0.8083114150447133, "grad_norm": 1.2460297346115112, "learning_rate": 1.866387915805473e-05, "loss": 1.8196, "step": 7683 }, { "epoch": 0.8084166228300894, "grad_norm": 1.7130165100097656, "learning_rate": 1.8644059064630516e-05, "loss": 1.652, "step": 7684 }, { "epoch": 0.8085218306154656, "grad_norm": 1.4033414125442505, "learning_rate": 1.86242484188038e-05, "loss": 1.5075, "step": 7685 }, { "epoch": 0.8086270384008417, "grad_norm": 0.9263932108879089, "learning_rate": 1.8604447222875178e-05, "loss": 1.8016, "step": 7686 }, { "epoch": 0.8087322461862178, "grad_norm": 1.5973109006881714, "learning_rate": 1.8584655479144063e-05, "loss": 1.5324, "step": 7687 }, { "epoch": 0.8088374539715939, "grad_norm": 1.074812889099121, "learning_rate": 1.8564873189908793e-05, "loss": 1.6024, "step": 7688 }, { "epoch": 0.80894266175697, "grad_norm": 1.3290114402770996, "learning_rate": 1.854510035746667e-05, "loss": 1.786, "step": 7689 }, { "epoch": 0.8090478695423461, "grad_norm": 2.054743766784668, "learning_rate": 1.8525336984113807e-05, "loss": 1.2776, "step": 7690 }, { "epoch": 0.8091530773277222, "grad_norm": 1.4225199222564697, "learning_rate": 1.850558307214525e-05, "loss": 1.4513, "step": 7691 }, { "epoch": 0.8092582851130984, "grad_norm": 1.4592626094818115, "learning_rate": 1.848583862385501e-05, "loss": 1.4772, "step": 7692 }, { "epoch": 0.8093634928984745, "grad_norm": 1.3948107957839966, "learning_rate": 1.8466103641535904e-05, "loss": 1.6123, "step": 7693 }, { "epoch": 0.8094687006838506, "grad_norm": 1.5830365419387817, "learning_rate": 1.8446378127479692e-05, "loss": 1.663, "step": 7694 }, { "epoch": 0.8095739084692267, "grad_norm": 1.350455403327942, "learning_rate": 1.8426662083977042e-05, "loss": 1.6457, "step": 7695 }, { "epoch": 0.8096791162546029, "grad_norm": 1.3711130619049072, "learning_rate": 1.8406955513317482e-05, "loss": 1.8406, "step": 7696 }, { "epoch": 0.8097843240399789, "grad_norm": 2.136914014816284, "learning_rate": 1.8387258417789532e-05, "loss": 1.7469, "step": 7697 }, { "epoch": 0.8098895318253551, "grad_norm": 2.448425531387329, "learning_rate": 1.836757079968051e-05, "loss": 1.3565, "step": 7698 }, { "epoch": 0.8099947396107312, "grad_norm": 1.5618599653244019, "learning_rate": 1.8347892661276656e-05, "loss": 1.587, "step": 7699 }, { "epoch": 0.8100999473961074, "grad_norm": 1.5216078758239746, "learning_rate": 1.8328224004863183e-05, "loss": 2.1681, "step": 7700 }, { "epoch": 0.8102051551814834, "grad_norm": 1.2957028150558472, "learning_rate": 1.830856483272412e-05, "loss": 1.0984, "step": 7701 }, { "epoch": 0.8103103629668595, "grad_norm": 1.9072808027267456, "learning_rate": 1.8288915147142384e-05, "loss": 1.4575, "step": 7702 }, { "epoch": 0.8104155707522357, "grad_norm": 1.6128511428833008, "learning_rate": 1.8269274950399895e-05, "loss": 1.3623, "step": 7703 }, { "epoch": 0.8105207785376117, "grad_norm": 1.4067853689193726, "learning_rate": 1.8249644244777376e-05, "loss": 1.5455, "step": 7704 }, { "epoch": 0.8106259863229879, "grad_norm": 1.5415167808532715, "learning_rate": 1.8230023032554444e-05, "loss": 1.7658, "step": 7705 }, { "epoch": 0.810731194108364, "grad_norm": 1.5726876258850098, "learning_rate": 1.82104113160097e-05, "loss": 1.7449, "step": 7706 }, { "epoch": 0.8108364018937402, "grad_norm": 1.307512879371643, "learning_rate": 1.819080909742057e-05, "loss": 1.7211, "step": 7707 }, { "epoch": 0.8109416096791162, "grad_norm": 1.4828598499298096, "learning_rate": 1.8171216379063348e-05, "loss": 1.6675, "step": 7708 }, { "epoch": 0.8110468174644924, "grad_norm": 1.5087251663208008, "learning_rate": 1.8151633163213357e-05, "loss": 2.0347, "step": 7709 }, { "epoch": 0.8111520252498685, "grad_norm": 1.217799186706543, "learning_rate": 1.8132059452144666e-05, "loss": 1.4303, "step": 7710 }, { "epoch": 0.8112572330352447, "grad_norm": 1.011033058166504, "learning_rate": 1.8112495248130312e-05, "loss": 1.6298, "step": 7711 }, { "epoch": 0.8113624408206207, "grad_norm": 1.5476887226104736, "learning_rate": 1.809294055344226e-05, "loss": 2.5314, "step": 7712 }, { "epoch": 0.8114676486059968, "grad_norm": 1.962207317352295, "learning_rate": 1.8073395370351287e-05, "loss": 1.6565, "step": 7713 }, { "epoch": 0.811572856391373, "grad_norm": 1.5745935440063477, "learning_rate": 1.8053859701127153e-05, "loss": 1.4709, "step": 7714 }, { "epoch": 0.811678064176749, "grad_norm": 1.5660057067871094, "learning_rate": 1.8034333548038474e-05, "loss": 2.3473, "step": 7715 }, { "epoch": 0.8117832719621252, "grad_norm": 1.5668786764144897, "learning_rate": 1.8014816913352682e-05, "loss": 1.6102, "step": 7716 }, { "epoch": 0.8118884797475013, "grad_norm": 1.458255648612976, "learning_rate": 1.7995309799336256e-05, "loss": 1.5805, "step": 7717 }, { "epoch": 0.8119936875328775, "grad_norm": 1.485870599746704, "learning_rate": 1.7975812208254473e-05, "loss": 1.7534, "step": 7718 }, { "epoch": 0.8120988953182535, "grad_norm": 1.127456784248352, "learning_rate": 1.7956324142371485e-05, "loss": 1.7223, "step": 7719 }, { "epoch": 0.8122041031036297, "grad_norm": 1.6504329442977905, "learning_rate": 1.7936845603950447e-05, "loss": 1.1134, "step": 7720 }, { "epoch": 0.8123093108890058, "grad_norm": 2.026771068572998, "learning_rate": 1.7917376595253264e-05, "loss": 1.236, "step": 7721 }, { "epoch": 0.8124145186743819, "grad_norm": 1.5272623300552368, "learning_rate": 1.7897917118540875e-05, "loss": 1.6421, "step": 7722 }, { "epoch": 0.812519726459758, "grad_norm": 1.7590751647949219, "learning_rate": 1.787846717607302e-05, "loss": 1.2447, "step": 7723 }, { "epoch": 0.8126249342451342, "grad_norm": 1.131596326828003, "learning_rate": 1.7859026770108323e-05, "loss": 1.3865, "step": 7724 }, { "epoch": 0.8127301420305103, "grad_norm": 1.4612594842910767, "learning_rate": 1.7839595902904393e-05, "loss": 1.7474, "step": 7725 }, { "epoch": 0.8128353498158863, "grad_norm": 1.4456712007522583, "learning_rate": 1.782017457671764e-05, "loss": 1.4284, "step": 7726 }, { "epoch": 0.8129405576012625, "grad_norm": 2.546482801437378, "learning_rate": 1.780076279380337e-05, "loss": 1.9752, "step": 7727 }, { "epoch": 0.8130457653866386, "grad_norm": 2.082400321960449, "learning_rate": 1.778136055641587e-05, "loss": 1.666, "step": 7728 }, { "epoch": 0.8131509731720147, "grad_norm": 1.367645025253296, "learning_rate": 1.776196786680824e-05, "loss": 1.4718, "step": 7729 }, { "epoch": 0.8132561809573908, "grad_norm": 2.418299436569214, "learning_rate": 1.7742584727232437e-05, "loss": 1.7791, "step": 7730 }, { "epoch": 0.813361388742767, "grad_norm": 1.4075639247894287, "learning_rate": 1.7723211139939445e-05, "loss": 1.9541, "step": 7731 }, { "epoch": 0.8134665965281431, "grad_norm": 1.8531979322433472, "learning_rate": 1.7703847107178996e-05, "loss": 1.3266, "step": 7732 }, { "epoch": 0.8135718043135192, "grad_norm": 1.2879350185394287, "learning_rate": 1.768449263119977e-05, "loss": 1.3593, "step": 7733 }, { "epoch": 0.8136770120988953, "grad_norm": 1.4324150085449219, "learning_rate": 1.7665147714249376e-05, "loss": 1.1906, "step": 7734 }, { "epoch": 0.8137822198842715, "grad_norm": 1.2460278272628784, "learning_rate": 1.7645812358574264e-05, "loss": 1.7535, "step": 7735 }, { "epoch": 0.8138874276696475, "grad_norm": 1.501090407371521, "learning_rate": 1.762648656641974e-05, "loss": 1.507, "step": 7736 }, { "epoch": 0.8139926354550236, "grad_norm": 2.3010196685791016, "learning_rate": 1.7607170340030143e-05, "loss": 1.1804, "step": 7737 }, { "epoch": 0.8140978432403998, "grad_norm": 1.1160740852355957, "learning_rate": 1.7587863681648487e-05, "loss": 1.7082, "step": 7738 }, { "epoch": 0.814203051025776, "grad_norm": 1.3494609594345093, "learning_rate": 1.7568566593516867e-05, "loss": 1.5496, "step": 7739 }, { "epoch": 0.814308258811152, "grad_norm": 1.8527060747146606, "learning_rate": 1.754927907787617e-05, "loss": 1.4893, "step": 7740 }, { "epoch": 0.8144134665965281, "grad_norm": 2.120997428894043, "learning_rate": 1.753000113696617e-05, "loss": 1.9963, "step": 7741 }, { "epoch": 0.8145186743819043, "grad_norm": 2.1121816635131836, "learning_rate": 1.7510732773025584e-05, "loss": 2.1124, "step": 7742 }, { "epoch": 0.8146238821672804, "grad_norm": 2.398660659790039, "learning_rate": 1.7491473988291984e-05, "loss": 1.6849, "step": 7743 }, { "epoch": 0.8147290899526565, "grad_norm": 1.6509634256362915, "learning_rate": 1.7472224785001778e-05, "loss": 1.4914, "step": 7744 }, { "epoch": 0.8148342977380326, "grad_norm": 1.1640690565109253, "learning_rate": 1.745298516539039e-05, "loss": 1.7646, "step": 7745 }, { "epoch": 0.8149395055234088, "grad_norm": 0.9993892312049866, "learning_rate": 1.7433755131692e-05, "loss": 1.5413, "step": 7746 }, { "epoch": 0.8150447133087848, "grad_norm": 2.1200225353240967, "learning_rate": 1.7414534686139717e-05, "loss": 1.3748, "step": 7747 }, { "epoch": 0.815149921094161, "grad_norm": 1.2785836458206177, "learning_rate": 1.7395323830965605e-05, "loss": 1.9115, "step": 7748 }, { "epoch": 0.8152551288795371, "grad_norm": 1.8023689985275269, "learning_rate": 1.7376122568400532e-05, "loss": 1.432, "step": 7749 }, { "epoch": 0.8153603366649133, "grad_norm": 1.6197617053985596, "learning_rate": 1.7356930900674228e-05, "loss": 1.2918, "step": 7750 }, { "epoch": 0.8154655444502893, "grad_norm": 1.9692368507385254, "learning_rate": 1.7337748830015442e-05, "loss": 1.4366, "step": 7751 }, { "epoch": 0.8155707522356654, "grad_norm": 1.7193232774734497, "learning_rate": 1.7318576358651685e-05, "loss": 1.9742, "step": 7752 }, { "epoch": 0.8156759600210416, "grad_norm": 1.3902307748794556, "learning_rate": 1.7299413488809356e-05, "loss": 1.6264, "step": 7753 }, { "epoch": 0.8157811678064176, "grad_norm": 1.7483313083648682, "learning_rate": 1.728026022271384e-05, "loss": 1.9915, "step": 7754 }, { "epoch": 0.8158863755917938, "grad_norm": 1.3935078382492065, "learning_rate": 1.726111656258932e-05, "loss": 1.5677, "step": 7755 }, { "epoch": 0.8159915833771699, "grad_norm": 1.7369499206542969, "learning_rate": 1.724198251065885e-05, "loss": 1.5543, "step": 7756 }, { "epoch": 0.8160967911625461, "grad_norm": 1.9357125759124756, "learning_rate": 1.7222858069144464e-05, "loss": 1.8799, "step": 7757 }, { "epoch": 0.8162019989479221, "grad_norm": 1.534250020980835, "learning_rate": 1.720374324026699e-05, "loss": 1.5193, "step": 7758 }, { "epoch": 0.8163072067332983, "grad_norm": 1.6834253072738647, "learning_rate": 1.718463802624617e-05, "loss": 1.3028, "step": 7759 }, { "epoch": 0.8164124145186744, "grad_norm": 1.60200834274292, "learning_rate": 1.716554242930063e-05, "loss": 1.5029, "step": 7760 }, { "epoch": 0.8165176223040504, "grad_norm": 1.758754849433899, "learning_rate": 1.7146456451647863e-05, "loss": 1.7036, "step": 7761 }, { "epoch": 0.8166228300894266, "grad_norm": 1.4392300844192505, "learning_rate": 1.7127380095504296e-05, "loss": 1.312, "step": 7762 }, { "epoch": 0.8167280378748027, "grad_norm": 1.1349120140075684, "learning_rate": 1.710831336308519e-05, "loss": 1.618, "step": 7763 }, { "epoch": 0.8168332456601789, "grad_norm": 2.290132999420166, "learning_rate": 1.708925625660467e-05, "loss": 1.3436, "step": 7764 }, { "epoch": 0.8169384534455549, "grad_norm": 1.6770775318145752, "learning_rate": 1.707020877827583e-05, "loss": 1.3586, "step": 7765 }, { "epoch": 0.8170436612309311, "grad_norm": 1.3523870706558228, "learning_rate": 1.7051170930310555e-05, "loss": 1.717, "step": 7766 }, { "epoch": 0.8171488690163072, "grad_norm": 1.853025197982788, "learning_rate": 1.703214271491964e-05, "loss": 1.5571, "step": 7767 }, { "epoch": 0.8172540768016833, "grad_norm": 1.83201003074646, "learning_rate": 1.701312413431281e-05, "loss": 1.9148, "step": 7768 }, { "epoch": 0.8173592845870594, "grad_norm": 1.2569760084152222, "learning_rate": 1.699411519069858e-05, "loss": 2.3183, "step": 7769 }, { "epoch": 0.8174644923724356, "grad_norm": 2.7303202152252197, "learning_rate": 1.697511588628443e-05, "loss": 1.6214, "step": 7770 }, { "epoch": 0.8175697001578117, "grad_norm": 2.05470609664917, "learning_rate": 1.6956126223276692e-05, "loss": 1.3433, "step": 7771 }, { "epoch": 0.8176749079431878, "grad_norm": 1.919611930847168, "learning_rate": 1.693714620388054e-05, "loss": 1.4729, "step": 7772 }, { "epoch": 0.8177801157285639, "grad_norm": 1.4189374446868896, "learning_rate": 1.6918175830300088e-05, "loss": 1.756, "step": 7773 }, { "epoch": 0.81788532351394, "grad_norm": 1.5107403993606567, "learning_rate": 1.6899215104738307e-05, "loss": 1.9558, "step": 7774 }, { "epoch": 0.8179905312993162, "grad_norm": 1.0100902318954468, "learning_rate": 1.6880264029396997e-05, "loss": 1.6087, "step": 7775 }, { "epoch": 0.8180957390846922, "grad_norm": 1.7361371517181396, "learning_rate": 1.686132260647696e-05, "loss": 1.3897, "step": 7776 }, { "epoch": 0.8182009468700684, "grad_norm": 1.6154359579086304, "learning_rate": 1.6842390838177746e-05, "loss": 1.8557, "step": 7777 }, { "epoch": 0.8183061546554445, "grad_norm": 1.9448010921478271, "learning_rate": 1.682346872669782e-05, "loss": 1.7147, "step": 7778 }, { "epoch": 0.8184113624408206, "grad_norm": 1.653695821762085, "learning_rate": 1.6804556274234616e-05, "loss": 1.7196, "step": 7779 }, { "epoch": 0.8185165702261967, "grad_norm": 1.5844122171401978, "learning_rate": 1.678565348298433e-05, "loss": 1.4943, "step": 7780 }, { "epoch": 0.8186217780115729, "grad_norm": 2.1291873455047607, "learning_rate": 1.6766760355142098e-05, "loss": 1.0847, "step": 7781 }, { "epoch": 0.818726985796949, "grad_norm": 1.2485053539276123, "learning_rate": 1.6747876892901893e-05, "loss": 1.5608, "step": 7782 }, { "epoch": 0.8188321935823251, "grad_norm": 2.34297776222229, "learning_rate": 1.6729003098456576e-05, "loss": 1.5364, "step": 7783 }, { "epoch": 0.8189374013677012, "grad_norm": 1.2006014585494995, "learning_rate": 1.6710138973997957e-05, "loss": 1.8377, "step": 7784 }, { "epoch": 0.8190426091530774, "grad_norm": 2.0813822746276855, "learning_rate": 1.6691284521716622e-05, "loss": 2.0049, "step": 7785 }, { "epoch": 0.8191478169384534, "grad_norm": 1.6687533855438232, "learning_rate": 1.667243974380207e-05, "loss": 1.6737, "step": 7786 }, { "epoch": 0.8192530247238295, "grad_norm": 1.5384453535079956, "learning_rate": 1.665360464244272e-05, "loss": 1.6079, "step": 7787 }, { "epoch": 0.8193582325092057, "grad_norm": 1.5753425359725952, "learning_rate": 1.6634779219825814e-05, "loss": 1.8783, "step": 7788 }, { "epoch": 0.8194634402945818, "grad_norm": 1.79296875, "learning_rate": 1.6615963478137454e-05, "loss": 1.8779, "step": 7789 }, { "epoch": 0.8195686480799579, "grad_norm": 2.0262389183044434, "learning_rate": 1.6597157419562703e-05, "loss": 1.7647, "step": 7790 }, { "epoch": 0.819673855865334, "grad_norm": 1.363146185874939, "learning_rate": 1.6578361046285418e-05, "loss": 1.7111, "step": 7791 }, { "epoch": 0.8197790636507102, "grad_norm": 2.63222599029541, "learning_rate": 1.655957436048834e-05, "loss": 1.0101, "step": 7792 }, { "epoch": 0.8198842714360862, "grad_norm": 1.6749835014343262, "learning_rate": 1.6540797364353155e-05, "loss": 1.5263, "step": 7793 }, { "epoch": 0.8199894792214624, "grad_norm": 1.5727925300598145, "learning_rate": 1.6522030060060356e-05, "loss": 1.6823, "step": 7794 }, { "epoch": 0.8200946870068385, "grad_norm": 1.6061309576034546, "learning_rate": 1.6503272449789286e-05, "loss": 1.4122, "step": 7795 }, { "epoch": 0.8201998947922147, "grad_norm": 2.4078192710876465, "learning_rate": 1.648452453571826e-05, "loss": 1.9501, "step": 7796 }, { "epoch": 0.8203051025775907, "grad_norm": 1.4480916261672974, "learning_rate": 1.646578632002439e-05, "loss": 1.7646, "step": 7797 }, { "epoch": 0.8204103103629669, "grad_norm": 2.407717227935791, "learning_rate": 1.644705780488367e-05, "loss": 1.4102, "step": 7798 }, { "epoch": 0.820515518148343, "grad_norm": 1.7151941061019897, "learning_rate": 1.6428338992471005e-05, "loss": 1.57, "step": 7799 }, { "epoch": 0.820620725933719, "grad_norm": 1.3426469564437866, "learning_rate": 1.6409629884960144e-05, "loss": 1.7375, "step": 7800 }, { "epoch": 0.8207259337190952, "grad_norm": 1.4567279815673828, "learning_rate": 1.6390930484523704e-05, "loss": 1.5916, "step": 7801 }, { "epoch": 0.8208311415044713, "grad_norm": 2.2961177825927734, "learning_rate": 1.6372240793333195e-05, "loss": 1.5445, "step": 7802 }, { "epoch": 0.8209363492898475, "grad_norm": 1.0729035139083862, "learning_rate": 1.6353560813558953e-05, "loss": 1.5018, "step": 7803 }, { "epoch": 0.8210415570752235, "grad_norm": 1.8320657014846802, "learning_rate": 1.6334890547370286e-05, "loss": 1.4641, "step": 7804 }, { "epoch": 0.8211467648605997, "grad_norm": 1.700055480003357, "learning_rate": 1.6316229996935262e-05, "loss": 2.0822, "step": 7805 }, { "epoch": 0.8212519726459758, "grad_norm": 1.5183660984039307, "learning_rate": 1.6297579164420873e-05, "loss": 1.8721, "step": 7806 }, { "epoch": 0.821357180431352, "grad_norm": 1.26556396484375, "learning_rate": 1.6278938051993008e-05, "loss": 1.7277, "step": 7807 }, { "epoch": 0.821462388216728, "grad_norm": 2.1678805351257324, "learning_rate": 1.6260306661816383e-05, "loss": 1.5226, "step": 7808 }, { "epoch": 0.8215675960021042, "grad_norm": 1.2085503339767456, "learning_rate": 1.6241684996054574e-05, "loss": 1.6556, "step": 7809 }, { "epoch": 0.8216728037874803, "grad_norm": 1.5834039449691772, "learning_rate": 1.6223073056870097e-05, "loss": 1.7377, "step": 7810 }, { "epoch": 0.8217780115728563, "grad_norm": 1.1570148468017578, "learning_rate": 1.6204470846424268e-05, "loss": 1.5493, "step": 7811 }, { "epoch": 0.8218832193582325, "grad_norm": 2.1178195476531982, "learning_rate": 1.6185878366877295e-05, "loss": 1.6665, "step": 7812 }, { "epoch": 0.8219884271436086, "grad_norm": 2.094390392303467, "learning_rate": 1.61672956203883e-05, "loss": 1.0295, "step": 7813 }, { "epoch": 0.8220936349289848, "grad_norm": 1.8944170475006104, "learning_rate": 1.6148722609115176e-05, "loss": 1.8045, "step": 7814 }, { "epoch": 0.8221988427143608, "grad_norm": 1.5587190389633179, "learning_rate": 1.613015933521481e-05, "loss": 1.6959, "step": 7815 }, { "epoch": 0.822304050499737, "grad_norm": 1.878710150718689, "learning_rate": 1.6111605800842865e-05, "loss": 1.62, "step": 7816 }, { "epoch": 0.8224092582851131, "grad_norm": 1.67520272731781, "learning_rate": 1.609306200815387e-05, "loss": 1.3253, "step": 7817 }, { "epoch": 0.8225144660704892, "grad_norm": 1.488664984703064, "learning_rate": 1.607452795930131e-05, "loss": 1.8211, "step": 7818 }, { "epoch": 0.8226196738558653, "grad_norm": 1.5285340547561646, "learning_rate": 1.6056003656437458e-05, "loss": 1.2902, "step": 7819 }, { "epoch": 0.8227248816412415, "grad_norm": 1.4336509704589844, "learning_rate": 1.6037489101713465e-05, "loss": 1.866, "step": 7820 }, { "epoch": 0.8228300894266176, "grad_norm": 1.3473023176193237, "learning_rate": 1.6018984297279393e-05, "loss": 1.4556, "step": 7821 }, { "epoch": 0.8229352972119937, "grad_norm": 1.2973744869232178, "learning_rate": 1.6000489245284133e-05, "loss": 1.9248, "step": 7822 }, { "epoch": 0.8230405049973698, "grad_norm": 1.9646800756454468, "learning_rate": 1.5982003947875467e-05, "loss": 1.7111, "step": 7823 }, { "epoch": 0.823145712782746, "grad_norm": 1.3596656322479248, "learning_rate": 1.59635284072e-05, "loss": 1.5034, "step": 7824 }, { "epoch": 0.823250920568122, "grad_norm": 2.065723180770874, "learning_rate": 1.594506262540324e-05, "loss": 1.876, "step": 7825 }, { "epoch": 0.8233561283534981, "grad_norm": 1.7020695209503174, "learning_rate": 1.592660660462959e-05, "loss": 1.237, "step": 7826 }, { "epoch": 0.8234613361388743, "grad_norm": 1.4010860919952393, "learning_rate": 1.5908160347022272e-05, "loss": 1.9249, "step": 7827 }, { "epoch": 0.8235665439242504, "grad_norm": 1.8709789514541626, "learning_rate": 1.588972385472336e-05, "loss": 2.0006, "step": 7828 }, { "epoch": 0.8236717517096265, "grad_norm": 1.6557965278625488, "learning_rate": 1.5871297129873864e-05, "loss": 1.9393, "step": 7829 }, { "epoch": 0.8237769594950026, "grad_norm": 1.690056562423706, "learning_rate": 1.5852880174613617e-05, "loss": 1.6349, "step": 7830 }, { "epoch": 0.8238821672803788, "grad_norm": 1.9580641984939575, "learning_rate": 1.5834472991081273e-05, "loss": 1.3244, "step": 7831 }, { "epoch": 0.8239873750657548, "grad_norm": 1.4145424365997314, "learning_rate": 1.5816075581414458e-05, "loss": 1.5102, "step": 7832 }, { "epoch": 0.824092582851131, "grad_norm": 1.6314736604690552, "learning_rate": 1.5797687947749563e-05, "loss": 1.424, "step": 7833 }, { "epoch": 0.8241977906365071, "grad_norm": 1.2429150342941284, "learning_rate": 1.577931009222189e-05, "loss": 1.2783, "step": 7834 }, { "epoch": 0.8243029984218833, "grad_norm": 1.750223994255066, "learning_rate": 1.5760942016965608e-05, "loss": 1.892, "step": 7835 }, { "epoch": 0.8244082062072593, "grad_norm": 1.6230374574661255, "learning_rate": 1.5742583724113746e-05, "loss": 1.8513, "step": 7836 }, { "epoch": 0.8245134139926354, "grad_norm": 1.2478396892547607, "learning_rate": 1.5724235215798167e-05, "loss": 1.8733, "step": 7837 }, { "epoch": 0.8246186217780116, "grad_norm": 1.6397819519042969, "learning_rate": 1.5705896494149654e-05, "loss": 1.4, "step": 7838 }, { "epoch": 0.8247238295633877, "grad_norm": 1.4806641340255737, "learning_rate": 1.56875675612978e-05, "loss": 1.4183, "step": 7839 }, { "epoch": 0.8248290373487638, "grad_norm": 1.4073078632354736, "learning_rate": 1.5669248419371085e-05, "loss": 1.7526, "step": 7840 }, { "epoch": 0.8249342451341399, "grad_norm": 1.6545143127441406, "learning_rate": 1.565093907049686e-05, "loss": 1.8446, "step": 7841 }, { "epoch": 0.8250394529195161, "grad_norm": 1.461369276046753, "learning_rate": 1.563263951680134e-05, "loss": 1.4983, "step": 7842 }, { "epoch": 0.8251446607048921, "grad_norm": 1.4910365343093872, "learning_rate": 1.5614349760409552e-05, "loss": 1.63, "step": 7843 }, { "epoch": 0.8252498684902683, "grad_norm": 1.4851601123809814, "learning_rate": 1.5596069803445502e-05, "loss": 1.3496, "step": 7844 }, { "epoch": 0.8253550762756444, "grad_norm": 1.3788039684295654, "learning_rate": 1.5577799648031876e-05, "loss": 1.8372, "step": 7845 }, { "epoch": 0.8254602840610206, "grad_norm": 1.6159765720367432, "learning_rate": 1.5559539296290403e-05, "loss": 1.5477, "step": 7846 }, { "epoch": 0.8255654918463966, "grad_norm": 2.3057737350463867, "learning_rate": 1.5541288750341575e-05, "loss": 1.866, "step": 7847 }, { "epoch": 0.8256706996317728, "grad_norm": 2.4619557857513428, "learning_rate": 1.5523048012304754e-05, "loss": 1.6384, "step": 7848 }, { "epoch": 0.8257759074171489, "grad_norm": 1.4026074409484863, "learning_rate": 1.5504817084298207e-05, "loss": 1.2915, "step": 7849 }, { "epoch": 0.8258811152025249, "grad_norm": 1.2069450616836548, "learning_rate": 1.548659596843902e-05, "loss": 1.6054, "step": 7850 }, { "epoch": 0.8259863229879011, "grad_norm": 1.7536677122116089, "learning_rate": 1.5468384666843115e-05, "loss": 1.7181, "step": 7851 }, { "epoch": 0.8260915307732772, "grad_norm": 1.5129624605178833, "learning_rate": 1.545018318162538e-05, "loss": 2.1859, "step": 7852 }, { "epoch": 0.8261967385586534, "grad_norm": 1.5461325645446777, "learning_rate": 1.5431991514899446e-05, "loss": 2.0414, "step": 7853 }, { "epoch": 0.8263019463440294, "grad_norm": 1.6245604753494263, "learning_rate": 1.5413809668777844e-05, "loss": 1.2136, "step": 7854 }, { "epoch": 0.8264071541294056, "grad_norm": 1.2918306589126587, "learning_rate": 1.5395637645372007e-05, "loss": 1.3098, "step": 7855 }, { "epoch": 0.8265123619147817, "grad_norm": 1.5559884309768677, "learning_rate": 1.5377475446792178e-05, "loss": 1.5964, "step": 7856 }, { "epoch": 0.8266175697001578, "grad_norm": 1.8957018852233887, "learning_rate": 1.535932307514745e-05, "loss": 1.8956, "step": 7857 }, { "epoch": 0.8267227774855339, "grad_norm": 2.0008089542388916, "learning_rate": 1.534118053254584e-05, "loss": 1.6321, "step": 7858 }, { "epoch": 0.8268279852709101, "grad_norm": 1.7587536573410034, "learning_rate": 1.5323047821094126e-05, "loss": 1.8533, "step": 7859 }, { "epoch": 0.8269331930562862, "grad_norm": 2.5158214569091797, "learning_rate": 1.5304924942898068e-05, "loss": 1.1862, "step": 7860 }, { "epoch": 0.8270384008416622, "grad_norm": 1.6874955892562866, "learning_rate": 1.528681190006218e-05, "loss": 1.5662, "step": 7861 }, { "epoch": 0.8271436086270384, "grad_norm": 2.3075296878814697, "learning_rate": 1.5268708694689847e-05, "loss": 1.449, "step": 7862 }, { "epoch": 0.8272488164124145, "grad_norm": 1.3436137437820435, "learning_rate": 1.5250615328883388e-05, "loss": 1.3811, "step": 7863 }, { "epoch": 0.8273540241977906, "grad_norm": 2.0536916255950928, "learning_rate": 1.5232531804743899e-05, "loss": 1.6885, "step": 7864 }, { "epoch": 0.8274592319831667, "grad_norm": 1.4442811012268066, "learning_rate": 1.5214458124371345e-05, "loss": 1.5104, "step": 7865 }, { "epoch": 0.8275644397685429, "grad_norm": 2.0475945472717285, "learning_rate": 1.5196394289864591e-05, "loss": 1.7676, "step": 7866 }, { "epoch": 0.827669647553919, "grad_norm": 1.2523587942123413, "learning_rate": 1.5178340303321314e-05, "loss": 1.5507, "step": 7867 }, { "epoch": 0.8277748553392951, "grad_norm": 2.2663662433624268, "learning_rate": 1.516029616683804e-05, "loss": 1.4212, "step": 7868 }, { "epoch": 0.8278800631246712, "grad_norm": 1.719728946685791, "learning_rate": 1.5142261882510223e-05, "loss": 2.0605, "step": 7869 }, { "epoch": 0.8279852709100474, "grad_norm": 1.38359534740448, "learning_rate": 1.5124237452432077e-05, "loss": 1.6727, "step": 7870 }, { "epoch": 0.8280904786954235, "grad_norm": 2.143702507019043, "learning_rate": 1.5106222878696775e-05, "loss": 1.3658, "step": 7871 }, { "epoch": 0.8281956864807996, "grad_norm": 1.9734598398208618, "learning_rate": 1.5088218163396262e-05, "loss": 1.5329, "step": 7872 }, { "epoch": 0.8283008942661757, "grad_norm": 1.5374348163604736, "learning_rate": 1.5070223308621345e-05, "loss": 1.5764, "step": 7873 }, { "epoch": 0.8284061020515519, "grad_norm": 1.664512038230896, "learning_rate": 1.5052238316461753e-05, "loss": 1.4934, "step": 7874 }, { "epoch": 0.8285113098369279, "grad_norm": 1.5587176084518433, "learning_rate": 1.5034263189005993e-05, "loss": 1.8861, "step": 7875 }, { "epoch": 0.828616517622304, "grad_norm": 1.4705950021743774, "learning_rate": 1.5016297928341438e-05, "loss": 1.7146, "step": 7876 }, { "epoch": 0.8287217254076802, "grad_norm": 1.6136786937713623, "learning_rate": 1.499834253655439e-05, "loss": 1.3576, "step": 7877 }, { "epoch": 0.8288269331930563, "grad_norm": 1.1220991611480713, "learning_rate": 1.4980397015729908e-05, "loss": 1.9862, "step": 7878 }, { "epoch": 0.8289321409784324, "grad_norm": 1.931833267211914, "learning_rate": 1.496246136795194e-05, "loss": 1.7605, "step": 7879 }, { "epoch": 0.8290373487638085, "grad_norm": 3.0278358459472656, "learning_rate": 1.4944535595303344e-05, "loss": 1.4795, "step": 7880 }, { "epoch": 0.8291425565491847, "grad_norm": 2.9382994174957275, "learning_rate": 1.492661969986574e-05, "loss": 1.4424, "step": 7881 }, { "epoch": 0.8292477643345607, "grad_norm": 1.7495644092559814, "learning_rate": 1.4908713683719632e-05, "loss": 1.7025, "step": 7882 }, { "epoch": 0.8293529721199369, "grad_norm": 1.588752269744873, "learning_rate": 1.4890817548944424e-05, "loss": 0.9935, "step": 7883 }, { "epoch": 0.829458179905313, "grad_norm": 1.3464096784591675, "learning_rate": 1.4872931297618308e-05, "loss": 1.6566, "step": 7884 }, { "epoch": 0.8295633876906892, "grad_norm": 1.621984601020813, "learning_rate": 1.4855054931818357e-05, "loss": 1.4483, "step": 7885 }, { "epoch": 0.8296685954760652, "grad_norm": 1.6145195960998535, "learning_rate": 1.483718845362051e-05, "loss": 1.6128, "step": 7886 }, { "epoch": 0.8297738032614413, "grad_norm": 1.5069514513015747, "learning_rate": 1.4819331865099539e-05, "loss": 1.5058, "step": 7887 }, { "epoch": 0.8298790110468175, "grad_norm": 2.006563425064087, "learning_rate": 1.4801485168329066e-05, "loss": 1.7359, "step": 7888 }, { "epoch": 0.8299842188321935, "grad_norm": 1.1885156631469727, "learning_rate": 1.4783648365381563e-05, "loss": 1.1542, "step": 7889 }, { "epoch": 0.8300894266175697, "grad_norm": 2.4285728931427, "learning_rate": 1.4765821458328355e-05, "loss": 1.7459, "step": 7890 }, { "epoch": 0.8301946344029458, "grad_norm": 1.307138442993164, "learning_rate": 1.4748004449239639e-05, "loss": 1.1976, "step": 7891 }, { "epoch": 0.830299842188322, "grad_norm": 2.049842357635498, "learning_rate": 1.473019734018445e-05, "loss": 1.3167, "step": 7892 }, { "epoch": 0.830405049973698, "grad_norm": 2.1878762245178223, "learning_rate": 1.471240013323063e-05, "loss": 1.5895, "step": 7893 }, { "epoch": 0.8305102577590742, "grad_norm": 1.3817623853683472, "learning_rate": 1.4694612830444953e-05, "loss": 1.8225, "step": 7894 }, { "epoch": 0.8306154655444503, "grad_norm": 1.5477038621902466, "learning_rate": 1.4676835433892989e-05, "loss": 1.5813, "step": 7895 }, { "epoch": 0.8307206733298264, "grad_norm": 2.0993480682373047, "learning_rate": 1.4659067945639137e-05, "loss": 2.0678, "step": 7896 }, { "epoch": 0.8308258811152025, "grad_norm": 1.446555733680725, "learning_rate": 1.464131036774674e-05, "loss": 1.5909, "step": 7897 }, { "epoch": 0.8309310889005787, "grad_norm": 1.8362712860107422, "learning_rate": 1.4623562702277882e-05, "loss": 1.1406, "step": 7898 }, { "epoch": 0.8310362966859548, "grad_norm": 1.4514873027801514, "learning_rate": 1.4605824951293524e-05, "loss": 1.7013, "step": 7899 }, { "epoch": 0.8311415044713308, "grad_norm": 1.1335458755493164, "learning_rate": 1.4588097116853538e-05, "loss": 1.2316, "step": 7900 }, { "epoch": 0.831246712256707, "grad_norm": 1.7612231969833374, "learning_rate": 1.4570379201016581e-05, "loss": 1.456, "step": 7901 }, { "epoch": 0.8313519200420831, "grad_norm": 2.277941942214966, "learning_rate": 1.4552671205840163e-05, "loss": 2.1247, "step": 7902 }, { "epoch": 0.8314571278274593, "grad_norm": 1.3662701845169067, "learning_rate": 1.4534973133380669e-05, "loss": 1.7872, "step": 7903 }, { "epoch": 0.8315623356128353, "grad_norm": 1.494207501411438, "learning_rate": 1.4517284985693335e-05, "loss": 1.7331, "step": 7904 }, { "epoch": 0.8316675433982115, "grad_norm": 1.8795514106750488, "learning_rate": 1.4499606764832175e-05, "loss": 1.3668, "step": 7905 }, { "epoch": 0.8317727511835876, "grad_norm": 1.7564284801483154, "learning_rate": 1.4481938472850154e-05, "loss": 1.5335, "step": 7906 }, { "epoch": 0.8318779589689637, "grad_norm": 2.4675612449645996, "learning_rate": 1.4464280111799e-05, "loss": 1.1034, "step": 7907 }, { "epoch": 0.8319831667543398, "grad_norm": 1.5577418804168701, "learning_rate": 1.4446631683729372e-05, "loss": 2.0499, "step": 7908 }, { "epoch": 0.832088374539716, "grad_norm": 2.425018787384033, "learning_rate": 1.4428993190690677e-05, "loss": 1.6102, "step": 7909 }, { "epoch": 0.8321935823250921, "grad_norm": 1.142414927482605, "learning_rate": 1.4411364634731183e-05, "loss": 1.52, "step": 7910 }, { "epoch": 0.8322987901104681, "grad_norm": 1.444345474243164, "learning_rate": 1.4393746017898113e-05, "loss": 1.5863, "step": 7911 }, { "epoch": 0.8324039978958443, "grad_norm": 1.6111942529678345, "learning_rate": 1.4376137342237417e-05, "loss": 1.4745, "step": 7912 }, { "epoch": 0.8325092056812204, "grad_norm": 2.1231207847595215, "learning_rate": 1.4358538609793915e-05, "loss": 1.719, "step": 7913 }, { "epoch": 0.8326144134665965, "grad_norm": 1.4172590970993042, "learning_rate": 1.4340949822611338e-05, "loss": 1.4065, "step": 7914 }, { "epoch": 0.8327196212519726, "grad_norm": 1.674170970916748, "learning_rate": 1.4323370982732176e-05, "loss": 1.8991, "step": 7915 }, { "epoch": 0.8328248290373488, "grad_norm": 1.4009698629379272, "learning_rate": 1.4305802092197829e-05, "loss": 1.5887, "step": 7916 }, { "epoch": 0.8329300368227249, "grad_norm": 1.580376148223877, "learning_rate": 1.4288243153048497e-05, "loss": 2.2614, "step": 7917 }, { "epoch": 0.833035244608101, "grad_norm": 2.047874689102173, "learning_rate": 1.4270694167323228e-05, "loss": 1.3469, "step": 7918 }, { "epoch": 0.8331404523934771, "grad_norm": 2.807647705078125, "learning_rate": 1.425315513705997e-05, "loss": 1.8353, "step": 7919 }, { "epoch": 0.8332456601788533, "grad_norm": 1.343658208847046, "learning_rate": 1.4235626064295438e-05, "loss": 1.6872, "step": 7920 }, { "epoch": 0.8333508679642293, "grad_norm": 1.3211522102355957, "learning_rate": 1.4218106951065224e-05, "loss": 1.6872, "step": 7921 }, { "epoch": 0.8334560757496055, "grad_norm": 2.710646867752075, "learning_rate": 1.4200597799403793e-05, "loss": 1.703, "step": 7922 }, { "epoch": 0.8335612835349816, "grad_norm": 1.5261638164520264, "learning_rate": 1.4183098611344415e-05, "loss": 1.5745, "step": 7923 }, { "epoch": 0.8336664913203577, "grad_norm": 1.4623990058898926, "learning_rate": 1.4165609388919176e-05, "loss": 1.606, "step": 7924 }, { "epoch": 0.8337716991057338, "grad_norm": 1.5466994047164917, "learning_rate": 1.414813013415911e-05, "loss": 1.7387, "step": 7925 }, { "epoch": 0.8338769068911099, "grad_norm": 1.6613030433654785, "learning_rate": 1.4130660849093969e-05, "loss": 1.8445, "step": 7926 }, { "epoch": 0.8339821146764861, "grad_norm": 1.4952675104141235, "learning_rate": 1.4113201535752407e-05, "loss": 1.7024, "step": 7927 }, { "epoch": 0.8340873224618621, "grad_norm": 1.191802740097046, "learning_rate": 1.409575219616196e-05, "loss": 1.56, "step": 7928 }, { "epoch": 0.8341925302472383, "grad_norm": 1.860492467880249, "learning_rate": 1.4078312832348938e-05, "loss": 0.9361, "step": 7929 }, { "epoch": 0.8342977380326144, "grad_norm": 1.4754762649536133, "learning_rate": 1.4060883446338502e-05, "loss": 1.5502, "step": 7930 }, { "epoch": 0.8344029458179906, "grad_norm": 1.2694263458251953, "learning_rate": 1.4043464040154686e-05, "loss": 1.761, "step": 7931 }, { "epoch": 0.8345081536033666, "grad_norm": 2.5978357791900635, "learning_rate": 1.4026054615820317e-05, "loss": 0.6868, "step": 7932 }, { "epoch": 0.8346133613887428, "grad_norm": 1.792586326599121, "learning_rate": 1.4008655175357144e-05, "loss": 1.808, "step": 7933 }, { "epoch": 0.8347185691741189, "grad_norm": 1.2222882509231567, "learning_rate": 1.3991265720785685e-05, "loss": 1.7221, "step": 7934 }, { "epoch": 0.8348237769594951, "grad_norm": 1.4182674884796143, "learning_rate": 1.397388625412529e-05, "loss": 1.4725, "step": 7935 }, { "epoch": 0.8349289847448711, "grad_norm": 1.2701834440231323, "learning_rate": 1.3956516777394235e-05, "loss": 1.3707, "step": 7936 }, { "epoch": 0.8350341925302472, "grad_norm": 1.180488109588623, "learning_rate": 1.393915729260955e-05, "loss": 1.5807, "step": 7937 }, { "epoch": 0.8351394003156234, "grad_norm": 2.2820448875427246, "learning_rate": 1.3921807801787112e-05, "loss": 1.4147, "step": 7938 }, { "epoch": 0.8352446081009994, "grad_norm": 2.877859354019165, "learning_rate": 1.390446830694172e-05, "loss": 1.2865, "step": 7939 }, { "epoch": 0.8353498158863756, "grad_norm": 2.0427865982055664, "learning_rate": 1.3887138810086908e-05, "loss": 1.4107, "step": 7940 }, { "epoch": 0.8354550236717517, "grad_norm": 1.6468256711959839, "learning_rate": 1.3869819313235077e-05, "loss": 1.4342, "step": 7941 }, { "epoch": 0.8355602314571279, "grad_norm": 2.2093889713287354, "learning_rate": 1.385250981839753e-05, "loss": 1.8815, "step": 7942 }, { "epoch": 0.8356654392425039, "grad_norm": 1.257178783416748, "learning_rate": 1.3835210327584348e-05, "loss": 1.3339, "step": 7943 }, { "epoch": 0.8357706470278801, "grad_norm": 1.405913233757019, "learning_rate": 1.3817920842804433e-05, "loss": 1.3401, "step": 7944 }, { "epoch": 0.8358758548132562, "grad_norm": 2.065035820007324, "learning_rate": 1.3800641366065604e-05, "loss": 1.1262, "step": 7945 }, { "epoch": 0.8359810625986323, "grad_norm": 1.2551019191741943, "learning_rate": 1.3783371899374442e-05, "loss": 1.651, "step": 7946 }, { "epoch": 0.8360862703840084, "grad_norm": 1.498842477798462, "learning_rate": 1.3766112444736368e-05, "loss": 1.2671, "step": 7947 }, { "epoch": 0.8361914781693845, "grad_norm": 1.7398850917816162, "learning_rate": 1.3748863004155732e-05, "loss": 1.9418, "step": 7948 }, { "epoch": 0.8362966859547607, "grad_norm": 1.3350509405136108, "learning_rate": 1.37316235796356e-05, "loss": 1.7554, "step": 7949 }, { "epoch": 0.8364018937401367, "grad_norm": 1.206315279006958, "learning_rate": 1.3714394173177936e-05, "loss": 1.4759, "step": 7950 }, { "epoch": 0.8365071015255129, "grad_norm": 2.4317026138305664, "learning_rate": 1.3697174786783584e-05, "loss": 1.921, "step": 7951 }, { "epoch": 0.836612309310889, "grad_norm": 1.54344642162323, "learning_rate": 1.3679965422452101e-05, "loss": 2.177, "step": 7952 }, { "epoch": 0.8367175170962651, "grad_norm": 1.8043365478515625, "learning_rate": 1.3662766082181999e-05, "loss": 1.2102, "step": 7953 }, { "epoch": 0.8368227248816412, "grad_norm": 1.4540766477584839, "learning_rate": 1.364557676797057e-05, "loss": 0.7462, "step": 7954 }, { "epoch": 0.8369279326670174, "grad_norm": 1.8703304529190063, "learning_rate": 1.3628397481813936e-05, "loss": 1.6058, "step": 7955 }, { "epoch": 0.8370331404523935, "grad_norm": 1.9202097654342651, "learning_rate": 1.3611228225707107e-05, "loss": 1.4126, "step": 7956 }, { "epoch": 0.8371383482377696, "grad_norm": 6.3809990882873535, "learning_rate": 1.3594069001643872e-05, "loss": 1.2723, "step": 7957 }, { "epoch": 0.8372435560231457, "grad_norm": 1.363297462463379, "learning_rate": 1.3576919811616862e-05, "loss": 1.6663, "step": 7958 }, { "epoch": 0.8373487638085219, "grad_norm": 4.482295513153076, "learning_rate": 1.3559780657617582e-05, "loss": 1.5827, "step": 7959 }, { "epoch": 0.8374539715938979, "grad_norm": 2.7441794872283936, "learning_rate": 1.354265154163633e-05, "loss": 1.9887, "step": 7960 }, { "epoch": 0.837559179379274, "grad_norm": 1.571427583694458, "learning_rate": 1.352553246566225e-05, "loss": 1.7604, "step": 7961 }, { "epoch": 0.8376643871646502, "grad_norm": 1.7176398038864136, "learning_rate": 1.3508423431683337e-05, "loss": 1.6065, "step": 7962 }, { "epoch": 0.8377695949500263, "grad_norm": 1.1875994205474854, "learning_rate": 1.3491324441686392e-05, "loss": 1.7916, "step": 7963 }, { "epoch": 0.8378748027354024, "grad_norm": 1.7343966960906982, "learning_rate": 1.3474235497657084e-05, "loss": 1.7253, "step": 7964 }, { "epoch": 0.8379800105207785, "grad_norm": 2.453002452850342, "learning_rate": 1.345715660157989e-05, "loss": 2.2406, "step": 7965 }, { "epoch": 0.8380852183061547, "grad_norm": 2.0220048427581787, "learning_rate": 1.3440087755438102e-05, "loss": 1.5343, "step": 7966 }, { "epoch": 0.8381904260915308, "grad_norm": 2.1328377723693848, "learning_rate": 1.3423028961213912e-05, "loss": 2.0797, "step": 7967 }, { "epoch": 0.8382956338769069, "grad_norm": 2.1333796977996826, "learning_rate": 1.340598022088827e-05, "loss": 1.8357, "step": 7968 }, { "epoch": 0.838400841662283, "grad_norm": 1.2837255001068115, "learning_rate": 1.338894153644098e-05, "loss": 1.6283, "step": 7969 }, { "epoch": 0.8385060494476592, "grad_norm": 1.9028080701828003, "learning_rate": 1.3371912909850726e-05, "loss": 1.2375, "step": 7970 }, { "epoch": 0.8386112572330352, "grad_norm": 1.260032296180725, "learning_rate": 1.335489434309496e-05, "loss": 1.5313, "step": 7971 }, { "epoch": 0.8387164650184114, "grad_norm": 1.5635043382644653, "learning_rate": 1.3337885838149988e-05, "loss": 1.5254, "step": 7972 }, { "epoch": 0.8388216728037875, "grad_norm": 1.1617438793182373, "learning_rate": 1.3320887396990999e-05, "loss": 1.3332, "step": 7973 }, { "epoch": 0.8389268805891636, "grad_norm": 1.6330403089523315, "learning_rate": 1.3303899021591882e-05, "loss": 1.3151, "step": 7974 }, { "epoch": 0.8390320883745397, "grad_norm": 1.6695667505264282, "learning_rate": 1.328692071392552e-05, "loss": 1.3915, "step": 7975 }, { "epoch": 0.8391372961599158, "grad_norm": 1.507834553718567, "learning_rate": 1.3269952475963509e-05, "loss": 1.7375, "step": 7976 }, { "epoch": 0.839242503945292, "grad_norm": 2.011200428009033, "learning_rate": 1.3252994309676303e-05, "loss": 1.5429, "step": 7977 }, { "epoch": 0.839347711730668, "grad_norm": 1.5089296102523804, "learning_rate": 1.3236046217033237e-05, "loss": 1.9026, "step": 7978 }, { "epoch": 0.8394529195160442, "grad_norm": 2.053021192550659, "learning_rate": 1.3219108200002418e-05, "loss": 1.2597, "step": 7979 }, { "epoch": 0.8395581273014203, "grad_norm": 2.294508457183838, "learning_rate": 1.3202180260550778e-05, "loss": 2.0704, "step": 7980 }, { "epoch": 0.8396633350867965, "grad_norm": 1.4443711042404175, "learning_rate": 1.318526240064415e-05, "loss": 1.6057, "step": 7981 }, { "epoch": 0.8397685428721725, "grad_norm": 1.4994503259658813, "learning_rate": 1.3168354622247115e-05, "loss": 1.7615, "step": 7982 }, { "epoch": 0.8398737506575487, "grad_norm": 2.1308159828186035, "learning_rate": 1.3151456927323113e-05, "loss": 1.8955, "step": 7983 }, { "epoch": 0.8399789584429248, "grad_norm": 1.4624441862106323, "learning_rate": 1.3134569317834454e-05, "loss": 1.7515, "step": 7984 }, { "epoch": 0.8400841662283008, "grad_norm": 1.2271381616592407, "learning_rate": 1.3117691795742226e-05, "loss": 1.494, "step": 7985 }, { "epoch": 0.840189374013677, "grad_norm": 1.1306962966918945, "learning_rate": 1.3100824363006326e-05, "loss": 1.977, "step": 7986 }, { "epoch": 0.8402945817990531, "grad_norm": 1.7996954917907715, "learning_rate": 1.3083967021585564e-05, "loss": 1.9938, "step": 7987 }, { "epoch": 0.8403997895844293, "grad_norm": 1.3190232515335083, "learning_rate": 1.3067119773437498e-05, "loss": 1.3396, "step": 7988 }, { "epoch": 0.8405049973698053, "grad_norm": 1.5940443277359009, "learning_rate": 1.3050282620518528e-05, "loss": 1.1903, "step": 7989 }, { "epoch": 0.8406102051551815, "grad_norm": 1.4561878442764282, "learning_rate": 1.3033455564783948e-05, "loss": 1.3508, "step": 7990 }, { "epoch": 0.8407154129405576, "grad_norm": 1.4680160284042358, "learning_rate": 1.3016638608187792e-05, "loss": 1.8019, "step": 7991 }, { "epoch": 0.8408206207259337, "grad_norm": 1.9364701509475708, "learning_rate": 1.2999831752682955e-05, "loss": 1.4724, "step": 7992 }, { "epoch": 0.8409258285113098, "grad_norm": 2.371671199798584, "learning_rate": 1.2983035000221177e-05, "loss": 2.2839, "step": 7993 }, { "epoch": 0.841031036296686, "grad_norm": 1.6771042346954346, "learning_rate": 1.2966248352753018e-05, "loss": 1.2167, "step": 7994 }, { "epoch": 0.8411362440820621, "grad_norm": 2.0259828567504883, "learning_rate": 1.294947181222783e-05, "loss": 1.7223, "step": 7995 }, { "epoch": 0.8412414518674382, "grad_norm": 1.4655014276504517, "learning_rate": 1.2932705380593846e-05, "loss": 1.8478, "step": 7996 }, { "epoch": 0.8413466596528143, "grad_norm": 1.9223905801773071, "learning_rate": 1.2915949059798038e-05, "loss": 1.5038, "step": 7997 }, { "epoch": 0.8414518674381904, "grad_norm": 1.4577714204788208, "learning_rate": 1.2899202851786341e-05, "loss": 1.5354, "step": 7998 }, { "epoch": 0.8415570752235666, "grad_norm": 1.3747444152832031, "learning_rate": 1.2882466758503397e-05, "loss": 2.3442, "step": 7999 }, { "epoch": 0.8416622830089426, "grad_norm": 1.4338715076446533, "learning_rate": 1.2865740781892699e-05, "loss": 1.7268, "step": 8000 }, { "epoch": 0.8417674907943188, "grad_norm": 1.9332977533340454, "learning_rate": 1.2849024923896612e-05, "loss": 2.0012, "step": 8001 }, { "epoch": 0.8418726985796949, "grad_norm": 2.2032244205474854, "learning_rate": 1.2832319186456288e-05, "loss": 1.7438, "step": 8002 }, { "epoch": 0.841977906365071, "grad_norm": 2.1048672199249268, "learning_rate": 1.281562357151167e-05, "loss": 0.9579, "step": 8003 }, { "epoch": 0.8420831141504471, "grad_norm": 1.7806977033615112, "learning_rate": 1.2798938081001621e-05, "loss": 1.4379, "step": 8004 }, { "epoch": 0.8421883219358233, "grad_norm": 2.0925915241241455, "learning_rate": 1.2782262716863747e-05, "loss": 2.3933, "step": 8005 }, { "epoch": 0.8422935297211994, "grad_norm": 2.210217237472534, "learning_rate": 1.2765597481034475e-05, "loss": 1.8541, "step": 8006 }, { "epoch": 0.8423987375065755, "grad_norm": 1.8628958463668823, "learning_rate": 1.2748942375449135e-05, "loss": 0.738, "step": 8007 }, { "epoch": 0.8425039452919516, "grad_norm": 1.774337649345398, "learning_rate": 1.2732297402041793e-05, "loss": 1.3924, "step": 8008 }, { "epoch": 0.8426091530773278, "grad_norm": 1.3823806047439575, "learning_rate": 1.27156625627454e-05, "loss": 1.7851, "step": 8009 }, { "epoch": 0.8427143608627038, "grad_norm": 1.3597822189331055, "learning_rate": 1.2699037859491702e-05, "loss": 1.7565, "step": 8010 }, { "epoch": 0.8428195686480799, "grad_norm": 1.7457836866378784, "learning_rate": 1.2682423294211231e-05, "loss": 2.0374, "step": 8011 }, { "epoch": 0.8429247764334561, "grad_norm": 1.9241029024124146, "learning_rate": 1.2665818868833445e-05, "loss": 2.1648, "step": 8012 }, { "epoch": 0.8430299842188322, "grad_norm": 1.4577206373214722, "learning_rate": 1.2649224585286524e-05, "loss": 1.5166, "step": 8013 }, { "epoch": 0.8431351920042083, "grad_norm": 1.7863467931747437, "learning_rate": 1.263264044549748e-05, "loss": 1.5639, "step": 8014 }, { "epoch": 0.8432403997895844, "grad_norm": 1.8786143064498901, "learning_rate": 1.2616066451392262e-05, "loss": 2.2911, "step": 8015 }, { "epoch": 0.8433456075749606, "grad_norm": 1.9232258796691895, "learning_rate": 1.2599502604895475e-05, "loss": 1.5803, "step": 8016 }, { "epoch": 0.8434508153603366, "grad_norm": 1.5343316793441772, "learning_rate": 1.2582948907930626e-05, "loss": 1.6762, "step": 8017 }, { "epoch": 0.8435560231457128, "grad_norm": 1.7372281551361084, "learning_rate": 1.2566405362420086e-05, "loss": 1.2532, "step": 8018 }, { "epoch": 0.8436612309310889, "grad_norm": 1.6992888450622559, "learning_rate": 1.2549871970284954e-05, "loss": 2.2251, "step": 8019 }, { "epoch": 0.8437664387164651, "grad_norm": 1.0523444414138794, "learning_rate": 1.253334873344525e-05, "loss": 1.7233, "step": 8020 }, { "epoch": 0.8438716465018411, "grad_norm": 2.2978196144104004, "learning_rate": 1.2516835653819725e-05, "loss": 1.1533, "step": 8021 }, { "epoch": 0.8439768542872172, "grad_norm": 1.5797593593597412, "learning_rate": 1.2500332733325993e-05, "loss": 1.2688, "step": 8022 }, { "epoch": 0.8440820620725934, "grad_norm": 2.2443854808807373, "learning_rate": 1.2483839973880508e-05, "loss": 1.2121, "step": 8023 }, { "epoch": 0.8441872698579694, "grad_norm": 1.3470818996429443, "learning_rate": 1.2467357377398504e-05, "loss": 1.6369, "step": 8024 }, { "epoch": 0.8442924776433456, "grad_norm": 1.4854938983917236, "learning_rate": 1.2450884945794017e-05, "loss": 1.9805, "step": 8025 }, { "epoch": 0.8443976854287217, "grad_norm": 1.5202157497406006, "learning_rate": 1.2434422680980006e-05, "loss": 1.9186, "step": 8026 }, { "epoch": 0.8445028932140979, "grad_norm": 1.5458056926727295, "learning_rate": 1.2417970584868132e-05, "loss": 1.3289, "step": 8027 }, { "epoch": 0.8446081009994739, "grad_norm": 1.4900844097137451, "learning_rate": 1.2401528659368911e-05, "loss": 1.3999, "step": 8028 }, { "epoch": 0.8447133087848501, "grad_norm": 1.6544053554534912, "learning_rate": 1.2385096906391746e-05, "loss": 1.7014, "step": 8029 }, { "epoch": 0.8448185165702262, "grad_norm": 1.7936450242996216, "learning_rate": 1.2368675327844758e-05, "loss": 1.373, "step": 8030 }, { "epoch": 0.8449237243556024, "grad_norm": 1.2108367681503296, "learning_rate": 1.2352263925634922e-05, "loss": 1.4582, "step": 8031 }, { "epoch": 0.8450289321409784, "grad_norm": 1.679121732711792, "learning_rate": 1.2335862701668078e-05, "loss": 1.4045, "step": 8032 }, { "epoch": 0.8451341399263546, "grad_norm": 1.2412477731704712, "learning_rate": 1.2319471657848825e-05, "loss": 1.9746, "step": 8033 }, { "epoch": 0.8452393477117307, "grad_norm": 1.6706655025482178, "learning_rate": 1.2303090796080585e-05, "loss": 1.6583, "step": 8034 }, { "epoch": 0.8453445554971067, "grad_norm": 2.606658935546875, "learning_rate": 1.2286720118265659e-05, "loss": 0.7691, "step": 8035 }, { "epoch": 0.8454497632824829, "grad_norm": 1.2284669876098633, "learning_rate": 1.2270359626305084e-05, "loss": 1.4472, "step": 8036 }, { "epoch": 0.845554971067859, "grad_norm": 1.9849002361297607, "learning_rate": 1.2254009322098759e-05, "loss": 1.8249, "step": 8037 }, { "epoch": 0.8456601788532352, "grad_norm": 2.283884286880493, "learning_rate": 1.22376692075454e-05, "loss": 1.3119, "step": 8038 }, { "epoch": 0.8457653866386112, "grad_norm": 1.697540283203125, "learning_rate": 1.2221339284542488e-05, "loss": 1.223, "step": 8039 }, { "epoch": 0.8458705944239874, "grad_norm": 2.4393091201782227, "learning_rate": 1.220501955498643e-05, "loss": 1.8292, "step": 8040 }, { "epoch": 0.8459758022093635, "grad_norm": 1.4089748859405518, "learning_rate": 1.2188710020772343e-05, "loss": 1.5042, "step": 8041 }, { "epoch": 0.8460810099947396, "grad_norm": 2.5239205360412598, "learning_rate": 1.2172410683794177e-05, "loss": 1.4657, "step": 8042 }, { "epoch": 0.8461862177801157, "grad_norm": 2.2481822967529297, "learning_rate": 1.2156121545944776e-05, "loss": 1.74, "step": 8043 }, { "epoch": 0.8462914255654919, "grad_norm": 1.7899489402770996, "learning_rate": 1.2139842609115726e-05, "loss": 1.798, "step": 8044 }, { "epoch": 0.846396633350868, "grad_norm": 2.1522939205169678, "learning_rate": 1.2123573875197402e-05, "loss": 2.2505, "step": 8045 }, { "epoch": 0.846501841136244, "grad_norm": 1.7058902978897095, "learning_rate": 1.2107315346079107e-05, "loss": 1.4551, "step": 8046 }, { "epoch": 0.8466070489216202, "grad_norm": 1.6210920810699463, "learning_rate": 1.209106702364885e-05, "loss": 1.5557, "step": 8047 }, { "epoch": 0.8467122567069963, "grad_norm": 2.2149577140808105, "learning_rate": 1.2074828909793479e-05, "loss": 1.0683, "step": 8048 }, { "epoch": 0.8468174644923724, "grad_norm": 1.5407854318618774, "learning_rate": 1.2058601006398718e-05, "loss": 1.813, "step": 8049 }, { "epoch": 0.8469226722777485, "grad_norm": 1.5162616968154907, "learning_rate": 1.2042383315349037e-05, "loss": 1.1297, "step": 8050 }, { "epoch": 0.8470278800631247, "grad_norm": 1.622755527496338, "learning_rate": 1.2026175838527732e-05, "loss": 1.7975, "step": 8051 }, { "epoch": 0.8471330878485008, "grad_norm": 1.260178565979004, "learning_rate": 1.2009978577816949e-05, "loss": 1.3295, "step": 8052 }, { "epoch": 0.8472382956338769, "grad_norm": 2.4802284240722656, "learning_rate": 1.199379153509761e-05, "loss": 1.7802, "step": 8053 }, { "epoch": 0.847343503419253, "grad_norm": 1.4476691484451294, "learning_rate": 1.1977614712249441e-05, "loss": 1.2737, "step": 8054 }, { "epoch": 0.8474487112046292, "grad_norm": 1.3926537036895752, "learning_rate": 1.1961448111151053e-05, "loss": 1.7071, "step": 8055 }, { "epoch": 0.8475539189900052, "grad_norm": 1.6615149974822998, "learning_rate": 1.1945291733679764e-05, "loss": 1.4381, "step": 8056 }, { "epoch": 0.8476591267753814, "grad_norm": 1.7258626222610474, "learning_rate": 1.192914558171181e-05, "loss": 1.9516, "step": 8057 }, { "epoch": 0.8477643345607575, "grad_norm": 2.5182111263275146, "learning_rate": 1.1913009657122188e-05, "loss": 1.4435, "step": 8058 }, { "epoch": 0.8478695423461337, "grad_norm": 1.2161062955856323, "learning_rate": 1.1896883961784656e-05, "loss": 1.7092, "step": 8059 }, { "epoch": 0.8479747501315097, "grad_norm": 1.7733458280563354, "learning_rate": 1.1880768497571882e-05, "loss": 1.5346, "step": 8060 }, { "epoch": 0.8480799579168858, "grad_norm": 1.790793776512146, "learning_rate": 1.1864663266355303e-05, "loss": 1.3009, "step": 8061 }, { "epoch": 0.848185165702262, "grad_norm": 1.6531035900115967, "learning_rate": 1.1848568270005135e-05, "loss": 1.4765, "step": 8062 }, { "epoch": 0.8482903734876381, "grad_norm": 1.2282698154449463, "learning_rate": 1.1832483510390469e-05, "loss": 1.9069, "step": 8063 }, { "epoch": 0.8483955812730142, "grad_norm": 1.9797649383544922, "learning_rate": 1.1816408989379158e-05, "loss": 1.4846, "step": 8064 }, { "epoch": 0.8485007890583903, "grad_norm": 1.0080591440200806, "learning_rate": 1.1800344708837895e-05, "loss": 1.6405, "step": 8065 }, { "epoch": 0.8486059968437665, "grad_norm": 1.6383482217788696, "learning_rate": 1.178429067063217e-05, "loss": 1.3591, "step": 8066 }, { "epoch": 0.8487112046291425, "grad_norm": 1.3764389753341675, "learning_rate": 1.1768246876626265e-05, "loss": 1.0397, "step": 8067 }, { "epoch": 0.8488164124145187, "grad_norm": 1.7834566831588745, "learning_rate": 1.1752213328683337e-05, "loss": 0.6507, "step": 8068 }, { "epoch": 0.8489216201998948, "grad_norm": 1.8441131114959717, "learning_rate": 1.1736190028665273e-05, "loss": 1.811, "step": 8069 }, { "epoch": 0.849026827985271, "grad_norm": 1.3411214351654053, "learning_rate": 1.1720176978432795e-05, "loss": 1.6706, "step": 8070 }, { "epoch": 0.849132035770647, "grad_norm": 1.3901194334030151, "learning_rate": 1.1704174179845496e-05, "loss": 1.4316, "step": 8071 }, { "epoch": 0.8492372435560231, "grad_norm": 1.992873191833496, "learning_rate": 1.1688181634761685e-05, "loss": 2.0151, "step": 8072 }, { "epoch": 0.8493424513413993, "grad_norm": 1.5536729097366333, "learning_rate": 1.1672199345038526e-05, "loss": 1.6268, "step": 8073 }, { "epoch": 0.8494476591267753, "grad_norm": 1.7125334739685059, "learning_rate": 1.1656227312532009e-05, "loss": 1.4057, "step": 8074 }, { "epoch": 0.8495528669121515, "grad_norm": 1.3038568496704102, "learning_rate": 1.1640265539096918e-05, "loss": 1.768, "step": 8075 }, { "epoch": 0.8496580746975276, "grad_norm": 1.536347508430481, "learning_rate": 1.1624314026586802e-05, "loss": 1.634, "step": 8076 }, { "epoch": 0.8497632824829038, "grad_norm": 1.6387101411819458, "learning_rate": 1.1608372776854103e-05, "loss": 1.7797, "step": 8077 }, { "epoch": 0.8498684902682798, "grad_norm": 1.4296340942382812, "learning_rate": 1.1592441791750009e-05, "loss": 1.8383, "step": 8078 }, { "epoch": 0.849973698053656, "grad_norm": 1.9562045335769653, "learning_rate": 1.1576521073124513e-05, "loss": 1.4364, "step": 8079 }, { "epoch": 0.8500789058390321, "grad_norm": 2.0499372482299805, "learning_rate": 1.1560610622826484e-05, "loss": 1.2246, "step": 8080 }, { "epoch": 0.8501841136244082, "grad_norm": 2.626941442489624, "learning_rate": 1.1544710442703488e-05, "loss": 1.1103, "step": 8081 }, { "epoch": 0.8502893214097843, "grad_norm": 1.4844361543655396, "learning_rate": 1.1528820534602002e-05, "loss": 1.3648, "step": 8082 }, { "epoch": 0.8503945291951605, "grad_norm": 1.9580860137939453, "learning_rate": 1.1512940900367275e-05, "loss": 1.21, "step": 8083 }, { "epoch": 0.8504997369805366, "grad_norm": 1.9782524108886719, "learning_rate": 1.1497071541843306e-05, "loss": 1.9595, "step": 8084 }, { "epoch": 0.8506049447659126, "grad_norm": 1.1815834045410156, "learning_rate": 1.1481212460873014e-05, "loss": 1.5414, "step": 8085 }, { "epoch": 0.8507101525512888, "grad_norm": 1.4871257543563843, "learning_rate": 1.1465363659298023e-05, "loss": 1.876, "step": 8086 }, { "epoch": 0.8508153603366649, "grad_norm": 1.5685290098190308, "learning_rate": 1.1449525138958805e-05, "loss": 1.6721, "step": 8087 }, { "epoch": 0.850920568122041, "grad_norm": 1.5825947523117065, "learning_rate": 1.1433696901694658e-05, "loss": 1.4306, "step": 8088 }, { "epoch": 0.8510257759074171, "grad_norm": 1.341077208518982, "learning_rate": 1.141787894934364e-05, "loss": 1.7303, "step": 8089 }, { "epoch": 0.8511309836927933, "grad_norm": 1.389611840248108, "learning_rate": 1.1402071283742632e-05, "loss": 1.4725, "step": 8090 }, { "epoch": 0.8512361914781694, "grad_norm": 2.9490091800689697, "learning_rate": 1.1386273906727363e-05, "loss": 1.549, "step": 8091 }, { "epoch": 0.8513413992635455, "grad_norm": 1.6228629350662231, "learning_rate": 1.1370486820132308e-05, "loss": 1.2005, "step": 8092 }, { "epoch": 0.8514466070489216, "grad_norm": 1.9842947721481323, "learning_rate": 1.1354710025790738e-05, "loss": 1.4727, "step": 8093 }, { "epoch": 0.8515518148342978, "grad_norm": 2.101422071456909, "learning_rate": 1.1338943525534818e-05, "loss": 1.7552, "step": 8094 }, { "epoch": 0.8516570226196739, "grad_norm": 1.4584681987762451, "learning_rate": 1.1323187321195439e-05, "loss": 1.9696, "step": 8095 }, { "epoch": 0.85176223040505, "grad_norm": 2.264514446258545, "learning_rate": 1.1307441414602282e-05, "loss": 1.3978, "step": 8096 }, { "epoch": 0.8518674381904261, "grad_norm": 1.4211277961730957, "learning_rate": 1.1291705807583918e-05, "loss": 1.4239, "step": 8097 }, { "epoch": 0.8519726459758022, "grad_norm": 2.931905508041382, "learning_rate": 1.1275980501967642e-05, "loss": 1.3678, "step": 8098 }, { "epoch": 0.8520778537611783, "grad_norm": 1.608646273612976, "learning_rate": 1.126026549957958e-05, "loss": 1.7463, "step": 8099 }, { "epoch": 0.8521830615465544, "grad_norm": 2.0848610401153564, "learning_rate": 1.1244560802244686e-05, "loss": 1.5804, "step": 8100 }, { "epoch": 0.8522882693319306, "grad_norm": 1.426628589630127, "learning_rate": 1.1228866411786687e-05, "loss": 1.608, "step": 8101 }, { "epoch": 0.8523934771173067, "grad_norm": 1.9477870464324951, "learning_rate": 1.1213182330028104e-05, "loss": 1.4227, "step": 8102 }, { "epoch": 0.8524986849026828, "grad_norm": 1.8772066831588745, "learning_rate": 1.1197508558790304e-05, "loss": 1.3724, "step": 8103 }, { "epoch": 0.8526038926880589, "grad_norm": 1.8329577445983887, "learning_rate": 1.1181845099893384e-05, "loss": 1.495, "step": 8104 }, { "epoch": 0.8527091004734351, "grad_norm": 1.718479871749878, "learning_rate": 1.1166191955156346e-05, "loss": 1.5246, "step": 8105 }, { "epoch": 0.8528143082588111, "grad_norm": 2.2497758865356445, "learning_rate": 1.1150549126396914e-05, "loss": 1.773, "step": 8106 }, { "epoch": 0.8529195160441873, "grad_norm": 1.177225947380066, "learning_rate": 1.1134916615431611e-05, "loss": 1.7879, "step": 8107 }, { "epoch": 0.8530247238295634, "grad_norm": 1.9921637773513794, "learning_rate": 1.1119294424075843e-05, "loss": 1.4187, "step": 8108 }, { "epoch": 0.8531299316149396, "grad_norm": 2.8252499103546143, "learning_rate": 1.1103682554143736e-05, "loss": 1.5446, "step": 8109 }, { "epoch": 0.8532351394003156, "grad_norm": 1.615925669670105, "learning_rate": 1.1088081007448214e-05, "loss": 1.9703, "step": 8110 }, { "epoch": 0.8533403471856917, "grad_norm": 1.6888537406921387, "learning_rate": 1.10724897858011e-05, "loss": 1.6726, "step": 8111 }, { "epoch": 0.8534455549710679, "grad_norm": 1.7525259256362915, "learning_rate": 1.1056908891012884e-05, "loss": 1.2526, "step": 8112 }, { "epoch": 0.8535507627564439, "grad_norm": 1.5043063163757324, "learning_rate": 1.104133832489298e-05, "loss": 1.8318, "step": 8113 }, { "epoch": 0.8536559705418201, "grad_norm": 1.4781932830810547, "learning_rate": 1.1025778089249527e-05, "loss": 1.7238, "step": 8114 }, { "epoch": 0.8537611783271962, "grad_norm": 1.9893333911895752, "learning_rate": 1.1010228185889449e-05, "loss": 1.5819, "step": 8115 }, { "epoch": 0.8538663861125724, "grad_norm": 2.0442187786102295, "learning_rate": 1.0994688616618565e-05, "loss": 1.9624, "step": 8116 }, { "epoch": 0.8539715938979484, "grad_norm": 1.369695782661438, "learning_rate": 1.09791593832414e-05, "loss": 2.0449, "step": 8117 }, { "epoch": 0.8540768016833246, "grad_norm": 2.589287042617798, "learning_rate": 1.09636404875613e-05, "loss": 1.9639, "step": 8118 }, { "epoch": 0.8541820094687007, "grad_norm": 1.5649428367614746, "learning_rate": 1.0948131931380457e-05, "loss": 1.2722, "step": 8119 }, { "epoch": 0.8542872172540767, "grad_norm": 1.4404014348983765, "learning_rate": 1.0932633716499818e-05, "loss": 1.7971, "step": 8120 }, { "epoch": 0.8543924250394529, "grad_norm": 1.283900260925293, "learning_rate": 1.0917145844719101e-05, "loss": 1.5276, "step": 8121 }, { "epoch": 0.854497632824829, "grad_norm": 1.41376531124115, "learning_rate": 1.0901668317836933e-05, "loss": 1.4866, "step": 8122 }, { "epoch": 0.8546028406102052, "grad_norm": 1.4160473346710205, "learning_rate": 1.088620113765061e-05, "loss": 1.8107, "step": 8123 }, { "epoch": 0.8547080483955812, "grad_norm": 1.5706056356430054, "learning_rate": 1.0870744305956315e-05, "loss": 1.4278, "step": 8124 }, { "epoch": 0.8548132561809574, "grad_norm": 1.7656512260437012, "learning_rate": 1.0855297824548982e-05, "loss": 1.6278, "step": 8125 }, { "epoch": 0.8549184639663335, "grad_norm": 1.3173682689666748, "learning_rate": 1.0839861695222354e-05, "loss": 1.8172, "step": 8126 }, { "epoch": 0.8550236717517097, "grad_norm": 1.5881686210632324, "learning_rate": 1.0824435919769005e-05, "loss": 2.1094, "step": 8127 }, { "epoch": 0.8551288795370857, "grad_norm": 2.1221399307250977, "learning_rate": 1.080902049998026e-05, "loss": 1.5415, "step": 8128 }, { "epoch": 0.8552340873224619, "grad_norm": 2.114959955215454, "learning_rate": 1.0793615437646254e-05, "loss": 0.6612, "step": 8129 }, { "epoch": 0.855339295107838, "grad_norm": 1.5936521291732788, "learning_rate": 1.0778220734555955e-05, "loss": 1.7225, "step": 8130 }, { "epoch": 0.855444502893214, "grad_norm": 1.4308537244796753, "learning_rate": 1.0762836392497078e-05, "loss": 1.5506, "step": 8131 }, { "epoch": 0.8555497106785902, "grad_norm": 1.9329140186309814, "learning_rate": 1.0747462413256148e-05, "loss": 1.2109, "step": 8132 }, { "epoch": 0.8556549184639664, "grad_norm": 1.7347162961959839, "learning_rate": 1.0732098798618517e-05, "loss": 1.8518, "step": 8133 }, { "epoch": 0.8557601262493425, "grad_norm": 1.4052585363388062, "learning_rate": 1.07167455503683e-05, "loss": 1.7604, "step": 8134 }, { "epoch": 0.8558653340347185, "grad_norm": 2.470710039138794, "learning_rate": 1.0701402670288407e-05, "loss": 1.5943, "step": 8135 }, { "epoch": 0.8559705418200947, "grad_norm": 1.489478349685669, "learning_rate": 1.0686070160160588e-05, "loss": 1.3159, "step": 8136 }, { "epoch": 0.8560757496054708, "grad_norm": 1.2157397270202637, "learning_rate": 1.067074802176533e-05, "loss": 1.3678, "step": 8137 }, { "epoch": 0.8561809573908469, "grad_norm": 1.8080435991287231, "learning_rate": 1.0655436256881935e-05, "loss": 1.366, "step": 8138 }, { "epoch": 0.856286165176223, "grad_norm": 1.8651374578475952, "learning_rate": 1.0640134867288542e-05, "loss": 1.5744, "step": 8139 }, { "epoch": 0.8563913729615992, "grad_norm": 1.4366014003753662, "learning_rate": 1.0624843854762034e-05, "loss": 1.7384, "step": 8140 }, { "epoch": 0.8564965807469753, "grad_norm": 1.5148162841796875, "learning_rate": 1.0609563221078079e-05, "loss": 1.2506, "step": 8141 }, { "epoch": 0.8566017885323514, "grad_norm": 1.3511996269226074, "learning_rate": 1.059429296801121e-05, "loss": 1.7989, "step": 8142 }, { "epoch": 0.8567069963177275, "grad_norm": 1.3725030422210693, "learning_rate": 1.05790330973347e-05, "loss": 1.2092, "step": 8143 }, { "epoch": 0.8568122041031037, "grad_norm": 1.480331301689148, "learning_rate": 1.056378361082062e-05, "loss": 1.7247, "step": 8144 }, { "epoch": 0.8569174118884797, "grad_norm": 1.1043062210083008, "learning_rate": 1.0548544510239833e-05, "loss": 2.1902, "step": 8145 }, { "epoch": 0.8570226196738558, "grad_norm": 1.8570284843444824, "learning_rate": 1.053331579736201e-05, "loss": 1.2777, "step": 8146 }, { "epoch": 0.857127827459232, "grad_norm": 1.4823728799819946, "learning_rate": 1.0518097473955624e-05, "loss": 1.4131, "step": 8147 }, { "epoch": 0.8572330352446081, "grad_norm": 1.5833871364593506, "learning_rate": 1.0502889541787918e-05, "loss": 1.4025, "step": 8148 }, { "epoch": 0.8573382430299842, "grad_norm": 1.5487840175628662, "learning_rate": 1.0487692002624937e-05, "loss": 2.1141, "step": 8149 }, { "epoch": 0.8574434508153603, "grad_norm": 2.5426361560821533, "learning_rate": 1.0472504858231535e-05, "loss": 1.4438, "step": 8150 }, { "epoch": 0.8575486586007365, "grad_norm": 1.742750644683838, "learning_rate": 1.0457328110371345e-05, "loss": 1.3274, "step": 8151 }, { "epoch": 0.8576538663861125, "grad_norm": 1.529963731765747, "learning_rate": 1.0442161760806756e-05, "loss": 1.545, "step": 8152 }, { "epoch": 0.8577590741714887, "grad_norm": 1.5266475677490234, "learning_rate": 1.042700581129904e-05, "loss": 1.4656, "step": 8153 }, { "epoch": 0.8578642819568648, "grad_norm": 2.2451913356781006, "learning_rate": 1.0411860263608186e-05, "loss": 1.5722, "step": 8154 }, { "epoch": 0.857969489742241, "grad_norm": 1.2465153932571411, "learning_rate": 1.0396725119492967e-05, "loss": 1.718, "step": 8155 }, { "epoch": 0.858074697527617, "grad_norm": 1.9138121604919434, "learning_rate": 1.038160038071102e-05, "loss": 1.7302, "step": 8156 }, { "epoch": 0.8581799053129932, "grad_norm": 1.7178139686584473, "learning_rate": 1.036648604901871e-05, "loss": 2.008, "step": 8157 }, { "epoch": 0.8582851130983693, "grad_norm": 1.064705729484558, "learning_rate": 1.0351382126171227e-05, "loss": 1.4776, "step": 8158 }, { "epoch": 0.8583903208837455, "grad_norm": 2.823317766189575, "learning_rate": 1.033628861392253e-05, "loss": 1.6744, "step": 8159 }, { "epoch": 0.8584955286691215, "grad_norm": 1.7388535737991333, "learning_rate": 1.0321205514025357e-05, "loss": 1.3214, "step": 8160 }, { "epoch": 0.8586007364544976, "grad_norm": 2.498295783996582, "learning_rate": 1.0306132828231318e-05, "loss": 1.5151, "step": 8161 }, { "epoch": 0.8587059442398738, "grad_norm": 2.1892495155334473, "learning_rate": 1.0291070558290705e-05, "loss": 1.1706, "step": 8162 }, { "epoch": 0.8588111520252498, "grad_norm": 1.274142861366272, "learning_rate": 1.027601870595265e-05, "loss": 1.5027, "step": 8163 }, { "epoch": 0.858916359810626, "grad_norm": 1.8496899604797363, "learning_rate": 1.02609772729651e-05, "loss": 1.3466, "step": 8164 }, { "epoch": 0.8590215675960021, "grad_norm": 1.2990412712097168, "learning_rate": 1.0245946261074769e-05, "loss": 1.7513, "step": 8165 }, { "epoch": 0.8591267753813783, "grad_norm": 1.4909148216247559, "learning_rate": 1.0230925672027137e-05, "loss": 1.5437, "step": 8166 }, { "epoch": 0.8592319831667543, "grad_norm": 1.508446455001831, "learning_rate": 1.0215915507566499e-05, "loss": 1.4974, "step": 8167 }, { "epoch": 0.8593371909521305, "grad_norm": 1.729293942451477, "learning_rate": 1.0200915769435937e-05, "loss": 1.8774, "step": 8168 }, { "epoch": 0.8594423987375066, "grad_norm": 2.2102980613708496, "learning_rate": 1.0185926459377326e-05, "loss": 0.6269, "step": 8169 }, { "epoch": 0.8595476065228826, "grad_norm": 1.7405325174331665, "learning_rate": 1.017094757913134e-05, "loss": 1.7546, "step": 8170 }, { "epoch": 0.8596528143082588, "grad_norm": 1.6175463199615479, "learning_rate": 1.0155979130437387e-05, "loss": 2.0148, "step": 8171 }, { "epoch": 0.8597580220936349, "grad_norm": 1.628763198852539, "learning_rate": 1.0141021115033745e-05, "loss": 1.0579, "step": 8172 }, { "epoch": 0.8598632298790111, "grad_norm": 1.998671293258667, "learning_rate": 1.012607353465742e-05, "loss": 1.5527, "step": 8173 }, { "epoch": 0.8599684376643871, "grad_norm": 2.1311984062194824, "learning_rate": 1.0111136391044218e-05, "loss": 1.4062, "step": 8174 }, { "epoch": 0.8600736454497633, "grad_norm": 2.120063304901123, "learning_rate": 1.009620968592876e-05, "loss": 1.3496, "step": 8175 }, { "epoch": 0.8601788532351394, "grad_norm": 1.719994306564331, "learning_rate": 1.0081293421044435e-05, "loss": 1.2293, "step": 8176 }, { "epoch": 0.8602840610205155, "grad_norm": 1.8668345212936401, "learning_rate": 1.0066387598123383e-05, "loss": 1.7347, "step": 8177 }, { "epoch": 0.8603892688058916, "grad_norm": 1.9415462017059326, "learning_rate": 1.0051492218896619e-05, "loss": 1.2794, "step": 8178 }, { "epoch": 0.8604944765912678, "grad_norm": 2.177868604660034, "learning_rate": 1.0036607285093857e-05, "loss": 1.6339, "step": 8179 }, { "epoch": 0.8605996843766439, "grad_norm": 1.627882719039917, "learning_rate": 1.002173279844364e-05, "loss": 1.5512, "step": 8180 }, { "epoch": 0.86070489216202, "grad_norm": 1.8268787860870361, "learning_rate": 1.0006868760673327e-05, "loss": 2.4057, "step": 8181 }, { "epoch": 0.8608100999473961, "grad_norm": 1.1629137992858887, "learning_rate": 9.992015173508995e-06, "loss": 1.5108, "step": 8182 }, { "epoch": 0.8609153077327723, "grad_norm": 1.567643404006958, "learning_rate": 9.977172038675531e-06, "loss": 1.5915, "step": 8183 }, { "epoch": 0.8610205155181483, "grad_norm": 1.6565831899642944, "learning_rate": 9.962339357896666e-06, "loss": 1.6012, "step": 8184 }, { "epoch": 0.8611257233035244, "grad_norm": 1.5608383417129517, "learning_rate": 9.947517132894835e-06, "loss": 1.5782, "step": 8185 }, { "epoch": 0.8612309310889006, "grad_norm": 1.3910160064697266, "learning_rate": 9.932705365391293e-06, "loss": 1.7121, "step": 8186 }, { "epoch": 0.8613361388742767, "grad_norm": 1.9219480752944946, "learning_rate": 9.91790405710613e-06, "loss": 1.6089, "step": 8187 }, { "epoch": 0.8614413466596528, "grad_norm": 1.804298996925354, "learning_rate": 9.903113209758096e-06, "loss": 1.7155, "step": 8188 }, { "epoch": 0.8615465544450289, "grad_norm": 0.9984854459762573, "learning_rate": 9.88833282506486e-06, "loss": 1.7646, "step": 8189 }, { "epoch": 0.8616517622304051, "grad_norm": 1.9518897533416748, "learning_rate": 9.873562904742805e-06, "loss": 1.1884, "step": 8190 }, { "epoch": 0.8617569700157812, "grad_norm": 1.4349753856658936, "learning_rate": 9.858803450507081e-06, "loss": 1.8053, "step": 8191 }, { "epoch": 0.8618621778011573, "grad_norm": 1.8369276523590088, "learning_rate": 9.844054464071717e-06, "loss": 1.9263, "step": 8192 }, { "epoch": 0.8619673855865334, "grad_norm": 1.3132811784744263, "learning_rate": 9.829315947149431e-06, "loss": 1.5865, "step": 8193 }, { "epoch": 0.8620725933719096, "grad_norm": 2.2313907146453857, "learning_rate": 9.814587901451733e-06, "loss": 0.9993, "step": 8194 }, { "epoch": 0.8621778011572856, "grad_norm": 1.455952525138855, "learning_rate": 9.799870328688988e-06, "loss": 1.4218, "step": 8195 }, { "epoch": 0.8622830089426617, "grad_norm": 1.646437644958496, "learning_rate": 9.785163230570282e-06, "loss": 1.7, "step": 8196 }, { "epoch": 0.8623882167280379, "grad_norm": 1.9158176183700562, "learning_rate": 9.770466608803475e-06, "loss": 2.0093, "step": 8197 }, { "epoch": 0.862493424513414, "grad_norm": 1.696004033088684, "learning_rate": 9.755780465095287e-06, "loss": 1.4588, "step": 8198 }, { "epoch": 0.8625986322987901, "grad_norm": 1.7793548107147217, "learning_rate": 9.741104801151146e-06, "loss": 1.8082, "step": 8199 }, { "epoch": 0.8627038400841662, "grad_norm": 2.2487435340881348, "learning_rate": 9.726439618675276e-06, "loss": 1.5888, "step": 8200 }, { "epoch": 0.8628090478695424, "grad_norm": 1.4217240810394287, "learning_rate": 9.711784919370715e-06, "loss": 1.4924, "step": 8201 }, { "epoch": 0.8629142556549184, "grad_norm": 1.571770191192627, "learning_rate": 9.697140704939245e-06, "loss": 1.2066, "step": 8202 }, { "epoch": 0.8630194634402946, "grad_norm": 1.2527103424072266, "learning_rate": 9.682506977081496e-06, "loss": 1.5845, "step": 8203 }, { "epoch": 0.8631246712256707, "grad_norm": 1.499109148979187, "learning_rate": 9.667883737496786e-06, "loss": 0.9014, "step": 8204 }, { "epoch": 0.8632298790110469, "grad_norm": 2.3821563720703125, "learning_rate": 9.653270987883267e-06, "loss": 1.6248, "step": 8205 }, { "epoch": 0.8633350867964229, "grad_norm": 1.5363044738769531, "learning_rate": 9.638668729937905e-06, "loss": 1.9727, "step": 8206 }, { "epoch": 0.863440294581799, "grad_norm": 1.1496707201004028, "learning_rate": 9.624076965356388e-06, "loss": 1.8011, "step": 8207 }, { "epoch": 0.8635455023671752, "grad_norm": 1.7943934202194214, "learning_rate": 9.609495695833216e-06, "loss": 1.4859, "step": 8208 }, { "epoch": 0.8636507101525512, "grad_norm": 2.865455389022827, "learning_rate": 9.594924923061655e-06, "loss": 1.6245, "step": 8209 }, { "epoch": 0.8637559179379274, "grad_norm": 1.4639524221420288, "learning_rate": 9.580364648733775e-06, "loss": 1.8505, "step": 8210 }, { "epoch": 0.8638611257233035, "grad_norm": 1.4049338102340698, "learning_rate": 9.56581487454038e-06, "loss": 1.5261, "step": 8211 }, { "epoch": 0.8639663335086797, "grad_norm": 1.3107516765594482, "learning_rate": 9.551275602171127e-06, "loss": 1.493, "step": 8212 }, { "epoch": 0.8640715412940557, "grad_norm": 1.295406460762024, "learning_rate": 9.53674683331438e-06, "loss": 1.3942, "step": 8213 }, { "epoch": 0.8641767490794319, "grad_norm": 1.5618852376937866, "learning_rate": 9.522228569657343e-06, "loss": 2.2107, "step": 8214 }, { "epoch": 0.864281956864808, "grad_norm": 1.705565094947815, "learning_rate": 9.507720812885978e-06, "loss": 1.8339, "step": 8215 }, { "epoch": 0.8643871646501841, "grad_norm": 2.110238552093506, "learning_rate": 9.493223564684994e-06, "loss": 1.6849, "step": 8216 }, { "epoch": 0.8644923724355602, "grad_norm": 1.721220850944519, "learning_rate": 9.478736826737944e-06, "loss": 1.3985, "step": 8217 }, { "epoch": 0.8645975802209364, "grad_norm": 1.481402039527893, "learning_rate": 9.464260600727104e-06, "loss": 1.7841, "step": 8218 }, { "epoch": 0.8647027880063125, "grad_norm": 2.818570852279663, "learning_rate": 9.44979488833353e-06, "loss": 1.3507, "step": 8219 }, { "epoch": 0.8648079957916885, "grad_norm": 1.7059146165847778, "learning_rate": 9.435339691237121e-06, "loss": 1.4943, "step": 8220 }, { "epoch": 0.8649132035770647, "grad_norm": 1.5640789270401, "learning_rate": 9.420895011116492e-06, "loss": 1.7849, "step": 8221 }, { "epoch": 0.8650184113624408, "grad_norm": 2.26628041267395, "learning_rate": 9.406460849649045e-06, "loss": 1.6047, "step": 8222 }, { "epoch": 0.865123619147817, "grad_norm": 1.201790452003479, "learning_rate": 9.392037208510996e-06, "loss": 1.8968, "step": 8223 }, { "epoch": 0.865228826933193, "grad_norm": 1.8288311958312988, "learning_rate": 9.37762408937729e-06, "loss": 0.9009, "step": 8224 }, { "epoch": 0.8653340347185692, "grad_norm": 1.5502636432647705, "learning_rate": 9.36322149392168e-06, "loss": 1.1525, "step": 8225 }, { "epoch": 0.8654392425039453, "grad_norm": 1.8618223667144775, "learning_rate": 9.348829423816718e-06, "loss": 1.6285, "step": 8226 }, { "epoch": 0.8655444502893214, "grad_norm": 1.6962296962738037, "learning_rate": 9.334447880733676e-06, "loss": 2.1117, "step": 8227 }, { "epoch": 0.8656496580746975, "grad_norm": 1.1905288696289062, "learning_rate": 9.320076866342642e-06, "loss": 1.6631, "step": 8228 }, { "epoch": 0.8657548658600737, "grad_norm": 1.2927476167678833, "learning_rate": 9.30571638231249e-06, "loss": 2.1606, "step": 8229 }, { "epoch": 0.8658600736454498, "grad_norm": 1.4459666013717651, "learning_rate": 9.291366430310844e-06, "loss": 1.3559, "step": 8230 }, { "epoch": 0.8659652814308259, "grad_norm": 1.827365517616272, "learning_rate": 9.277027012004125e-06, "loss": 1.6095, "step": 8231 }, { "epoch": 0.866070489216202, "grad_norm": 1.9253352880477905, "learning_rate": 9.262698129057512e-06, "loss": 1.2613, "step": 8232 }, { "epoch": 0.8661756970015781, "grad_norm": 1.1855809688568115, "learning_rate": 9.248379783134952e-06, "loss": 1.8954, "step": 8233 }, { "epoch": 0.8662809047869542, "grad_norm": 2.3653783798217773, "learning_rate": 9.234071975899228e-06, "loss": 1.2972, "step": 8234 }, { "epoch": 0.8663861125723303, "grad_norm": 1.508872389793396, "learning_rate": 9.21977470901184e-06, "loss": 1.6728, "step": 8235 }, { "epoch": 0.8664913203577065, "grad_norm": 1.531275749206543, "learning_rate": 9.205487984133076e-06, "loss": 1.2766, "step": 8236 }, { "epoch": 0.8665965281430826, "grad_norm": 1.4290529489517212, "learning_rate": 9.191211802922017e-06, "loss": 1.7471, "step": 8237 }, { "epoch": 0.8667017359284587, "grad_norm": 1.9806660413742065, "learning_rate": 9.176946167036516e-06, "loss": 1.0977, "step": 8238 }, { "epoch": 0.8668069437138348, "grad_norm": 1.909472107887268, "learning_rate": 9.162691078133157e-06, "loss": 1.8081, "step": 8239 }, { "epoch": 0.866912151499211, "grad_norm": 2.2242584228515625, "learning_rate": 9.148446537867383e-06, "loss": 1.6033, "step": 8240 }, { "epoch": 0.867017359284587, "grad_norm": 1.3341119289398193, "learning_rate": 9.134212547893351e-06, "loss": 1.9168, "step": 8241 }, { "epoch": 0.8671225670699632, "grad_norm": 1.836234211921692, "learning_rate": 9.11998910986398e-06, "loss": 2.2142, "step": 8242 }, { "epoch": 0.8672277748553393, "grad_norm": 1.667237401008606, "learning_rate": 9.105776225431029e-06, "loss": 1.3457, "step": 8243 }, { "epoch": 0.8673329826407155, "grad_norm": 1.6611175537109375, "learning_rate": 9.091573896244976e-06, "loss": 1.3769, "step": 8244 }, { "epoch": 0.8674381904260915, "grad_norm": 1.894445776939392, "learning_rate": 9.07738212395508e-06, "loss": 1.6441, "step": 8245 }, { "epoch": 0.8675433982114676, "grad_norm": 1.9377069473266602, "learning_rate": 9.063200910209413e-06, "loss": 1.4411, "step": 8246 }, { "epoch": 0.8676486059968438, "grad_norm": 1.5607261657714844, "learning_rate": 9.049030256654777e-06, "loss": 1.5803, "step": 8247 }, { "epoch": 0.8677538137822198, "grad_norm": 1.2471551895141602, "learning_rate": 9.034870164936737e-06, "loss": 1.4342, "step": 8248 }, { "epoch": 0.867859021567596, "grad_norm": 1.3888238668441772, "learning_rate": 9.020720636699709e-06, "loss": 1.5969, "step": 8249 }, { "epoch": 0.8679642293529721, "grad_norm": 3.419867753982544, "learning_rate": 9.006581673586789e-06, "loss": 1.5199, "step": 8250 }, { "epoch": 0.8680694371383483, "grad_norm": 1.7983578443527222, "learning_rate": 8.992453277239942e-06, "loss": 1.5565, "step": 8251 }, { "epoch": 0.8681746449237243, "grad_norm": 1.5577338933944702, "learning_rate": 8.978335449299791e-06, "loss": 1.5991, "step": 8252 }, { "epoch": 0.8682798527091005, "grad_norm": 2.433664321899414, "learning_rate": 8.9642281914058e-06, "loss": 1.3975, "step": 8253 }, { "epoch": 0.8683850604944766, "grad_norm": 1.6767748594284058, "learning_rate": 8.950131505196236e-06, "loss": 1.9497, "step": 8254 }, { "epoch": 0.8684902682798528, "grad_norm": 1.431541919708252, "learning_rate": 8.936045392308079e-06, "loss": 2.1185, "step": 8255 }, { "epoch": 0.8685954760652288, "grad_norm": 2.1313490867614746, "learning_rate": 8.921969854377088e-06, "loss": 1.7141, "step": 8256 }, { "epoch": 0.868700683850605, "grad_norm": 1.8773490190505981, "learning_rate": 8.907904893037833e-06, "loss": 1.592, "step": 8257 }, { "epoch": 0.8688058916359811, "grad_norm": 1.4465880393981934, "learning_rate": 8.893850509923619e-06, "loss": 1.3952, "step": 8258 }, { "epoch": 0.8689110994213571, "grad_norm": 1.6788699626922607, "learning_rate": 8.87980670666655e-06, "loss": 1.2683, "step": 8259 }, { "epoch": 0.8690163072067333, "grad_norm": 1.4873356819152832, "learning_rate": 8.865773484897477e-06, "loss": 2.0945, "step": 8260 }, { "epoch": 0.8691215149921094, "grad_norm": 1.6656405925750732, "learning_rate": 8.85175084624602e-06, "loss": 1.4234, "step": 8261 }, { "epoch": 0.8692267227774856, "grad_norm": 1.3971835374832153, "learning_rate": 8.8377387923406e-06, "loss": 1.4474, "step": 8262 }, { "epoch": 0.8693319305628616, "grad_norm": 1.494502305984497, "learning_rate": 8.82373732480839e-06, "loss": 1.8342, "step": 8263 }, { "epoch": 0.8694371383482378, "grad_norm": 1.7074618339538574, "learning_rate": 8.809746445275312e-06, "loss": 1.4596, "step": 8264 }, { "epoch": 0.8695423461336139, "grad_norm": 2.2928879261016846, "learning_rate": 8.795766155366114e-06, "loss": 1.4602, "step": 8265 }, { "epoch": 0.86964755391899, "grad_norm": 1.632634162902832, "learning_rate": 8.781796456704262e-06, "loss": 1.6407, "step": 8266 }, { "epoch": 0.8697527617043661, "grad_norm": 1.7646962404251099, "learning_rate": 8.767837350912e-06, "loss": 1.6187, "step": 8267 }, { "epoch": 0.8698579694897423, "grad_norm": 1.2554585933685303, "learning_rate": 8.75388883961038e-06, "loss": 1.3565, "step": 8268 }, { "epoch": 0.8699631772751184, "grad_norm": 2.188851833343506, "learning_rate": 8.739950924419183e-06, "loss": 1.4143, "step": 8269 }, { "epoch": 0.8700683850604944, "grad_norm": 1.2379424571990967, "learning_rate": 8.726023606956956e-06, "loss": 2.0142, "step": 8270 }, { "epoch": 0.8701735928458706, "grad_norm": 1.5064557790756226, "learning_rate": 8.712106888841064e-06, "loss": 1.8644, "step": 8271 }, { "epoch": 0.8702788006312467, "grad_norm": 2.4556884765625, "learning_rate": 8.698200771687592e-06, "loss": 1.1037, "step": 8272 }, { "epoch": 0.8703840084166228, "grad_norm": 0.9739543199539185, "learning_rate": 8.684305257111425e-06, "loss": 1.5688, "step": 8273 }, { "epoch": 0.8704892162019989, "grad_norm": 1.589955449104309, "learning_rate": 8.670420346726182e-06, "loss": 1.9209, "step": 8274 }, { "epoch": 0.8705944239873751, "grad_norm": 1.278948187828064, "learning_rate": 8.656546042144275e-06, "loss": 1.406, "step": 8275 }, { "epoch": 0.8706996317727512, "grad_norm": 1.640868067741394, "learning_rate": 8.642682344976904e-06, "loss": 1.4179, "step": 8276 }, { "epoch": 0.8708048395581273, "grad_norm": 1.3409028053283691, "learning_rate": 8.628829256833992e-06, "loss": 1.4, "step": 8277 }, { "epoch": 0.8709100473435034, "grad_norm": 1.4325164556503296, "learning_rate": 8.614986779324252e-06, "loss": 1.6548, "step": 8278 }, { "epoch": 0.8710152551288796, "grad_norm": 2.271646022796631, "learning_rate": 8.601154914055187e-06, "loss": 1.3482, "step": 8279 }, { "epoch": 0.8711204629142556, "grad_norm": 1.5409026145935059, "learning_rate": 8.587333662633035e-06, "loss": 1.5707, "step": 8280 }, { "epoch": 0.8712256706996317, "grad_norm": 1.1222127676010132, "learning_rate": 8.57352302666279e-06, "loss": 1.4839, "step": 8281 }, { "epoch": 0.8713308784850079, "grad_norm": 1.5568034648895264, "learning_rate": 8.559723007748278e-06, "loss": 1.5636, "step": 8282 }, { "epoch": 0.871436086270384, "grad_norm": 1.717877984046936, "learning_rate": 8.545933607492019e-06, "loss": 1.3919, "step": 8283 }, { "epoch": 0.8715412940557601, "grad_norm": 1.7419726848602295, "learning_rate": 8.53215482749532e-06, "loss": 1.164, "step": 8284 }, { "epoch": 0.8716465018411362, "grad_norm": 1.5329818725585938, "learning_rate": 8.518386669358313e-06, "loss": 1.5617, "step": 8285 }, { "epoch": 0.8717517096265124, "grad_norm": 1.6898797750473022, "learning_rate": 8.50462913467982e-06, "loss": 1.2344, "step": 8286 }, { "epoch": 0.8718569174118885, "grad_norm": 2.2260582447052, "learning_rate": 8.490882225057428e-06, "loss": 1.6568, "step": 8287 }, { "epoch": 0.8719621251972646, "grad_norm": 1.553295612335205, "learning_rate": 8.477145942087583e-06, "loss": 1.0574, "step": 8288 }, { "epoch": 0.8720673329826407, "grad_norm": 1.8379864692687988, "learning_rate": 8.463420287365386e-06, "loss": 1.7292, "step": 8289 }, { "epoch": 0.8721725407680169, "grad_norm": 2.359562873840332, "learning_rate": 8.449705262484763e-06, "loss": 1.2984, "step": 8290 }, { "epoch": 0.8722777485533929, "grad_norm": 1.2469983100891113, "learning_rate": 8.436000869038418e-06, "loss": 1.3601, "step": 8291 }, { "epoch": 0.8723829563387691, "grad_norm": 1.5775099992752075, "learning_rate": 8.422307108617777e-06, "loss": 1.3026, "step": 8292 }, { "epoch": 0.8724881641241452, "grad_norm": 1.6676642894744873, "learning_rate": 8.408623982813036e-06, "loss": 1.1921, "step": 8293 }, { "epoch": 0.8725933719095214, "grad_norm": 1.9857598543167114, "learning_rate": 8.39495149321322e-06, "loss": 1.7544, "step": 8294 }, { "epoch": 0.8726985796948974, "grad_norm": 2.608306407928467, "learning_rate": 8.381289641405998e-06, "loss": 1.2208, "step": 8295 }, { "epoch": 0.8728037874802735, "grad_norm": 1.3763883113861084, "learning_rate": 8.367638428977942e-06, "loss": 1.5372, "step": 8296 }, { "epoch": 0.8729089952656497, "grad_norm": 1.8553149700164795, "learning_rate": 8.353997857514296e-06, "loss": 1.6343, "step": 8297 }, { "epoch": 0.8730142030510257, "grad_norm": 2.202342987060547, "learning_rate": 8.34036792859908e-06, "loss": 1.5843, "step": 8298 }, { "epoch": 0.8731194108364019, "grad_norm": 2.3540780544281006, "learning_rate": 8.32674864381513e-06, "loss": 1.5165, "step": 8299 }, { "epoch": 0.873224618621778, "grad_norm": 2.0896689891815186, "learning_rate": 8.31314000474398e-06, "loss": 1.613, "step": 8300 }, { "epoch": 0.8733298264071542, "grad_norm": 1.7080516815185547, "learning_rate": 8.299542012965944e-06, "loss": 1.3788, "step": 8301 }, { "epoch": 0.8734350341925302, "grad_norm": 1.593510627746582, "learning_rate": 8.285954670060159e-06, "loss": 1.6948, "step": 8302 }, { "epoch": 0.8735402419779064, "grad_norm": 2.1248574256896973, "learning_rate": 8.272377977604439e-06, "loss": 1.6842, "step": 8303 }, { "epoch": 0.8736454497632825, "grad_norm": 1.1334164142608643, "learning_rate": 8.258811937175403e-06, "loss": 2.0723, "step": 8304 }, { "epoch": 0.8737506575486585, "grad_norm": 1.6127103567123413, "learning_rate": 8.245256550348456e-06, "loss": 1.4466, "step": 8305 }, { "epoch": 0.8738558653340347, "grad_norm": 1.4904117584228516, "learning_rate": 8.231711818697708e-06, "loss": 1.4292, "step": 8306 }, { "epoch": 0.8739610731194108, "grad_norm": 1.7392334938049316, "learning_rate": 8.218177743796096e-06, "loss": 2.2474, "step": 8307 }, { "epoch": 0.874066280904787, "grad_norm": 2.0337204933166504, "learning_rate": 8.204654327215267e-06, "loss": 1.369, "step": 8308 }, { "epoch": 0.874171488690163, "grad_norm": 1.8506286144256592, "learning_rate": 8.19114157052564e-06, "loss": 1.587, "step": 8309 }, { "epoch": 0.8742766964755392, "grad_norm": 1.861567497253418, "learning_rate": 8.177639475296451e-06, "loss": 0.9129, "step": 8310 }, { "epoch": 0.8743819042609153, "grad_norm": 1.4691027402877808, "learning_rate": 8.16414804309562e-06, "loss": 1.5196, "step": 8311 }, { "epoch": 0.8744871120462914, "grad_norm": 1.7774051427841187, "learning_rate": 8.150667275489842e-06, "loss": 2.4965, "step": 8312 }, { "epoch": 0.8745923198316675, "grad_norm": 1.6832690238952637, "learning_rate": 8.137197174044653e-06, "loss": 1.4504, "step": 8313 }, { "epoch": 0.8746975276170437, "grad_norm": 1.254486083984375, "learning_rate": 8.123737740324256e-06, "loss": 1.5565, "step": 8314 }, { "epoch": 0.8748027354024198, "grad_norm": 2.1630659103393555, "learning_rate": 8.110288975891634e-06, "loss": 1.232, "step": 8315 }, { "epoch": 0.8749079431877959, "grad_norm": 1.3778032064437866, "learning_rate": 8.096850882308593e-06, "loss": 1.517, "step": 8316 }, { "epoch": 0.875013150973172, "grad_norm": 2.6462600231170654, "learning_rate": 8.083423461135608e-06, "loss": 1.6253, "step": 8317 }, { "epoch": 0.8751183587585482, "grad_norm": 2.1237175464630127, "learning_rate": 8.070006713931988e-06, "loss": 1.6068, "step": 8318 }, { "epoch": 0.8752235665439243, "grad_norm": 1.620760202407837, "learning_rate": 8.056600642255773e-06, "loss": 1.4239, "step": 8319 }, { "epoch": 0.8753287743293003, "grad_norm": 1.3363070487976074, "learning_rate": 8.043205247663755e-06, "loss": 1.7418, "step": 8320 }, { "epoch": 0.8754339821146765, "grad_norm": 1.1547250747680664, "learning_rate": 8.029820531711518e-06, "loss": 1.7182, "step": 8321 }, { "epoch": 0.8755391899000526, "grad_norm": 1.5865908861160278, "learning_rate": 8.016446495953367e-06, "loss": 2.1783, "step": 8322 }, { "epoch": 0.8756443976854287, "grad_norm": 1.7956304550170898, "learning_rate": 8.00308314194238e-06, "loss": 1.5709, "step": 8323 }, { "epoch": 0.8757496054708048, "grad_norm": 1.6148009300231934, "learning_rate": 7.989730471230417e-06, "loss": 1.8065, "step": 8324 }, { "epoch": 0.875854813256181, "grad_norm": 1.6082184314727783, "learning_rate": 7.97638848536808e-06, "loss": 1.824, "step": 8325 }, { "epoch": 0.8759600210415571, "grad_norm": 1.5092358589172363, "learning_rate": 7.963057185904698e-06, "loss": 1.1529, "step": 8326 }, { "epoch": 0.8760652288269332, "grad_norm": 1.4924912452697754, "learning_rate": 7.949736574388433e-06, "loss": 1.3965, "step": 8327 }, { "epoch": 0.8761704366123093, "grad_norm": 1.484432339668274, "learning_rate": 7.936426652366147e-06, "loss": 1.5392, "step": 8328 }, { "epoch": 0.8762756443976855, "grad_norm": 1.8063507080078125, "learning_rate": 7.923127421383458e-06, "loss": 1.7397, "step": 8329 }, { "epoch": 0.8763808521830615, "grad_norm": 1.7138848304748535, "learning_rate": 7.909838882984799e-06, "loss": 1.9147, "step": 8330 }, { "epoch": 0.8764860599684376, "grad_norm": 1.6054270267486572, "learning_rate": 7.896561038713302e-06, "loss": 1.7124, "step": 8331 }, { "epoch": 0.8765912677538138, "grad_norm": 1.4371368885040283, "learning_rate": 7.883293890110865e-06, "loss": 1.3358, "step": 8332 }, { "epoch": 0.87669647553919, "grad_norm": 1.3768408298492432, "learning_rate": 7.870037438718191e-06, "loss": 1.4805, "step": 8333 }, { "epoch": 0.876801683324566, "grad_norm": 1.5090663433074951, "learning_rate": 7.856791686074694e-06, "loss": 1.819, "step": 8334 }, { "epoch": 0.8769068911099421, "grad_norm": 1.7968672513961792, "learning_rate": 7.84355663371854e-06, "loss": 1.5492, "step": 8335 }, { "epoch": 0.8770120988953183, "grad_norm": 1.5424824953079224, "learning_rate": 7.830332283186714e-06, "loss": 1.8034, "step": 8336 }, { "epoch": 0.8771173066806943, "grad_norm": 1.5214895009994507, "learning_rate": 7.817118636014886e-06, "loss": 1.3005, "step": 8337 }, { "epoch": 0.8772225144660705, "grad_norm": 1.3257561922073364, "learning_rate": 7.803915693737518e-06, "loss": 1.3967, "step": 8338 }, { "epoch": 0.8773277222514466, "grad_norm": 2.690847635269165, "learning_rate": 7.790723457887828e-06, "loss": 1.3393, "step": 8339 }, { "epoch": 0.8774329300368228, "grad_norm": 1.336316466331482, "learning_rate": 7.777541929997766e-06, "loss": 1.6733, "step": 8340 }, { "epoch": 0.8775381378221988, "grad_norm": 1.7110776901245117, "learning_rate": 7.7643711115981e-06, "loss": 1.7544, "step": 8341 }, { "epoch": 0.877643345607575, "grad_norm": 1.3267966508865356, "learning_rate": 7.751211004218295e-06, "loss": 1.7771, "step": 8342 }, { "epoch": 0.8777485533929511, "grad_norm": 1.5423023700714111, "learning_rate": 7.73806160938656e-06, "loss": 1.3971, "step": 8343 }, { "epoch": 0.8778537611783271, "grad_norm": 1.562389850616455, "learning_rate": 7.724922928629941e-06, "loss": 1.2311, "step": 8344 }, { "epoch": 0.8779589689637033, "grad_norm": 1.6092841625213623, "learning_rate": 7.711794963474173e-06, "loss": 2.0053, "step": 8345 }, { "epoch": 0.8780641767490794, "grad_norm": 1.8974779844284058, "learning_rate": 7.698677715443736e-06, "loss": 1.2428, "step": 8346 }, { "epoch": 0.8781693845344556, "grad_norm": 1.8776745796203613, "learning_rate": 7.685571186061934e-06, "loss": 1.9842, "step": 8347 }, { "epoch": 0.8782745923198316, "grad_norm": 1.4398527145385742, "learning_rate": 7.672475376850764e-06, "loss": 1.878, "step": 8348 }, { "epoch": 0.8783798001052078, "grad_norm": 1.1596499681472778, "learning_rate": 7.65939028933098e-06, "loss": 1.6491, "step": 8349 }, { "epoch": 0.8784850078905839, "grad_norm": 2.233144760131836, "learning_rate": 7.646315925022152e-06, "loss": 1.5677, "step": 8350 }, { "epoch": 0.8785902156759601, "grad_norm": 2.088862419128418, "learning_rate": 7.633252285442526e-06, "loss": 1.6353, "step": 8351 }, { "epoch": 0.8786954234613361, "grad_norm": 1.5038179159164429, "learning_rate": 7.620199372109172e-06, "loss": 2.0619, "step": 8352 }, { "epoch": 0.8788006312467123, "grad_norm": 1.6128473281860352, "learning_rate": 7.607157186537872e-06, "loss": 1.9283, "step": 8353 }, { "epoch": 0.8789058390320884, "grad_norm": 1.4330729246139526, "learning_rate": 7.59412573024314e-06, "loss": 1.757, "step": 8354 }, { "epoch": 0.8790110468174644, "grad_norm": 1.7641589641571045, "learning_rate": 7.581105004738321e-06, "loss": 1.8615, "step": 8355 }, { "epoch": 0.8791162546028406, "grad_norm": 1.5351738929748535, "learning_rate": 7.568095011535448e-06, "loss": 1.5014, "step": 8356 }, { "epoch": 0.8792214623882167, "grad_norm": 1.7756725549697876, "learning_rate": 7.555095752145313e-06, "loss": 1.4319, "step": 8357 }, { "epoch": 0.8793266701735929, "grad_norm": 1.6346051692962646, "learning_rate": 7.542107228077533e-06, "loss": 1.2158, "step": 8358 }, { "epoch": 0.8794318779589689, "grad_norm": 2.2152860164642334, "learning_rate": 7.529129440840355e-06, "loss": 1.389, "step": 8359 }, { "epoch": 0.8795370857443451, "grad_norm": 1.4864938259124756, "learning_rate": 7.516162391940873e-06, "loss": 0.8488, "step": 8360 }, { "epoch": 0.8796422935297212, "grad_norm": 2.238067626953125, "learning_rate": 7.503206082884917e-06, "loss": 1.8396, "step": 8361 }, { "epoch": 0.8797475013150973, "grad_norm": 1.6795562505722046, "learning_rate": 7.4902605151770385e-06, "loss": 1.9979, "step": 8362 }, { "epoch": 0.8798527091004734, "grad_norm": 2.297811269760132, "learning_rate": 7.477325690320602e-06, "loss": 1.9526, "step": 8363 }, { "epoch": 0.8799579168858496, "grad_norm": 2.567331075668335, "learning_rate": 7.4644016098176615e-06, "loss": 1.5925, "step": 8364 }, { "epoch": 0.8800631246712257, "grad_norm": 1.6319959163665771, "learning_rate": 7.451488275169028e-06, "loss": 1.9061, "step": 8365 }, { "epoch": 0.8801683324566018, "grad_norm": 2.1503756046295166, "learning_rate": 7.438585687874333e-06, "loss": 1.4737, "step": 8366 }, { "epoch": 0.8802735402419779, "grad_norm": 1.6706979274749756, "learning_rate": 7.42569384943187e-06, "loss": 1.822, "step": 8367 }, { "epoch": 0.880378748027354, "grad_norm": 1.9475339651107788, "learning_rate": 7.412812761338739e-06, "loss": 1.8185, "step": 8368 }, { "epoch": 0.8804839558127301, "grad_norm": 1.8608582019805908, "learning_rate": 7.3999424250907775e-06, "loss": 2.2257, "step": 8369 }, { "epoch": 0.8805891635981062, "grad_norm": 1.6136201620101929, "learning_rate": 7.387082842182591e-06, "loss": 1.6696, "step": 8370 }, { "epoch": 0.8806943713834824, "grad_norm": 1.2842328548431396, "learning_rate": 7.374234014107484e-06, "loss": 1.8681, "step": 8371 }, { "epoch": 0.8807995791688585, "grad_norm": 2.344787120819092, "learning_rate": 7.361395942357596e-06, "loss": 1.4332, "step": 8372 }, { "epoch": 0.8809047869542346, "grad_norm": 3.1230101585388184, "learning_rate": 7.348568628423746e-06, "loss": 1.5007, "step": 8373 }, { "epoch": 0.8810099947396107, "grad_norm": 1.2945367097854614, "learning_rate": 7.335752073795499e-06, "loss": 1.7119, "step": 8374 }, { "epoch": 0.8811152025249869, "grad_norm": 1.189368486404419, "learning_rate": 7.322946279961252e-06, "loss": 1.8333, "step": 8375 }, { "epoch": 0.8812204103103629, "grad_norm": 1.435043454170227, "learning_rate": 7.31015124840807e-06, "loss": 1.6932, "step": 8376 }, { "epoch": 0.8813256180957391, "grad_norm": 2.0978446006774902, "learning_rate": 7.297366980621789e-06, "loss": 1.2705, "step": 8377 }, { "epoch": 0.8814308258811152, "grad_norm": 1.0726866722106934, "learning_rate": 7.284593478087043e-06, "loss": 1.846, "step": 8378 }, { "epoch": 0.8815360336664914, "grad_norm": 1.2944754362106323, "learning_rate": 7.2718307422871445e-06, "loss": 1.2121, "step": 8379 }, { "epoch": 0.8816412414518674, "grad_norm": 1.729756474494934, "learning_rate": 7.259078774704198e-06, "loss": 2.0314, "step": 8380 }, { "epoch": 0.8817464492372435, "grad_norm": 1.872869610786438, "learning_rate": 7.24633757681904e-06, "loss": 1.9162, "step": 8381 }, { "epoch": 0.8818516570226197, "grad_norm": 1.577900767326355, "learning_rate": 7.233607150111255e-06, "loss": 1.697, "step": 8382 }, { "epoch": 0.8819568648079958, "grad_norm": 1.8250850439071655, "learning_rate": 7.2208874960592145e-06, "loss": 1.1092, "step": 8383 }, { "epoch": 0.8820620725933719, "grad_norm": 1.5504481792449951, "learning_rate": 7.208178616139994e-06, "loss": 1.265, "step": 8384 }, { "epoch": 0.882167280378748, "grad_norm": 1.681287407875061, "learning_rate": 7.195480511829411e-06, "loss": 1.366, "step": 8385 }, { "epoch": 0.8822724881641242, "grad_norm": 1.5954519510269165, "learning_rate": 7.18279318460211e-06, "loss": 1.4569, "step": 8386 }, { "epoch": 0.8823776959495002, "grad_norm": 1.6172798871994019, "learning_rate": 7.1701166359313894e-06, "loss": 1.6745, "step": 8387 }, { "epoch": 0.8824829037348764, "grad_norm": 1.5901143550872803, "learning_rate": 7.157450867289317e-06, "loss": 1.6646, "step": 8388 }, { "epoch": 0.8825881115202525, "grad_norm": 1.5728827714920044, "learning_rate": 7.1447958801467816e-06, "loss": 1.5367, "step": 8389 }, { "epoch": 0.8826933193056287, "grad_norm": 1.7115237712860107, "learning_rate": 7.132151675973331e-06, "loss": 1.3662, "step": 8390 }, { "epoch": 0.8827985270910047, "grad_norm": 1.2988559007644653, "learning_rate": 7.119518256237279e-06, "loss": 1.5863, "step": 8391 }, { "epoch": 0.8829037348763809, "grad_norm": 1.656714916229248, "learning_rate": 7.106895622405752e-06, "loss": 1.6779, "step": 8392 }, { "epoch": 0.883008942661757, "grad_norm": 1.3429776430130005, "learning_rate": 7.0942837759445325e-06, "loss": 1.6569, "step": 8393 }, { "epoch": 0.883114150447133, "grad_norm": 1.6043792963027954, "learning_rate": 7.081682718318194e-06, "loss": 1.4003, "step": 8394 }, { "epoch": 0.8832193582325092, "grad_norm": 1.8559693098068237, "learning_rate": 7.069092450990089e-06, "loss": 1.6701, "step": 8395 }, { "epoch": 0.8833245660178853, "grad_norm": 1.5783679485321045, "learning_rate": 7.056512975422269e-06, "loss": 1.5224, "step": 8396 }, { "epoch": 0.8834297738032615, "grad_norm": 1.5466448068618774, "learning_rate": 7.0439442930755105e-06, "loss": 1.5427, "step": 8397 }, { "epoch": 0.8835349815886375, "grad_norm": 1.4492648839950562, "learning_rate": 7.031386405409434e-06, "loss": 1.3369, "step": 8398 }, { "epoch": 0.8836401893740137, "grad_norm": 1.4293040037155151, "learning_rate": 7.018839313882286e-06, "loss": 1.3867, "step": 8399 }, { "epoch": 0.8837453971593898, "grad_norm": 2.3548078536987305, "learning_rate": 7.006303019951177e-06, "loss": 1.9804, "step": 8400 }, { "epoch": 0.8838506049447659, "grad_norm": 1.3848503828048706, "learning_rate": 6.993777525071887e-06, "loss": 1.1785, "step": 8401 }, { "epoch": 0.883955812730142, "grad_norm": 1.6691620349884033, "learning_rate": 6.98126283069892e-06, "loss": 1.5292, "step": 8402 }, { "epoch": 0.8840610205155182, "grad_norm": 1.5758638381958008, "learning_rate": 6.968758938285614e-06, "loss": 1.3405, "step": 8403 }, { "epoch": 0.8841662283008943, "grad_norm": 1.6990686655044556, "learning_rate": 6.956265849283994e-06, "loss": 1.3229, "step": 8404 }, { "epoch": 0.8842714360862703, "grad_norm": 2.8560450077056885, "learning_rate": 6.943783565144812e-06, "loss": 1.6618, "step": 8405 }, { "epoch": 0.8843766438716465, "grad_norm": 1.6072392463684082, "learning_rate": 6.931312087317632e-06, "loss": 1.7917, "step": 8406 }, { "epoch": 0.8844818516570226, "grad_norm": 1.5466362237930298, "learning_rate": 6.918851417250693e-06, "loss": 1.321, "step": 8407 }, { "epoch": 0.8845870594423987, "grad_norm": 1.8458850383758545, "learning_rate": 6.906401556391051e-06, "loss": 1.6448, "step": 8408 }, { "epoch": 0.8846922672277748, "grad_norm": 2.208996295928955, "learning_rate": 6.893962506184448e-06, "loss": 1.8648, "step": 8409 }, { "epoch": 0.884797475013151, "grad_norm": 1.3810334205627441, "learning_rate": 6.8815342680753735e-06, "loss": 1.7885, "step": 8410 }, { "epoch": 0.8849026827985271, "grad_norm": 1.5404844284057617, "learning_rate": 6.869116843507106e-06, "loss": 1.5237, "step": 8411 }, { "epoch": 0.8850078905839032, "grad_norm": 1.3252919912338257, "learning_rate": 6.856710233921626e-06, "loss": 1.9188, "step": 8412 }, { "epoch": 0.8851130983692793, "grad_norm": 1.3163280487060547, "learning_rate": 6.844314440759647e-06, "loss": 1.8506, "step": 8413 }, { "epoch": 0.8852183061546555, "grad_norm": 1.7833784818649292, "learning_rate": 6.8319294654607065e-06, "loss": 1.2899, "step": 8414 }, { "epoch": 0.8853235139400316, "grad_norm": 2.266003370285034, "learning_rate": 6.8195553094629995e-06, "loss": 1.1743, "step": 8415 }, { "epoch": 0.8854287217254077, "grad_norm": 2.029226303100586, "learning_rate": 6.807191974203486e-06, "loss": 1.04, "step": 8416 }, { "epoch": 0.8855339295107838, "grad_norm": 1.7757220268249512, "learning_rate": 6.7948394611178964e-06, "loss": 1.2583, "step": 8417 }, { "epoch": 0.88563913729616, "grad_norm": 2.46942138671875, "learning_rate": 6.782497771640694e-06, "loss": 1.5153, "step": 8418 }, { "epoch": 0.885744345081536, "grad_norm": 2.080439805984497, "learning_rate": 6.770166907205044e-06, "loss": 0.9513, "step": 8419 }, { "epoch": 0.8858495528669121, "grad_norm": 1.3978705406188965, "learning_rate": 6.7578468692429345e-06, "loss": 1.2028, "step": 8420 }, { "epoch": 0.8859547606522883, "grad_norm": 1.2934406995773315, "learning_rate": 6.7455376591850195e-06, "loss": 1.6844, "step": 8421 }, { "epoch": 0.8860599684376644, "grad_norm": 1.9341928958892822, "learning_rate": 6.733239278460735e-06, "loss": 1.5859, "step": 8422 }, { "epoch": 0.8861651762230405, "grad_norm": 1.2040367126464844, "learning_rate": 6.7209517284982704e-06, "loss": 1.3829, "step": 8423 }, { "epoch": 0.8862703840084166, "grad_norm": 1.6010814905166626, "learning_rate": 6.7086750107244965e-06, "loss": 1.3713, "step": 8424 }, { "epoch": 0.8863755917937928, "grad_norm": 2.1789660453796387, "learning_rate": 6.696409126565107e-06, "loss": 1.9039, "step": 8425 }, { "epoch": 0.8864807995791688, "grad_norm": 1.1743876934051514, "learning_rate": 6.684154077444482e-06, "loss": 1.6147, "step": 8426 }, { "epoch": 0.886586007364545, "grad_norm": 1.855284571647644, "learning_rate": 6.6719098647857525e-06, "loss": 1.5138, "step": 8427 }, { "epoch": 0.8866912151499211, "grad_norm": 1.862816333770752, "learning_rate": 6.659676490010824e-06, "loss": 1.5119, "step": 8428 }, { "epoch": 0.8867964229352973, "grad_norm": 1.4365988969802856, "learning_rate": 6.647453954540295e-06, "loss": 1.6655, "step": 8429 }, { "epoch": 0.8869016307206733, "grad_norm": 1.137057900428772, "learning_rate": 6.635242259793528e-06, "loss": 1.6778, "step": 8430 }, { "epoch": 0.8870068385060494, "grad_norm": 1.6340605020523071, "learning_rate": 6.623041407188646e-06, "loss": 1.7023, "step": 8431 }, { "epoch": 0.8871120462914256, "grad_norm": 1.682706356048584, "learning_rate": 6.610851398142482e-06, "loss": 1.6385, "step": 8432 }, { "epoch": 0.8872172540768016, "grad_norm": 2.859862804412842, "learning_rate": 6.598672234070602e-06, "loss": 1.4929, "step": 8433 }, { "epoch": 0.8873224618621778, "grad_norm": 1.687212347984314, "learning_rate": 6.586503916387366e-06, "loss": 1.3471, "step": 8434 }, { "epoch": 0.8874276696475539, "grad_norm": 1.9600337743759155, "learning_rate": 6.574346446505841e-06, "loss": 1.8195, "step": 8435 }, { "epoch": 0.8875328774329301, "grad_norm": 1.6050618886947632, "learning_rate": 6.562199825837789e-06, "loss": 1.4023, "step": 8436 }, { "epoch": 0.8876380852183061, "grad_norm": 1.2669776678085327, "learning_rate": 6.550064055793815e-06, "loss": 1.6314, "step": 8437 }, { "epoch": 0.8877432930036823, "grad_norm": 1.4774951934814453, "learning_rate": 6.537939137783166e-06, "loss": 1.5599, "step": 8438 }, { "epoch": 0.8878485007890584, "grad_norm": 2.499821424484253, "learning_rate": 6.525825073213876e-06, "loss": 1.9315, "step": 8439 }, { "epoch": 0.8879537085744345, "grad_norm": 1.3363654613494873, "learning_rate": 6.513721863492739e-06, "loss": 1.8184, "step": 8440 }, { "epoch": 0.8880589163598106, "grad_norm": 1.872019648551941, "learning_rate": 6.501629510025231e-06, "loss": 1.8917, "step": 8441 }, { "epoch": 0.8881641241451868, "grad_norm": 1.3895902633666992, "learning_rate": 6.489548014215585e-06, "loss": 1.296, "step": 8442 }, { "epoch": 0.8882693319305629, "grad_norm": 1.1768591403961182, "learning_rate": 6.4774773774668225e-06, "loss": 1.3739, "step": 8443 }, { "epoch": 0.8883745397159389, "grad_norm": 1.620846152305603, "learning_rate": 6.465417601180657e-06, "loss": 1.3514, "step": 8444 }, { "epoch": 0.8884797475013151, "grad_norm": 1.3851511478424072, "learning_rate": 6.453368686757533e-06, "loss": 1.2824, "step": 8445 }, { "epoch": 0.8885849552866912, "grad_norm": 1.9092847108840942, "learning_rate": 6.441330635596665e-06, "loss": 1.6623, "step": 8446 }, { "epoch": 0.8886901630720674, "grad_norm": 1.3836404085159302, "learning_rate": 6.42930344909598e-06, "loss": 1.932, "step": 8447 }, { "epoch": 0.8887953708574434, "grad_norm": 1.2607841491699219, "learning_rate": 6.417287128652172e-06, "loss": 1.5857, "step": 8448 }, { "epoch": 0.8889005786428196, "grad_norm": 1.6962370872497559, "learning_rate": 6.405281675660657e-06, "loss": 1.7428, "step": 8449 }, { "epoch": 0.8890057864281957, "grad_norm": 1.8756303787231445, "learning_rate": 6.393287091515565e-06, "loss": 1.3188, "step": 8450 }, { "epoch": 0.8891109942135718, "grad_norm": 1.370875358581543, "learning_rate": 6.3813033776098045e-06, "loss": 1.9644, "step": 8451 }, { "epoch": 0.8892162019989479, "grad_norm": 1.3783890008926392, "learning_rate": 6.369330535335016e-06, "loss": 1.3712, "step": 8452 }, { "epoch": 0.8893214097843241, "grad_norm": 1.7575373649597168, "learning_rate": 6.357368566081534e-06, "loss": 1.7293, "step": 8453 }, { "epoch": 0.8894266175697002, "grad_norm": 1.3849029541015625, "learning_rate": 6.345417471238501e-06, "loss": 1.6498, "step": 8454 }, { "epoch": 0.8895318253550762, "grad_norm": 2.384744167327881, "learning_rate": 6.333477252193731e-06, "loss": 2.3091, "step": 8455 }, { "epoch": 0.8896370331404524, "grad_norm": 1.8546522855758667, "learning_rate": 6.321547910333814e-06, "loss": 1.5911, "step": 8456 }, { "epoch": 0.8897422409258285, "grad_norm": 1.3433914184570312, "learning_rate": 6.309629447044074e-06, "loss": 2.1943, "step": 8457 }, { "epoch": 0.8898474487112046, "grad_norm": 1.997424840927124, "learning_rate": 6.297721863708528e-06, "loss": 1.8741, "step": 8458 }, { "epoch": 0.8899526564965807, "grad_norm": 1.5434318780899048, "learning_rate": 6.285825161710002e-06, "loss": 0.8767, "step": 8459 }, { "epoch": 0.8900578642819569, "grad_norm": 1.7368338108062744, "learning_rate": 6.273939342430013e-06, "loss": 1.6961, "step": 8460 }, { "epoch": 0.890163072067333, "grad_norm": 1.936927318572998, "learning_rate": 6.262064407248791e-06, "loss": 0.8594, "step": 8461 }, { "epoch": 0.8902682798527091, "grad_norm": 1.7954018115997314, "learning_rate": 6.250200357545377e-06, "loss": 1.6956, "step": 8462 }, { "epoch": 0.8903734876380852, "grad_norm": 1.5254756212234497, "learning_rate": 6.238347194697492e-06, "loss": 1.6629, "step": 8463 }, { "epoch": 0.8904786954234614, "grad_norm": 1.8098357915878296, "learning_rate": 6.226504920081566e-06, "loss": 1.17, "step": 8464 }, { "epoch": 0.8905839032088374, "grad_norm": 1.3831403255462646, "learning_rate": 6.214673535072868e-06, "loss": 1.6587, "step": 8465 }, { "epoch": 0.8906891109942136, "grad_norm": 1.8217341899871826, "learning_rate": 6.202853041045298e-06, "loss": 1.7295, "step": 8466 }, { "epoch": 0.8907943187795897, "grad_norm": 0.8784952759742737, "learning_rate": 6.191043439371535e-06, "loss": 2.3105, "step": 8467 }, { "epoch": 0.8908995265649658, "grad_norm": 1.1799579858779907, "learning_rate": 6.179244731422984e-06, "loss": 1.1108, "step": 8468 }, { "epoch": 0.8910047343503419, "grad_norm": 1.5560115575790405, "learning_rate": 6.167456918569792e-06, "loss": 1.9163, "step": 8469 }, { "epoch": 0.891109942135718, "grad_norm": 1.6790612936019897, "learning_rate": 6.155680002180864e-06, "loss": 1.5781, "step": 8470 }, { "epoch": 0.8912151499210942, "grad_norm": 2.0735700130462646, "learning_rate": 6.143913983623795e-06, "loss": 1.601, "step": 8471 }, { "epoch": 0.8913203577064703, "grad_norm": 1.3503835201263428, "learning_rate": 6.132158864264914e-06, "loss": 1.8698, "step": 8472 }, { "epoch": 0.8914255654918464, "grad_norm": 1.1508896350860596, "learning_rate": 6.120414645469341e-06, "loss": 1.8827, "step": 8473 }, { "epoch": 0.8915307732772225, "grad_norm": 2.075125217437744, "learning_rate": 6.108681328600874e-06, "loss": 1.8445, "step": 8474 }, { "epoch": 0.8916359810625987, "grad_norm": 1.5181657075881958, "learning_rate": 6.0969589150220554e-06, "loss": 1.8365, "step": 8475 }, { "epoch": 0.8917411888479747, "grad_norm": 1.7302663326263428, "learning_rate": 6.085247406094197e-06, "loss": 1.0664, "step": 8476 }, { "epoch": 0.8918463966333509, "grad_norm": 1.5871552228927612, "learning_rate": 6.073546803177299e-06, "loss": 1.7855, "step": 8477 }, { "epoch": 0.891951604418727, "grad_norm": 1.1431766748428345, "learning_rate": 6.0618571076301085e-06, "loss": 2.0511, "step": 8478 }, { "epoch": 0.8920568122041032, "grad_norm": 1.5150169134140015, "learning_rate": 6.050178320810141e-06, "loss": 1.4898, "step": 8479 }, { "epoch": 0.8921620199894792, "grad_norm": 2.173424005508423, "learning_rate": 6.038510444073586e-06, "loss": 1.1078, "step": 8480 }, { "epoch": 0.8922672277748553, "grad_norm": 2.229217529296875, "learning_rate": 6.026853478775396e-06, "loss": 1.9185, "step": 8481 }, { "epoch": 0.8923724355602315, "grad_norm": 2.0256011486053467, "learning_rate": 6.015207426269276e-06, "loss": 2.1693, "step": 8482 }, { "epoch": 0.8924776433456075, "grad_norm": 1.8043358325958252, "learning_rate": 6.003572287907633e-06, "loss": 1.7443, "step": 8483 }, { "epoch": 0.8925828511309837, "grad_norm": 1.3562211990356445, "learning_rate": 5.991948065041608e-06, "loss": 1.4683, "step": 8484 }, { "epoch": 0.8926880589163598, "grad_norm": 1.540953278541565, "learning_rate": 5.9803347590211e-06, "loss": 1.6193, "step": 8485 }, { "epoch": 0.892793266701736, "grad_norm": 1.7050999402999878, "learning_rate": 5.968732371194729e-06, "loss": 1.4715, "step": 8486 }, { "epoch": 0.892898474487112, "grad_norm": 2.0793585777282715, "learning_rate": 5.957140902909819e-06, "loss": 1.22, "step": 8487 }, { "epoch": 0.8930036822724882, "grad_norm": 2.1180810928344727, "learning_rate": 5.945560355512458e-06, "loss": 1.2444, "step": 8488 }, { "epoch": 0.8931088900578643, "grad_norm": 1.1381313800811768, "learning_rate": 5.93399073034745e-06, "loss": 1.5193, "step": 8489 }, { "epoch": 0.8932140978432404, "grad_norm": 1.6418750286102295, "learning_rate": 5.922432028758362e-06, "loss": 1.7449, "step": 8490 }, { "epoch": 0.8933193056286165, "grad_norm": 1.5433701276779175, "learning_rate": 5.910884252087457e-06, "loss": 1.4825, "step": 8491 }, { "epoch": 0.8934245134139926, "grad_norm": 1.8106061220169067, "learning_rate": 5.8993474016757145e-06, "loss": 1.542, "step": 8492 }, { "epoch": 0.8935297211993688, "grad_norm": 1.619828701019287, "learning_rate": 5.88782147886291e-06, "loss": 1.8641, "step": 8493 }, { "epoch": 0.8936349289847448, "grad_norm": 1.3758325576782227, "learning_rate": 5.876306484987481e-06, "loss": 1.3756, "step": 8494 }, { "epoch": 0.893740136770121, "grad_norm": 1.4138890504837036, "learning_rate": 5.8648024213866396e-06, "loss": 1.3945, "step": 8495 }, { "epoch": 0.8938453445554971, "grad_norm": 2.485138416290283, "learning_rate": 5.853309289396314e-06, "loss": 1.6306, "step": 8496 }, { "epoch": 0.8939505523408732, "grad_norm": 1.9388067722320557, "learning_rate": 5.841827090351171e-06, "loss": 1.4469, "step": 8497 }, { "epoch": 0.8940557601262493, "grad_norm": 1.8217087984085083, "learning_rate": 5.830355825584577e-06, "loss": 1.3221, "step": 8498 }, { "epoch": 0.8941609679116255, "grad_norm": 1.5511425733566284, "learning_rate": 5.818895496428689e-06, "loss": 1.3081, "step": 8499 }, { "epoch": 0.8942661756970016, "grad_norm": 2.5423057079315186, "learning_rate": 5.8074461042143095e-06, "loss": 1.1463, "step": 8500 }, { "epoch": 0.8943713834823777, "grad_norm": 1.830324411392212, "learning_rate": 5.796007650271063e-06, "loss": 1.4945, "step": 8501 }, { "epoch": 0.8944765912677538, "grad_norm": 2.4133145809173584, "learning_rate": 5.784580135927242e-06, "loss": 1.5212, "step": 8502 }, { "epoch": 0.89458179905313, "grad_norm": 1.9608807563781738, "learning_rate": 5.7731635625098755e-06, "loss": 1.3602, "step": 8503 }, { "epoch": 0.8946870068385061, "grad_norm": 2.1056807041168213, "learning_rate": 5.761757931344758e-06, "loss": 1.5038, "step": 8504 }, { "epoch": 0.8947922146238821, "grad_norm": 2.061222553253174, "learning_rate": 5.750363243756363e-06, "loss": 1.9814, "step": 8505 }, { "epoch": 0.8948974224092583, "grad_norm": 2.296455144882202, "learning_rate": 5.738979501067921e-06, "loss": 2.0531, "step": 8506 }, { "epoch": 0.8950026301946344, "grad_norm": 2.311011552810669, "learning_rate": 5.727606704601407e-06, "loss": 1.719, "step": 8507 }, { "epoch": 0.8951078379800105, "grad_norm": 1.3462337255477905, "learning_rate": 5.7162448556774995e-06, "loss": 1.7112, "step": 8508 }, { "epoch": 0.8952130457653866, "grad_norm": 1.3177099227905273, "learning_rate": 5.704893955615598e-06, "loss": 1.8481, "step": 8509 }, { "epoch": 0.8953182535507628, "grad_norm": 1.1072567701339722, "learning_rate": 5.693554005733859e-06, "loss": 1.6795, "step": 8510 }, { "epoch": 0.8954234613361389, "grad_norm": 1.5659089088439941, "learning_rate": 5.68222500734914e-06, "loss": 1.4823, "step": 8511 }, { "epoch": 0.895528669121515, "grad_norm": 1.4302914142608643, "learning_rate": 5.6709069617770675e-06, "loss": 1.2352, "step": 8512 }, { "epoch": 0.8956338769068911, "grad_norm": 1.4420124292373657, "learning_rate": 5.659599870331944e-06, "loss": 1.5293, "step": 8513 }, { "epoch": 0.8957390846922673, "grad_norm": 2.497387170791626, "learning_rate": 5.64830373432681e-06, "loss": 1.2493, "step": 8514 }, { "epoch": 0.8958442924776433, "grad_norm": 1.808634877204895, "learning_rate": 5.637018555073492e-06, "loss": 1.8987, "step": 8515 }, { "epoch": 0.8959495002630194, "grad_norm": 2.0679233074188232, "learning_rate": 5.625744333882488e-06, "loss": 1.3279, "step": 8516 }, { "epoch": 0.8960547080483956, "grad_norm": 1.3654800653457642, "learning_rate": 5.614481072063005e-06, "loss": 1.6085, "step": 8517 }, { "epoch": 0.8961599158337717, "grad_norm": 1.6575655937194824, "learning_rate": 5.603228770923041e-06, "loss": 1.1494, "step": 8518 }, { "epoch": 0.8962651236191478, "grad_norm": 1.3457351922988892, "learning_rate": 5.591987431769285e-06, "loss": 1.7025, "step": 8519 }, { "epoch": 0.8963703314045239, "grad_norm": 1.5918161869049072, "learning_rate": 5.580757055907137e-06, "loss": 1.3449, "step": 8520 }, { "epoch": 0.8964755391899001, "grad_norm": 1.6226881742477417, "learning_rate": 5.5695376446407656e-06, "loss": 1.6743, "step": 8521 }, { "epoch": 0.8965807469752761, "grad_norm": 1.7749323844909668, "learning_rate": 5.558329199273038e-06, "loss": 1.8004, "step": 8522 }, { "epoch": 0.8966859547606523, "grad_norm": 1.319456696510315, "learning_rate": 5.547131721105536e-06, "loss": 1.5449, "step": 8523 }, { "epoch": 0.8967911625460284, "grad_norm": 1.598297119140625, "learning_rate": 5.53594521143862e-06, "loss": 1.7227, "step": 8524 }, { "epoch": 0.8968963703314046, "grad_norm": 2.359658718109131, "learning_rate": 5.524769671571317e-06, "loss": 1.5778, "step": 8525 }, { "epoch": 0.8970015781167806, "grad_norm": 1.2139389514923096, "learning_rate": 5.5136051028014e-06, "loss": 2.0547, "step": 8526 }, { "epoch": 0.8971067859021568, "grad_norm": 1.9385275840759277, "learning_rate": 5.50245150642541e-06, "loss": 1.374, "step": 8527 }, { "epoch": 0.8972119936875329, "grad_norm": 1.8721158504486084, "learning_rate": 5.491308883738544e-06, "loss": 1.3719, "step": 8528 }, { "epoch": 0.8973172014729089, "grad_norm": 1.8050134181976318, "learning_rate": 5.480177236034756e-06, "loss": 1.4891, "step": 8529 }, { "epoch": 0.8974224092582851, "grad_norm": 1.385988712310791, "learning_rate": 5.469056564606767e-06, "loss": 1.7196, "step": 8530 }, { "epoch": 0.8975276170436612, "grad_norm": 1.7569900751113892, "learning_rate": 5.4579468707459225e-06, "loss": 1.7562, "step": 8531 }, { "epoch": 0.8976328248290374, "grad_norm": 1.610825538635254, "learning_rate": 5.446848155742401e-06, "loss": 1.6719, "step": 8532 }, { "epoch": 0.8977380326144134, "grad_norm": 2.0842642784118652, "learning_rate": 5.435760420885061e-06, "loss": 1.8032, "step": 8533 }, { "epoch": 0.8978432403997896, "grad_norm": 1.5236550569534302, "learning_rate": 5.42468366746145e-06, "loss": 1.4211, "step": 8534 }, { "epoch": 0.8979484481851657, "grad_norm": 1.6460191011428833, "learning_rate": 5.4136178967579054e-06, "loss": 1.5113, "step": 8535 }, { "epoch": 0.8980536559705419, "grad_norm": 2.305039405822754, "learning_rate": 5.402563110059456e-06, "loss": 1.5592, "step": 8536 }, { "epoch": 0.8981588637559179, "grad_norm": 1.2543623447418213, "learning_rate": 5.3915193086498286e-06, "loss": 1.4207, "step": 8537 }, { "epoch": 0.8982640715412941, "grad_norm": 1.8181365728378296, "learning_rate": 5.380486493811543e-06, "loss": 1.5938, "step": 8538 }, { "epoch": 0.8983692793266702, "grad_norm": 1.4921931028366089, "learning_rate": 5.3694646668257855e-06, "loss": 1.7251, "step": 8539 }, { "epoch": 0.8984744871120462, "grad_norm": 1.6796791553497314, "learning_rate": 5.358453828972465e-06, "loss": 1.7007, "step": 8540 }, { "epoch": 0.8985796948974224, "grad_norm": 1.4309808015823364, "learning_rate": 5.3474539815302815e-06, "loss": 1.6625, "step": 8541 }, { "epoch": 0.8986849026827985, "grad_norm": 1.6457716226577759, "learning_rate": 5.336465125776579e-06, "loss": 1.4362, "step": 8542 }, { "epoch": 0.8987901104681747, "grad_norm": 1.1779471635818481, "learning_rate": 5.325487262987439e-06, "loss": 1.5655, "step": 8543 }, { "epoch": 0.8988953182535507, "grad_norm": 1.411882996559143, "learning_rate": 5.314520394437728e-06, "loss": 1.5253, "step": 8544 }, { "epoch": 0.8990005260389269, "grad_norm": 1.2842921018600464, "learning_rate": 5.303564521400961e-06, "loss": 1.5975, "step": 8545 }, { "epoch": 0.899105733824303, "grad_norm": 2.102381467819214, "learning_rate": 5.292619645149433e-06, "loss": 2.1535, "step": 8546 }, { "epoch": 0.8992109416096791, "grad_norm": 2.0416359901428223, "learning_rate": 5.281685766954114e-06, "loss": 1.4009, "step": 8547 }, { "epoch": 0.8993161493950552, "grad_norm": 1.6468279361724854, "learning_rate": 5.270762888084712e-06, "loss": 1.8449, "step": 8548 }, { "epoch": 0.8994213571804314, "grad_norm": 1.4324162006378174, "learning_rate": 5.259851009809702e-06, "loss": 1.5525, "step": 8549 }, { "epoch": 0.8995265649658075, "grad_norm": 1.8984599113464355, "learning_rate": 5.2489501333962135e-06, "loss": 2.262, "step": 8550 }, { "epoch": 0.8996317727511836, "grad_norm": 1.3665709495544434, "learning_rate": 5.238060260110145e-06, "loss": 1.2597, "step": 8551 }, { "epoch": 0.8997369805365597, "grad_norm": 1.834430456161499, "learning_rate": 5.227181391216096e-06, "loss": 1.4875, "step": 8552 }, { "epoch": 0.8998421883219359, "grad_norm": 1.4777787923812866, "learning_rate": 5.2163135279773904e-06, "loss": 1.5871, "step": 8553 }, { "epoch": 0.8999473961073119, "grad_norm": 1.73262619972229, "learning_rate": 5.205456671656061e-06, "loss": 1.4334, "step": 8554 }, { "epoch": 0.900052603892688, "grad_norm": 2.0025711059570312, "learning_rate": 5.194610823512913e-06, "loss": 1.5515, "step": 8555 }, { "epoch": 0.9001578116780642, "grad_norm": 2.1046454906463623, "learning_rate": 5.183775984807415e-06, "loss": 1.7606, "step": 8556 }, { "epoch": 0.9002630194634403, "grad_norm": 2.093031167984009, "learning_rate": 5.172952156797795e-06, "loss": 1.0529, "step": 8557 }, { "epoch": 0.9003682272488164, "grad_norm": 3.1299211978912354, "learning_rate": 5.1621393407409904e-06, "loss": 1.3046, "step": 8558 }, { "epoch": 0.9004734350341925, "grad_norm": 1.444140076637268, "learning_rate": 5.151337537892631e-06, "loss": 1.6833, "step": 8559 }, { "epoch": 0.9005786428195687, "grad_norm": 1.5545951128005981, "learning_rate": 5.140546749507136e-06, "loss": 1.5637, "step": 8560 }, { "epoch": 0.9006838506049447, "grad_norm": 1.6997040510177612, "learning_rate": 5.129766976837569e-06, "loss": 1.3593, "step": 8561 }, { "epoch": 0.9007890583903209, "grad_norm": 2.1797568798065186, "learning_rate": 5.118998221135762e-06, "loss": 1.5224, "step": 8562 }, { "epoch": 0.900894266175697, "grad_norm": 1.9926127195358276, "learning_rate": 5.10824048365226e-06, "loss": 1.8785, "step": 8563 }, { "epoch": 0.9009994739610732, "grad_norm": 1.50703763961792, "learning_rate": 5.097493765636318e-06, "loss": 1.1829, "step": 8564 }, { "epoch": 0.9011046817464492, "grad_norm": 2.1419105529785156, "learning_rate": 5.086758068335917e-06, "loss": 1.2841, "step": 8565 }, { "epoch": 0.9012098895318253, "grad_norm": 2.1330740451812744, "learning_rate": 5.076033392997758e-06, "loss": 1.5291, "step": 8566 }, { "epoch": 0.9013150973172015, "grad_norm": 1.655969262123108, "learning_rate": 5.06531974086728e-06, "loss": 0.8968, "step": 8567 }, { "epoch": 0.9014203051025776, "grad_norm": 1.8075084686279297, "learning_rate": 5.054617113188586e-06, "loss": 2.1496, "step": 8568 }, { "epoch": 0.9015255128879537, "grad_norm": 1.51304292678833, "learning_rate": 5.043925511204573e-06, "loss": 1.8161, "step": 8569 }, { "epoch": 0.9016307206733298, "grad_norm": 1.4924466609954834, "learning_rate": 5.0332449361568e-06, "loss": 1.731, "step": 8570 }, { "epoch": 0.901735928458706, "grad_norm": 1.9998409748077393, "learning_rate": 5.0225753892855776e-06, "loss": 1.6692, "step": 8571 }, { "epoch": 0.901841136244082, "grad_norm": 1.3246253728866577, "learning_rate": 5.011916871829925e-06, "loss": 1.6233, "step": 8572 }, { "epoch": 0.9019463440294582, "grad_norm": 1.8206862211227417, "learning_rate": 5.0012693850275736e-06, "loss": 1.7423, "step": 8573 }, { "epoch": 0.9020515518148343, "grad_norm": 2.139857292175293, "learning_rate": 4.9906329301149914e-06, "loss": 1.4421, "step": 8574 }, { "epoch": 0.9021567596002105, "grad_norm": 2.2966580390930176, "learning_rate": 4.980007508327345e-06, "loss": 1.6362, "step": 8575 }, { "epoch": 0.9022619673855865, "grad_norm": 1.746172308921814, "learning_rate": 4.969393120898525e-06, "loss": 1.2432, "step": 8576 }, { "epoch": 0.9023671751709627, "grad_norm": 1.2494480609893799, "learning_rate": 4.958789769061156e-06, "loss": 1.7419, "step": 8577 }, { "epoch": 0.9024723829563388, "grad_norm": 1.3508919477462769, "learning_rate": 4.948197454046577e-06, "loss": 1.7533, "step": 8578 }, { "epoch": 0.9025775907417148, "grad_norm": 1.8284884691238403, "learning_rate": 4.937616177084814e-06, "loss": 1.5497, "step": 8579 }, { "epoch": 0.902682798527091, "grad_norm": 1.5557414293289185, "learning_rate": 4.927045939404673e-06, "loss": 1.8222, "step": 8580 }, { "epoch": 0.9027880063124671, "grad_norm": 1.6787934303283691, "learning_rate": 4.916486742233606e-06, "loss": 1.385, "step": 8581 }, { "epoch": 0.9028932140978433, "grad_norm": 1.3682160377502441, "learning_rate": 4.90593858679782e-06, "loss": 1.768, "step": 8582 }, { "epoch": 0.9029984218832193, "grad_norm": 2.39587140083313, "learning_rate": 4.89540147432227e-06, "loss": 1.8781, "step": 8583 }, { "epoch": 0.9031036296685955, "grad_norm": 1.224987268447876, "learning_rate": 4.884875406030565e-06, "loss": 1.6377, "step": 8584 }, { "epoch": 0.9032088374539716, "grad_norm": 1.3309333324432373, "learning_rate": 4.874360383145072e-06, "loss": 1.7443, "step": 8585 }, { "epoch": 0.9033140452393477, "grad_norm": 3.1084442138671875, "learning_rate": 4.863856406886869e-06, "loss": 2.1872, "step": 8586 }, { "epoch": 0.9034192530247238, "grad_norm": 1.980631709098816, "learning_rate": 4.853363478475748e-06, "loss": 1.273, "step": 8587 }, { "epoch": 0.9035244608101, "grad_norm": 1.4033839702606201, "learning_rate": 4.8428815991302005e-06, "loss": 1.3123, "step": 8588 }, { "epoch": 0.9036296685954761, "grad_norm": 1.6415311098098755, "learning_rate": 4.832410770067486e-06, "loss": 1.5425, "step": 8589 }, { "epoch": 0.9037348763808521, "grad_norm": 1.605977177619934, "learning_rate": 4.821950992503521e-06, "loss": 1.6757, "step": 8590 }, { "epoch": 0.9038400841662283, "grad_norm": 2.0766685009002686, "learning_rate": 4.811502267652968e-06, "loss": 1.7121, "step": 8591 }, { "epoch": 0.9039452919516044, "grad_norm": 1.8514540195465088, "learning_rate": 4.801064596729221e-06, "loss": 1.3121, "step": 8592 }, { "epoch": 0.9040504997369805, "grad_norm": 1.8307629823684692, "learning_rate": 4.790637980944346e-06, "loss": 1.871, "step": 8593 }, { "epoch": 0.9041557075223566, "grad_norm": 1.437143325805664, "learning_rate": 4.780222421509184e-06, "loss": 1.8685, "step": 8594 }, { "epoch": 0.9042609153077328, "grad_norm": 1.956531047821045, "learning_rate": 4.769817919633235e-06, "loss": 1.5374, "step": 8595 }, { "epoch": 0.9043661230931089, "grad_norm": 1.2559046745300293, "learning_rate": 4.759424476524732e-06, "loss": 2.0446, "step": 8596 }, { "epoch": 0.904471330878485, "grad_norm": 1.7142302989959717, "learning_rate": 4.749042093390654e-06, "loss": 1.7011, "step": 8597 }, { "epoch": 0.9045765386638611, "grad_norm": 2.1067309379577637, "learning_rate": 4.738670771436671e-06, "loss": 1.5651, "step": 8598 }, { "epoch": 0.9046817464492373, "grad_norm": 1.2750693559646606, "learning_rate": 4.72831051186714e-06, "loss": 1.9008, "step": 8599 }, { "epoch": 0.9047869542346134, "grad_norm": 1.5802183151245117, "learning_rate": 4.717961315885211e-06, "loss": 2.3383, "step": 8600 }, { "epoch": 0.9048921620199895, "grad_norm": 1.7453382015228271, "learning_rate": 4.707623184692655e-06, "loss": 1.7165, "step": 8601 }, { "epoch": 0.9049973698053656, "grad_norm": 1.6323179006576538, "learning_rate": 4.697296119490047e-06, "loss": 1.3792, "step": 8602 }, { "epoch": 0.9051025775907418, "grad_norm": 1.3392667770385742, "learning_rate": 4.686980121476614e-06, "loss": 1.369, "step": 8603 }, { "epoch": 0.9052077853761178, "grad_norm": 2.2435269355773926, "learning_rate": 4.67667519185031e-06, "loss": 1.8554, "step": 8604 }, { "epoch": 0.9053129931614939, "grad_norm": 1.6639388799667358, "learning_rate": 4.666381331807834e-06, "loss": 1.2958, "step": 8605 }, { "epoch": 0.9054182009468701, "grad_norm": 1.4905084371566772, "learning_rate": 4.656098542544574e-06, "loss": 1.7928, "step": 8606 }, { "epoch": 0.9055234087322462, "grad_norm": 1.625221848487854, "learning_rate": 4.645826825254607e-06, "loss": 1.9308, "step": 8607 }, { "epoch": 0.9056286165176223, "grad_norm": 1.763044834136963, "learning_rate": 4.6355661811308015e-06, "loss": 1.7369, "step": 8608 }, { "epoch": 0.9057338243029984, "grad_norm": 3.1937339305877686, "learning_rate": 4.625316611364661e-06, "loss": 1.1817, "step": 8609 }, { "epoch": 0.9058390320883746, "grad_norm": 1.484457015991211, "learning_rate": 4.615078117146421e-06, "loss": 1.6794, "step": 8610 }, { "epoch": 0.9059442398737506, "grad_norm": 1.7743538618087769, "learning_rate": 4.604850699665087e-06, "loss": 2.1803, "step": 8611 }, { "epoch": 0.9060494476591268, "grad_norm": 2.6126396656036377, "learning_rate": 4.594634360108319e-06, "loss": 1.6458, "step": 8612 }, { "epoch": 0.9061546554445029, "grad_norm": 2.2468442916870117, "learning_rate": 4.584429099662468e-06, "loss": 1.8922, "step": 8613 }, { "epoch": 0.9062598632298791, "grad_norm": 2.1800897121429443, "learning_rate": 4.574234919512698e-06, "loss": 1.5657, "step": 8614 }, { "epoch": 0.9063650710152551, "grad_norm": 2.317814826965332, "learning_rate": 4.564051820842796e-06, "loss": 1.7934, "step": 8615 }, { "epoch": 0.9064702788006312, "grad_norm": 1.8590099811553955, "learning_rate": 4.553879804835282e-06, "loss": 2.0423, "step": 8616 }, { "epoch": 0.9065754865860074, "grad_norm": 1.0570123195648193, "learning_rate": 4.543718872671421e-06, "loss": 2.114, "step": 8617 }, { "epoch": 0.9066806943713834, "grad_norm": 1.6926145553588867, "learning_rate": 4.533569025531137e-06, "loss": 1.5145, "step": 8618 }, { "epoch": 0.9067859021567596, "grad_norm": 2.080049514770508, "learning_rate": 4.523430264593132e-06, "loss": 2.0468, "step": 8619 }, { "epoch": 0.9068911099421357, "grad_norm": 1.7230383157730103, "learning_rate": 4.5133025910347845e-06, "loss": 1.407, "step": 8620 }, { "epoch": 0.9069963177275119, "grad_norm": 1.7170302867889404, "learning_rate": 4.5031860060321455e-06, "loss": 1.0797, "step": 8621 }, { "epoch": 0.9071015255128879, "grad_norm": 1.9875949621200562, "learning_rate": 4.493080510760083e-06, "loss": 1.2423, "step": 8622 }, { "epoch": 0.9072067332982641, "grad_norm": 2.1834206581115723, "learning_rate": 4.482986106392073e-06, "loss": 1.2699, "step": 8623 }, { "epoch": 0.9073119410836402, "grad_norm": 1.1872564554214478, "learning_rate": 4.472902794100342e-06, "loss": 1.7113, "step": 8624 }, { "epoch": 0.9074171488690163, "grad_norm": 1.889709711074829, "learning_rate": 4.4628305750558656e-06, "loss": 1.9109, "step": 8625 }, { "epoch": 0.9075223566543924, "grad_norm": 1.6247581243515015, "learning_rate": 4.452769450428273e-06, "loss": 1.1514, "step": 8626 }, { "epoch": 0.9076275644397686, "grad_norm": 1.985897183418274, "learning_rate": 4.442719421385922e-06, "loss": 1.7738, "step": 8627 }, { "epoch": 0.9077327722251447, "grad_norm": 1.4728162288665771, "learning_rate": 4.4326804890959195e-06, "loss": 1.8999, "step": 8628 }, { "epoch": 0.9078379800105207, "grad_norm": 1.9778163433074951, "learning_rate": 4.422652654724036e-06, "loss": 2.0382, "step": 8629 }, { "epoch": 0.9079431877958969, "grad_norm": 1.3184362649917603, "learning_rate": 4.412635919434749e-06, "loss": 1.6149, "step": 8630 }, { "epoch": 0.908048395581273, "grad_norm": 1.9387370347976685, "learning_rate": 4.402630284391318e-06, "loss": 1.4208, "step": 8631 }, { "epoch": 0.9081536033666492, "grad_norm": 1.576529860496521, "learning_rate": 4.392635750755625e-06, "loss": 1.4622, "step": 8632 }, { "epoch": 0.9082588111520252, "grad_norm": 2.166128396987915, "learning_rate": 4.382652319688307e-06, "loss": 2.017, "step": 8633 }, { "epoch": 0.9083640189374014, "grad_norm": 1.8880326747894287, "learning_rate": 4.372679992348727e-06, "loss": 1.7115, "step": 8634 }, { "epoch": 0.9084692267227775, "grad_norm": 1.9160791635513306, "learning_rate": 4.362718769894925e-06, "loss": 2.233, "step": 8635 }, { "epoch": 0.9085744345081536, "grad_norm": 1.0861622095108032, "learning_rate": 4.352768653483652e-06, "loss": 1.4918, "step": 8636 }, { "epoch": 0.9086796422935297, "grad_norm": 1.6084988117218018, "learning_rate": 4.342829644270429e-06, "loss": 1.69, "step": 8637 }, { "epoch": 0.9087848500789059, "grad_norm": 1.7415450811386108, "learning_rate": 4.332901743409379e-06, "loss": 1.4335, "step": 8638 }, { "epoch": 0.908890057864282, "grad_norm": 1.4303922653198242, "learning_rate": 4.322984952053433e-06, "loss": 1.3406, "step": 8639 }, { "epoch": 0.908995265649658, "grad_norm": 1.2648975849151611, "learning_rate": 4.313079271354192e-06, "loss": 1.3649, "step": 8640 }, { "epoch": 0.9091004734350342, "grad_norm": 1.7955864667892456, "learning_rate": 4.303184702461948e-06, "loss": 1.5743, "step": 8641 }, { "epoch": 0.9092056812204103, "grad_norm": 1.23116934299469, "learning_rate": 4.293301246525761e-06, "loss": 1.4245, "step": 8642 }, { "epoch": 0.9093108890057864, "grad_norm": 1.7114944458007812, "learning_rate": 4.283428904693343e-06, "loss": 1.8552, "step": 8643 }, { "epoch": 0.9094160967911625, "grad_norm": 1.141519546508789, "learning_rate": 4.273567678111123e-06, "loss": 1.773, "step": 8644 }, { "epoch": 0.9095213045765387, "grad_norm": 1.2643916606903076, "learning_rate": 4.263717567924286e-06, "loss": 1.8676, "step": 8645 }, { "epoch": 0.9096265123619148, "grad_norm": 1.395230770111084, "learning_rate": 4.2538785752766816e-06, "loss": 1.591, "step": 8646 }, { "epoch": 0.9097317201472909, "grad_norm": 1.6044540405273438, "learning_rate": 4.244050701310853e-06, "loss": 2.1957, "step": 8647 }, { "epoch": 0.909836927932667, "grad_norm": 1.558889389038086, "learning_rate": 4.234233947168109e-06, "loss": 1.3307, "step": 8648 }, { "epoch": 0.9099421357180432, "grad_norm": 2.091890811920166, "learning_rate": 4.224428313988416e-06, "loss": 1.092, "step": 8649 }, { "epoch": 0.9100473435034192, "grad_norm": 2.1650590896606445, "learning_rate": 4.214633802910506e-06, "loss": 1.9047, "step": 8650 }, { "epoch": 0.9101525512887954, "grad_norm": 1.981006145477295, "learning_rate": 4.204850415071748e-06, "loss": 1.1051, "step": 8651 }, { "epoch": 0.9102577590741715, "grad_norm": 1.77347993850708, "learning_rate": 4.195078151608256e-06, "loss": 1.6043, "step": 8652 }, { "epoch": 0.9103629668595477, "grad_norm": 3.0252108573913574, "learning_rate": 4.185317013654866e-06, "loss": 1.4648, "step": 8653 }, { "epoch": 0.9104681746449237, "grad_norm": 1.7558192014694214, "learning_rate": 4.175567002345104e-06, "loss": 1.2865, "step": 8654 }, { "epoch": 0.9105733824302998, "grad_norm": 1.6132521629333496, "learning_rate": 4.165828118811199e-06, "loss": 1.5495, "step": 8655 }, { "epoch": 0.910678590215676, "grad_norm": 1.244947910308838, "learning_rate": 4.156100364184101e-06, "loss": 2.0034, "step": 8656 }, { "epoch": 0.910783798001052, "grad_norm": 1.8241870403289795, "learning_rate": 4.146383739593474e-06, "loss": 1.6338, "step": 8657 }, { "epoch": 0.9108890057864282, "grad_norm": 0.9370825290679932, "learning_rate": 4.136678246167636e-06, "loss": 1.6983, "step": 8658 }, { "epoch": 0.9109942135718043, "grad_norm": 1.7532540559768677, "learning_rate": 4.12698388503372e-06, "loss": 1.5103, "step": 8659 }, { "epoch": 0.9110994213571805, "grad_norm": 1.93152916431427, "learning_rate": 4.1173006573174354e-06, "loss": 1.9619, "step": 8660 }, { "epoch": 0.9112046291425565, "grad_norm": 1.5335841178894043, "learning_rate": 4.107628564143306e-06, "loss": 1.519, "step": 8661 }, { "epoch": 0.9113098369279327, "grad_norm": 1.8598703145980835, "learning_rate": 4.0979676066345005e-06, "loss": 1.3632, "step": 8662 }, { "epoch": 0.9114150447133088, "grad_norm": 1.7030982971191406, "learning_rate": 4.08831778591291e-06, "loss": 1.0882, "step": 8663 }, { "epoch": 0.911520252498685, "grad_norm": 1.5589040517807007, "learning_rate": 4.078679103099159e-06, "loss": 2.0111, "step": 8664 }, { "epoch": 0.911625460284061, "grad_norm": 1.4710445404052734, "learning_rate": 4.069051559312531e-06, "loss": 1.8036, "step": 8665 }, { "epoch": 0.9117306680694371, "grad_norm": 2.0354957580566406, "learning_rate": 4.0594351556710544e-06, "loss": 1.6073, "step": 8666 }, { "epoch": 0.9118358758548133, "grad_norm": 1.0362532138824463, "learning_rate": 4.049829893291457e-06, "loss": 1.689, "step": 8667 }, { "epoch": 0.9119410836401893, "grad_norm": 1.7926151752471924, "learning_rate": 4.040235773289147e-06, "loss": 1.7301, "step": 8668 }, { "epoch": 0.9120462914255655, "grad_norm": 1.6435024738311768, "learning_rate": 4.030652796778267e-06, "loss": 1.5168, "step": 8669 }, { "epoch": 0.9121514992109416, "grad_norm": 1.7422984838485718, "learning_rate": 4.021080964871671e-06, "loss": 1.7809, "step": 8670 }, { "epoch": 0.9122567069963178, "grad_norm": 2.355043411254883, "learning_rate": 4.011520278680891e-06, "loss": 1.9162, "step": 8671 }, { "epoch": 0.9123619147816938, "grad_norm": 1.7827218770980835, "learning_rate": 4.001970739316163e-06, "loss": 1.322, "step": 8672 }, { "epoch": 0.91246712256707, "grad_norm": 1.2466920614242554, "learning_rate": 3.9924323478864655e-06, "loss": 1.2959, "step": 8673 }, { "epoch": 0.9125723303524461, "grad_norm": 1.3089375495910645, "learning_rate": 3.982905105499468e-06, "loss": 1.3969, "step": 8674 }, { "epoch": 0.9126775381378222, "grad_norm": 1.8237321376800537, "learning_rate": 3.973389013261497e-06, "loss": 1.2769, "step": 8675 }, { "epoch": 0.9127827459231983, "grad_norm": 1.5145193338394165, "learning_rate": 3.9638840722776685e-06, "loss": 1.4357, "step": 8676 }, { "epoch": 0.9128879537085745, "grad_norm": 1.9698090553283691, "learning_rate": 3.954390283651754e-06, "loss": 1.4994, "step": 8677 }, { "epoch": 0.9129931614939506, "grad_norm": 1.725354790687561, "learning_rate": 3.944907648486196e-06, "loss": 1.4207, "step": 8678 }, { "epoch": 0.9130983692793266, "grad_norm": 2.0058798789978027, "learning_rate": 3.935436167882234e-06, "loss": 1.1381, "step": 8679 }, { "epoch": 0.9132035770647028, "grad_norm": 1.658754587173462, "learning_rate": 3.925975842939733e-06, "loss": 1.7292, "step": 8680 }, { "epoch": 0.9133087848500789, "grad_norm": 1.5891391038894653, "learning_rate": 3.916526674757293e-06, "loss": 1.0397, "step": 8681 }, { "epoch": 0.913413992635455, "grad_norm": 1.2456856966018677, "learning_rate": 3.907088664432224e-06, "loss": 2.1817, "step": 8682 }, { "epoch": 0.9135192004208311, "grad_norm": 1.409684658050537, "learning_rate": 3.897661813060494e-06, "loss": 1.3057, "step": 8683 }, { "epoch": 0.9136244082062073, "grad_norm": 1.887300968170166, "learning_rate": 3.8882461217368604e-06, "loss": 1.7419, "step": 8684 }, { "epoch": 0.9137296159915834, "grad_norm": 1.338424801826477, "learning_rate": 3.878841591554716e-06, "loss": 1.6628, "step": 8685 }, { "epoch": 0.9138348237769595, "grad_norm": 1.1566272974014282, "learning_rate": 3.869448223606165e-06, "loss": 1.7944, "step": 8686 }, { "epoch": 0.9139400315623356, "grad_norm": 1.7602925300598145, "learning_rate": 3.860066018982056e-06, "loss": 1.2639, "step": 8687 }, { "epoch": 0.9140452393477118, "grad_norm": 1.099798560142517, "learning_rate": 3.850694978771896e-06, "loss": 1.5982, "step": 8688 }, { "epoch": 0.9141504471330878, "grad_norm": 1.6427594423294067, "learning_rate": 3.841335104063904e-06, "loss": 2.0918, "step": 8689 }, { "epoch": 0.914255654918464, "grad_norm": 1.4541171789169312, "learning_rate": 3.831986395945042e-06, "loss": 1.8839, "step": 8690 }, { "epoch": 0.9143608627038401, "grad_norm": 1.9176745414733887, "learning_rate": 3.822648855500921e-06, "loss": 1.5762, "step": 8691 }, { "epoch": 0.9144660704892162, "grad_norm": 1.3941978216171265, "learning_rate": 3.813322483815862e-06, "loss": 1.6778, "step": 8692 }, { "epoch": 0.9145712782745923, "grad_norm": 2.07476544380188, "learning_rate": 3.8040072819729545e-06, "loss": 1.2133, "step": 8693 }, { "epoch": 0.9146764860599684, "grad_norm": 1.7748063802719116, "learning_rate": 3.794703251053899e-06, "loss": 1.7154, "step": 8694 }, { "epoch": 0.9147816938453446, "grad_norm": 1.5125572681427002, "learning_rate": 3.785410392139166e-06, "loss": 1.6641, "step": 8695 }, { "epoch": 0.9148869016307207, "grad_norm": 2.81387996673584, "learning_rate": 3.776128706307902e-06, "loss": 1.3597, "step": 8696 }, { "epoch": 0.9149921094160968, "grad_norm": 2.081625461578369, "learning_rate": 3.7668581946379345e-06, "loss": 1.3875, "step": 8697 }, { "epoch": 0.9150973172014729, "grad_norm": 1.5099921226501465, "learning_rate": 3.7575988582058575e-06, "loss": 2.1729, "step": 8698 }, { "epoch": 0.9152025249868491, "grad_norm": 1.3036530017852783, "learning_rate": 3.7483506980868997e-06, "loss": 0.8875, "step": 8699 }, { "epoch": 0.9153077327722251, "grad_norm": 1.566401720046997, "learning_rate": 3.7391137153550137e-06, "loss": 1.5267, "step": 8700 }, { "epoch": 0.9154129405576013, "grad_norm": 1.6144717931747437, "learning_rate": 3.7298879110828965e-06, "loss": 1.6758, "step": 8701 }, { "epoch": 0.9155181483429774, "grad_norm": 1.2582496404647827, "learning_rate": 3.7206732863418804e-06, "loss": 1.6363, "step": 8702 }, { "epoch": 0.9156233561283535, "grad_norm": 1.8955252170562744, "learning_rate": 3.71146984220202e-06, "loss": 1.6015, "step": 8703 }, { "epoch": 0.9157285639137296, "grad_norm": 1.6370620727539062, "learning_rate": 3.702277579732116e-06, "loss": 1.4619, "step": 8704 }, { "epoch": 0.9158337716991057, "grad_norm": 2.216202974319458, "learning_rate": 3.6930964999995933e-06, "loss": 1.4897, "step": 8705 }, { "epoch": 0.9159389794844819, "grad_norm": 1.8498296737670898, "learning_rate": 3.683926604070653e-06, "loss": 1.7555, "step": 8706 }, { "epoch": 0.9160441872698579, "grad_norm": 2.3383429050445557, "learning_rate": 3.6747678930101558e-06, "loss": 1.4773, "step": 8707 }, { "epoch": 0.9161493950552341, "grad_norm": 1.3877893686294556, "learning_rate": 3.6656203678816723e-06, "loss": 1.9327, "step": 8708 }, { "epoch": 0.9162546028406102, "grad_norm": 1.5240907669067383, "learning_rate": 3.6564840297474757e-06, "loss": 2.1459, "step": 8709 }, { "epoch": 0.9163598106259864, "grad_norm": 1.7055188417434692, "learning_rate": 3.6473588796685386e-06, "loss": 1.5657, "step": 8710 }, { "epoch": 0.9164650184113624, "grad_norm": 2.1579809188842773, "learning_rate": 3.6382449187045144e-06, "loss": 1.5808, "step": 8711 }, { "epoch": 0.9165702261967386, "grad_norm": 1.276623010635376, "learning_rate": 3.629142147913811e-06, "loss": 1.9783, "step": 8712 }, { "epoch": 0.9166754339821147, "grad_norm": 2.286100149154663, "learning_rate": 3.6200505683534945e-06, "loss": 1.714, "step": 8713 }, { "epoch": 0.9167806417674907, "grad_norm": 2.115018367767334, "learning_rate": 3.6109701810793208e-06, "loss": 1.7412, "step": 8714 }, { "epoch": 0.9168858495528669, "grad_norm": 1.6776834726333618, "learning_rate": 3.6019009871457897e-06, "loss": 1.715, "step": 8715 }, { "epoch": 0.916991057338243, "grad_norm": 1.3437702655792236, "learning_rate": 3.59284298760606e-06, "loss": 1.1101, "step": 8716 }, { "epoch": 0.9170962651236192, "grad_norm": 3.0596518516540527, "learning_rate": 3.5837961835120006e-06, "loss": 1.4793, "step": 8717 }, { "epoch": 0.9172014729089952, "grad_norm": 1.5906575918197632, "learning_rate": 3.5747605759142157e-06, "loss": 1.6282, "step": 8718 }, { "epoch": 0.9173066806943714, "grad_norm": 2.017958402633667, "learning_rate": 3.565736165861966e-06, "loss": 1.9276, "step": 8719 }, { "epoch": 0.9174118884797475, "grad_norm": 2.3949334621429443, "learning_rate": 3.5567229544032133e-06, "loss": 0.857, "step": 8720 }, { "epoch": 0.9175170962651236, "grad_norm": 3.835064649581909, "learning_rate": 3.5477209425846538e-06, "loss": 1.3035, "step": 8721 }, { "epoch": 0.9176223040504997, "grad_norm": 1.541480302810669, "learning_rate": 3.538730131451651e-06, "loss": 1.6198, "step": 8722 }, { "epoch": 0.9177275118358759, "grad_norm": 1.4783751964569092, "learning_rate": 3.529750522048281e-06, "loss": 1.5319, "step": 8723 }, { "epoch": 0.917832719621252, "grad_norm": 1.9219422340393066, "learning_rate": 3.5207821154173093e-06, "loss": 1.6938, "step": 8724 }, { "epoch": 0.917937927406628, "grad_norm": 1.5939526557922363, "learning_rate": 3.5118249126002035e-06, "loss": 1.4352, "step": 8725 }, { "epoch": 0.9180431351920042, "grad_norm": 1.3447009325027466, "learning_rate": 3.5028789146371533e-06, "loss": 1.7133, "step": 8726 }, { "epoch": 0.9181483429773803, "grad_norm": 1.859089970588684, "learning_rate": 3.4939441225670054e-06, "loss": 1.8013, "step": 8727 }, { "epoch": 0.9182535507627565, "grad_norm": 1.8480535745620728, "learning_rate": 3.4850205374273416e-06, "loss": 1.5742, "step": 8728 }, { "epoch": 0.9183587585481325, "grad_norm": 2.163987159729004, "learning_rate": 3.476108160254443e-06, "loss": 1.2029, "step": 8729 }, { "epoch": 0.9184639663335087, "grad_norm": 1.8570356369018555, "learning_rate": 3.4672069920832493e-06, "loss": 1.558, "step": 8730 }, { "epoch": 0.9185691741188848, "grad_norm": 1.9687753915786743, "learning_rate": 3.4583170339474223e-06, "loss": 1.6714, "step": 8731 }, { "epoch": 0.9186743819042609, "grad_norm": 1.7975976467132568, "learning_rate": 3.4494382868793474e-06, "loss": 1.623, "step": 8732 }, { "epoch": 0.918779589689637, "grad_norm": 1.992436170578003, "learning_rate": 3.440570751910066e-06, "loss": 1.2232, "step": 8733 }, { "epoch": 0.9188847974750132, "grad_norm": 1.5416083335876465, "learning_rate": 3.4317144300693328e-06, "loss": 1.3667, "step": 8734 }, { "epoch": 0.9189900052603893, "grad_norm": 2.0780837535858154, "learning_rate": 3.4228693223856136e-06, "loss": 1.6205, "step": 8735 }, { "epoch": 0.9190952130457654, "grad_norm": 2.3178086280822754, "learning_rate": 3.4140354298860756e-06, "loss": 1.6387, "step": 8736 }, { "epoch": 0.9192004208311415, "grad_norm": 2.03950834274292, "learning_rate": 3.405212753596532e-06, "loss": 1.2235, "step": 8737 }, { "epoch": 0.9193056286165177, "grad_norm": 1.5267329216003418, "learning_rate": 3.3964012945415624e-06, "loss": 1.9435, "step": 8738 }, { "epoch": 0.9194108364018937, "grad_norm": 1.4271742105484009, "learning_rate": 3.3876010537444046e-06, "loss": 1.3572, "step": 8739 }, { "epoch": 0.9195160441872698, "grad_norm": 1.5185612440109253, "learning_rate": 3.3788120322269855e-06, "loss": 1.1357, "step": 8740 }, { "epoch": 0.919621251972646, "grad_norm": 2.3991973400115967, "learning_rate": 3.3700342310099773e-06, "loss": 2.0348, "step": 8741 }, { "epoch": 0.9197264597580221, "grad_norm": 1.1119378805160522, "learning_rate": 3.361267651112676e-06, "loss": 1.4776, "step": 8742 }, { "epoch": 0.9198316675433982, "grad_norm": 2.3870489597320557, "learning_rate": 3.3525122935531562e-06, "loss": 1.8226, "step": 8743 }, { "epoch": 0.9199368753287743, "grad_norm": 1.734883189201355, "learning_rate": 3.3437681593481486e-06, "loss": 1.6325, "step": 8744 }, { "epoch": 0.9200420831141505, "grad_norm": 1.6427862644195557, "learning_rate": 3.3350352495130298e-06, "loss": 1.6529, "step": 8745 }, { "epoch": 0.9201472908995265, "grad_norm": 1.838613748550415, "learning_rate": 3.326313565061978e-06, "loss": 1.3113, "step": 8746 }, { "epoch": 0.9202524986849027, "grad_norm": 2.063891887664795, "learning_rate": 3.3176031070077827e-06, "loss": 2.1088, "step": 8747 }, { "epoch": 0.9203577064702788, "grad_norm": 1.299898624420166, "learning_rate": 3.3089038763619684e-06, "loss": 1.7546, "step": 8748 }, { "epoch": 0.920462914255655, "grad_norm": 1.4041413068771362, "learning_rate": 3.30021587413476e-06, "loss": 2.0231, "step": 8749 }, { "epoch": 0.920568122041031, "grad_norm": 2.1937994956970215, "learning_rate": 3.2915391013350392e-06, "loss": 2.0115, "step": 8750 }, { "epoch": 0.9206733298264071, "grad_norm": 1.2597063779830933, "learning_rate": 3.282873558970445e-06, "loss": 1.7548, "step": 8751 }, { "epoch": 0.9207785376117833, "grad_norm": 1.6283094882965088, "learning_rate": 3.2742192480472724e-06, "loss": 1.5641, "step": 8752 }, { "epoch": 0.9208837453971593, "grad_norm": 1.535847783088684, "learning_rate": 3.2655761695704834e-06, "loss": 1.769, "step": 8753 }, { "epoch": 0.9209889531825355, "grad_norm": 1.572363018989563, "learning_rate": 3.256944324543809e-06, "loss": 1.5744, "step": 8754 }, { "epoch": 0.9210941609679116, "grad_norm": 1.8032740354537964, "learning_rate": 3.2483237139696255e-06, "loss": 1.1064, "step": 8755 }, { "epoch": 0.9211993687532878, "grad_norm": 1.5819141864776611, "learning_rate": 3.2397143388489983e-06, "loss": 1.3802, "step": 8756 }, { "epoch": 0.9213045765386638, "grad_norm": 1.1894673109054565, "learning_rate": 3.2311162001817387e-06, "loss": 1.4237, "step": 8757 }, { "epoch": 0.92140978432404, "grad_norm": 1.9379583597183228, "learning_rate": 3.2225292989662925e-06, "loss": 1.9112, "step": 8758 }, { "epoch": 0.9215149921094161, "grad_norm": 1.32359778881073, "learning_rate": 3.21395363619984e-06, "loss": 1.3942, "step": 8759 }, { "epoch": 0.9216201998947923, "grad_norm": 1.8300716876983643, "learning_rate": 3.2053892128782403e-06, "loss": 1.7214, "step": 8760 }, { "epoch": 0.9217254076801683, "grad_norm": 1.1215351819992065, "learning_rate": 3.1968360299960643e-06, "loss": 1.3853, "step": 8761 }, { "epoch": 0.9218306154655445, "grad_norm": 1.482314944267273, "learning_rate": 3.1882940885465397e-06, "loss": 1.6937, "step": 8762 }, { "epoch": 0.9219358232509206, "grad_norm": 1.5101746320724487, "learning_rate": 3.1797633895216394e-06, "loss": 1.7438, "step": 8763 }, { "epoch": 0.9220410310362966, "grad_norm": 1.2713983058929443, "learning_rate": 3.171243933911994e-06, "loss": 1.5315, "step": 8764 }, { "epoch": 0.9221462388216728, "grad_norm": 2.0249693393707275, "learning_rate": 3.1627357227069333e-06, "loss": 1.4189, "step": 8765 }, { "epoch": 0.9222514466070489, "grad_norm": 1.0391638278961182, "learning_rate": 3.154238756894512e-06, "loss": 1.8894, "step": 8766 }, { "epoch": 0.9223566543924251, "grad_norm": 1.596531629562378, "learning_rate": 3.1457530374614295e-06, "loss": 2.0511, "step": 8767 }, { "epoch": 0.9224618621778011, "grad_norm": 1.645261526107788, "learning_rate": 3.1372785653931093e-06, "loss": 1.7624, "step": 8768 }, { "epoch": 0.9225670699631773, "grad_norm": 1.3146398067474365, "learning_rate": 3.128815341673674e-06, "loss": 1.9661, "step": 8769 }, { "epoch": 0.9226722777485534, "grad_norm": 1.578999400138855, "learning_rate": 3.120363367285917e-06, "loss": 1.3778, "step": 8770 }, { "epoch": 0.9227774855339295, "grad_norm": 2.222402811050415, "learning_rate": 3.111922643211351e-06, "loss": 2.074, "step": 8771 }, { "epoch": 0.9228826933193056, "grad_norm": 1.712213158607483, "learning_rate": 3.1034931704301606e-06, "loss": 1.6251, "step": 8772 }, { "epoch": 0.9229879011046818, "grad_norm": 1.4497140645980835, "learning_rate": 3.0950749499212283e-06, "loss": 1.5102, "step": 8773 }, { "epoch": 0.9230931088900579, "grad_norm": 1.7401906251907349, "learning_rate": 3.0866679826621504e-06, "loss": 1.5702, "step": 8774 }, { "epoch": 0.923198316675434, "grad_norm": 1.3884131908416748, "learning_rate": 3.078272269629201e-06, "loss": 1.6859, "step": 8775 }, { "epoch": 0.9233035244608101, "grad_norm": 1.8712023496627808, "learning_rate": 3.0698878117973117e-06, "loss": 1.7209, "step": 8776 }, { "epoch": 0.9234087322461862, "grad_norm": 1.4813895225524902, "learning_rate": 3.0615146101401925e-06, "loss": 1.5664, "step": 8777 }, { "epoch": 0.9235139400315623, "grad_norm": 1.8341212272644043, "learning_rate": 3.053152665630166e-06, "loss": 1.4758, "step": 8778 }, { "epoch": 0.9236191478169384, "grad_norm": 2.59242582321167, "learning_rate": 3.0448019792382654e-06, "loss": 1.4894, "step": 8779 }, { "epoch": 0.9237243556023146, "grad_norm": 1.6962684392929077, "learning_rate": 3.0364625519342603e-06, "loss": 1.1454, "step": 8780 }, { "epoch": 0.9238295633876907, "grad_norm": 1.346834421157837, "learning_rate": 3.028134384686565e-06, "loss": 1.3534, "step": 8781 }, { "epoch": 0.9239347711730668, "grad_norm": 1.6630899906158447, "learning_rate": 3.0198174784622944e-06, "loss": 1.7483, "step": 8782 }, { "epoch": 0.9240399789584429, "grad_norm": 1.2288480997085571, "learning_rate": 3.0115118342272765e-06, "loss": 1.9129, "step": 8783 }, { "epoch": 0.9241451867438191, "grad_norm": 1.761152982711792, "learning_rate": 3.0032174529460165e-06, "loss": 1.4155, "step": 8784 }, { "epoch": 0.9242503945291951, "grad_norm": 1.8571745157241821, "learning_rate": 2.9949343355817003e-06, "loss": 1.5797, "step": 8785 }, { "epoch": 0.9243556023145713, "grad_norm": 1.4947131872177124, "learning_rate": 2.9866624830962366e-06, "loss": 1.5442, "step": 8786 }, { "epoch": 0.9244608100999474, "grad_norm": 1.9050381183624268, "learning_rate": 2.9784018964502114e-06, "loss": 1.0048, "step": 8787 }, { "epoch": 0.9245660178853236, "grad_norm": 1.943846583366394, "learning_rate": 2.9701525766028802e-06, "loss": 1.6949, "step": 8788 }, { "epoch": 0.9246712256706996, "grad_norm": 2.4148600101470947, "learning_rate": 2.9619145245122217e-06, "loss": 0.8479, "step": 8789 }, { "epoch": 0.9247764334560757, "grad_norm": 1.4801805019378662, "learning_rate": 2.9536877411348808e-06, "loss": 2.1227, "step": 8790 }, { "epoch": 0.9248816412414519, "grad_norm": 1.3251457214355469, "learning_rate": 2.945472227426227e-06, "loss": 1.5081, "step": 8791 }, { "epoch": 0.924986849026828, "grad_norm": 2.5353012084960938, "learning_rate": 2.9372679843402863e-06, "loss": 1.4429, "step": 8792 }, { "epoch": 0.9250920568122041, "grad_norm": 1.5721760988235474, "learning_rate": 2.9290750128297963e-06, "loss": 1.9756, "step": 8793 }, { "epoch": 0.9251972645975802, "grad_norm": 1.1288405656814575, "learning_rate": 2.9208933138461737e-06, "loss": 2.0431, "step": 8794 }, { "epoch": 0.9253024723829564, "grad_norm": 1.4272089004516602, "learning_rate": 2.9127228883395472e-06, "loss": 1.9728, "step": 8795 }, { "epoch": 0.9254076801683324, "grad_norm": 1.9231061935424805, "learning_rate": 2.904563737258692e-06, "loss": 2.0053, "step": 8796 }, { "epoch": 0.9255128879537086, "grad_norm": 1.3104902505874634, "learning_rate": 2.8964158615511383e-06, "loss": 1.5842, "step": 8797 }, { "epoch": 0.9256180957390847, "grad_norm": 1.5530825853347778, "learning_rate": 2.8882792621630406e-06, "loss": 1.1644, "step": 8798 }, { "epoch": 0.9257233035244609, "grad_norm": 1.2885637283325195, "learning_rate": 2.8801539400393097e-06, "loss": 1.7493, "step": 8799 }, { "epoch": 0.9258285113098369, "grad_norm": 1.3112657070159912, "learning_rate": 2.8720398961234907e-06, "loss": 1.2708, "step": 8800 }, { "epoch": 0.925933719095213, "grad_norm": 1.7676241397857666, "learning_rate": 2.86393713135783e-06, "loss": 1.7696, "step": 8801 }, { "epoch": 0.9260389268805892, "grad_norm": 1.611799955368042, "learning_rate": 2.8558456466832973e-06, "loss": 1.4262, "step": 8802 }, { "epoch": 0.9261441346659652, "grad_norm": 1.5839171409606934, "learning_rate": 2.8477654430395185e-06, "loss": 1.9976, "step": 8803 }, { "epoch": 0.9262493424513414, "grad_norm": 1.8299293518066406, "learning_rate": 2.839696521364821e-06, "loss": 1.5226, "step": 8804 }, { "epoch": 0.9263545502367175, "grad_norm": 2.451498508453369, "learning_rate": 2.8316388825962324e-06, "loss": 1.4629, "step": 8805 }, { "epoch": 0.9264597580220937, "grad_norm": 1.6395529508590698, "learning_rate": 2.823592527669461e-06, "loss": 1.8826, "step": 8806 }, { "epoch": 0.9265649658074697, "grad_norm": 1.9740159511566162, "learning_rate": 2.8155574575188694e-06, "loss": 1.2348, "step": 8807 }, { "epoch": 0.9266701735928459, "grad_norm": 2.034790515899658, "learning_rate": 2.8075336730775894e-06, "loss": 1.6714, "step": 8808 }, { "epoch": 0.926775381378222, "grad_norm": 1.232695460319519, "learning_rate": 2.7995211752773752e-06, "loss": 1.2372, "step": 8809 }, { "epoch": 0.9268805891635981, "grad_norm": 1.4230536222457886, "learning_rate": 2.7915199650486944e-06, "loss": 1.8896, "step": 8810 }, { "epoch": 0.9269857969489742, "grad_norm": 1.6737197637557983, "learning_rate": 2.7835300433207035e-06, "loss": 1.6089, "step": 8811 }, { "epoch": 0.9270910047343504, "grad_norm": 2.7729249000549316, "learning_rate": 2.7755514110212264e-06, "loss": 1.311, "step": 8812 }, { "epoch": 0.9271962125197265, "grad_norm": 1.7959167957305908, "learning_rate": 2.767584069076823e-06, "loss": 1.3979, "step": 8813 }, { "epoch": 0.9273014203051025, "grad_norm": 0.992779016494751, "learning_rate": 2.7596280184126965e-06, "loss": 1.7532, "step": 8814 }, { "epoch": 0.9274066280904787, "grad_norm": 1.6769089698791504, "learning_rate": 2.751683259952764e-06, "loss": 1.6832, "step": 8815 }, { "epoch": 0.9275118358758548, "grad_norm": 1.5358198881149292, "learning_rate": 2.7437497946196322e-06, "loss": 1.1027, "step": 8816 }, { "epoch": 0.9276170436612309, "grad_norm": 1.683468222618103, "learning_rate": 2.7358276233345747e-06, "loss": 1.014, "step": 8817 }, { "epoch": 0.927722251446607, "grad_norm": 1.826439619064331, "learning_rate": 2.727916747017556e-06, "loss": 1.3374, "step": 8818 }, { "epoch": 0.9278274592319832, "grad_norm": 1.3325717449188232, "learning_rate": 2.7200171665872742e-06, "loss": 1.8112, "step": 8819 }, { "epoch": 0.9279326670173593, "grad_norm": 1.434065818786621, "learning_rate": 2.7121288829610624e-06, "loss": 1.5424, "step": 8820 }, { "epoch": 0.9280378748027354, "grad_norm": 1.613349437713623, "learning_rate": 2.7042518970549546e-06, "loss": 1.3637, "step": 8821 }, { "epoch": 0.9281430825881115, "grad_norm": 1.9332239627838135, "learning_rate": 2.696386209783697e-06, "loss": 1.5076, "step": 8822 }, { "epoch": 0.9282482903734877, "grad_norm": 1.393325686454773, "learning_rate": 2.6885318220606914e-06, "loss": 1.6009, "step": 8823 }, { "epoch": 0.9283534981588638, "grad_norm": 1.6824408769607544, "learning_rate": 2.6806887347980427e-06, "loss": 1.6287, "step": 8824 }, { "epoch": 0.9284587059442398, "grad_norm": 1.7882195711135864, "learning_rate": 2.6728569489065437e-06, "loss": 2.0023, "step": 8825 }, { "epoch": 0.928563913729616, "grad_norm": 1.559647560119629, "learning_rate": 2.6650364652956894e-06, "loss": 1.3792, "step": 8826 }, { "epoch": 0.9286691215149921, "grad_norm": 1.2669860124588013, "learning_rate": 2.657227284873609e-06, "loss": 1.6591, "step": 8827 }, { "epoch": 0.9287743293003682, "grad_norm": 1.9442473649978638, "learning_rate": 2.6494294085472103e-06, "loss": 1.7604, "step": 8828 }, { "epoch": 0.9288795370857443, "grad_norm": 1.3520489931106567, "learning_rate": 2.6416428372219914e-06, "loss": 1.8279, "step": 8829 }, { "epoch": 0.9289847448711205, "grad_norm": 1.2436840534210205, "learning_rate": 2.6338675718022064e-06, "loss": 1.9452, "step": 8830 }, { "epoch": 0.9290899526564966, "grad_norm": 1.687325119972229, "learning_rate": 2.6261036131907557e-06, "loss": 1.1885, "step": 8831 }, { "epoch": 0.9291951604418727, "grad_norm": 1.489946722984314, "learning_rate": 2.61835096228924e-06, "loss": 1.9571, "step": 8832 }, { "epoch": 0.9293003682272488, "grad_norm": 1.1309078931808472, "learning_rate": 2.6106096199979614e-06, "loss": 1.4592, "step": 8833 }, { "epoch": 0.929405576012625, "grad_norm": 1.816552758216858, "learning_rate": 2.6028795872159005e-06, "loss": 1.5374, "step": 8834 }, { "epoch": 0.929510783798001, "grad_norm": 1.753360390663147, "learning_rate": 2.5951608648406955e-06, "loss": 1.5404, "step": 8835 }, { "epoch": 0.9296159915833772, "grad_norm": 1.432955265045166, "learning_rate": 2.587453453768729e-06, "loss": 1.7795, "step": 8836 }, { "epoch": 0.9297211993687533, "grad_norm": 2.5248990058898926, "learning_rate": 2.579757354895018e-06, "loss": 1.5791, "step": 8837 }, { "epoch": 0.9298264071541295, "grad_norm": 1.3202687501907349, "learning_rate": 2.5720725691132706e-06, "loss": 1.4432, "step": 8838 }, { "epoch": 0.9299316149395055, "grad_norm": 1.9619760513305664, "learning_rate": 2.564399097315928e-06, "loss": 1.7022, "step": 8839 }, { "epoch": 0.9300368227248816, "grad_norm": 1.2252382040023804, "learning_rate": 2.5567369403940776e-06, "loss": 1.579, "step": 8840 }, { "epoch": 0.9301420305102578, "grad_norm": 1.8557909727096558, "learning_rate": 2.5490860992374745e-06, "loss": 1.6796, "step": 8841 }, { "epoch": 0.9302472382956338, "grad_norm": 1.5600543022155762, "learning_rate": 2.5414465747346182e-06, "loss": 1.5379, "step": 8842 }, { "epoch": 0.93035244608101, "grad_norm": 1.3198964595794678, "learning_rate": 2.5338183677726334e-06, "loss": 1.8882, "step": 8843 }, { "epoch": 0.9304576538663861, "grad_norm": 1.934952974319458, "learning_rate": 2.526201479237389e-06, "loss": 0.9782, "step": 8844 }, { "epoch": 0.9305628616517623, "grad_norm": 1.6024317741394043, "learning_rate": 2.5185959100133883e-06, "loss": 1.7564, "step": 8845 }, { "epoch": 0.9306680694371383, "grad_norm": 1.7014966011047363, "learning_rate": 2.5110016609838473e-06, "loss": 1.5487, "step": 8846 }, { "epoch": 0.9307732772225145, "grad_norm": 1.7204549312591553, "learning_rate": 2.50341873303066e-06, "loss": 1.7045, "step": 8847 }, { "epoch": 0.9308784850078906, "grad_norm": 1.1743104457855225, "learning_rate": 2.495847127034401e-06, "loss": 1.7379, "step": 8848 }, { "epoch": 0.9309836927932666, "grad_norm": 1.450788140296936, "learning_rate": 2.4882868438743436e-06, "loss": 1.2717, "step": 8849 }, { "epoch": 0.9310889005786428, "grad_norm": 1.5399311780929565, "learning_rate": 2.480737884428441e-06, "loss": 1.3663, "step": 8850 }, { "epoch": 0.931194108364019, "grad_norm": 1.6990025043487549, "learning_rate": 2.4732002495733154e-06, "loss": 1.8612, "step": 8851 }, { "epoch": 0.9312993161493951, "grad_norm": 1.3516472578048706, "learning_rate": 2.46567394018431e-06, "loss": 1.5048, "step": 8852 }, { "epoch": 0.9314045239347711, "grad_norm": 2.059854030609131, "learning_rate": 2.458158957135415e-06, "loss": 1.6879, "step": 8853 }, { "epoch": 0.9315097317201473, "grad_norm": 1.1983468532562256, "learning_rate": 2.4506553012993093e-06, "loss": 1.7273, "step": 8854 }, { "epoch": 0.9316149395055234, "grad_norm": 1.6070380210876465, "learning_rate": 2.443162973547386e-06, "loss": 1.3611, "step": 8855 }, { "epoch": 0.9317201472908996, "grad_norm": 1.6409918069839478, "learning_rate": 2.435681974749704e-06, "loss": 1.5396, "step": 8856 }, { "epoch": 0.9318253550762756, "grad_norm": 2.1190476417541504, "learning_rate": 2.4282123057750016e-06, "loss": 1.9413, "step": 8857 }, { "epoch": 0.9319305628616518, "grad_norm": 1.454859733581543, "learning_rate": 2.4207539674907075e-06, "loss": 1.6396, "step": 8858 }, { "epoch": 0.9320357706470279, "grad_norm": 1.9500346183776855, "learning_rate": 2.413306960762929e-06, "loss": 1.0076, "step": 8859 }, { "epoch": 0.932140978432404, "grad_norm": 2.006927013397217, "learning_rate": 2.4058712864564736e-06, "loss": 1.8154, "step": 8860 }, { "epoch": 0.9322461862177801, "grad_norm": 1.5914230346679688, "learning_rate": 2.398446945434818e-06, "loss": 1.5964, "step": 8861 }, { "epoch": 0.9323513940031563, "grad_norm": 1.940948724746704, "learning_rate": 2.3910339385601168e-06, "loss": 1.8728, "step": 8862 }, { "epoch": 0.9324566017885324, "grad_norm": 1.6678147315979004, "learning_rate": 2.383632266693225e-06, "loss": 2.0333, "step": 8863 }, { "epoch": 0.9325618095739084, "grad_norm": 1.5024465322494507, "learning_rate": 2.376241930693679e-06, "loss": 1.7904, "step": 8864 }, { "epoch": 0.9326670173592846, "grad_norm": 1.273901343345642, "learning_rate": 2.368862931419702e-06, "loss": 1.3617, "step": 8865 }, { "epoch": 0.9327722251446607, "grad_norm": 1.9819680452346802, "learning_rate": 2.3614952697281534e-06, "loss": 1.5756, "step": 8866 }, { "epoch": 0.9328774329300368, "grad_norm": 1.4044946432113647, "learning_rate": 2.354138946474671e-06, "loss": 1.7253, "step": 8867 }, { "epoch": 0.9329826407154129, "grad_norm": 1.6213405132293701, "learning_rate": 2.346793962513483e-06, "loss": 1.9362, "step": 8868 }, { "epoch": 0.9330878485007891, "grad_norm": 2.6082890033721924, "learning_rate": 2.3394603186975393e-06, "loss": 1.5571, "step": 8869 }, { "epoch": 0.9331930562861652, "grad_norm": 1.7648690938949585, "learning_rate": 2.332138015878482e-06, "loss": 1.5838, "step": 8870 }, { "epoch": 0.9332982640715413, "grad_norm": 1.2517858743667603, "learning_rate": 2.324827054906631e-06, "loss": 1.4428, "step": 8871 }, { "epoch": 0.9334034718569174, "grad_norm": 1.9067564010620117, "learning_rate": 2.317527436630973e-06, "loss": 2.0886, "step": 8872 }, { "epoch": 0.9335086796422936, "grad_norm": 1.4557033777236938, "learning_rate": 2.310239161899208e-06, "loss": 1.4884, "step": 8873 }, { "epoch": 0.9336138874276696, "grad_norm": 1.6103370189666748, "learning_rate": 2.3029622315576595e-06, "loss": 1.6499, "step": 8874 }, { "epoch": 0.9337190952130457, "grad_norm": 1.6522938013076782, "learning_rate": 2.2956966464514175e-06, "loss": 1.7791, "step": 8875 }, { "epoch": 0.9338243029984219, "grad_norm": 1.3017587661743164, "learning_rate": 2.288442407424185e-06, "loss": 1.9411, "step": 8876 }, { "epoch": 0.933929510783798, "grad_norm": 1.478482961654663, "learning_rate": 2.2811995153183776e-06, "loss": 1.6778, "step": 8877 }, { "epoch": 0.9340347185691741, "grad_norm": 2.009734630584717, "learning_rate": 2.2739679709750885e-06, "loss": 1.4429, "step": 8878 }, { "epoch": 0.9341399263545502, "grad_norm": 1.2722523212432861, "learning_rate": 2.2667477752341017e-06, "loss": 1.4058, "step": 8879 }, { "epoch": 0.9342451341399264, "grad_norm": 1.124986171722412, "learning_rate": 2.2595389289338575e-06, "loss": 1.3739, "step": 8880 }, { "epoch": 0.9343503419253024, "grad_norm": 1.039442777633667, "learning_rate": 2.252341432911509e-06, "loss": 1.7472, "step": 8881 }, { "epoch": 0.9344555497106786, "grad_norm": 2.12754487991333, "learning_rate": 2.245155288002876e-06, "loss": 1.2261, "step": 8882 }, { "epoch": 0.9345607574960547, "grad_norm": 1.5434430837631226, "learning_rate": 2.2379804950424576e-06, "loss": 1.5896, "step": 8883 }, { "epoch": 0.9346659652814309, "grad_norm": 1.6226022243499756, "learning_rate": 2.2308170548634435e-06, "loss": 1.3338, "step": 8884 }, { "epoch": 0.9347711730668069, "grad_norm": 1.7707509994506836, "learning_rate": 2.2236649682977117e-06, "loss": 1.332, "step": 8885 }, { "epoch": 0.934876380852183, "grad_norm": 1.5284020900726318, "learning_rate": 2.2165242361757764e-06, "loss": 1.669, "step": 8886 }, { "epoch": 0.9349815886375592, "grad_norm": 1.250614881515503, "learning_rate": 2.2093948593268963e-06, "loss": 1.7689, "step": 8887 }, { "epoch": 0.9350867964229354, "grad_norm": 1.4621397256851196, "learning_rate": 2.202276838578976e-06, "loss": 1.5322, "step": 8888 }, { "epoch": 0.9351920042083114, "grad_norm": 1.8375461101531982, "learning_rate": 2.1951701747585982e-06, "loss": 1.6119, "step": 8889 }, { "epoch": 0.9352972119936875, "grad_norm": 1.5318689346313477, "learning_rate": 2.188074868691059e-06, "loss": 1.2107, "step": 8890 }, { "epoch": 0.9354024197790637, "grad_norm": 2.4939355850219727, "learning_rate": 2.1809909212002767e-06, "loss": 1.7044, "step": 8891 }, { "epoch": 0.9355076275644397, "grad_norm": 1.93202543258667, "learning_rate": 2.1739183331089263e-06, "loss": 1.3337, "step": 8892 }, { "epoch": 0.9356128353498159, "grad_norm": 1.680690884590149, "learning_rate": 2.166857105238307e-06, "loss": 1.9268, "step": 8893 }, { "epoch": 0.935718043135192, "grad_norm": 1.772756576538086, "learning_rate": 2.159807238408418e-06, "loss": 1.7988, "step": 8894 }, { "epoch": 0.9358232509205682, "grad_norm": 1.440395474433899, "learning_rate": 2.1527687334379266e-06, "loss": 1.2087, "step": 8895 }, { "epoch": 0.9359284587059442, "grad_norm": 1.6496853828430176, "learning_rate": 2.1457415911442013e-06, "loss": 1.5179, "step": 8896 }, { "epoch": 0.9360336664913204, "grad_norm": 1.3402870893478394, "learning_rate": 2.1387258123432673e-06, "loss": 1.7395, "step": 8897 }, { "epoch": 0.9361388742766965, "grad_norm": 1.9559837579727173, "learning_rate": 2.131721397849862e-06, "loss": 1.5187, "step": 8898 }, { "epoch": 0.9362440820620725, "grad_norm": 2.084958076477051, "learning_rate": 2.1247283484773785e-06, "loss": 1.8087, "step": 8899 }, { "epoch": 0.9363492898474487, "grad_norm": 2.734379291534424, "learning_rate": 2.1177466650379007e-06, "loss": 1.286, "step": 8900 }, { "epoch": 0.9364544976328248, "grad_norm": 1.1719292402267456, "learning_rate": 2.1107763483421805e-06, "loss": 1.8817, "step": 8901 }, { "epoch": 0.936559705418201, "grad_norm": 1.631633996963501, "learning_rate": 2.103817399199659e-06, "loss": 1.5544, "step": 8902 }, { "epoch": 0.936664913203577, "grad_norm": 2.258840799331665, "learning_rate": 2.0968698184184565e-06, "loss": 1.4009, "step": 8903 }, { "epoch": 0.9367701209889532, "grad_norm": 1.6519112586975098, "learning_rate": 2.0899336068053833e-06, "loss": 1.5555, "step": 8904 }, { "epoch": 0.9368753287743293, "grad_norm": 1.7025299072265625, "learning_rate": 2.0830087651658945e-06, "loss": 2.2402, "step": 8905 }, { "epoch": 0.9369805365597054, "grad_norm": 1.8736423254013062, "learning_rate": 2.076095294304181e-06, "loss": 1.5652, "step": 8906 }, { "epoch": 0.9370857443450815, "grad_norm": 1.4368400573730469, "learning_rate": 2.069193195023067e-06, "loss": 2.0581, "step": 8907 }, { "epoch": 0.9371909521304577, "grad_norm": 1.2674177885055542, "learning_rate": 2.0623024681240554e-06, "loss": 1.8723, "step": 8908 }, { "epoch": 0.9372961599158338, "grad_norm": 1.2145195007324219, "learning_rate": 2.0554231144073623e-06, "loss": 1.8348, "step": 8909 }, { "epoch": 0.9374013677012099, "grad_norm": 2.1379621028900146, "learning_rate": 2.04855513467187e-06, "loss": 1.5549, "step": 8910 }, { "epoch": 0.937506575486586, "grad_norm": 2.2935433387756348, "learning_rate": 2.041698529715097e-06, "loss": 0.869, "step": 8911 }, { "epoch": 0.9376117832719622, "grad_norm": 2.0212948322296143, "learning_rate": 2.034853300333328e-06, "loss": 2.2311, "step": 8912 }, { "epoch": 0.9377169910573382, "grad_norm": 1.7877557277679443, "learning_rate": 2.0280194473214497e-06, "loss": 1.214, "step": 8913 }, { "epoch": 0.9378221988427143, "grad_norm": 1.538549780845642, "learning_rate": 2.0211969714730496e-06, "loss": 1.2042, "step": 8914 }, { "epoch": 0.9379274066280905, "grad_norm": 1.2701212167739868, "learning_rate": 2.0143858735804154e-06, "loss": 1.6479, "step": 8915 }, { "epoch": 0.9380326144134666, "grad_norm": 2.006523609161377, "learning_rate": 2.007586154434493e-06, "loss": 1.9177, "step": 8916 }, { "epoch": 0.9381378221988427, "grad_norm": 1.5271292924880981, "learning_rate": 2.000797814824906e-06, "loss": 1.2413, "step": 8917 }, { "epoch": 0.9382430299842188, "grad_norm": 1.2923787832260132, "learning_rate": 1.9940208555399685e-06, "loss": 1.626, "step": 8918 }, { "epoch": 0.938348237769595, "grad_norm": 2.304880380630493, "learning_rate": 1.987255277366662e-06, "loss": 1.7644, "step": 8919 }, { "epoch": 0.9384534455549711, "grad_norm": 1.4698824882507324, "learning_rate": 1.9805010810906464e-06, "loss": 1.9222, "step": 8920 }, { "epoch": 0.9385586533403472, "grad_norm": 1.2957773208618164, "learning_rate": 1.9737582674962728e-06, "loss": 1.5484, "step": 8921 }, { "epoch": 0.9386638611257233, "grad_norm": 1.2710483074188232, "learning_rate": 1.967026837366559e-06, "loss": 1.4719, "step": 8922 }, { "epoch": 0.9387690689110995, "grad_norm": 1.4735010862350464, "learning_rate": 1.9603067914832017e-06, "loss": 1.6447, "step": 8923 }, { "epoch": 0.9388742766964755, "grad_norm": 1.5090333223342896, "learning_rate": 1.9535981306265884e-06, "loss": 1.9183, "step": 8924 }, { "epoch": 0.9389794844818516, "grad_norm": 1.4228880405426025, "learning_rate": 1.9469008555757505e-06, "loss": 1.8077, "step": 8925 }, { "epoch": 0.9390846922672278, "grad_norm": 1.4772889614105225, "learning_rate": 1.9402149671084446e-06, "loss": 1.4166, "step": 8926 }, { "epoch": 0.9391899000526039, "grad_norm": 2.280817985534668, "learning_rate": 1.9335404660010713e-06, "loss": 1.458, "step": 8927 }, { "epoch": 0.93929510783798, "grad_norm": 2.147027015686035, "learning_rate": 1.926877353028711e-06, "loss": 0.6933, "step": 8928 }, { "epoch": 0.9394003156233561, "grad_norm": 1.2121834754943848, "learning_rate": 1.9202256289651446e-06, "loss": 1.8354, "step": 8929 }, { "epoch": 0.9395055234087323, "grad_norm": 1.4258288145065308, "learning_rate": 1.913585294582798e-06, "loss": 1.588, "step": 8930 }, { "epoch": 0.9396107311941083, "grad_norm": 1.710619330406189, "learning_rate": 1.9069563506527998e-06, "loss": 1.7881, "step": 8931 }, { "epoch": 0.9397159389794845, "grad_norm": 1.781956434249878, "learning_rate": 1.9003387979449562e-06, "loss": 1.416, "step": 8932 }, { "epoch": 0.9398211467648606, "grad_norm": 2.1186389923095703, "learning_rate": 1.8937326372277408e-06, "loss": 1.2819, "step": 8933 }, { "epoch": 0.9399263545502368, "grad_norm": 1.4919960498809814, "learning_rate": 1.8871378692682851e-06, "loss": 1.6042, "step": 8934 }, { "epoch": 0.9400315623356128, "grad_norm": 1.996641755104065, "learning_rate": 1.8805544948324317e-06, "loss": 1.4017, "step": 8935 }, { "epoch": 0.940136770120989, "grad_norm": 1.87164306640625, "learning_rate": 1.87398251468468e-06, "loss": 1.7661, "step": 8936 }, { "epoch": 0.9402419779063651, "grad_norm": 1.9063903093338013, "learning_rate": 1.867421929588231e-06, "loss": 1.7494, "step": 8937 }, { "epoch": 0.9403471856917411, "grad_norm": 3.1897265911102295, "learning_rate": 1.8608727403049309e-06, "loss": 1.7943, "step": 8938 }, { "epoch": 0.9404523934771173, "grad_norm": 6.021173000335693, "learning_rate": 1.8543349475953043e-06, "loss": 1.4585, "step": 8939 }, { "epoch": 0.9405576012624934, "grad_norm": 2.0118443965911865, "learning_rate": 1.8478085522185773e-06, "loss": 1.4911, "step": 8940 }, { "epoch": 0.9406628090478696, "grad_norm": 1.1734709739685059, "learning_rate": 1.8412935549326438e-06, "loss": 1.9156, "step": 8941 }, { "epoch": 0.9407680168332456, "grad_norm": 1.911544919013977, "learning_rate": 1.834789956494043e-06, "loss": 1.7905, "step": 8942 }, { "epoch": 0.9408732246186218, "grad_norm": 1.821887731552124, "learning_rate": 1.8282977576580484e-06, "loss": 1.6208, "step": 8943 }, { "epoch": 0.9409784324039979, "grad_norm": 1.2936660051345825, "learning_rate": 1.8218169591785572e-06, "loss": 1.9121, "step": 8944 }, { "epoch": 0.941083640189374, "grad_norm": 1.6752891540527344, "learning_rate": 1.8153475618081673e-06, "loss": 1.3991, "step": 8945 }, { "epoch": 0.9411888479747501, "grad_norm": 1.817868709564209, "learning_rate": 1.8088895662981554e-06, "loss": 1.518, "step": 8946 }, { "epoch": 0.9412940557601263, "grad_norm": 1.5891584157943726, "learning_rate": 1.802442973398455e-06, "loss": 1.763, "step": 8947 }, { "epoch": 0.9413992635455024, "grad_norm": 1.1031978130340576, "learning_rate": 1.7960077838577117e-06, "loss": 1.5801, "step": 8948 }, { "epoch": 0.9415044713308784, "grad_norm": 1.214778184890747, "learning_rate": 1.7895839984231943e-06, "loss": 1.9143, "step": 8949 }, { "epoch": 0.9416096791162546, "grad_norm": 1.8901784420013428, "learning_rate": 1.7831716178408841e-06, "loss": 1.397, "step": 8950 }, { "epoch": 0.9417148869016307, "grad_norm": 1.900726079940796, "learning_rate": 1.776770642855441e-06, "loss": 1.8984, "step": 8951 }, { "epoch": 0.9418200946870069, "grad_norm": 1.5284584760665894, "learning_rate": 1.7703810742101813e-06, "loss": 1.8202, "step": 8952 }, { "epoch": 0.9419253024723829, "grad_norm": 1.6591954231262207, "learning_rate": 1.7640029126471003e-06, "loss": 1.2281, "step": 8953 }, { "epoch": 0.9420305102577591, "grad_norm": 1.8267433643341064, "learning_rate": 1.7576361589068834e-06, "loss": 1.3939, "step": 8954 }, { "epoch": 0.9421357180431352, "grad_norm": 1.8574069738388062, "learning_rate": 1.7512808137288727e-06, "loss": 1.6839, "step": 8955 }, { "epoch": 0.9422409258285113, "grad_norm": 2.1914479732513428, "learning_rate": 1.7449368778511e-06, "loss": 1.6874, "step": 8956 }, { "epoch": 0.9423461336138874, "grad_norm": 1.5957982540130615, "learning_rate": 1.7386043520102534e-06, "loss": 1.8577, "step": 8957 }, { "epoch": 0.9424513413992636, "grad_norm": 1.8510699272155762, "learning_rate": 1.732283236941723e-06, "loss": 1.8177, "step": 8958 }, { "epoch": 0.9425565491846397, "grad_norm": 1.7489219903945923, "learning_rate": 1.7259735333795545e-06, "loss": 1.9759, "step": 8959 }, { "epoch": 0.9426617569700158, "grad_norm": 1.5425418615341187, "learning_rate": 1.7196752420564622e-06, "loss": 1.5018, "step": 8960 }, { "epoch": 0.9427669647553919, "grad_norm": 1.9875203371047974, "learning_rate": 1.7133883637038494e-06, "loss": 1.3086, "step": 8961 }, { "epoch": 0.942872172540768, "grad_norm": 1.7433385848999023, "learning_rate": 1.7071128990518103e-06, "loss": 1.4756, "step": 8962 }, { "epoch": 0.9429773803261441, "grad_norm": 1.6027960777282715, "learning_rate": 1.700848848829073e-06, "loss": 2.1611, "step": 8963 }, { "epoch": 0.9430825881115202, "grad_norm": 1.8459538221359253, "learning_rate": 1.6945962137630668e-06, "loss": 1.7465, "step": 8964 }, { "epoch": 0.9431877958968964, "grad_norm": 1.179093599319458, "learning_rate": 1.6883549945798883e-06, "loss": 1.8516, "step": 8965 }, { "epoch": 0.9432930036822725, "grad_norm": 1.7608375549316406, "learning_rate": 1.6821251920043246e-06, "loss": 1.5069, "step": 8966 }, { "epoch": 0.9433982114676486, "grad_norm": 1.7923959493637085, "learning_rate": 1.675906806759786e-06, "loss": 1.3353, "step": 8967 }, { "epoch": 0.9435034192530247, "grad_norm": 1.7323315143585205, "learning_rate": 1.6696998395684394e-06, "loss": 1.7977, "step": 8968 }, { "epoch": 0.9436086270384009, "grad_norm": 1.0940277576446533, "learning_rate": 1.6635042911510413e-06, "loss": 1.8435, "step": 8969 }, { "epoch": 0.9437138348237769, "grad_norm": 1.5782438516616821, "learning_rate": 1.6573201622270828e-06, "loss": 1.428, "step": 8970 }, { "epoch": 0.9438190426091531, "grad_norm": 1.616591453552246, "learning_rate": 1.6511474535146899e-06, "loss": 1.3093, "step": 8971 }, { "epoch": 0.9439242503945292, "grad_norm": 1.872626781463623, "learning_rate": 1.6449861657306998e-06, "loss": 1.1612, "step": 8972 }, { "epoch": 0.9440294581799054, "grad_norm": 2.045409679412842, "learning_rate": 1.6388362995905848e-06, "loss": 1.7662, "step": 8973 }, { "epoch": 0.9441346659652814, "grad_norm": 1.8982219696044922, "learning_rate": 1.6326978558085182e-06, "loss": 1.5276, "step": 8974 }, { "epoch": 0.9442398737506575, "grad_norm": 2.2144486904144287, "learning_rate": 1.6265708350973296e-06, "loss": 1.3016, "step": 8975 }, { "epoch": 0.9443450815360337, "grad_norm": 1.7731724977493286, "learning_rate": 1.6204552381685278e-06, "loss": 1.884, "step": 8976 }, { "epoch": 0.9444502893214097, "grad_norm": 1.628603219985962, "learning_rate": 1.6143510657323112e-06, "loss": 1.5011, "step": 8977 }, { "epoch": 0.9445554971067859, "grad_norm": 1.250891089439392, "learning_rate": 1.6082583184975352e-06, "loss": 1.5904, "step": 8978 }, { "epoch": 0.944660704892162, "grad_norm": 1.6449693441390991, "learning_rate": 1.6021769971717116e-06, "loss": 1.9314, "step": 8979 }, { "epoch": 0.9447659126775382, "grad_norm": 1.9107930660247803, "learning_rate": 1.5961071024610752e-06, "loss": 1.5378, "step": 8980 }, { "epoch": 0.9448711204629142, "grad_norm": 1.551930546760559, "learning_rate": 1.5900486350704625e-06, "loss": 1.4893, "step": 8981 }, { "epoch": 0.9449763282482904, "grad_norm": 1.4413615465164185, "learning_rate": 1.584001595703466e-06, "loss": 1.8078, "step": 8982 }, { "epoch": 0.9450815360336665, "grad_norm": 1.8441150188446045, "learning_rate": 1.5779659850622797e-06, "loss": 1.6129, "step": 8983 }, { "epoch": 0.9451867438190427, "grad_norm": 1.5965230464935303, "learning_rate": 1.5719418038477985e-06, "loss": 2.0439, "step": 8984 }, { "epoch": 0.9452919516044187, "grad_norm": 2.328061819076538, "learning_rate": 1.5659290527596071e-06, "loss": 1.4441, "step": 8985 }, { "epoch": 0.9453971593897948, "grad_norm": 2.004664182662964, "learning_rate": 1.559927732495936e-06, "loss": 1.7041, "step": 8986 }, { "epoch": 0.945502367175171, "grad_norm": 1.5392452478408813, "learning_rate": 1.5539378437536944e-06, "loss": 1.8988, "step": 8987 }, { "epoch": 0.945607574960547, "grad_norm": 1.6413873434066772, "learning_rate": 1.5479593872284926e-06, "loss": 1.6044, "step": 8988 }, { "epoch": 0.9457127827459232, "grad_norm": 2.1332480907440186, "learning_rate": 1.5419923636145639e-06, "loss": 1.8659, "step": 8989 }, { "epoch": 0.9458179905312993, "grad_norm": 1.1487113237380981, "learning_rate": 1.536036773604843e-06, "loss": 1.6138, "step": 8990 }, { "epoch": 0.9459231983166755, "grad_norm": 1.1979764699935913, "learning_rate": 1.5300926178909435e-06, "loss": 1.684, "step": 8991 }, { "epoch": 0.9460284061020515, "grad_norm": 1.9243789911270142, "learning_rate": 1.5241598971631354e-06, "loss": 1.5863, "step": 8992 }, { "epoch": 0.9461336138874277, "grad_norm": 1.8303254842758179, "learning_rate": 1.5182386121103676e-06, "loss": 1.7, "step": 8993 }, { "epoch": 0.9462388216728038, "grad_norm": 1.7681361436843872, "learning_rate": 1.5123287634202454e-06, "loss": 1.521, "step": 8994 }, { "epoch": 0.9463440294581799, "grad_norm": 1.3203057050704956, "learning_rate": 1.5064303517790757e-06, "loss": 1.8113, "step": 8995 }, { "epoch": 0.946449237243556, "grad_norm": 1.5649878978729248, "learning_rate": 1.5005433778718213e-06, "loss": 1.4567, "step": 8996 }, { "epoch": 0.9465544450289322, "grad_norm": 1.9078657627105713, "learning_rate": 1.4946678423821248e-06, "loss": 1.6237, "step": 8997 }, { "epoch": 0.9466596528143083, "grad_norm": 1.2230523824691772, "learning_rate": 1.4888037459922622e-06, "loss": 1.6624, "step": 8998 }, { "epoch": 0.9467648605996843, "grad_norm": 2.302837371826172, "learning_rate": 1.4829510893832332e-06, "loss": 1.7364, "step": 8999 }, { "epoch": 0.9468700683850605, "grad_norm": 1.3794721364974976, "learning_rate": 1.4771098732346943e-06, "loss": 1.9772, "step": 9000 }, { "epoch": 0.9469752761704366, "grad_norm": 1.796860694885254, "learning_rate": 1.4712800982249474e-06, "loss": 1.4391, "step": 9001 }, { "epoch": 0.9470804839558127, "grad_norm": 1.5692015886306763, "learning_rate": 1.4654617650310176e-06, "loss": 1.5924, "step": 9002 }, { "epoch": 0.9471856917411888, "grad_norm": 1.5295088291168213, "learning_rate": 1.4596548743285198e-06, "loss": 1.4613, "step": 9003 }, { "epoch": 0.947290899526565, "grad_norm": 2.021333932876587, "learning_rate": 1.453859426791826e-06, "loss": 1.9719, "step": 9004 }, { "epoch": 0.9473961073119411, "grad_norm": 1.1852548122406006, "learning_rate": 1.448075423093942e-06, "loss": 1.3462, "step": 9005 }, { "epoch": 0.9475013150973172, "grad_norm": 1.628966212272644, "learning_rate": 1.4423028639065195e-06, "loss": 2.1885, "step": 9006 }, { "epoch": 0.9476065228826933, "grad_norm": 1.4825149774551392, "learning_rate": 1.4365417498999334e-06, "loss": 1.4696, "step": 9007 }, { "epoch": 0.9477117306680695, "grad_norm": 1.7557880878448486, "learning_rate": 1.430792081743182e-06, "loss": 1.3468, "step": 9008 }, { "epoch": 0.9478169384534455, "grad_norm": 1.8458571434020996, "learning_rate": 1.4250538601039642e-06, "loss": 1.804, "step": 9009 }, { "epoch": 0.9479221462388217, "grad_norm": 1.6594163179397583, "learning_rate": 1.4193270856486585e-06, "loss": 1.3729, "step": 9010 }, { "epoch": 0.9480273540241978, "grad_norm": 1.11782968044281, "learning_rate": 1.413611759042266e-06, "loss": 1.8055, "step": 9011 }, { "epoch": 0.948132561809574, "grad_norm": 1.8020600080490112, "learning_rate": 1.4079078809485002e-06, "loss": 1.6278, "step": 9012 }, { "epoch": 0.94823776959495, "grad_norm": 1.9331527948379517, "learning_rate": 1.4022154520297425e-06, "loss": 1.819, "step": 9013 }, { "epoch": 0.9483429773803261, "grad_norm": 1.2520698308944702, "learning_rate": 1.396534472947031e-06, "loss": 1.757, "step": 9014 }, { "epoch": 0.9484481851657023, "grad_norm": 1.4876186847686768, "learning_rate": 1.3908649443600707e-06, "loss": 1.5025, "step": 9015 }, { "epoch": 0.9485533929510784, "grad_norm": 1.5193793773651123, "learning_rate": 1.3852068669272688e-06, "loss": 1.8764, "step": 9016 }, { "epoch": 0.9486586007364545, "grad_norm": 1.3971233367919922, "learning_rate": 1.3795602413056442e-06, "loss": 1.3161, "step": 9017 }, { "epoch": 0.9487638085218306, "grad_norm": 1.4068975448608398, "learning_rate": 1.3739250681509497e-06, "loss": 1.2116, "step": 9018 }, { "epoch": 0.9488690163072068, "grad_norm": 1.3958295583724976, "learning_rate": 1.3683013481175621e-06, "loss": 1.4589, "step": 9019 }, { "epoch": 0.9489742240925828, "grad_norm": 1.2469528913497925, "learning_rate": 1.3626890818585591e-06, "loss": 1.808, "step": 9020 }, { "epoch": 0.949079431877959, "grad_norm": 1.113645076751709, "learning_rate": 1.3570882700256637e-06, "loss": 1.4229, "step": 9021 }, { "epoch": 0.9491846396633351, "grad_norm": 1.3229107856750488, "learning_rate": 1.351498913269289e-06, "loss": 1.8561, "step": 9022 }, { "epoch": 0.9492898474487113, "grad_norm": 1.7333238124847412, "learning_rate": 1.3459210122384934e-06, "loss": 1.3316, "step": 9023 }, { "epoch": 0.9493950552340873, "grad_norm": 1.8732205629348755, "learning_rate": 1.340354567581037e-06, "loss": 1.7108, "step": 9024 }, { "epoch": 0.9495002630194634, "grad_norm": 1.6077425479888916, "learning_rate": 1.3347995799433355e-06, "loss": 1.6701, "step": 9025 }, { "epoch": 0.9496054708048396, "grad_norm": 2.317784070968628, "learning_rate": 1.3292560499704398e-06, "loss": 1.5069, "step": 9026 }, { "epoch": 0.9497106785902156, "grad_norm": 2.149060010910034, "learning_rate": 1.3237239783061462e-06, "loss": 0.9591, "step": 9027 }, { "epoch": 0.9498158863755918, "grad_norm": 1.2673544883728027, "learning_rate": 1.3182033655928404e-06, "loss": 1.7899, "step": 9028 }, { "epoch": 0.9499210941609679, "grad_norm": 1.8225202560424805, "learning_rate": 1.3126942124716213e-06, "loss": 1.5151, "step": 9029 }, { "epoch": 0.9500263019463441, "grad_norm": 1.9195647239685059, "learning_rate": 1.3071965195822656e-06, "loss": 1.5733, "step": 9030 }, { "epoch": 0.9501315097317201, "grad_norm": 2.377620220184326, "learning_rate": 1.301710287563196e-06, "loss": 1.1683, "step": 9031 }, { "epoch": 0.9502367175170963, "grad_norm": 1.6789700984954834, "learning_rate": 1.2962355170514917e-06, "loss": 1.5605, "step": 9032 }, { "epoch": 0.9503419253024724, "grad_norm": 1.4375206232070923, "learning_rate": 1.2907722086829332e-06, "loss": 1.3032, "step": 9033 }, { "epoch": 0.9504471330878485, "grad_norm": 1.3653327226638794, "learning_rate": 1.2853203630919686e-06, "loss": 1.4516, "step": 9034 }, { "epoch": 0.9505523408732246, "grad_norm": 1.7745347023010254, "learning_rate": 1.279879980911669e-06, "loss": 1.5384, "step": 9035 }, { "epoch": 0.9506575486586007, "grad_norm": 1.7795803546905518, "learning_rate": 1.2744510627738516e-06, "loss": 1.8052, "step": 9036 }, { "epoch": 0.9507627564439769, "grad_norm": 1.5467740297317505, "learning_rate": 1.2690336093089228e-06, "loss": 1.7491, "step": 9037 }, { "epoch": 0.9508679642293529, "grad_norm": 1.2983478307724, "learning_rate": 1.263627621146013e-06, "loss": 1.6505, "step": 9038 }, { "epoch": 0.9509731720147291, "grad_norm": 2.1444427967071533, "learning_rate": 1.2582330989128977e-06, "loss": 1.3857, "step": 9039 }, { "epoch": 0.9510783798001052, "grad_norm": 1.9414992332458496, "learning_rate": 1.252850043236009e-06, "loss": 1.2304, "step": 9040 }, { "epoch": 0.9511835875854813, "grad_norm": 1.6080853939056396, "learning_rate": 1.2474784547404916e-06, "loss": 1.8033, "step": 9041 }, { "epoch": 0.9512887953708574, "grad_norm": 1.4745537042617798, "learning_rate": 1.2421183340501242e-06, "loss": 2.1338, "step": 9042 }, { "epoch": 0.9513940031562336, "grad_norm": 1.2472604513168335, "learning_rate": 1.2367696817873419e-06, "loss": 2.3056, "step": 9043 }, { "epoch": 0.9514992109416097, "grad_norm": 1.7281334400177002, "learning_rate": 1.2314324985732818e-06, "loss": 1.5415, "step": 9044 }, { "epoch": 0.9516044187269858, "grad_norm": 1.563148856163025, "learning_rate": 1.2261067850277252e-06, "loss": 1.6814, "step": 9045 }, { "epoch": 0.9517096265123619, "grad_norm": 1.6207623481750488, "learning_rate": 1.2207925417691334e-06, "loss": 1.566, "step": 9046 }, { "epoch": 0.951814834297738, "grad_norm": 1.622622013092041, "learning_rate": 1.215489769414635e-06, "loss": 1.4526, "step": 9047 }, { "epoch": 0.9519200420831142, "grad_norm": 1.5105351209640503, "learning_rate": 1.210198468580015e-06, "loss": 1.8531, "step": 9048 }, { "epoch": 0.9520252498684902, "grad_norm": 1.3942266702651978, "learning_rate": 1.2049186398797374e-06, "loss": 1.8842, "step": 9049 }, { "epoch": 0.9521304576538664, "grad_norm": 1.670791745185852, "learning_rate": 1.1996502839269453e-06, "loss": 1.9948, "step": 9050 }, { "epoch": 0.9522356654392425, "grad_norm": 1.3902188539505005, "learning_rate": 1.1943934013334047e-06, "loss": 1.091, "step": 9051 }, { "epoch": 0.9523408732246186, "grad_norm": 1.8161249160766602, "learning_rate": 1.189147992709616e-06, "loss": 2.1526, "step": 9052 }, { "epoch": 0.9524460810099947, "grad_norm": 1.8096421957015991, "learning_rate": 1.1839140586646923e-06, "loss": 0.9295, "step": 9053 }, { "epoch": 0.9525512887953709, "grad_norm": 1.7646183967590332, "learning_rate": 1.178691599806425e-06, "loss": 1.7518, "step": 9054 }, { "epoch": 0.952656496580747, "grad_norm": 1.164070963859558, "learning_rate": 1.173480616741296e-06, "loss": 1.5824, "step": 9055 }, { "epoch": 0.9527617043661231, "grad_norm": 1.876624584197998, "learning_rate": 1.168281110074443e-06, "loss": 1.4504, "step": 9056 }, { "epoch": 0.9528669121514992, "grad_norm": 1.5865232944488525, "learning_rate": 1.1630930804096495e-06, "loss": 1.5255, "step": 9057 }, { "epoch": 0.9529721199368754, "grad_norm": 1.653921127319336, "learning_rate": 1.1579165283494009e-06, "loss": 1.5343, "step": 9058 }, { "epoch": 0.9530773277222514, "grad_norm": 3.066737651824951, "learning_rate": 1.152751454494827e-06, "loss": 1.3524, "step": 9059 }, { "epoch": 0.9531825355076275, "grad_norm": 1.9732846021652222, "learning_rate": 1.1475978594457149e-06, "loss": 1.6233, "step": 9060 }, { "epoch": 0.9532877432930037, "grad_norm": 1.4492088556289673, "learning_rate": 1.1424557438005634e-06, "loss": 1.6444, "step": 9061 }, { "epoch": 0.9533929510783798, "grad_norm": 1.6626065969467163, "learning_rate": 1.1373251081565062e-06, "loss": 1.6292, "step": 9062 }, { "epoch": 0.9534981588637559, "grad_norm": 1.2074167728424072, "learning_rate": 1.132205953109311e-06, "loss": 1.9072, "step": 9063 }, { "epoch": 0.953603366649132, "grad_norm": 1.8209060430526733, "learning_rate": 1.127098279253491e-06, "loss": 1.4265, "step": 9064 }, { "epoch": 0.9537085744345082, "grad_norm": 1.6529898643493652, "learning_rate": 1.1220020871821723e-06, "loss": 1.3912, "step": 9065 }, { "epoch": 0.9538137822198842, "grad_norm": 1.3081694841384888, "learning_rate": 1.1169173774871478e-06, "loss": 1.8764, "step": 9066 }, { "epoch": 0.9539189900052604, "grad_norm": 1.5082989931106567, "learning_rate": 1.1118441507589006e-06, "loss": 1.6502, "step": 9067 }, { "epoch": 0.9540241977906365, "grad_norm": 1.7318092584609985, "learning_rate": 1.1067824075865485e-06, "loss": 1.3874, "step": 9068 }, { "epoch": 0.9541294055760127, "grad_norm": 1.9931410551071167, "learning_rate": 1.1017321485579102e-06, "loss": 2.344, "step": 9069 }, { "epoch": 0.9542346133613887, "grad_norm": 1.397017240524292, "learning_rate": 1.096693374259461e-06, "loss": 1.9131, "step": 9070 }, { "epoch": 0.9543398211467649, "grad_norm": 0.9994755983352661, "learning_rate": 1.0916660852763216e-06, "loss": 1.4483, "step": 9071 }, { "epoch": 0.954445028932141, "grad_norm": 1.567875623703003, "learning_rate": 1.0866502821923031e-06, "loss": 1.4536, "step": 9072 }, { "epoch": 0.954550236717517, "grad_norm": 1.6587287187576294, "learning_rate": 1.0816459655898726e-06, "loss": 1.8396, "step": 9073 }, { "epoch": 0.9546554445028932, "grad_norm": 1.5894458293914795, "learning_rate": 1.0766531360501654e-06, "loss": 1.3923, "step": 9074 }, { "epoch": 0.9547606522882693, "grad_norm": 1.1351405382156372, "learning_rate": 1.0716717941529841e-06, "loss": 1.543, "step": 9075 }, { "epoch": 0.9548658600736455, "grad_norm": 1.4005601406097412, "learning_rate": 1.0667019404767996e-06, "loss": 1.7229, "step": 9076 }, { "epoch": 0.9549710678590215, "grad_norm": 2.1269547939300537, "learning_rate": 1.0617435755987281e-06, "loss": 1.5129, "step": 9077 }, { "epoch": 0.9550762756443977, "grad_norm": 1.9350544214248657, "learning_rate": 1.0567967000945866e-06, "loss": 1.4607, "step": 9078 }, { "epoch": 0.9551814834297738, "grad_norm": 1.4004015922546387, "learning_rate": 1.0518613145388378e-06, "loss": 1.907, "step": 9079 }, { "epoch": 0.95528669121515, "grad_norm": 1.2579517364501953, "learning_rate": 1.04693741950459e-06, "loss": 1.4664, "step": 9080 }, { "epoch": 0.955391899000526, "grad_norm": 1.7487457990646362, "learning_rate": 1.0420250155636745e-06, "loss": 1.6682, "step": 9081 }, { "epoch": 0.9554971067859022, "grad_norm": 2.7791035175323486, "learning_rate": 1.0371241032865242e-06, "loss": 1.2912, "step": 9082 }, { "epoch": 0.9556023145712783, "grad_norm": 1.7350082397460938, "learning_rate": 1.0322346832422613e-06, "loss": 1.7033, "step": 9083 }, { "epoch": 0.9557075223566543, "grad_norm": 2.800816535949707, "learning_rate": 1.0273567559987097e-06, "loss": 1.7198, "step": 9084 }, { "epoch": 0.9558127301420305, "grad_norm": 1.4789823293685913, "learning_rate": 1.0224903221222938e-06, "loss": 1.9354, "step": 9085 }, { "epoch": 0.9559179379274066, "grad_norm": 2.029716968536377, "learning_rate": 1.0176353821781616e-06, "loss": 1.2883, "step": 9086 }, { "epoch": 0.9560231457127828, "grad_norm": 1.2375006675720215, "learning_rate": 1.0127919367301064e-06, "loss": 1.7844, "step": 9087 }, { "epoch": 0.9561283534981588, "grad_norm": 1.4403471946716309, "learning_rate": 1.0079599863405454e-06, "loss": 1.4304, "step": 9088 }, { "epoch": 0.956233561283535, "grad_norm": 1.9305444955825806, "learning_rate": 1.0031395315706183e-06, "loss": 1.041, "step": 9089 }, { "epoch": 0.9563387690689111, "grad_norm": 1.9020962715148926, "learning_rate": 9.983305729801107e-07, "loss": 1.609, "step": 9090 }, { "epoch": 0.9564439768542872, "grad_norm": 2.1255598068237305, "learning_rate": 9.93533111127476e-07, "loss": 1.4914, "step": 9091 }, { "epoch": 0.9565491846396633, "grad_norm": 2.1392078399658203, "learning_rate": 9.88747146569813e-07, "loss": 1.6591, "step": 9092 }, { "epoch": 0.9566543924250395, "grad_norm": 1.4976539611816406, "learning_rate": 9.8397267986291e-07, "loss": 1.5315, "step": 9093 }, { "epoch": 0.9567596002104156, "grad_norm": 1.4068844318389893, "learning_rate": 9.79209711561202e-07, "loss": 1.2471, "step": 9094 }, { "epoch": 0.9568648079957917, "grad_norm": 1.8085061311721802, "learning_rate": 9.744582422178127e-07, "loss": 1.3131, "step": 9095 }, { "epoch": 0.9569700157811678, "grad_norm": 1.4883860349655151, "learning_rate": 9.6971827238449e-07, "loss": 1.8711, "step": 9096 }, { "epoch": 0.957075223566544, "grad_norm": 1.6276978254318237, "learning_rate": 9.649898026117043e-07, "loss": 1.5425, "step": 9097 }, { "epoch": 0.95718043135192, "grad_norm": 1.1478794813156128, "learning_rate": 9.602728334485278e-07, "loss": 1.746, "step": 9098 }, { "epoch": 0.9572856391372961, "grad_norm": 1.3659400939941406, "learning_rate": 9.555673654427332e-07, "loss": 1.3815, "step": 9099 }, { "epoch": 0.9573908469226723, "grad_norm": 1.6303960084915161, "learning_rate": 9.508733991407615e-07, "loss": 1.6709, "step": 9100 }, { "epoch": 0.9574960547080484, "grad_norm": 1.6804249286651611, "learning_rate": 9.461909350876985e-07, "loss": 1.5344, "step": 9101 }, { "epoch": 0.9576012624934245, "grad_norm": 1.8790122270584106, "learning_rate": 9.415199738272984e-07, "loss": 1.4744, "step": 9102 }, { "epoch": 0.9577064702788006, "grad_norm": 1.4778565168380737, "learning_rate": 9.36860515902005e-07, "loss": 1.7999, "step": 9103 }, { "epoch": 0.9578116780641768, "grad_norm": 1.2392534017562866, "learning_rate": 9.322125618528854e-07, "loss": 1.7232, "step": 9104 }, { "epoch": 0.9579168858495528, "grad_norm": 1.5124746561050415, "learning_rate": 9.275761122196968e-07, "loss": 2.0347, "step": 9105 }, { "epoch": 0.958022093634929, "grad_norm": 1.9507633447647095, "learning_rate": 9.229511675408642e-07, "loss": 1.6567, "step": 9106 }, { "epoch": 0.9581273014203051, "grad_norm": 1.7960712909698486, "learning_rate": 9.183377283534578e-07, "loss": 1.7705, "step": 9107 }, { "epoch": 0.9582325092056813, "grad_norm": 2.1413114070892334, "learning_rate": 9.137357951932157e-07, "loss": 1.7895, "step": 9108 }, { "epoch": 0.9583377169910573, "grad_norm": 1.709755301475525, "learning_rate": 9.091453685945661e-07, "loss": 1.7887, "step": 9109 }, { "epoch": 0.9584429247764334, "grad_norm": 1.2714898586273193, "learning_rate": 9.045664490905492e-07, "loss": 1.8393, "step": 9110 }, { "epoch": 0.9585481325618096, "grad_norm": 1.3688026666641235, "learning_rate": 8.999990372129286e-07, "loss": 1.6004, "step": 9111 }, { "epoch": 0.9586533403471857, "grad_norm": 1.3194514513015747, "learning_rate": 8.95443133492091e-07, "loss": 1.1129, "step": 9112 }, { "epoch": 0.9587585481325618, "grad_norm": 1.4183218479156494, "learning_rate": 8.90898738457091e-07, "loss": 1.8257, "step": 9113 }, { "epoch": 0.9588637559179379, "grad_norm": 1.4666954278945923, "learning_rate": 8.863658526356622e-07, "loss": 1.6854, "step": 9114 }, { "epoch": 0.9589689637033141, "grad_norm": 1.620832085609436, "learning_rate": 8.818444765541944e-07, "loss": 1.9613, "step": 9115 }, { "epoch": 0.9590741714886901, "grad_norm": 1.858279824256897, "learning_rate": 8.773346107377456e-07, "loss": 1.7767, "step": 9116 }, { "epoch": 0.9591793792740663, "grad_norm": 1.720293402671814, "learning_rate": 8.72836255710019e-07, "loss": 1.8888, "step": 9117 }, { "epoch": 0.9592845870594424, "grad_norm": 1.7517911195755005, "learning_rate": 8.683494119934076e-07, "loss": 1.449, "step": 9118 }, { "epoch": 0.9593897948448186, "grad_norm": 0.9157357215881348, "learning_rate": 8.638740801089396e-07, "loss": 1.5477, "step": 9119 }, { "epoch": 0.9594950026301946, "grad_norm": 1.0953036546707153, "learning_rate": 8.59410260576321e-07, "loss": 1.6595, "step": 9120 }, { "epoch": 0.9596002104155708, "grad_norm": 1.7858799695968628, "learning_rate": 8.549579539139374e-07, "loss": 1.6835, "step": 9121 }, { "epoch": 0.9597054182009469, "grad_norm": 1.9369888305664062, "learning_rate": 8.505171606388085e-07, "loss": 2.1841, "step": 9122 }, { "epoch": 0.9598106259863229, "grad_norm": 1.8354744911193848, "learning_rate": 8.460878812666217e-07, "loss": 1.823, "step": 9123 }, { "epoch": 0.9599158337716991, "grad_norm": 1.8804636001586914, "learning_rate": 8.416701163117546e-07, "loss": 2.0077, "step": 9124 }, { "epoch": 0.9600210415570752, "grad_norm": 1.981581449508667, "learning_rate": 8.372638662872079e-07, "loss": 1.5237, "step": 9125 }, { "epoch": 0.9601262493424514, "grad_norm": 1.4924275875091553, "learning_rate": 8.328691317046722e-07, "loss": 1.5502, "step": 9126 }, { "epoch": 0.9602314571278274, "grad_norm": 1.8991860151290894, "learning_rate": 8.28485913074506e-07, "loss": 1.5965, "step": 9127 }, { "epoch": 0.9603366649132036, "grad_norm": 0.9699298739433289, "learning_rate": 8.24114210905691e-07, "loss": 2.2511, "step": 9128 }, { "epoch": 0.9604418726985797, "grad_norm": 1.3279988765716553, "learning_rate": 8.197540257059321e-07, "loss": 1.6017, "step": 9129 }, { "epoch": 0.9605470804839558, "grad_norm": 1.4322397708892822, "learning_rate": 8.154053579815358e-07, "loss": 1.5948, "step": 9130 }, { "epoch": 0.9606522882693319, "grad_norm": 1.4159713983535767, "learning_rate": 8.110682082375087e-07, "loss": 1.4411, "step": 9131 }, { "epoch": 0.9607574960547081, "grad_norm": 1.7567311525344849, "learning_rate": 8.067425769775039e-07, "loss": 1.3948, "step": 9132 }, { "epoch": 0.9608627038400842, "grad_norm": 2.1508755683898926, "learning_rate": 8.024284647038527e-07, "loss": 1.8868, "step": 9133 }, { "epoch": 0.9609679116254602, "grad_norm": 2.318276882171631, "learning_rate": 7.981258719175322e-07, "loss": 1.4337, "step": 9134 }, { "epoch": 0.9610731194108364, "grad_norm": 1.1780331134796143, "learning_rate": 7.938347991181982e-07, "loss": 1.6706, "step": 9135 }, { "epoch": 0.9611783271962125, "grad_norm": 2.116865634918213, "learning_rate": 7.895552468041412e-07, "loss": 1.8336, "step": 9136 }, { "epoch": 0.9612835349815886, "grad_norm": 1.3692445755004883, "learning_rate": 7.852872154723412e-07, "loss": 1.9935, "step": 9137 }, { "epoch": 0.9613887427669647, "grad_norm": 1.171465277671814, "learning_rate": 7.810307056184352e-07, "loss": 1.6881, "step": 9138 }, { "epoch": 0.9614939505523409, "grad_norm": 1.1161651611328125, "learning_rate": 7.767857177367055e-07, "loss": 1.6728, "step": 9139 }, { "epoch": 0.961599158337717, "grad_norm": 1.869456171989441, "learning_rate": 7.725522523201245e-07, "loss": 1.1257, "step": 9140 }, { "epoch": 0.9617043661230931, "grad_norm": 1.2645540237426758, "learning_rate": 7.683303098602989e-07, "loss": 1.4703, "step": 9141 }, { "epoch": 0.9618095739084692, "grad_norm": 1.4299430847167969, "learning_rate": 7.641198908475144e-07, "loss": 1.6185, "step": 9142 }, { "epoch": 0.9619147816938454, "grad_norm": 2.149198532104492, "learning_rate": 7.599209957707021e-07, "loss": 1.4881, "step": 9143 }, { "epoch": 0.9620199894792215, "grad_norm": 1.9210573434829712, "learning_rate": 7.55733625117483e-07, "loss": 2.0232, "step": 9144 }, { "epoch": 0.9621251972645976, "grad_norm": 2.284515380859375, "learning_rate": 7.515577793741124e-07, "loss": 1.4036, "step": 9145 }, { "epoch": 0.9622304050499737, "grad_norm": 1.6469871997833252, "learning_rate": 7.473934590255249e-07, "loss": 1.4179, "step": 9146 }, { "epoch": 0.9623356128353499, "grad_norm": 1.3524980545043945, "learning_rate": 7.432406645552891e-07, "loss": 1.4155, "step": 9147 }, { "epoch": 0.9624408206207259, "grad_norm": 1.9270532131195068, "learning_rate": 7.39099396445686e-07, "loss": 1.7492, "step": 9148 }, { "epoch": 0.962546028406102, "grad_norm": 1.6175739765167236, "learning_rate": 7.349696551776086e-07, "loss": 2.1136, "step": 9149 }, { "epoch": 0.9626512361914782, "grad_norm": 1.5902010202407837, "learning_rate": 7.308514412306289e-07, "loss": 1.8893, "step": 9150 }, { "epoch": 0.9627564439768543, "grad_norm": 1.2558472156524658, "learning_rate": 7.267447550829865e-07, "loss": 1.5318, "step": 9151 }, { "epoch": 0.9628616517622304, "grad_norm": 1.7991538047790527, "learning_rate": 7.226495972115776e-07, "loss": 1.2432, "step": 9152 }, { "epoch": 0.9629668595476065, "grad_norm": 1.6260236501693726, "learning_rate": 7.185659680919554e-07, "loss": 1.7901, "step": 9153 }, { "epoch": 0.9630720673329827, "grad_norm": 1.319873571395874, "learning_rate": 7.144938681983515e-07, "loss": 1.4871, "step": 9154 }, { "epoch": 0.9631772751183587, "grad_norm": 1.6436864137649536, "learning_rate": 7.104332980036211e-07, "loss": 1.2697, "step": 9155 }, { "epoch": 0.9632824829037349, "grad_norm": 1.0608686208724976, "learning_rate": 7.063842579793311e-07, "loss": 1.8403, "step": 9156 }, { "epoch": 0.963387690689111, "grad_norm": 1.5618516206741333, "learning_rate": 7.023467485956614e-07, "loss": 1.4696, "step": 9157 }, { "epoch": 0.9634928984744872, "grad_norm": 1.3680403232574463, "learning_rate": 6.983207703214811e-07, "loss": 1.6647, "step": 9158 }, { "epoch": 0.9635981062598632, "grad_norm": 2.8899571895599365, "learning_rate": 6.943063236243275e-07, "loss": 1.7808, "step": 9159 }, { "epoch": 0.9637033140452393, "grad_norm": 1.5044270753860474, "learning_rate": 6.903034089703719e-07, "loss": 1.8185, "step": 9160 }, { "epoch": 0.9638085218306155, "grad_norm": 1.38004469871521, "learning_rate": 6.863120268244649e-07, "loss": 1.6207, "step": 9161 }, { "epoch": 0.9639137296159915, "grad_norm": 2.003220558166504, "learning_rate": 6.823321776501024e-07, "loss": 1.5458, "step": 9162 }, { "epoch": 0.9640189374013677, "grad_norm": 1.4244567155838013, "learning_rate": 6.783638619094701e-07, "loss": 1.2926, "step": 9163 }, { "epoch": 0.9641241451867438, "grad_norm": 1.428270697593689, "learning_rate": 6.744070800633661e-07, "loss": 1.5582, "step": 9164 }, { "epoch": 0.96422935297212, "grad_norm": 1.64469313621521, "learning_rate": 6.704618325713119e-07, "loss": 1.7808, "step": 9165 }, { "epoch": 0.964334560757496, "grad_norm": 1.6442152261734009, "learning_rate": 6.665281198914408e-07, "loss": 1.4003, "step": 9166 }, { "epoch": 0.9644397685428722, "grad_norm": 1.406569480895996, "learning_rate": 6.626059424805542e-07, "loss": 1.4821, "step": 9167 }, { "epoch": 0.9645449763282483, "grad_norm": 1.100145936012268, "learning_rate": 6.586953007941321e-07, "loss": 1.6582, "step": 9168 }, { "epoch": 0.9646501841136244, "grad_norm": 1.651857614517212, "learning_rate": 6.547961952863002e-07, "loss": 1.7471, "step": 9169 }, { "epoch": 0.9647553918990005, "grad_norm": 1.3238495588302612, "learning_rate": 6.509086264098407e-07, "loss": 1.7772, "step": 9170 }, { "epoch": 0.9648605996843767, "grad_norm": 2.2787668704986572, "learning_rate": 6.470325946162259e-07, "loss": 1.3509, "step": 9171 }, { "epoch": 0.9649658074697528, "grad_norm": 1.7403054237365723, "learning_rate": 6.431681003555622e-07, "loss": 1.4681, "step": 9172 }, { "epoch": 0.9650710152551288, "grad_norm": 1.6274385452270508, "learning_rate": 6.393151440766021e-07, "loss": 1.6136, "step": 9173 }, { "epoch": 0.965176223040505, "grad_norm": 1.5703790187835693, "learning_rate": 6.354737262267873e-07, "loss": 1.4191, "step": 9174 }, { "epoch": 0.9652814308258811, "grad_norm": 1.5442698001861572, "learning_rate": 6.316438472522057e-07, "loss": 1.1351, "step": 9175 }, { "epoch": 0.9653866386112573, "grad_norm": 1.7261892557144165, "learning_rate": 6.278255075976125e-07, "loss": 1.1987, "step": 9176 }, { "epoch": 0.9654918463966333, "grad_norm": 1.5503300428390503, "learning_rate": 6.240187077064307e-07, "loss": 1.8204, "step": 9177 }, { "epoch": 0.9655970541820095, "grad_norm": 1.6254150867462158, "learning_rate": 6.202234480207069e-07, "loss": 1.6718, "step": 9178 }, { "epoch": 0.9657022619673856, "grad_norm": 1.0417579412460327, "learning_rate": 6.164397289811885e-07, "loss": 1.3387, "step": 9179 }, { "epoch": 0.9658074697527617, "grad_norm": 1.2805875539779663, "learning_rate": 6.126675510272572e-07, "loss": 1.2551, "step": 9180 }, { "epoch": 0.9659126775381378, "grad_norm": 1.3211545944213867, "learning_rate": 6.089069145969739e-07, "loss": 1.65, "step": 9181 }, { "epoch": 0.966017885323514, "grad_norm": 4.055902481079102, "learning_rate": 6.051578201270336e-07, "loss": 1.776, "step": 9182 }, { "epoch": 0.9661230931088901, "grad_norm": 1.3434686660766602, "learning_rate": 6.014202680528324e-07, "loss": 1.4995, "step": 9183 }, { "epoch": 0.9662283008942661, "grad_norm": 1.8617429733276367, "learning_rate": 5.976942588083678e-07, "loss": 1.7132, "step": 9184 }, { "epoch": 0.9663335086796423, "grad_norm": 1.6335538625717163, "learning_rate": 5.939797928263602e-07, "loss": 1.5556, "step": 9185 }, { "epoch": 0.9664387164650184, "grad_norm": 1.5544825792312622, "learning_rate": 5.902768705381312e-07, "loss": 1.736, "step": 9186 }, { "epoch": 0.9665439242503945, "grad_norm": 1.460477590560913, "learning_rate": 5.865854923737035e-07, "loss": 1.4154, "step": 9187 }, { "epoch": 0.9666491320357706, "grad_norm": 1.5231237411499023, "learning_rate": 5.829056587617455e-07, "loss": 1.3271, "step": 9188 }, { "epoch": 0.9667543398211468, "grad_norm": 1.2489863634109497, "learning_rate": 5.792373701295706e-07, "loss": 1.9272, "step": 9189 }, { "epoch": 0.9668595476065229, "grad_norm": 1.2746580839157104, "learning_rate": 5.755806269031827e-07, "loss": 1.4012, "step": 9190 }, { "epoch": 0.966964755391899, "grad_norm": 1.310562252998352, "learning_rate": 5.719354295072199e-07, "loss": 1.9932, "step": 9191 }, { "epoch": 0.9670699631772751, "grad_norm": 1.565401554107666, "learning_rate": 5.68301778364988e-07, "loss": 1.5477, "step": 9192 }, { "epoch": 0.9671751709626513, "grad_norm": 1.3643187284469604, "learning_rate": 5.646796738984495e-07, "loss": 1.7313, "step": 9193 }, { "epoch": 0.9672803787480273, "grad_norm": 2.277108907699585, "learning_rate": 5.610691165282234e-07, "loss": 1.4127, "step": 9194 }, { "epoch": 0.9673855865334035, "grad_norm": 1.1328575611114502, "learning_rate": 5.574701066735965e-07, "loss": 2.0227, "step": 9195 }, { "epoch": 0.9674907943187796, "grad_norm": 1.7198408842086792, "learning_rate": 5.538826447525125e-07, "loss": 1.7175, "step": 9196 }, { "epoch": 0.9675960021041558, "grad_norm": 1.514518141746521, "learning_rate": 5.503067311815713e-07, "loss": 1.512, "step": 9197 }, { "epoch": 0.9677012098895318, "grad_norm": 1.6654568910598755, "learning_rate": 5.467423663760296e-07, "loss": 1.337, "step": 9198 }, { "epoch": 0.9678064176749079, "grad_norm": 1.9775021076202393, "learning_rate": 5.431895507498008e-07, "loss": 1.1514, "step": 9199 }, { "epoch": 0.9679116254602841, "grad_norm": 1.0919164419174194, "learning_rate": 5.39648284715466e-07, "loss": 1.7102, "step": 9200 }, { "epoch": 0.9680168332456601, "grad_norm": 1.3616853952407837, "learning_rate": 5.361185686842629e-07, "loss": 1.7125, "step": 9201 }, { "epoch": 0.9681220410310363, "grad_norm": 1.843559741973877, "learning_rate": 5.326004030660747e-07, "loss": 1.1107, "step": 9202 }, { "epoch": 0.9682272488164124, "grad_norm": 1.5905053615570068, "learning_rate": 5.290937882694746e-07, "loss": 1.4774, "step": 9203 }, { "epoch": 0.9683324566017886, "grad_norm": 1.699774146080017, "learning_rate": 5.255987247016591e-07, "loss": 1.6351, "step": 9204 }, { "epoch": 0.9684376643871646, "grad_norm": 2.0290257930755615, "learning_rate": 5.221152127685036e-07, "loss": 2.2553, "step": 9205 }, { "epoch": 0.9685428721725408, "grad_norm": 2.343132972717285, "learning_rate": 5.186432528745289e-07, "loss": 0.9011, "step": 9206 }, { "epoch": 0.9686480799579169, "grad_norm": 2.2507107257843018, "learning_rate": 5.151828454229346e-07, "loss": 1.7659, "step": 9207 }, { "epoch": 0.9687532877432931, "grad_norm": 1.9188868999481201, "learning_rate": 5.117339908155549e-07, "loss": 1.9411, "step": 9208 }, { "epoch": 0.9688584955286691, "grad_norm": 1.9085512161254883, "learning_rate": 5.082966894529028e-07, "loss": 1.3046, "step": 9209 }, { "epoch": 0.9689637033140452, "grad_norm": 1.6628177165985107, "learning_rate": 5.04870941734148e-07, "loss": 1.3953, "step": 9210 }, { "epoch": 0.9690689110994214, "grad_norm": 1.5158159732818604, "learning_rate": 5.014567480570831e-07, "loss": 1.9355, "step": 9211 }, { "epoch": 0.9691741188847974, "grad_norm": 1.3943663835525513, "learning_rate": 4.980541088182133e-07, "loss": 1.5488, "step": 9212 }, { "epoch": 0.9692793266701736, "grad_norm": 1.2974984645843506, "learning_rate": 4.946630244126782e-07, "loss": 1.4926, "step": 9213 }, { "epoch": 0.9693845344555497, "grad_norm": 1.5148580074310303, "learning_rate": 4.912834952342515e-07, "loss": 1.7226, "step": 9214 }, { "epoch": 0.9694897422409259, "grad_norm": 1.8382201194763184, "learning_rate": 4.879155216753972e-07, "loss": 1.6377, "step": 9215 }, { "epoch": 0.9695949500263019, "grad_norm": 1.510947823524475, "learning_rate": 4.845591041272358e-07, "loss": 2.0674, "step": 9216 }, { "epoch": 0.9697001578116781, "grad_norm": 2.992677927017212, "learning_rate": 4.812142429795219e-07, "loss": 1.6575, "step": 9217 }, { "epoch": 0.9698053655970542, "grad_norm": 2.957728147506714, "learning_rate": 4.778809386206895e-07, "loss": 1.6637, "step": 9218 }, { "epoch": 0.9699105733824303, "grad_norm": 1.4280604124069214, "learning_rate": 4.745591914378289e-07, "loss": 1.9542, "step": 9219 }, { "epoch": 0.9700157811678064, "grad_norm": 2.1100828647613525, "learning_rate": 4.71249001816676e-07, "loss": 1.6198, "step": 9220 }, { "epoch": 0.9701209889531826, "grad_norm": 1.218092918395996, "learning_rate": 4.6795037014163436e-07, "loss": 1.2455, "step": 9221 }, { "epoch": 0.9702261967385587, "grad_norm": 1.3231292963027954, "learning_rate": 4.6466329679577536e-07, "loss": 1.4456, "step": 9222 }, { "epoch": 0.9703314045239347, "grad_norm": 1.482961893081665, "learning_rate": 4.613877821607937e-07, "loss": 2.1301, "step": 9223 }, { "epoch": 0.9704366123093109, "grad_norm": 1.3625333309173584, "learning_rate": 4.581238266170851e-07, "loss": 1.631, "step": 9224 }, { "epoch": 0.970541820094687, "grad_norm": 1.9850952625274658, "learning_rate": 4.548714305436685e-07, "loss": 1.8183, "step": 9225 }, { "epoch": 0.9706470278800631, "grad_norm": 2.0296475887298584, "learning_rate": 4.5163059431824194e-07, "loss": 1.5821, "step": 9226 }, { "epoch": 0.9707522356654392, "grad_norm": 1.2871662378311157, "learning_rate": 4.484013183171376e-07, "loss": 1.5999, "step": 9227 }, { "epoch": 0.9708574434508154, "grad_norm": 1.5916684865951538, "learning_rate": 4.4518360291538883e-07, "loss": 1.8337, "step": 9228 }, { "epoch": 0.9709626512361915, "grad_norm": 1.2991007566452026, "learning_rate": 4.4197744848663017e-07, "loss": 1.2721, "step": 9229 }, { "epoch": 0.9710678590215676, "grad_norm": 1.4421882629394531, "learning_rate": 4.3878285540319694e-07, "loss": 1.684, "step": 9230 }, { "epoch": 0.9711730668069437, "grad_norm": 1.5107342004776, "learning_rate": 4.355998240360704e-07, "loss": 1.1753, "step": 9231 }, { "epoch": 0.9712782745923199, "grad_norm": 1.8420056104660034, "learning_rate": 4.324283547548658e-07, "loss": 1.5455, "step": 9232 }, { "epoch": 0.9713834823776959, "grad_norm": 2.1014416217803955, "learning_rate": 4.2926844792789967e-07, "loss": 1.3949, "step": 9233 }, { "epoch": 0.971488690163072, "grad_norm": 1.452222466468811, "learning_rate": 4.261201039221008e-07, "loss": 1.5272, "step": 9234 }, { "epoch": 0.9715938979484482, "grad_norm": 1.7641346454620361, "learning_rate": 4.2298332310308775e-07, "loss": 1.3401, "step": 9235 }, { "epoch": 0.9716991057338243, "grad_norm": 1.8564509153366089, "learning_rate": 4.1985810583512473e-07, "loss": 1.4316, "step": 9236 }, { "epoch": 0.9718043135192004, "grad_norm": 1.6556718349456787, "learning_rate": 4.167444524811215e-07, "loss": 1.5263, "step": 9237 }, { "epoch": 0.9719095213045765, "grad_norm": 1.864583969116211, "learning_rate": 4.136423634026776e-07, "loss": 1.4365, "step": 9238 }, { "epoch": 0.9720147290899527, "grad_norm": 1.7581238746643066, "learning_rate": 4.1055183896001606e-07, "loss": 1.5028, "step": 9239 }, { "epoch": 0.9721199368753288, "grad_norm": 1.2646266222000122, "learning_rate": 4.074728795120275e-07, "loss": 1.6811, "step": 9240 }, { "epoch": 0.9722251446607049, "grad_norm": 1.6444590091705322, "learning_rate": 4.0440548541625935e-07, "loss": 1.6224, "step": 9241 }, { "epoch": 0.972330352446081, "grad_norm": 1.8099961280822754, "learning_rate": 4.013496570289155e-07, "loss": 1.5547, "step": 9242 }, { "epoch": 0.9724355602314572, "grad_norm": 2.0473053455352783, "learning_rate": 3.983053947048676e-07, "loss": 1.4913, "step": 9243 }, { "epoch": 0.9725407680168332, "grad_norm": 1.557085394859314, "learning_rate": 3.9527269879764406e-07, "loss": 1.415, "step": 9244 }, { "epoch": 0.9726459758022094, "grad_norm": 1.8768304586410522, "learning_rate": 3.9225156965939647e-07, "loss": 1.5624, "step": 9245 }, { "epoch": 0.9727511835875855, "grad_norm": 1.8402485847473145, "learning_rate": 3.892420076409886e-07, "loss": 1.712, "step": 9246 }, { "epoch": 0.9728563913729616, "grad_norm": 1.5501086711883545, "learning_rate": 3.862440130918854e-07, "loss": 1.5368, "step": 9247 }, { "epoch": 0.9729615991583377, "grad_norm": 2.000622272491455, "learning_rate": 3.832575863602528e-07, "loss": 2.0911, "step": 9248 }, { "epoch": 0.9730668069437138, "grad_norm": 1.5791133642196655, "learning_rate": 3.8028272779287997e-07, "loss": 1.7847, "step": 9249 }, { "epoch": 0.97317201472909, "grad_norm": 1.8884774446487427, "learning_rate": 3.7731943773523515e-07, "loss": 1.5126, "step": 9250 }, { "epoch": 0.973277222514466, "grad_norm": 1.8291395902633667, "learning_rate": 3.7436771653143187e-07, "loss": 1.6039, "step": 9251 }, { "epoch": 0.9733824302998422, "grad_norm": 1.446852684020996, "learning_rate": 3.7142756452425155e-07, "loss": 1.3811, "step": 9252 }, { "epoch": 0.9734876380852183, "grad_norm": 1.450920820236206, "learning_rate": 3.68498982055121e-07, "loss": 1.5193, "step": 9253 }, { "epoch": 0.9735928458705945, "grad_norm": 1.6216645240783691, "learning_rate": 3.655819694641127e-07, "loss": 1.7471, "step": 9254 }, { "epoch": 0.9736980536559705, "grad_norm": 1.7787151336669922, "learning_rate": 3.62676527089989e-07, "loss": 1.8712, "step": 9255 }, { "epoch": 0.9738032614413467, "grad_norm": 1.63424813747406, "learning_rate": 3.5978265527014666e-07, "loss": 1.8227, "step": 9256 }, { "epoch": 0.9739084692267228, "grad_norm": 2.0026700496673584, "learning_rate": 3.56900354340628e-07, "loss": 1.7547, "step": 9257 }, { "epoch": 0.9740136770120988, "grad_norm": 1.6562840938568115, "learning_rate": 3.5402962463616517e-07, "loss": 2.0525, "step": 9258 }, { "epoch": 0.974118884797475, "grad_norm": 2.0857789516448975, "learning_rate": 3.511704664901139e-07, "loss": 1.4605, "step": 9259 }, { "epoch": 0.9742240925828511, "grad_norm": 1.9922007322311401, "learning_rate": 3.483228802344973e-07, "loss": 1.4324, "step": 9260 }, { "epoch": 0.9743293003682273, "grad_norm": 2.007927417755127, "learning_rate": 3.454868661999955e-07, "loss": 1.6704, "step": 9261 }, { "epoch": 0.9744345081536033, "grad_norm": 1.3496975898742676, "learning_rate": 3.42662424715956e-07, "loss": 1.6075, "step": 9262 }, { "epoch": 0.9745397159389795, "grad_norm": 1.6474722623825073, "learning_rate": 3.39849556110361e-07, "loss": 1.8737, "step": 9263 }, { "epoch": 0.9746449237243556, "grad_norm": 1.7069952487945557, "learning_rate": 3.370482607098602e-07, "loss": 1.2898, "step": 9264 }, { "epoch": 0.9747501315097317, "grad_norm": 1.7524888515472412, "learning_rate": 3.342585388397712e-07, "loss": 1.5437, "step": 9265 }, { "epoch": 0.9748553392951078, "grad_norm": 1.8465501070022583, "learning_rate": 3.3148039082404605e-07, "loss": 1.4052, "step": 9266 }, { "epoch": 0.974960547080484, "grad_norm": 1.5890626907348633, "learning_rate": 3.2871381698529324e-07, "loss": 1.616, "step": 9267 }, { "epoch": 0.9750657548658601, "grad_norm": 1.5656459331512451, "learning_rate": 3.259588176448003e-07, "loss": 1.3757, "step": 9268 }, { "epoch": 0.9751709626512362, "grad_norm": 1.868996262550354, "learning_rate": 3.2321539312248903e-07, "loss": 1.3586, "step": 9269 }, { "epoch": 0.9752761704366123, "grad_norm": 1.7891117334365845, "learning_rate": 3.204835437369491e-07, "loss": 1.2176, "step": 9270 }, { "epoch": 0.9753813782219884, "grad_norm": 1.7648265361785889, "learning_rate": 3.177632698054156e-07, "loss": 1.7038, "step": 9271 }, { "epoch": 0.9754865860073646, "grad_norm": 1.6850988864898682, "learning_rate": 3.150545716437914e-07, "loss": 1.4181, "step": 9272 }, { "epoch": 0.9755917937927406, "grad_norm": 1.7368006706237793, "learning_rate": 3.1235744956662484e-07, "loss": 1.3408, "step": 9273 }, { "epoch": 0.9756970015781168, "grad_norm": 2.081385374069214, "learning_rate": 3.0967190388712097e-07, "loss": 1.3886, "step": 9274 }, { "epoch": 0.9758022093634929, "grad_norm": 2.454695224761963, "learning_rate": 3.0699793491715256e-07, "loss": 1.1868, "step": 9275 }, { "epoch": 0.975907417148869, "grad_norm": 1.7129137516021729, "learning_rate": 3.043355429672268e-07, "loss": 1.7567, "step": 9276 }, { "epoch": 0.9760126249342451, "grad_norm": 1.3494802713394165, "learning_rate": 3.016847283465185e-07, "loss": 1.4914, "step": 9277 }, { "epoch": 0.9761178327196213, "grad_norm": 1.3890875577926636, "learning_rate": 2.990454913628704e-07, "loss": 1.3622, "step": 9278 }, { "epoch": 0.9762230405049974, "grad_norm": 2.182384967803955, "learning_rate": 2.9641783232275955e-07, "loss": 1.4551, "step": 9279 }, { "epoch": 0.9763282482903735, "grad_norm": 1.537185788154602, "learning_rate": 2.938017515313418e-07, "loss": 1.7531, "step": 9280 }, { "epoch": 0.9764334560757496, "grad_norm": 1.466887354850769, "learning_rate": 2.9119724929239645e-07, "loss": 1.567, "step": 9281 }, { "epoch": 0.9765386638611258, "grad_norm": 1.78278648853302, "learning_rate": 2.886043259083704e-07, "loss": 1.3838, "step": 9282 }, { "epoch": 0.9766438716465018, "grad_norm": 1.6056292057037354, "learning_rate": 2.860229816803894e-07, "loss": 1.1808, "step": 9283 }, { "epoch": 0.9767490794318779, "grad_norm": 1.9775320291519165, "learning_rate": 2.834532169082138e-07, "loss": 1.6403, "step": 9284 }, { "epoch": 0.9768542872172541, "grad_norm": 1.4951421022415161, "learning_rate": 2.8089503189024926e-07, "loss": 1.6209, "step": 9285 }, { "epoch": 0.9769594950026302, "grad_norm": 2.1086246967315674, "learning_rate": 2.7834842692358033e-07, "loss": 1.6395, "step": 9286 }, { "epoch": 0.9770647027880063, "grad_norm": 1.4690982103347778, "learning_rate": 2.7581340230393717e-07, "loss": 1.6837, "step": 9287 }, { "epoch": 0.9771699105733824, "grad_norm": 1.447407603263855, "learning_rate": 2.7328995832568426e-07, "loss": 1.6867, "step": 9288 }, { "epoch": 0.9772751183587586, "grad_norm": 1.7926383018493652, "learning_rate": 2.7077809528188724e-07, "loss": 1.4406, "step": 9289 }, { "epoch": 0.9773803261441346, "grad_norm": 2.195378541946411, "learning_rate": 2.6827781346423496e-07, "loss": 1.808, "step": 9290 }, { "epoch": 0.9774855339295108, "grad_norm": 1.9407440423965454, "learning_rate": 2.657891131630619e-07, "loss": 1.362, "step": 9291 }, { "epoch": 0.9775907417148869, "grad_norm": 1.7981421947479248, "learning_rate": 2.633119946673923e-07, "loss": 1.4832, "step": 9292 }, { "epoch": 0.9776959495002631, "grad_norm": 1.5545809268951416, "learning_rate": 2.608464582648629e-07, "loss": 1.8864, "step": 9293 }, { "epoch": 0.9778011572856391, "grad_norm": 1.306902289390564, "learning_rate": 2.583925042418112e-07, "loss": 1.5875, "step": 9294 }, { "epoch": 0.9779063650710152, "grad_norm": 2.3127174377441406, "learning_rate": 2.5595013288318703e-07, "loss": 1.6263, "step": 9295 }, { "epoch": 0.9780115728563914, "grad_norm": 2.087738275527954, "learning_rate": 2.5351934447263026e-07, "loss": 1.7355, "step": 9296 }, { "epoch": 0.9781167806417674, "grad_norm": 1.412264108657837, "learning_rate": 2.5110013929241504e-07, "loss": 1.5116, "step": 9297 }, { "epoch": 0.9782219884271436, "grad_norm": 1.165013313293457, "learning_rate": 2.4869251762348333e-07, "loss": 1.421, "step": 9298 }, { "epoch": 0.9783271962125197, "grad_norm": 2.194392442703247, "learning_rate": 2.462964797454004e-07, "loss": 1.9825, "step": 9299 }, { "epoch": 0.9784324039978959, "grad_norm": 1.6436448097229004, "learning_rate": 2.4391202593643246e-07, "loss": 1.6435, "step": 9300 }, { "epoch": 0.9785376117832719, "grad_norm": 0.9663065075874329, "learning_rate": 2.4153915647348034e-07, "loss": 1.5033, "step": 9301 }, { "epoch": 0.9786428195686481, "grad_norm": 1.650208592414856, "learning_rate": 2.391778716320792e-07, "loss": 1.4037, "step": 9302 }, { "epoch": 0.9787480273540242, "grad_norm": 1.5926425457000732, "learning_rate": 2.3682817168644288e-07, "loss": 1.9914, "step": 9303 }, { "epoch": 0.9788532351394004, "grad_norm": 1.1928669214248657, "learning_rate": 2.3449005690945324e-07, "loss": 1.5487, "step": 9304 }, { "epoch": 0.9789584429247764, "grad_norm": 1.331669569015503, "learning_rate": 2.321635275726042e-07, "loss": 1.9395, "step": 9305 }, { "epoch": 0.9790636507101526, "grad_norm": 1.4218162298202515, "learning_rate": 2.2984858394607956e-07, "loss": 1.4188, "step": 9306 }, { "epoch": 0.9791688584955287, "grad_norm": 1.6478614807128906, "learning_rate": 2.275452262986977e-07, "loss": 1.7231, "step": 9307 }, { "epoch": 0.9792740662809047, "grad_norm": 1.5847270488739014, "learning_rate": 2.252534548979446e-07, "loss": 1.3288, "step": 9308 }, { "epoch": 0.9793792740662809, "grad_norm": 1.5312392711639404, "learning_rate": 2.2297327000996293e-07, "loss": 1.4311, "step": 9309 }, { "epoch": 0.979484481851657, "grad_norm": 1.6331710815429688, "learning_rate": 2.207046718995409e-07, "loss": 1.8195, "step": 9310 }, { "epoch": 0.9795896896370332, "grad_norm": 1.5755873918533325, "learning_rate": 2.1844766083011226e-07, "loss": 1.5262, "step": 9311 }, { "epoch": 0.9796948974224092, "grad_norm": 1.929260492324829, "learning_rate": 2.162022370637895e-07, "loss": 1.5236, "step": 9312 }, { "epoch": 0.9798001052077854, "grad_norm": 1.6517144441604614, "learning_rate": 2.1396840086131964e-07, "loss": 1.9677, "step": 9313 }, { "epoch": 0.9799053129931615, "grad_norm": 1.5350128412246704, "learning_rate": 2.1174615248210626e-07, "loss": 1.2628, "step": 9314 }, { "epoch": 0.9800105207785376, "grad_norm": 1.6820600032806396, "learning_rate": 2.0953549218423185e-07, "loss": 2.0707, "step": 9315 }, { "epoch": 0.9801157285639137, "grad_norm": 2.348896026611328, "learning_rate": 2.0733642022437994e-07, "loss": 1.3693, "step": 9316 }, { "epoch": 0.9802209363492899, "grad_norm": 3.21044659614563, "learning_rate": 2.0514893685795733e-07, "loss": 1.4473, "step": 9317 }, { "epoch": 0.980326144134666, "grad_norm": 1.5892785787582397, "learning_rate": 2.0297304233896087e-07, "loss": 1.5056, "step": 9318 }, { "epoch": 0.980431351920042, "grad_norm": 1.6441622972488403, "learning_rate": 2.008087369200773e-07, "loss": 1.6748, "step": 9319 }, { "epoch": 0.9805365597054182, "grad_norm": 1.501565933227539, "learning_rate": 1.9865602085265002e-07, "loss": 1.3629, "step": 9320 }, { "epoch": 0.9806417674907943, "grad_norm": 1.4179154634475708, "learning_rate": 1.9651489438666792e-07, "loss": 1.7201, "step": 9321 }, { "epoch": 0.9807469752761704, "grad_norm": 1.6609681844711304, "learning_rate": 1.943853577707544e-07, "loss": 1.7947, "step": 9322 }, { "epoch": 0.9808521830615465, "grad_norm": 1.9472432136535645, "learning_rate": 1.922674112522227e-07, "loss": 1.6767, "step": 9323 }, { "epoch": 0.9809573908469227, "grad_norm": 1.3472323417663574, "learning_rate": 1.9016105507702054e-07, "loss": 1.2528, "step": 9324 }, { "epoch": 0.9810625986322988, "grad_norm": 1.9179092645645142, "learning_rate": 1.8806628948974114e-07, "loss": 1.5153, "step": 9325 }, { "epoch": 0.9811678064176749, "grad_norm": 2.2002909183502197, "learning_rate": 1.859831147336566e-07, "loss": 1.584, "step": 9326 }, { "epoch": 0.981273014203051, "grad_norm": 2.532457113265991, "learning_rate": 1.8391153105067338e-07, "loss": 2.4041, "step": 9327 }, { "epoch": 0.9813782219884272, "grad_norm": 1.7120616436004639, "learning_rate": 1.8185153868135462e-07, "loss": 1.6406, "step": 9328 }, { "epoch": 0.9814834297738032, "grad_norm": 1.6035572290420532, "learning_rate": 1.798031378649201e-07, "loss": 1.5754, "step": 9329 }, { "epoch": 0.9815886375591794, "grad_norm": 1.2862604856491089, "learning_rate": 1.7776632883924615e-07, "loss": 1.7776, "step": 9330 }, { "epoch": 0.9816938453445555, "grad_norm": 2.551787853240967, "learning_rate": 1.7574111184086582e-07, "loss": 1.962, "step": 9331 }, { "epoch": 0.9817990531299317, "grad_norm": 1.6095662117004395, "learning_rate": 1.7372748710495768e-07, "loss": 1.6107, "step": 9332 }, { "epoch": 0.9819042609153077, "grad_norm": 2.01579213142395, "learning_rate": 1.7172545486535685e-07, "loss": 1.1388, "step": 9333 }, { "epoch": 0.9820094687006838, "grad_norm": 2.388930559158325, "learning_rate": 1.6973501535455516e-07, "loss": 1.7735, "step": 9334 }, { "epoch": 0.98211467648606, "grad_norm": 1.5731823444366455, "learning_rate": 1.6775616880368994e-07, "loss": 1.653, "step": 9335 }, { "epoch": 0.9822198842714361, "grad_norm": 1.1225214004516602, "learning_rate": 1.6578891544255514e-07, "loss": 1.8642, "step": 9336 }, { "epoch": 0.9823250920568122, "grad_norm": 1.4525314569473267, "learning_rate": 1.638332554996125e-07, "loss": 1.2603, "step": 9337 }, { "epoch": 0.9824302998421883, "grad_norm": 2.0482780933380127, "learning_rate": 1.6188918920195806e-07, "loss": 1.4663, "step": 9338 }, { "epoch": 0.9825355076275645, "grad_norm": 1.067059874534607, "learning_rate": 1.5995671677535573e-07, "loss": 1.371, "step": 9339 }, { "epoch": 0.9826407154129405, "grad_norm": 2.4665608406066895, "learning_rate": 1.5803583844421488e-07, "loss": 1.533, "step": 9340 }, { "epoch": 0.9827459231983167, "grad_norm": 1.2421523332595825, "learning_rate": 1.561265544316015e-07, "loss": 1.3759, "step": 9341 }, { "epoch": 0.9828511309836928, "grad_norm": 1.3852927684783936, "learning_rate": 1.5422886495922718e-07, "loss": 1.5905, "step": 9342 }, { "epoch": 0.982956338769069, "grad_norm": 1.9068608283996582, "learning_rate": 1.5234277024747112e-07, "loss": 1.7971, "step": 9343 }, { "epoch": 0.983061546554445, "grad_norm": 1.5108956098556519, "learning_rate": 1.5046827051536928e-07, "loss": 1.8727, "step": 9344 }, { "epoch": 0.9831667543398211, "grad_norm": 1.5862867832183838, "learning_rate": 1.4860536598058085e-07, "loss": 1.9341, "step": 9345 }, { "epoch": 0.9832719621251973, "grad_norm": 1.2081376314163208, "learning_rate": 1.4675405685944387e-07, "loss": 1.7027, "step": 9346 }, { "epoch": 0.9833771699105733, "grad_norm": 1.2375844717025757, "learning_rate": 1.4491434336696418e-07, "loss": 1.5949, "step": 9347 }, { "epoch": 0.9834823776959495, "grad_norm": 1.2393487691879272, "learning_rate": 1.430862257167598e-07, "loss": 1.7839, "step": 9348 }, { "epoch": 0.9835875854813256, "grad_norm": 2.2204091548919678, "learning_rate": 1.412697041211275e-07, "loss": 1.5249, "step": 9349 }, { "epoch": 0.9836927932667018, "grad_norm": 1.9110403060913086, "learning_rate": 1.3946477879102083e-07, "loss": 1.1106, "step": 9350 }, { "epoch": 0.9837980010520778, "grad_norm": 1.352942705154419, "learning_rate": 1.3767144993602766e-07, "loss": 1.3338, "step": 9351 }, { "epoch": 0.983903208837454, "grad_norm": 1.511385440826416, "learning_rate": 1.3588971776441472e-07, "loss": 1.8067, "step": 9352 }, { "epoch": 0.9840084166228301, "grad_norm": 1.3924455642700195, "learning_rate": 1.3411958248309431e-07, "loss": 1.8532, "step": 9353 }, { "epoch": 0.9841136244082062, "grad_norm": 1.5775933265686035, "learning_rate": 1.3236104429760199e-07, "loss": 1.602, "step": 9354 }, { "epoch": 0.9842188321935823, "grad_norm": 1.5030940771102905, "learning_rate": 1.306141034121744e-07, "loss": 1.8972, "step": 9355 }, { "epoch": 0.9843240399789585, "grad_norm": 2.3042783737182617, "learning_rate": 1.2887876002967149e-07, "loss": 1.3577, "step": 9356 }, { "epoch": 0.9844292477643346, "grad_norm": 2.0904388427734375, "learning_rate": 1.2715501435159872e-07, "loss": 1.3343, "step": 9357 }, { "epoch": 0.9845344555497106, "grad_norm": 1.2769339084625244, "learning_rate": 1.254428665781515e-07, "loss": 1.8264, "step": 9358 }, { "epoch": 0.9846396633350868, "grad_norm": 1.4355113506317139, "learning_rate": 1.2374231690813754e-07, "loss": 1.8146, "step": 9359 }, { "epoch": 0.9847448711204629, "grad_norm": 1.9717060327529907, "learning_rate": 1.2205336553904323e-07, "loss": 1.3479, "step": 9360 }, { "epoch": 0.984850078905839, "grad_norm": 1.9314119815826416, "learning_rate": 1.203760126670117e-07, "loss": 1.4207, "step": 9361 }, { "epoch": 0.9849552866912151, "grad_norm": 1.9035725593566895, "learning_rate": 1.1871025848680939e-07, "loss": 1.2061, "step": 9362 }, { "epoch": 0.9850604944765913, "grad_norm": 1.5623804330825806, "learning_rate": 1.1705610319188154e-07, "loss": 1.5218, "step": 9363 }, { "epoch": 0.9851657022619674, "grad_norm": 1.5503050088882446, "learning_rate": 1.1541354697431894e-07, "loss": 1.8319, "step": 9364 }, { "epoch": 0.9852709100473435, "grad_norm": 1.8744232654571533, "learning_rate": 1.1378259002488013e-07, "loss": 1.2055, "step": 9365 }, { "epoch": 0.9853761178327196, "grad_norm": 1.7430399656295776, "learning_rate": 1.1216323253294691e-07, "loss": 1.44, "step": 9366 }, { "epoch": 0.9854813256180958, "grad_norm": 1.298393964767456, "learning_rate": 1.1055547468658001e-07, "loss": 1.7648, "step": 9367 }, { "epoch": 0.9855865334034719, "grad_norm": 1.0370997190475464, "learning_rate": 1.089593166724634e-07, "loss": 1.4384, "step": 9368 }, { "epoch": 0.985691741188848, "grad_norm": 1.9467689990997314, "learning_rate": 1.0737475867598212e-07, "loss": 1.8683, "step": 9369 }, { "epoch": 0.9857969489742241, "grad_norm": 1.4504022598266602, "learning_rate": 1.0580180088112234e-07, "loss": 1.2465, "step": 9370 }, { "epoch": 0.9859021567596002, "grad_norm": 1.4652175903320312, "learning_rate": 1.0424044347056017e-07, "loss": 1.6443, "step": 9371 }, { "epoch": 0.9860073645449763, "grad_norm": 1.7554495334625244, "learning_rate": 1.0269068662560611e-07, "loss": 1.0831, "step": 9372 }, { "epoch": 0.9861125723303524, "grad_norm": 1.1378939151763916, "learning_rate": 1.0115253052622731e-07, "loss": 1.7837, "step": 9373 }, { "epoch": 0.9862177801157286, "grad_norm": 1.8769643306732178, "learning_rate": 9.962597535104756e-08, "loss": 1.515, "step": 9374 }, { "epoch": 0.9863229879011047, "grad_norm": 1.4734489917755127, "learning_rate": 9.811102127733618e-08, "loss": 2.1066, "step": 9375 }, { "epoch": 0.9864281956864808, "grad_norm": 1.2402914762496948, "learning_rate": 9.660766848101909e-08, "loss": 1.6076, "step": 9376 }, { "epoch": 0.9865334034718569, "grad_norm": 1.601291537284851, "learning_rate": 9.511591713668999e-08, "loss": 1.5006, "step": 9377 }, { "epoch": 0.9866386112572331, "grad_norm": 1.4172643423080444, "learning_rate": 9.363576741755476e-08, "loss": 1.2291, "step": 9378 }, { "epoch": 0.9867438190426091, "grad_norm": 1.5718390941619873, "learning_rate": 9.216721949553142e-08, "loss": 1.8789, "step": 9379 }, { "epoch": 0.9868490268279853, "grad_norm": 1.7157665491104126, "learning_rate": 9.071027354112804e-08, "loss": 1.6226, "step": 9380 }, { "epoch": 0.9869542346133614, "grad_norm": 1.4585630893707275, "learning_rate": 8.926492972355371e-08, "loss": 1.5842, "step": 9381 }, { "epoch": 0.9870594423987376, "grad_norm": 1.299195647239685, "learning_rate": 8.783118821064085e-08, "loss": 1.4625, "step": 9382 }, { "epoch": 0.9871646501841136, "grad_norm": 1.305501937866211, "learning_rate": 8.640904916888959e-08, "loss": 1.4309, "step": 9383 }, { "epoch": 0.9872698579694897, "grad_norm": 1.4798643589019775, "learning_rate": 8.499851276344561e-08, "loss": 1.4667, "step": 9384 }, { "epoch": 0.9873750657548659, "grad_norm": 1.9000688791275024, "learning_rate": 8.359957915812233e-08, "loss": 1.4393, "step": 9385 }, { "epoch": 0.9874802735402419, "grad_norm": 1.4523059129714966, "learning_rate": 8.221224851535647e-08, "loss": 1.5859, "step": 9386 }, { "epoch": 0.9875854813256181, "grad_norm": 1.9257595539093018, "learning_rate": 8.08365209962525e-08, "loss": 1.7144, "step": 9387 }, { "epoch": 0.9876906891109942, "grad_norm": 1.2338892221450806, "learning_rate": 7.94723967605937e-08, "loss": 1.5707, "step": 9388 }, { "epoch": 0.9877958968963704, "grad_norm": 1.9463906288146973, "learning_rate": 7.81198759667645e-08, "loss": 1.7662, "step": 9389 }, { "epoch": 0.9879011046817464, "grad_norm": 1.2963135242462158, "learning_rate": 7.677895877183927e-08, "loss": 1.6959, "step": 9390 }, { "epoch": 0.9880063124671226, "grad_norm": 1.3970755338668823, "learning_rate": 7.544964533153787e-08, "loss": 1.334, "step": 9391 }, { "epoch": 0.9881115202524987, "grad_norm": 1.601676344871521, "learning_rate": 7.41319358002146e-08, "loss": 1.6125, "step": 9392 }, { "epoch": 0.9882167280378747, "grad_norm": 1.3263901472091675, "learning_rate": 7.282583033091372e-08, "loss": 1.9106, "step": 9393 }, { "epoch": 0.9883219358232509, "grad_norm": 1.8151953220367432, "learning_rate": 7.15313290752917e-08, "loss": 1.8621, "step": 9394 }, { "epoch": 0.988427143608627, "grad_norm": 1.4019142389297485, "learning_rate": 7.024843218368382e-08, "loss": 1.565, "step": 9395 }, { "epoch": 0.9885323513940032, "grad_norm": 2.185393810272217, "learning_rate": 6.897713980505982e-08, "loss": 1.4799, "step": 9396 }, { "epoch": 0.9886375591793792, "grad_norm": 1.357749104499817, "learning_rate": 6.771745208705715e-08, "loss": 1.5501, "step": 9397 }, { "epoch": 0.9887427669647554, "grad_norm": 1.7001276016235352, "learning_rate": 6.646936917595881e-08, "loss": 1.5436, "step": 9398 }, { "epoch": 0.9888479747501315, "grad_norm": 2.9914774894714355, "learning_rate": 6.52328912167044e-08, "loss": 1.9068, "step": 9399 }, { "epoch": 0.9889531825355077, "grad_norm": 1.8778984546661377, "learning_rate": 6.400801835286796e-08, "loss": 1.6909, "step": 9400 }, { "epoch": 0.9890583903208837, "grad_norm": 1.316900372505188, "learning_rate": 6.279475072670238e-08, "loss": 1.4041, "step": 9401 }, { "epoch": 0.9891635981062599, "grad_norm": 2.04650616645813, "learning_rate": 6.159308847909495e-08, "loss": 1.7704, "step": 9402 }, { "epoch": 0.989268805891636, "grad_norm": 2.6405978202819824, "learning_rate": 6.040303174958961e-08, "loss": 1.8183, "step": 9403 }, { "epoch": 0.989374013677012, "grad_norm": 1.2661974430084229, "learning_rate": 5.922458067639802e-08, "loss": 2.0856, "step": 9404 }, { "epoch": 0.9894792214623882, "grad_norm": 1.692745566368103, "learning_rate": 5.805773539634407e-08, "loss": 1.5816, "step": 9405 }, { "epoch": 0.9895844292477644, "grad_norm": 1.9099050760269165, "learning_rate": 5.690249604495268e-08, "loss": 1.4334, "step": 9406 }, { "epoch": 0.9896896370331405, "grad_norm": 1.8465721607208252, "learning_rate": 5.57588627563721e-08, "loss": 1.7839, "step": 9407 }, { "epoch": 0.9897948448185165, "grad_norm": 1.444919228553772, "learning_rate": 5.46268356634072e-08, "loss": 1.5675, "step": 9408 }, { "epoch": 0.9899000526038927, "grad_norm": 1.517018437385559, "learning_rate": 5.3506414897508404e-08, "loss": 1.4653, "step": 9409 }, { "epoch": 0.9900052603892688, "grad_norm": 1.6585253477096558, "learning_rate": 5.239760058879384e-08, "loss": 2.2469, "step": 9410 }, { "epoch": 0.9901104681746449, "grad_norm": 1.9332027435302734, "learning_rate": 5.130039286602717e-08, "loss": 1.6433, "step": 9411 }, { "epoch": 0.990215675960021, "grad_norm": 1.3716708421707153, "learning_rate": 5.02147918566287e-08, "loss": 1.4928, "step": 9412 }, { "epoch": 0.9903208837453972, "grad_norm": 2.6388068199157715, "learning_rate": 4.9140797686653136e-08, "loss": 1.4978, "step": 9413 }, { "epoch": 0.9904260915307733, "grad_norm": 1.895566463470459, "learning_rate": 4.807841048082296e-08, "loss": 1.6034, "step": 9414 }, { "epoch": 0.9905312993161494, "grad_norm": 1.3241360187530518, "learning_rate": 4.702763036252833e-08, "loss": 1.7537, "step": 9415 }, { "epoch": 0.9906365071015255, "grad_norm": 2.1230876445770264, "learning_rate": 4.598845745376057e-08, "loss": 1.9085, "step": 9416 }, { "epoch": 0.9907417148869017, "grad_norm": 2.1953372955322266, "learning_rate": 4.496089187522312e-08, "loss": 1.5051, "step": 9417 }, { "epoch": 0.9908469226722777, "grad_norm": 1.0922819375991821, "learning_rate": 4.3944933746231655e-08, "loss": 1.938, "step": 9418 }, { "epoch": 0.9909521304576538, "grad_norm": 1.2908408641815186, "learning_rate": 4.294058318475846e-08, "loss": 1.4622, "step": 9419 }, { "epoch": 0.99105733824303, "grad_norm": 1.8100134134292603, "learning_rate": 4.194784030745469e-08, "loss": 1.8774, "step": 9420 }, { "epoch": 0.9911625460284061, "grad_norm": 1.8238705396652222, "learning_rate": 4.096670522959478e-08, "loss": 1.5957, "step": 9421 }, { "epoch": 0.9912677538137822, "grad_norm": 1.4968173503875732, "learning_rate": 3.999717806510983e-08, "loss": 1.6067, "step": 9422 }, { "epoch": 0.9913729615991583, "grad_norm": 1.1250231266021729, "learning_rate": 3.903925892658755e-08, "loss": 1.5794, "step": 9423 }, { "epoch": 0.9914781693845345, "grad_norm": 1.5623326301574707, "learning_rate": 3.809294792527229e-08, "loss": 1.7024, "step": 9424 }, { "epoch": 0.9915833771699105, "grad_norm": 2.0509960651397705, "learning_rate": 3.715824517106503e-08, "loss": 1.489, "step": 9425 }, { "epoch": 0.9916885849552867, "grad_norm": 1.0217515230178833, "learning_rate": 3.623515077250117e-08, "loss": 1.5209, "step": 9426 }, { "epoch": 0.9917937927406628, "grad_norm": 1.395032525062561, "learning_rate": 3.532366483677274e-08, "loss": 1.8385, "step": 9427 }, { "epoch": 0.991899000526039, "grad_norm": 1.5118807554244995, "learning_rate": 3.442378746972841e-08, "loss": 1.3888, "step": 9428 }, { "epoch": 0.992004208311415, "grad_norm": 1.96005117893219, "learning_rate": 3.3535518775873466e-08, "loss": 1.5879, "step": 9429 }, { "epoch": 0.9921094160967912, "grad_norm": 1.319014072418213, "learning_rate": 3.265885885835873e-08, "loss": 1.8016, "step": 9430 }, { "epoch": 0.9922146238821673, "grad_norm": 1.2182525396347046, "learning_rate": 3.179380781898056e-08, "loss": 1.9213, "step": 9431 }, { "epoch": 0.9923198316675435, "grad_norm": 1.6044673919677734, "learning_rate": 3.0940365758203025e-08, "loss": 1.421, "step": 9432 }, { "epoch": 0.9924250394529195, "grad_norm": 1.7028839588165283, "learning_rate": 3.009853277512464e-08, "loss": 1.4184, "step": 9433 }, { "epoch": 0.9925302472382956, "grad_norm": 1.4499717950820923, "learning_rate": 2.9268308967522752e-08, "loss": 1.8255, "step": 9434 }, { "epoch": 0.9926354550236718, "grad_norm": 2.1826236248016357, "learning_rate": 2.844969443178691e-08, "loss": 2.1099, "step": 9435 }, { "epoch": 0.9927406628090478, "grad_norm": 1.6331896781921387, "learning_rate": 2.7642689262996625e-08, "loss": 2.0769, "step": 9436 }, { "epoch": 0.992845870594424, "grad_norm": 1.0556201934814453, "learning_rate": 2.6847293554854712e-08, "loss": 1.2877, "step": 9437 }, { "epoch": 0.9929510783798001, "grad_norm": 1.582120656967163, "learning_rate": 2.6063507399731735e-08, "loss": 1.6723, "step": 9438 }, { "epoch": 0.9930562861651763, "grad_norm": 1.865082859992981, "learning_rate": 2.5291330888643772e-08, "loss": 1.612, "step": 9439 }, { "epoch": 0.9931614939505523, "grad_norm": 1.324381709098816, "learning_rate": 2.453076411127464e-08, "loss": 2.0149, "step": 9440 }, { "epoch": 0.9932667017359285, "grad_norm": 1.3037350177764893, "learning_rate": 2.378180715593148e-08, "loss": 1.6584, "step": 9441 }, { "epoch": 0.9933719095213046, "grad_norm": 2.0570385456085205, "learning_rate": 2.304446010958916e-08, "loss": 1.0094, "step": 9442 }, { "epoch": 0.9934771173066806, "grad_norm": 2.5256316661834717, "learning_rate": 2.2318723057879188e-08, "loss": 1.3119, "step": 9443 }, { "epoch": 0.9935823250920568, "grad_norm": 1.4097765684127808, "learning_rate": 2.1604596085078587e-08, "loss": 1.5828, "step": 9444 }, { "epoch": 0.9936875328774329, "grad_norm": 1.6299117803573608, "learning_rate": 2.0902079274121024e-08, "loss": 1.523, "step": 9445 }, { "epoch": 0.9937927406628091, "grad_norm": 1.7055869102478027, "learning_rate": 2.0211172706574576e-08, "loss": 1.7484, "step": 9446 }, { "epoch": 0.9938979484481851, "grad_norm": 1.4784151315689087, "learning_rate": 1.953187646268617e-08, "loss": 1.4377, "step": 9447 }, { "epoch": 0.9940031562335613, "grad_norm": 1.5295408964157104, "learning_rate": 1.886419062132605e-08, "loss": 1.6043, "step": 9448 }, { "epoch": 0.9941083640189374, "grad_norm": 1.7209928035736084, "learning_rate": 1.8208115260032187e-08, "loss": 1.236, "step": 9449 }, { "epoch": 0.9942135718043135, "grad_norm": 1.6919333934783936, "learning_rate": 1.7563650455010295e-08, "loss": 1.4099, "step": 9450 }, { "epoch": 0.9943187795896896, "grad_norm": 1.644214391708374, "learning_rate": 1.6930796281078297e-08, "loss": 1.8462, "step": 9451 }, { "epoch": 0.9944239873750658, "grad_norm": 1.6857322454452515, "learning_rate": 1.6309552811744067e-08, "loss": 1.568, "step": 9452 }, { "epoch": 0.9945291951604419, "grad_norm": 1.1969326734542847, "learning_rate": 1.569992011913879e-08, "loss": 2.0799, "step": 9453 }, { "epoch": 0.994634402945818, "grad_norm": 1.4197190999984741, "learning_rate": 1.5101898274050286e-08, "loss": 1.6347, "step": 9454 }, { "epoch": 0.9947396107311941, "grad_norm": 1.7226569652557373, "learning_rate": 1.4515487345956313e-08, "loss": 1.5217, "step": 9455 }, { "epoch": 0.9948448185165703, "grad_norm": 1.4008227586746216, "learning_rate": 1.3940687402924646e-08, "loss": 1.7268, "step": 9456 }, { "epoch": 0.9949500263019463, "grad_norm": 1.268113613128662, "learning_rate": 1.3377498511712993e-08, "loss": 1.6932, "step": 9457 }, { "epoch": 0.9950552340873224, "grad_norm": 1.930999994277954, "learning_rate": 1.2825920737724596e-08, "loss": 1.7915, "step": 9458 }, { "epoch": 0.9951604418726986, "grad_norm": 2.0582447052001953, "learning_rate": 1.2285954145008216e-08, "loss": 1.3104, "step": 9459 }, { "epoch": 0.9952656496580747, "grad_norm": 1.6370129585266113, "learning_rate": 1.1757598796280355e-08, "loss": 1.6174, "step": 9460 }, { "epoch": 0.9953708574434508, "grad_norm": 1.1519914865493774, "learning_rate": 1.1240854752880836e-08, "loss": 1.6631, "step": 9461 }, { "epoch": 0.9954760652288269, "grad_norm": 1.6220288276672363, "learning_rate": 1.0735722074828313e-08, "loss": 2.0331, "step": 9462 }, { "epoch": 0.9955812730142031, "grad_norm": 1.75868558883667, "learning_rate": 1.0242200820786974e-08, "loss": 1.8015, "step": 9463 }, { "epoch": 0.9956864807995792, "grad_norm": 1.8576023578643799, "learning_rate": 9.760291048055426e-09, "loss": 1.8266, "step": 9464 }, { "epoch": 0.9957916885849553, "grad_norm": 1.2487952709197998, "learning_rate": 9.289992812600012e-09, "loss": 2.035, "step": 9465 }, { "epoch": 0.9958968963703314, "grad_norm": 2.236783027648926, "learning_rate": 8.831306169032604e-09, "loss": 1.9039, "step": 9466 }, { "epoch": 0.9960021041557076, "grad_norm": 1.4378468990325928, "learning_rate": 8.384231170632805e-09, "loss": 1.9437, "step": 9467 }, { "epoch": 0.9961073119410836, "grad_norm": 1.5383528470993042, "learning_rate": 7.94876786929244e-09, "loss": 1.2174, "step": 9468 }, { "epoch": 0.9962125197264597, "grad_norm": 3.4968624114990234, "learning_rate": 7.52491631560437e-09, "loss": 1.4205, "step": 9469 }, { "epoch": 0.9963177275118359, "grad_norm": 1.9415160417556763, "learning_rate": 7.112676558784781e-09, "loss": 1.4909, "step": 9470 }, { "epoch": 0.996422935297212, "grad_norm": 1.2908811569213867, "learning_rate": 6.71204864669539e-09, "loss": 1.5163, "step": 9471 }, { "epoch": 0.9965281430825881, "grad_norm": 1.1630277633666992, "learning_rate": 6.3230326258656435e-09, "loss": 1.5793, "step": 9472 }, { "epoch": 0.9966333508679642, "grad_norm": 1.6427892446517944, "learning_rate": 5.945628541481619e-09, "loss": 1.774, "step": 9473 }, { "epoch": 0.9967385586533404, "grad_norm": 1.8046869039535522, "learning_rate": 5.579836437341612e-09, "loss": 1.6256, "step": 9474 }, { "epoch": 0.9968437664387164, "grad_norm": 2.09666109085083, "learning_rate": 5.225656355956066e-09, "loss": 1.5055, "step": 9475 }, { "epoch": 0.9969489742240926, "grad_norm": 1.8133103847503662, "learning_rate": 4.883088338425434e-09, "loss": 1.7931, "step": 9476 }, { "epoch": 0.9970541820094687, "grad_norm": 1.8678925037384033, "learning_rate": 4.552132424562317e-09, "loss": 1.9548, "step": 9477 }, { "epoch": 0.9971593897948449, "grad_norm": 1.6927310228347778, "learning_rate": 4.2327886527693265e-09, "loss": 1.4532, "step": 9478 }, { "epoch": 0.9972645975802209, "grad_norm": 1.3339251279830933, "learning_rate": 3.925057060150117e-09, "loss": 1.4882, "step": 9479 }, { "epoch": 0.997369805365597, "grad_norm": 2.199472188949585, "learning_rate": 3.628937682431666e-09, "loss": 1.782, "step": 9480 }, { "epoch": 0.9974750131509732, "grad_norm": 1.5956065654754639, "learning_rate": 3.344430554008682e-09, "loss": 1.7815, "step": 9481 }, { "epoch": 0.9975802209363492, "grad_norm": 1.884323000907898, "learning_rate": 3.0715357079103003e-09, "loss": 1.3084, "step": 9482 }, { "epoch": 0.9976854287217254, "grad_norm": 1.6075438261032104, "learning_rate": 2.8102531758333883e-09, "loss": 1.4152, "step": 9483 }, { "epoch": 0.9977906365071015, "grad_norm": 1.7321268320083618, "learning_rate": 2.5605829881203414e-09, "loss": 1.776, "step": 9484 }, { "epoch": 0.9978958442924777, "grad_norm": 1.7372652292251587, "learning_rate": 2.3225251737701846e-09, "loss": 1.2503, "step": 9485 }, { "epoch": 0.9980010520778537, "grad_norm": 1.7862646579742432, "learning_rate": 2.0960797604052675e-09, "loss": 1.4679, "step": 9486 }, { "epoch": 0.9981062598632299, "grad_norm": 1.7311826944351196, "learning_rate": 1.881246774348977e-09, "loss": 0.6319, "step": 9487 }, { "epoch": 0.998211467648606, "grad_norm": 2.2221319675445557, "learning_rate": 1.678026240536923e-09, "loss": 1.4301, "step": 9488 }, { "epoch": 0.9983166754339821, "grad_norm": 1.4736319780349731, "learning_rate": 1.4864181825613444e-09, "loss": 1.6957, "step": 9489 }, { "epoch": 0.9984218832193582, "grad_norm": 1.5542229413986206, "learning_rate": 1.3064226226933152e-09, "loss": 1.4633, "step": 9490 }, { "epoch": 0.9985270910047344, "grad_norm": 1.928504467010498, "learning_rate": 1.1380395818050282e-09, "loss": 1.9097, "step": 9491 }, { "epoch": 0.9986322987901105, "grad_norm": 1.4703706502914429, "learning_rate": 9.812690794808177e-10, "loss": 1.5353, "step": 9492 }, { "epoch": 0.9987375065754865, "grad_norm": 1.7381463050842285, "learning_rate": 8.361111339061367e-10, "loss": 1.562, "step": 9493 }, { "epoch": 0.9988427143608627, "grad_norm": 2.3469841480255127, "learning_rate": 7.025657619563752e-10, "loss": 1.756, "step": 9494 }, { "epoch": 0.9989479221462388, "grad_norm": 1.3420002460479736, "learning_rate": 5.806329791191445e-10, "loss": 1.4611, "step": 9495 }, { "epoch": 0.999053129931615, "grad_norm": 1.709895372390747, "learning_rate": 4.703127995608902e-10, "loss": 1.4044, "step": 9496 }, { "epoch": 0.999158337716991, "grad_norm": 1.3718494176864624, "learning_rate": 3.716052360935862e-10, "loss": 1.7666, "step": 9497 }, { "epoch": 0.9992635455023672, "grad_norm": 1.1952706575393677, "learning_rate": 2.8451030018583623e-10, "loss": 1.8885, "step": 9498 }, { "epoch": 0.9993687532877433, "grad_norm": 1.6405409574508667, "learning_rate": 2.0902800194066985e-10, "loss": 1.5163, "step": 9499 }, { "epoch": 0.9994739610731194, "grad_norm": 1.4296813011169434, "learning_rate": 1.4515835012884893e-10, "loss": 1.4754, "step": 9500 } ], "logging_steps": 1.0, "max_steps": 9505, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.969403554197504e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }