{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999107899549489, "eval_steps": 500, "global_step": 22418, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 479.181575370833, "learning_rate": 1.4858841010401189e-08, "loss": 3.4191, "step": 1 }, { "epoch": 0.0, "grad_norm": 444.2897708062578, "learning_rate": 2.9717682020802378e-08, "loss": 3.4145, "step": 2 }, { "epoch": 0.0, "grad_norm": 471.42589410977354, "learning_rate": 4.457652303120357e-08, "loss": 3.4776, "step": 3 }, { "epoch": 0.0, "grad_norm": 447.0952927337563, "learning_rate": 5.9435364041604756e-08, "loss": 3.3658, "step": 4 }, { "epoch": 0.0, "grad_norm": 453.6589550797721, "learning_rate": 7.429420505200595e-08, "loss": 3.4743, "step": 5 }, { "epoch": 0.0, "grad_norm": 481.06522522053183, "learning_rate": 8.915304606240714e-08, "loss": 3.4123, "step": 6 }, { "epoch": 0.0, "grad_norm": 473.5375533010743, "learning_rate": 1.0401188707280832e-07, "loss": 3.3757, "step": 7 }, { "epoch": 0.0, "grad_norm": 436.315608474856, "learning_rate": 1.1887072808320951e-07, "loss": 3.3641, "step": 8 }, { "epoch": 0.0, "grad_norm": 414.8792812382819, "learning_rate": 1.337295690936107e-07, "loss": 3.3456, "step": 9 }, { "epoch": 0.0, "grad_norm": 333.7437359925691, "learning_rate": 1.485884101040119e-07, "loss": 3.1523, "step": 10 }, { "epoch": 0.0, "grad_norm": 327.378710355412, "learning_rate": 1.6344725111441309e-07, "loss": 3.1377, "step": 11 }, { "epoch": 0.0, "grad_norm": 285.7239111115323, "learning_rate": 1.7830609212481427e-07, "loss": 3.0205, "step": 12 }, { "epoch": 0.0, "grad_norm": 280.7644842348515, "learning_rate": 1.931649331352155e-07, "loss": 2.9787, "step": 13 }, { "epoch": 0.0, "grad_norm": 142.52095750824589, "learning_rate": 2.0802377414561665e-07, "loss": 2.6684, "step": 14 }, { "epoch": 0.0, "grad_norm": 127.96254371758955, "learning_rate": 2.2288261515601786e-07, "loss": 2.6067, "step": 15 }, { "epoch": 0.0, "grad_norm": 120.73572885794177, "learning_rate": 2.3774145616641902e-07, "loss": 2.6169, "step": 16 }, { "epoch": 0.0, "grad_norm": 112.53917359409854, "learning_rate": 2.5260029717682024e-07, "loss": 2.6291, "step": 17 }, { "epoch": 0.0, "grad_norm": 92.92813910099909, "learning_rate": 2.674591381872214e-07, "loss": 2.3937, "step": 18 }, { "epoch": 0.0, "grad_norm": 54.47839605852833, "learning_rate": 2.823179791976226e-07, "loss": 2.2331, "step": 19 }, { "epoch": 0.0, "grad_norm": 47.32993965473415, "learning_rate": 2.971768202080238e-07, "loss": 2.2455, "step": 20 }, { "epoch": 0.0, "grad_norm": 48.7811356025536, "learning_rate": 3.12035661218425e-07, "loss": 2.2506, "step": 21 }, { "epoch": 0.0, "grad_norm": 44.01857855746975, "learning_rate": 3.2689450222882617e-07, "loss": 2.2508, "step": 22 }, { "epoch": 0.0, "grad_norm": 41.93631579612844, "learning_rate": 3.4175334323922736e-07, "loss": 2.2085, "step": 23 }, { "epoch": 0.0, "grad_norm": 38.30011234774115, "learning_rate": 3.5661218424962855e-07, "loss": 2.1267, "step": 24 }, { "epoch": 0.0, "grad_norm": 43.54188231181135, "learning_rate": 3.7147102526002973e-07, "loss": 2.1306, "step": 25 }, { "epoch": 0.0, "grad_norm": 52.08026321767365, "learning_rate": 3.86329866270431e-07, "loss": 2.0643, "step": 26 }, { "epoch": 0.0, "grad_norm": 46.70090747661249, "learning_rate": 4.0118870728083216e-07, "loss": 2.0185, "step": 27 }, { "epoch": 0.0, "grad_norm": 39.41131309753315, "learning_rate": 4.160475482912333e-07, "loss": 1.9262, "step": 28 }, { "epoch": 0.0, "grad_norm": 34.41968496829119, "learning_rate": 4.309063893016345e-07, "loss": 1.9676, "step": 29 }, { "epoch": 0.0, "grad_norm": 28.570605923178842, "learning_rate": 4.457652303120357e-07, "loss": 1.9105, "step": 30 }, { "epoch": 0.0, "grad_norm": 28.3141498730999, "learning_rate": 4.606240713224369e-07, "loss": 1.8976, "step": 31 }, { "epoch": 0.0, "grad_norm": 27.40745888228763, "learning_rate": 4.7548291233283804e-07, "loss": 1.8376, "step": 32 }, { "epoch": 0.0, "grad_norm": 23.918437458656047, "learning_rate": 4.903417533432393e-07, "loss": 1.8495, "step": 33 }, { "epoch": 0.0, "grad_norm": 25.05587626909775, "learning_rate": 5.052005943536405e-07, "loss": 1.7793, "step": 34 }, { "epoch": 0.0, "grad_norm": 21.955775049135106, "learning_rate": 5.200594353640417e-07, "loss": 1.7941, "step": 35 }, { "epoch": 0.0, "grad_norm": 20.231098512861866, "learning_rate": 5.349182763744428e-07, "loss": 1.7493, "step": 36 }, { "epoch": 0.0, "grad_norm": 19.279727581189874, "learning_rate": 5.49777117384844e-07, "loss": 1.8063, "step": 37 }, { "epoch": 0.0, "grad_norm": 17.966177535301533, "learning_rate": 5.646359583952452e-07, "loss": 1.7594, "step": 38 }, { "epoch": 0.0, "grad_norm": 19.846803453280362, "learning_rate": 5.794947994056464e-07, "loss": 1.6701, "step": 39 }, { "epoch": 0.0, "grad_norm": 18.30996136853287, "learning_rate": 5.943536404160476e-07, "loss": 1.6956, "step": 40 }, { "epoch": 0.0, "grad_norm": 18.010934144859558, "learning_rate": 6.092124814264488e-07, "loss": 1.7458, "step": 41 }, { "epoch": 0.0, "grad_norm": 24.168274420434788, "learning_rate": 6.2407132243685e-07, "loss": 1.682, "step": 42 }, { "epoch": 0.0, "grad_norm": 20.084756508438474, "learning_rate": 6.389301634472512e-07, "loss": 1.7363, "step": 43 }, { "epoch": 0.0, "grad_norm": 19.290902998384894, "learning_rate": 6.537890044576523e-07, "loss": 1.706, "step": 44 }, { "epoch": 0.0, "grad_norm": 15.902582389775189, "learning_rate": 6.686478454680535e-07, "loss": 1.6916, "step": 45 }, { "epoch": 0.0, "grad_norm": 13.676862017586267, "learning_rate": 6.835066864784547e-07, "loss": 1.6156, "step": 46 }, { "epoch": 0.0, "grad_norm": 13.161909789045197, "learning_rate": 6.983655274888559e-07, "loss": 1.6225, "step": 47 }, { "epoch": 0.0, "grad_norm": 17.024205871793313, "learning_rate": 7.132243684992571e-07, "loss": 1.6207, "step": 48 }, { "epoch": 0.0, "grad_norm": 15.004350315629559, "learning_rate": 7.280832095096584e-07, "loss": 1.6309, "step": 49 }, { "epoch": 0.0, "grad_norm": 13.990190132490014, "learning_rate": 7.429420505200595e-07, "loss": 1.5804, "step": 50 }, { "epoch": 0.0, "grad_norm": 18.991256193625045, "learning_rate": 7.578008915304607e-07, "loss": 1.5821, "step": 51 }, { "epoch": 0.0, "grad_norm": 15.895332622201291, "learning_rate": 7.72659732540862e-07, "loss": 1.588, "step": 52 }, { "epoch": 0.0, "grad_norm": 17.09088450280486, "learning_rate": 7.87518573551263e-07, "loss": 1.6041, "step": 53 }, { "epoch": 0.0, "grad_norm": 14.500945522403708, "learning_rate": 8.023774145616643e-07, "loss": 1.5966, "step": 54 }, { "epoch": 0.0, "grad_norm": 12.211534781116145, "learning_rate": 8.172362555720654e-07, "loss": 1.6168, "step": 55 }, { "epoch": 0.0, "grad_norm": 14.692041429656056, "learning_rate": 8.320950965824666e-07, "loss": 1.586, "step": 56 }, { "epoch": 0.01, "grad_norm": 13.001357004610455, "learning_rate": 8.469539375928679e-07, "loss": 1.5413, "step": 57 }, { "epoch": 0.01, "grad_norm": 16.978137841649104, "learning_rate": 8.61812778603269e-07, "loss": 1.5951, "step": 58 }, { "epoch": 0.01, "grad_norm": 12.751436330408085, "learning_rate": 8.766716196136702e-07, "loss": 1.5421, "step": 59 }, { "epoch": 0.01, "grad_norm": 12.004915053909961, "learning_rate": 8.915304606240714e-07, "loss": 1.5123, "step": 60 }, { "epoch": 0.01, "grad_norm": 13.906358884049599, "learning_rate": 9.063893016344725e-07, "loss": 1.5453, "step": 61 }, { "epoch": 0.01, "grad_norm": 12.409957010954496, "learning_rate": 9.212481426448738e-07, "loss": 1.4842, "step": 62 }, { "epoch": 0.01, "grad_norm": 12.864082352187994, "learning_rate": 9.36106983655275e-07, "loss": 1.4757, "step": 63 }, { "epoch": 0.01, "grad_norm": 13.701845077315442, "learning_rate": 9.509658246656761e-07, "loss": 1.4957, "step": 64 }, { "epoch": 0.01, "grad_norm": 12.713471528137951, "learning_rate": 9.658246656760775e-07, "loss": 1.5024, "step": 65 }, { "epoch": 0.01, "grad_norm": 15.993112651003695, "learning_rate": 9.806835066864786e-07, "loss": 1.564, "step": 66 }, { "epoch": 0.01, "grad_norm": 13.869519451134513, "learning_rate": 9.955423476968797e-07, "loss": 1.426, "step": 67 }, { "epoch": 0.01, "grad_norm": 14.069638413154408, "learning_rate": 1.010401188707281e-06, "loss": 1.4965, "step": 68 }, { "epoch": 0.01, "grad_norm": 15.087880279096124, "learning_rate": 1.025260029717682e-06, "loss": 1.5382, "step": 69 }, { "epoch": 0.01, "grad_norm": 13.92505532814705, "learning_rate": 1.0401188707280833e-06, "loss": 1.4644, "step": 70 }, { "epoch": 0.01, "grad_norm": 11.784591415007261, "learning_rate": 1.0549777117384844e-06, "loss": 1.4451, "step": 71 }, { "epoch": 0.01, "grad_norm": 11.459471046099068, "learning_rate": 1.0698365527488857e-06, "loss": 1.4433, "step": 72 }, { "epoch": 0.01, "grad_norm": 14.550963441072971, "learning_rate": 1.084695393759287e-06, "loss": 1.4407, "step": 73 }, { "epoch": 0.01, "grad_norm": 10.872072499299058, "learning_rate": 1.099554234769688e-06, "loss": 1.4875, "step": 74 }, { "epoch": 0.01, "grad_norm": 13.328535641926827, "learning_rate": 1.1144130757800894e-06, "loss": 1.4575, "step": 75 }, { "epoch": 0.01, "grad_norm": 13.364700899036928, "learning_rate": 1.1292719167904904e-06, "loss": 1.5537, "step": 76 }, { "epoch": 0.01, "grad_norm": 16.147898840712465, "learning_rate": 1.1441307578008915e-06, "loss": 1.482, "step": 77 }, { "epoch": 0.01, "grad_norm": 11.815008450187952, "learning_rate": 1.1589895988112928e-06, "loss": 1.3526, "step": 78 }, { "epoch": 0.01, "grad_norm": 12.05571210539922, "learning_rate": 1.173848439821694e-06, "loss": 1.4698, "step": 79 }, { "epoch": 0.01, "grad_norm": 11.163143091048799, "learning_rate": 1.1887072808320952e-06, "loss": 1.377, "step": 80 }, { "epoch": 0.01, "grad_norm": 17.568085696170478, "learning_rate": 1.2035661218424965e-06, "loss": 1.39, "step": 81 }, { "epoch": 0.01, "grad_norm": 11.806868621894173, "learning_rate": 1.2184249628528976e-06, "loss": 1.3527, "step": 82 }, { "epoch": 0.01, "grad_norm": 12.455012804839708, "learning_rate": 1.2332838038632989e-06, "loss": 1.4197, "step": 83 }, { "epoch": 0.01, "grad_norm": 25.358931898720623, "learning_rate": 1.2481426448737e-06, "loss": 1.4604, "step": 84 }, { "epoch": 0.01, "grad_norm": 12.58656447679682, "learning_rate": 1.263001485884101e-06, "loss": 1.4368, "step": 85 }, { "epoch": 0.01, "grad_norm": 15.814777797788555, "learning_rate": 1.2778603268945023e-06, "loss": 1.4281, "step": 86 }, { "epoch": 0.01, "grad_norm": 10.322638396510534, "learning_rate": 1.2927191679049036e-06, "loss": 1.3675, "step": 87 }, { "epoch": 0.01, "grad_norm": 15.134557335800913, "learning_rate": 1.3075780089153047e-06, "loss": 1.4305, "step": 88 }, { "epoch": 0.01, "grad_norm": 12.209688427686528, "learning_rate": 1.322436849925706e-06, "loss": 1.3816, "step": 89 }, { "epoch": 0.01, "grad_norm": 15.840224974971235, "learning_rate": 1.337295690936107e-06, "loss": 1.3839, "step": 90 }, { "epoch": 0.01, "grad_norm": 15.696053380271618, "learning_rate": 1.3521545319465081e-06, "loss": 1.4101, "step": 91 }, { "epoch": 0.01, "grad_norm": 10.938588514626533, "learning_rate": 1.3670133729569094e-06, "loss": 1.4081, "step": 92 }, { "epoch": 0.01, "grad_norm": 10.055815008077932, "learning_rate": 1.3818722139673107e-06, "loss": 1.3995, "step": 93 }, { "epoch": 0.01, "grad_norm": 12.915638253873388, "learning_rate": 1.3967310549777118e-06, "loss": 1.4221, "step": 94 }, { "epoch": 0.01, "grad_norm": 17.0224794717339, "learning_rate": 1.4115898959881131e-06, "loss": 1.3323, "step": 95 }, { "epoch": 0.01, "grad_norm": 9.951756075064637, "learning_rate": 1.4264487369985142e-06, "loss": 1.426, "step": 96 }, { "epoch": 0.01, "grad_norm": 18.447943115932567, "learning_rate": 1.4413075780089153e-06, "loss": 1.3683, "step": 97 }, { "epoch": 0.01, "grad_norm": 11.907615755825423, "learning_rate": 1.4561664190193168e-06, "loss": 1.3827, "step": 98 }, { "epoch": 0.01, "grad_norm": 17.527064148724257, "learning_rate": 1.4710252600297179e-06, "loss": 1.3522, "step": 99 }, { "epoch": 0.01, "grad_norm": 12.85463892424153, "learning_rate": 1.485884101040119e-06, "loss": 1.3583, "step": 100 }, { "epoch": 0.01, "grad_norm": 11.979405742834658, "learning_rate": 1.5007429420505202e-06, "loss": 1.3796, "step": 101 }, { "epoch": 0.01, "grad_norm": 14.877901544159329, "learning_rate": 1.5156017830609213e-06, "loss": 1.3206, "step": 102 }, { "epoch": 0.01, "grad_norm": 18.14171894621848, "learning_rate": 1.5304606240713226e-06, "loss": 1.3531, "step": 103 }, { "epoch": 0.01, "grad_norm": 17.139693487970053, "learning_rate": 1.545319465081724e-06, "loss": 1.3549, "step": 104 }, { "epoch": 0.01, "grad_norm": 13.643358282574313, "learning_rate": 1.560178306092125e-06, "loss": 1.3871, "step": 105 }, { "epoch": 0.01, "grad_norm": 13.558436135548048, "learning_rate": 1.575037147102526e-06, "loss": 1.3315, "step": 106 }, { "epoch": 0.01, "grad_norm": 16.7614529553977, "learning_rate": 1.5898959881129271e-06, "loss": 1.3925, "step": 107 }, { "epoch": 0.01, "grad_norm": 14.871331846237426, "learning_rate": 1.6047548291233286e-06, "loss": 1.3291, "step": 108 }, { "epoch": 0.01, "grad_norm": 14.294756570468897, "learning_rate": 1.6196136701337297e-06, "loss": 1.3492, "step": 109 }, { "epoch": 0.01, "grad_norm": 12.405533508743769, "learning_rate": 1.6344725111441308e-06, "loss": 1.2963, "step": 110 }, { "epoch": 0.01, "grad_norm": 13.61018279134456, "learning_rate": 1.649331352154532e-06, "loss": 1.3527, "step": 111 }, { "epoch": 0.01, "grad_norm": 12.821120193198384, "learning_rate": 1.6641901931649332e-06, "loss": 1.2968, "step": 112 }, { "epoch": 0.01, "grad_norm": 16.918411132600838, "learning_rate": 1.6790490341753343e-06, "loss": 1.3459, "step": 113 }, { "epoch": 0.01, "grad_norm": 14.633436837689228, "learning_rate": 1.6939078751857358e-06, "loss": 1.3415, "step": 114 }, { "epoch": 0.01, "grad_norm": 14.249921385480322, "learning_rate": 1.7087667161961369e-06, "loss": 1.2786, "step": 115 }, { "epoch": 0.01, "grad_norm": 12.025236355476027, "learning_rate": 1.723625557206538e-06, "loss": 1.3296, "step": 116 }, { "epoch": 0.01, "grad_norm": 11.901094356781028, "learning_rate": 1.7384843982169392e-06, "loss": 1.3116, "step": 117 }, { "epoch": 0.01, "grad_norm": 17.865371712168688, "learning_rate": 1.7533432392273403e-06, "loss": 1.3751, "step": 118 }, { "epoch": 0.01, "grad_norm": 17.300456403688568, "learning_rate": 1.7682020802377416e-06, "loss": 1.2831, "step": 119 }, { "epoch": 0.01, "grad_norm": 11.851391973420686, "learning_rate": 1.7830609212481429e-06, "loss": 1.3637, "step": 120 }, { "epoch": 0.01, "grad_norm": 16.204876698827395, "learning_rate": 1.797919762258544e-06, "loss": 1.3002, "step": 121 }, { "epoch": 0.01, "grad_norm": 12.392154234251004, "learning_rate": 1.812778603268945e-06, "loss": 1.3093, "step": 122 }, { "epoch": 0.01, "grad_norm": 13.759589482992947, "learning_rate": 1.8276374442793463e-06, "loss": 1.2945, "step": 123 }, { "epoch": 0.01, "grad_norm": 12.620869907217457, "learning_rate": 1.8424962852897476e-06, "loss": 1.3044, "step": 124 }, { "epoch": 0.01, "grad_norm": 13.864494310444867, "learning_rate": 1.8573551263001487e-06, "loss": 1.3197, "step": 125 }, { "epoch": 0.01, "grad_norm": 11.05485640891095, "learning_rate": 1.87221396731055e-06, "loss": 1.3127, "step": 126 }, { "epoch": 0.01, "grad_norm": 13.584014381877582, "learning_rate": 1.887072808320951e-06, "loss": 1.3062, "step": 127 }, { "epoch": 0.01, "grad_norm": 13.500868635254967, "learning_rate": 1.9019316493313522e-06, "loss": 1.3076, "step": 128 }, { "epoch": 0.01, "grad_norm": 15.806498047741435, "learning_rate": 1.9167904903417537e-06, "loss": 1.316, "step": 129 }, { "epoch": 0.01, "grad_norm": 17.9649931494059, "learning_rate": 1.931649331352155e-06, "loss": 1.2616, "step": 130 }, { "epoch": 0.01, "grad_norm": 12.855783617695623, "learning_rate": 1.946508172362556e-06, "loss": 1.3063, "step": 131 }, { "epoch": 0.01, "grad_norm": 13.529057403136159, "learning_rate": 1.961367013372957e-06, "loss": 1.2163, "step": 132 }, { "epoch": 0.01, "grad_norm": 13.777468106054982, "learning_rate": 1.976225854383358e-06, "loss": 1.2762, "step": 133 }, { "epoch": 0.01, "grad_norm": 14.24424756323446, "learning_rate": 1.9910846953937593e-06, "loss": 1.2596, "step": 134 }, { "epoch": 0.01, "grad_norm": 15.042397353824043, "learning_rate": 2.0059435364041606e-06, "loss": 1.2565, "step": 135 }, { "epoch": 0.01, "grad_norm": 17.735895883652123, "learning_rate": 2.020802377414562e-06, "loss": 1.3068, "step": 136 }, { "epoch": 0.01, "grad_norm": 13.707673602825954, "learning_rate": 2.035661218424963e-06, "loss": 1.3003, "step": 137 }, { "epoch": 0.01, "grad_norm": 17.48684971958427, "learning_rate": 2.050520059435364e-06, "loss": 1.3342, "step": 138 }, { "epoch": 0.01, "grad_norm": 13.228479960755173, "learning_rate": 2.0653789004457653e-06, "loss": 1.2495, "step": 139 }, { "epoch": 0.01, "grad_norm": 11.900078884379077, "learning_rate": 2.0802377414561666e-06, "loss": 1.2468, "step": 140 }, { "epoch": 0.01, "grad_norm": 13.47129268314176, "learning_rate": 2.095096582466568e-06, "loss": 1.296, "step": 141 }, { "epoch": 0.01, "grad_norm": 16.459660959763685, "learning_rate": 2.109955423476969e-06, "loss": 1.342, "step": 142 }, { "epoch": 0.01, "grad_norm": 13.760276648449938, "learning_rate": 2.12481426448737e-06, "loss": 1.2727, "step": 143 }, { "epoch": 0.01, "grad_norm": 12.700064552390026, "learning_rate": 2.1396731054977714e-06, "loss": 1.3235, "step": 144 }, { "epoch": 0.01, "grad_norm": 18.913751170862138, "learning_rate": 2.1545319465081727e-06, "loss": 1.2435, "step": 145 }, { "epoch": 0.01, "grad_norm": 18.305401315829588, "learning_rate": 2.169390787518574e-06, "loss": 1.2308, "step": 146 }, { "epoch": 0.01, "grad_norm": 10.771982225540329, "learning_rate": 2.184249628528975e-06, "loss": 1.2199, "step": 147 }, { "epoch": 0.01, "grad_norm": 14.979628684242488, "learning_rate": 2.199108469539376e-06, "loss": 1.2491, "step": 148 }, { "epoch": 0.01, "grad_norm": 14.787849766802376, "learning_rate": 2.213967310549777e-06, "loss": 1.2688, "step": 149 }, { "epoch": 0.01, "grad_norm": 11.918227376465481, "learning_rate": 2.2288261515601787e-06, "loss": 1.245, "step": 150 }, { "epoch": 0.01, "grad_norm": 13.170387629365358, "learning_rate": 2.2436849925705796e-06, "loss": 1.2734, "step": 151 }, { "epoch": 0.01, "grad_norm": 15.653791558432433, "learning_rate": 2.258543833580981e-06, "loss": 1.2626, "step": 152 }, { "epoch": 0.01, "grad_norm": 15.205828606638123, "learning_rate": 2.273402674591382e-06, "loss": 1.2133, "step": 153 }, { "epoch": 0.01, "grad_norm": 11.6149677828453, "learning_rate": 2.288261515601783e-06, "loss": 1.2684, "step": 154 }, { "epoch": 0.01, "grad_norm": 11.388640296290708, "learning_rate": 2.3031203566121843e-06, "loss": 1.2988, "step": 155 }, { "epoch": 0.01, "grad_norm": 13.862643638893008, "learning_rate": 2.3179791976225856e-06, "loss": 1.2823, "step": 156 }, { "epoch": 0.01, "grad_norm": 15.512149971227803, "learning_rate": 2.332838038632987e-06, "loss": 1.2087, "step": 157 }, { "epoch": 0.01, "grad_norm": 12.453080063018092, "learning_rate": 2.347696879643388e-06, "loss": 1.2138, "step": 158 }, { "epoch": 0.01, "grad_norm": 13.567949862851616, "learning_rate": 2.362555720653789e-06, "loss": 1.3151, "step": 159 }, { "epoch": 0.01, "grad_norm": 21.547222876178306, "learning_rate": 2.3774145616641904e-06, "loss": 1.2234, "step": 160 }, { "epoch": 0.01, "grad_norm": 11.251373263860538, "learning_rate": 2.3922734026745917e-06, "loss": 1.2584, "step": 161 }, { "epoch": 0.01, "grad_norm": 13.300628443048849, "learning_rate": 2.407132243684993e-06, "loss": 1.2608, "step": 162 }, { "epoch": 0.01, "grad_norm": 11.98705476437207, "learning_rate": 2.421991084695394e-06, "loss": 1.2159, "step": 163 }, { "epoch": 0.01, "grad_norm": 13.791748061089544, "learning_rate": 2.436849925705795e-06, "loss": 1.2554, "step": 164 }, { "epoch": 0.01, "grad_norm": 13.018632449496657, "learning_rate": 2.4517087667161964e-06, "loss": 1.2341, "step": 165 }, { "epoch": 0.01, "grad_norm": 15.094477050073278, "learning_rate": 2.4665676077265977e-06, "loss": 1.2332, "step": 166 }, { "epoch": 0.01, "grad_norm": 11.884350776445494, "learning_rate": 2.4814264487369986e-06, "loss": 1.2469, "step": 167 }, { "epoch": 0.01, "grad_norm": 17.193021833995882, "learning_rate": 2.4962852897474e-06, "loss": 1.3187, "step": 168 }, { "epoch": 0.02, "grad_norm": 11.961130562655457, "learning_rate": 2.511144130757801e-06, "loss": 1.2319, "step": 169 }, { "epoch": 0.02, "grad_norm": 10.993313911240913, "learning_rate": 2.526002971768202e-06, "loss": 1.2149, "step": 170 }, { "epoch": 0.02, "grad_norm": 14.047670795991726, "learning_rate": 2.5408618127786033e-06, "loss": 1.2209, "step": 171 }, { "epoch": 0.02, "grad_norm": 21.127092907882563, "learning_rate": 2.5557206537890046e-06, "loss": 1.2408, "step": 172 }, { "epoch": 0.02, "grad_norm": 14.132470723641486, "learning_rate": 2.5705794947994055e-06, "loss": 1.203, "step": 173 }, { "epoch": 0.02, "grad_norm": 12.844890485764699, "learning_rate": 2.5854383358098072e-06, "loss": 1.1984, "step": 174 }, { "epoch": 0.02, "grad_norm": 12.74978526362959, "learning_rate": 2.6002971768202085e-06, "loss": 1.1639, "step": 175 }, { "epoch": 0.02, "grad_norm": 13.815068691606902, "learning_rate": 2.6151560178306094e-06, "loss": 1.3011, "step": 176 }, { "epoch": 0.02, "grad_norm": 15.764349569674762, "learning_rate": 2.6300148588410107e-06, "loss": 1.2627, "step": 177 }, { "epoch": 0.02, "grad_norm": 14.470509417280374, "learning_rate": 2.644873699851412e-06, "loss": 1.2685, "step": 178 }, { "epoch": 0.02, "grad_norm": 16.039980543166294, "learning_rate": 2.659732540861813e-06, "loss": 1.1406, "step": 179 }, { "epoch": 0.02, "grad_norm": 12.5738469121652, "learning_rate": 2.674591381872214e-06, "loss": 1.1516, "step": 180 }, { "epoch": 0.02, "grad_norm": 18.059602492533326, "learning_rate": 2.6894502228826154e-06, "loss": 1.1666, "step": 181 }, { "epoch": 0.02, "grad_norm": 20.177117462919192, "learning_rate": 2.7043090638930163e-06, "loss": 1.2744, "step": 182 }, { "epoch": 0.02, "grad_norm": 15.536263644110688, "learning_rate": 2.7191679049034176e-06, "loss": 1.1206, "step": 183 }, { "epoch": 0.02, "grad_norm": 9.297048918515785, "learning_rate": 2.734026745913819e-06, "loss": 1.2574, "step": 184 }, { "epoch": 0.02, "grad_norm": 10.678652060311096, "learning_rate": 2.74888558692422e-06, "loss": 1.2799, "step": 185 }, { "epoch": 0.02, "grad_norm": 15.59313597860626, "learning_rate": 2.7637444279346215e-06, "loss": 1.2248, "step": 186 }, { "epoch": 0.02, "grad_norm": 13.457200260997297, "learning_rate": 2.7786032689450228e-06, "loss": 1.2024, "step": 187 }, { "epoch": 0.02, "grad_norm": 14.59239483647375, "learning_rate": 2.7934621099554236e-06, "loss": 1.1853, "step": 188 }, { "epoch": 0.02, "grad_norm": 18.672676931184043, "learning_rate": 2.808320950965825e-06, "loss": 1.2517, "step": 189 }, { "epoch": 0.02, "grad_norm": 15.29911091977755, "learning_rate": 2.8231797919762262e-06, "loss": 1.1466, "step": 190 }, { "epoch": 0.02, "grad_norm": 11.149515901303058, "learning_rate": 2.838038632986627e-06, "loss": 1.1838, "step": 191 }, { "epoch": 0.02, "grad_norm": 14.051324968932548, "learning_rate": 2.8528974739970284e-06, "loss": 1.1744, "step": 192 }, { "epoch": 0.02, "grad_norm": 14.522630864590466, "learning_rate": 2.8677563150074297e-06, "loss": 1.2072, "step": 193 }, { "epoch": 0.02, "grad_norm": 13.674067310609283, "learning_rate": 2.8826151560178305e-06, "loss": 1.2121, "step": 194 }, { "epoch": 0.02, "grad_norm": 13.782367562371006, "learning_rate": 2.8974739970282323e-06, "loss": 1.1845, "step": 195 }, { "epoch": 0.02, "grad_norm": 12.879011934178767, "learning_rate": 2.9123328380386335e-06, "loss": 1.2064, "step": 196 }, { "epoch": 0.02, "grad_norm": 11.498092970246208, "learning_rate": 2.9271916790490344e-06, "loss": 1.1234, "step": 197 }, { "epoch": 0.02, "grad_norm": 10.17586068791182, "learning_rate": 2.9420505200594357e-06, "loss": 1.2419, "step": 198 }, { "epoch": 0.02, "grad_norm": 13.004187552958408, "learning_rate": 2.956909361069837e-06, "loss": 1.2166, "step": 199 }, { "epoch": 0.02, "grad_norm": 12.192285836517392, "learning_rate": 2.971768202080238e-06, "loss": 1.212, "step": 200 }, { "epoch": 0.02, "grad_norm": 17.42181289015885, "learning_rate": 2.986627043090639e-06, "loss": 1.2487, "step": 201 }, { "epoch": 0.02, "grad_norm": 12.179845802131048, "learning_rate": 3.0014858841010405e-06, "loss": 1.1612, "step": 202 }, { "epoch": 0.02, "grad_norm": 9.600482852462585, "learning_rate": 3.0163447251114413e-06, "loss": 1.1495, "step": 203 }, { "epoch": 0.02, "grad_norm": 12.014264113015788, "learning_rate": 3.0312035661218426e-06, "loss": 1.2616, "step": 204 }, { "epoch": 0.02, "grad_norm": 13.107693551936135, "learning_rate": 3.0460624071322435e-06, "loss": 1.139, "step": 205 }, { "epoch": 0.02, "grad_norm": 11.917826535458081, "learning_rate": 3.060921248142645e-06, "loss": 1.2142, "step": 206 }, { "epoch": 0.02, "grad_norm": 11.826483910058341, "learning_rate": 3.0757800891530465e-06, "loss": 1.2569, "step": 207 }, { "epoch": 0.02, "grad_norm": 15.056638792781408, "learning_rate": 3.090638930163448e-06, "loss": 1.2198, "step": 208 }, { "epoch": 0.02, "grad_norm": 16.784629298821034, "learning_rate": 3.1054977711738487e-06, "loss": 1.1242, "step": 209 }, { "epoch": 0.02, "grad_norm": 11.040510640244115, "learning_rate": 3.12035661218425e-06, "loss": 1.1908, "step": 210 }, { "epoch": 0.02, "grad_norm": 14.314824997667564, "learning_rate": 3.135215453194651e-06, "loss": 1.2029, "step": 211 }, { "epoch": 0.02, "grad_norm": 18.09616170922017, "learning_rate": 3.150074294205052e-06, "loss": 1.2121, "step": 212 }, { "epoch": 0.02, "grad_norm": 16.781146176840643, "learning_rate": 3.1649331352154534e-06, "loss": 1.2052, "step": 213 }, { "epoch": 0.02, "grad_norm": 11.264561761688796, "learning_rate": 3.1797919762258543e-06, "loss": 1.1961, "step": 214 }, { "epoch": 0.02, "grad_norm": 14.781151549460386, "learning_rate": 3.1946508172362556e-06, "loss": 1.2406, "step": 215 }, { "epoch": 0.02, "grad_norm": 11.95473417118167, "learning_rate": 3.2095096582466573e-06, "loss": 1.1866, "step": 216 }, { "epoch": 0.02, "grad_norm": 15.16509082756982, "learning_rate": 3.2243684992570586e-06, "loss": 1.2092, "step": 217 }, { "epoch": 0.02, "grad_norm": 13.786658637688276, "learning_rate": 3.2392273402674595e-06, "loss": 1.2308, "step": 218 }, { "epoch": 0.02, "grad_norm": 15.021731858126865, "learning_rate": 3.2540861812778607e-06, "loss": 1.1464, "step": 219 }, { "epoch": 0.02, "grad_norm": 12.953107241389668, "learning_rate": 3.2689450222882616e-06, "loss": 1.1401, "step": 220 }, { "epoch": 0.02, "grad_norm": 11.081731006475007, "learning_rate": 3.283803863298663e-06, "loss": 1.2147, "step": 221 }, { "epoch": 0.02, "grad_norm": 13.20020408897566, "learning_rate": 3.298662704309064e-06, "loss": 1.1948, "step": 222 }, { "epoch": 0.02, "grad_norm": 11.476540961563462, "learning_rate": 3.313521545319465e-06, "loss": 1.1341, "step": 223 }, { "epoch": 0.02, "grad_norm": 13.357412307087126, "learning_rate": 3.3283803863298664e-06, "loss": 1.1894, "step": 224 }, { "epoch": 0.02, "grad_norm": 10.042216590979454, "learning_rate": 3.3432392273402677e-06, "loss": 1.2044, "step": 225 }, { "epoch": 0.02, "grad_norm": 9.914633679413257, "learning_rate": 3.3580980683506685e-06, "loss": 1.1855, "step": 226 }, { "epoch": 0.02, "grad_norm": 14.73825595969435, "learning_rate": 3.3729569093610702e-06, "loss": 1.1828, "step": 227 }, { "epoch": 0.02, "grad_norm": 12.519662367164644, "learning_rate": 3.3878157503714715e-06, "loss": 1.1759, "step": 228 }, { "epoch": 0.02, "grad_norm": 13.021231922314369, "learning_rate": 3.4026745913818724e-06, "loss": 1.2459, "step": 229 }, { "epoch": 0.02, "grad_norm": 13.310598451337086, "learning_rate": 3.4175334323922737e-06, "loss": 1.1945, "step": 230 }, { "epoch": 0.02, "grad_norm": 11.949457538230213, "learning_rate": 3.432392273402675e-06, "loss": 1.2102, "step": 231 }, { "epoch": 0.02, "grad_norm": 17.045710640024133, "learning_rate": 3.447251114413076e-06, "loss": 1.1379, "step": 232 }, { "epoch": 0.02, "grad_norm": 12.590841904831532, "learning_rate": 3.462109955423477e-06, "loss": 1.1811, "step": 233 }, { "epoch": 0.02, "grad_norm": 13.750607612300065, "learning_rate": 3.4769687964338785e-06, "loss": 1.1857, "step": 234 }, { "epoch": 0.02, "grad_norm": 15.011852341995404, "learning_rate": 3.4918276374442793e-06, "loss": 1.203, "step": 235 }, { "epoch": 0.02, "grad_norm": 11.39528573444235, "learning_rate": 3.5066864784546806e-06, "loss": 1.1346, "step": 236 }, { "epoch": 0.02, "grad_norm": 13.840094174504417, "learning_rate": 3.5215453194650823e-06, "loss": 1.1567, "step": 237 }, { "epoch": 0.02, "grad_norm": 14.616219974659701, "learning_rate": 3.536404160475483e-06, "loss": 1.1738, "step": 238 }, { "epoch": 0.02, "grad_norm": 15.372558864126345, "learning_rate": 3.5512630014858845e-06, "loss": 1.1568, "step": 239 }, { "epoch": 0.02, "grad_norm": 12.455412066813167, "learning_rate": 3.5661218424962858e-06, "loss": 1.1451, "step": 240 }, { "epoch": 0.02, "grad_norm": 16.910211566269286, "learning_rate": 3.5809806835066867e-06, "loss": 1.1602, "step": 241 }, { "epoch": 0.02, "grad_norm": 12.30636616567896, "learning_rate": 3.595839524517088e-06, "loss": 1.1565, "step": 242 }, { "epoch": 0.02, "grad_norm": 12.111097262677804, "learning_rate": 3.6106983655274892e-06, "loss": 1.169, "step": 243 }, { "epoch": 0.02, "grad_norm": 13.50312867862547, "learning_rate": 3.62555720653789e-06, "loss": 1.2179, "step": 244 }, { "epoch": 0.02, "grad_norm": 14.32961653214507, "learning_rate": 3.6404160475482914e-06, "loss": 1.1748, "step": 245 }, { "epoch": 0.02, "grad_norm": 10.455741519438698, "learning_rate": 3.6552748885586927e-06, "loss": 1.2008, "step": 246 }, { "epoch": 0.02, "grad_norm": 15.474695964955936, "learning_rate": 3.6701337295690936e-06, "loss": 1.1185, "step": 247 }, { "epoch": 0.02, "grad_norm": 12.393869030707698, "learning_rate": 3.6849925705794953e-06, "loss": 1.1227, "step": 248 }, { "epoch": 0.02, "grad_norm": 15.713920176980748, "learning_rate": 3.6998514115898966e-06, "loss": 1.1721, "step": 249 }, { "epoch": 0.02, "grad_norm": 10.743457450738655, "learning_rate": 3.7147102526002974e-06, "loss": 1.2703, "step": 250 }, { "epoch": 0.02, "grad_norm": 12.415390507140565, "learning_rate": 3.7295690936106987e-06, "loss": 1.1371, "step": 251 }, { "epoch": 0.02, "grad_norm": 10.65868346467697, "learning_rate": 3.7444279346211e-06, "loss": 1.1417, "step": 252 }, { "epoch": 0.02, "grad_norm": 14.358408993554516, "learning_rate": 3.759286775631501e-06, "loss": 1.1465, "step": 253 }, { "epoch": 0.02, "grad_norm": 11.829190608866632, "learning_rate": 3.774145616641902e-06, "loss": 1.2175, "step": 254 }, { "epoch": 0.02, "grad_norm": 17.185789181969838, "learning_rate": 3.7890044576523035e-06, "loss": 1.1737, "step": 255 }, { "epoch": 0.02, "grad_norm": 18.77896936569814, "learning_rate": 3.8038632986627044e-06, "loss": 1.1712, "step": 256 }, { "epoch": 0.02, "grad_norm": 16.017119240160365, "learning_rate": 3.818722139673105e-06, "loss": 1.1675, "step": 257 }, { "epoch": 0.02, "grad_norm": 13.796672333302658, "learning_rate": 3.833580980683507e-06, "loss": 1.1254, "step": 258 }, { "epoch": 0.02, "grad_norm": 13.96226868735574, "learning_rate": 3.848439821693909e-06, "loss": 1.2487, "step": 259 }, { "epoch": 0.02, "grad_norm": 13.722954246633371, "learning_rate": 3.86329866270431e-06, "loss": 1.1899, "step": 260 }, { "epoch": 0.02, "grad_norm": 12.719410386530718, "learning_rate": 3.87815750371471e-06, "loss": 1.2081, "step": 261 }, { "epoch": 0.02, "grad_norm": 14.749493269685129, "learning_rate": 3.893016344725112e-06, "loss": 1.1285, "step": 262 }, { "epoch": 0.02, "grad_norm": 15.68745324230534, "learning_rate": 3.907875185735513e-06, "loss": 1.1248, "step": 263 }, { "epoch": 0.02, "grad_norm": 9.965408803921283, "learning_rate": 3.922734026745914e-06, "loss": 1.143, "step": 264 }, { "epoch": 0.02, "grad_norm": 12.40320934750862, "learning_rate": 3.9375928677563156e-06, "loss": 1.1394, "step": 265 }, { "epoch": 0.02, "grad_norm": 13.771517992861996, "learning_rate": 3.952451708766716e-06, "loss": 1.1301, "step": 266 }, { "epoch": 0.02, "grad_norm": 12.475975059438115, "learning_rate": 3.967310549777117e-06, "loss": 1.1739, "step": 267 }, { "epoch": 0.02, "grad_norm": 9.686715681645756, "learning_rate": 3.982169390787519e-06, "loss": 1.1226, "step": 268 }, { "epoch": 0.02, "grad_norm": 11.101843245998369, "learning_rate": 3.99702823179792e-06, "loss": 1.1672, "step": 269 }, { "epoch": 0.02, "grad_norm": 10.913564075048646, "learning_rate": 4.011887072808321e-06, "loss": 1.1486, "step": 270 }, { "epoch": 0.02, "grad_norm": 16.69823654863612, "learning_rate": 4.0267459138187225e-06, "loss": 1.1327, "step": 271 }, { "epoch": 0.02, "grad_norm": 16.085969793714913, "learning_rate": 4.041604754829124e-06, "loss": 1.1343, "step": 272 }, { "epoch": 0.02, "grad_norm": 15.494657069121349, "learning_rate": 4.056463595839525e-06, "loss": 1.1289, "step": 273 }, { "epoch": 0.02, "grad_norm": 10.440177381474532, "learning_rate": 4.071322436849926e-06, "loss": 1.1858, "step": 274 }, { "epoch": 0.02, "grad_norm": 11.8334650428448, "learning_rate": 4.086181277860327e-06, "loss": 1.1907, "step": 275 }, { "epoch": 0.02, "grad_norm": 12.74151253320919, "learning_rate": 4.101040118870728e-06, "loss": 1.1603, "step": 276 }, { "epoch": 0.02, "grad_norm": 11.475525789423399, "learning_rate": 4.115898959881129e-06, "loss": 1.2043, "step": 277 }, { "epoch": 0.02, "grad_norm": 12.829299651075278, "learning_rate": 4.130757800891531e-06, "loss": 1.133, "step": 278 }, { "epoch": 0.02, "grad_norm": 17.329680635075192, "learning_rate": 4.145616641901932e-06, "loss": 1.1493, "step": 279 }, { "epoch": 0.02, "grad_norm": 14.690556911955909, "learning_rate": 4.160475482912333e-06, "loss": 1.1623, "step": 280 }, { "epoch": 0.03, "grad_norm": 13.206246265946543, "learning_rate": 4.1753343239227346e-06, "loss": 1.1234, "step": 281 }, { "epoch": 0.03, "grad_norm": 16.712358755807497, "learning_rate": 4.190193164933136e-06, "loss": 1.1051, "step": 282 }, { "epoch": 0.03, "grad_norm": 16.073673365591848, "learning_rate": 4.205052005943537e-06, "loss": 1.0933, "step": 283 }, { "epoch": 0.03, "grad_norm": 12.508584279157313, "learning_rate": 4.219910846953938e-06, "loss": 1.1764, "step": 284 }, { "epoch": 0.03, "grad_norm": 10.991423548643269, "learning_rate": 4.234769687964339e-06, "loss": 1.0992, "step": 285 }, { "epoch": 0.03, "grad_norm": 11.748056613621992, "learning_rate": 4.24962852897474e-06, "loss": 1.1243, "step": 286 }, { "epoch": 0.03, "grad_norm": 12.322306237293285, "learning_rate": 4.2644873699851415e-06, "loss": 1.1757, "step": 287 }, { "epoch": 0.03, "grad_norm": 13.988645394552838, "learning_rate": 4.279346210995543e-06, "loss": 1.1257, "step": 288 }, { "epoch": 0.03, "grad_norm": 10.736277700046065, "learning_rate": 4.294205052005943e-06, "loss": 1.1539, "step": 289 }, { "epoch": 0.03, "grad_norm": 12.002667620401283, "learning_rate": 4.309063893016345e-06, "loss": 1.0836, "step": 290 }, { "epoch": 0.03, "grad_norm": 11.808712495874166, "learning_rate": 4.323922734026747e-06, "loss": 1.09, "step": 291 }, { "epoch": 0.03, "grad_norm": 10.936288983067914, "learning_rate": 4.338781575037148e-06, "loss": 1.1336, "step": 292 }, { "epoch": 0.03, "grad_norm": 9.145686145809051, "learning_rate": 4.353640416047548e-06, "loss": 1.1435, "step": 293 }, { "epoch": 0.03, "grad_norm": 14.456789448369179, "learning_rate": 4.36849925705795e-06, "loss": 1.1687, "step": 294 }, { "epoch": 0.03, "grad_norm": 11.769863508672417, "learning_rate": 4.383358098068351e-06, "loss": 1.1446, "step": 295 }, { "epoch": 0.03, "grad_norm": 17.589132702641255, "learning_rate": 4.398216939078752e-06, "loss": 1.1772, "step": 296 }, { "epoch": 0.03, "grad_norm": 12.278357014011094, "learning_rate": 4.4130757800891536e-06, "loss": 1.1981, "step": 297 }, { "epoch": 0.03, "grad_norm": 10.20754151376717, "learning_rate": 4.427934621099554e-06, "loss": 1.114, "step": 298 }, { "epoch": 0.03, "grad_norm": 12.029876134130642, "learning_rate": 4.442793462109955e-06, "loss": 1.1267, "step": 299 }, { "epoch": 0.03, "grad_norm": 10.418290211193492, "learning_rate": 4.4576523031203574e-06, "loss": 1.1366, "step": 300 }, { "epoch": 0.03, "grad_norm": 11.003956497140376, "learning_rate": 4.472511144130759e-06, "loss": 1.0846, "step": 301 }, { "epoch": 0.03, "grad_norm": 14.175925642107874, "learning_rate": 4.487369985141159e-06, "loss": 1.1238, "step": 302 }, { "epoch": 0.03, "grad_norm": 15.182664928368428, "learning_rate": 4.5022288261515605e-06, "loss": 1.1619, "step": 303 }, { "epoch": 0.03, "grad_norm": 12.878505417883948, "learning_rate": 4.517087667161962e-06, "loss": 1.1158, "step": 304 }, { "epoch": 0.03, "grad_norm": 12.952130330860887, "learning_rate": 4.531946508172363e-06, "loss": 1.128, "step": 305 }, { "epoch": 0.03, "grad_norm": 11.773052293623806, "learning_rate": 4.546805349182764e-06, "loss": 1.1744, "step": 306 }, { "epoch": 0.03, "grad_norm": 12.331953786365242, "learning_rate": 4.561664190193165e-06, "loss": 1.0938, "step": 307 }, { "epoch": 0.03, "grad_norm": 17.665432727588627, "learning_rate": 4.576523031203566e-06, "loss": 1.1313, "step": 308 }, { "epoch": 0.03, "grad_norm": 11.84762038498788, "learning_rate": 4.591381872213967e-06, "loss": 1.1476, "step": 309 }, { "epoch": 0.03, "grad_norm": 11.600148427775235, "learning_rate": 4.606240713224369e-06, "loss": 1.1333, "step": 310 }, { "epoch": 0.03, "grad_norm": 14.501259732471242, "learning_rate": 4.62109955423477e-06, "loss": 1.1369, "step": 311 }, { "epoch": 0.03, "grad_norm": 16.55911266314763, "learning_rate": 4.635958395245171e-06, "loss": 1.1324, "step": 312 }, { "epoch": 0.03, "grad_norm": 16.4446652146636, "learning_rate": 4.6508172362555726e-06, "loss": 1.1611, "step": 313 }, { "epoch": 0.03, "grad_norm": 14.275935419911963, "learning_rate": 4.665676077265974e-06, "loss": 1.1249, "step": 314 }, { "epoch": 0.03, "grad_norm": 11.518630160559079, "learning_rate": 4.680534918276375e-06, "loss": 1.1577, "step": 315 }, { "epoch": 0.03, "grad_norm": 9.777739076374935, "learning_rate": 4.695393759286776e-06, "loss": 1.1522, "step": 316 }, { "epoch": 0.03, "grad_norm": 12.349262763923035, "learning_rate": 4.710252600297177e-06, "loss": 1.1568, "step": 317 }, { "epoch": 0.03, "grad_norm": 13.241273809687511, "learning_rate": 4.725111441307578e-06, "loss": 1.1747, "step": 318 }, { "epoch": 0.03, "grad_norm": 12.896098906136915, "learning_rate": 4.7399702823179795e-06, "loss": 1.1553, "step": 319 }, { "epoch": 0.03, "grad_norm": 9.798390363313116, "learning_rate": 4.754829123328381e-06, "loss": 1.1057, "step": 320 }, { "epoch": 0.03, "grad_norm": 11.382811173244301, "learning_rate": 4.769687964338782e-06, "loss": 1.1664, "step": 321 }, { "epoch": 0.03, "grad_norm": 11.925649919559495, "learning_rate": 4.784546805349183e-06, "loss": 1.1351, "step": 322 }, { "epoch": 0.03, "grad_norm": 12.990226126529167, "learning_rate": 4.799405646359585e-06, "loss": 1.1642, "step": 323 }, { "epoch": 0.03, "grad_norm": 13.138448205829322, "learning_rate": 4.814264487369986e-06, "loss": 1.1545, "step": 324 }, { "epoch": 0.03, "grad_norm": 11.669573960480008, "learning_rate": 4.829123328380386e-06, "loss": 1.1882, "step": 325 }, { "epoch": 0.03, "grad_norm": 11.260299621981513, "learning_rate": 4.843982169390788e-06, "loss": 1.097, "step": 326 }, { "epoch": 0.03, "grad_norm": 12.807511934158953, "learning_rate": 4.858841010401189e-06, "loss": 1.1386, "step": 327 }, { "epoch": 0.03, "grad_norm": 13.363941323925244, "learning_rate": 4.87369985141159e-06, "loss": 1.1141, "step": 328 }, { "epoch": 0.03, "grad_norm": 10.217746968184022, "learning_rate": 4.8885586924219916e-06, "loss": 1.1128, "step": 329 }, { "epoch": 0.03, "grad_norm": 12.48202238565019, "learning_rate": 4.903417533432393e-06, "loss": 1.161, "step": 330 }, { "epoch": 0.03, "grad_norm": 9.752973799420971, "learning_rate": 4.918276374442793e-06, "loss": 1.1064, "step": 331 }, { "epoch": 0.03, "grad_norm": 12.576794435696424, "learning_rate": 4.9331352154531954e-06, "loss": 1.1232, "step": 332 }, { "epoch": 0.03, "grad_norm": 11.97070575833564, "learning_rate": 4.947994056463597e-06, "loss": 1.117, "step": 333 }, { "epoch": 0.03, "grad_norm": 10.770311580299774, "learning_rate": 4.962852897473997e-06, "loss": 1.1629, "step": 334 }, { "epoch": 0.03, "grad_norm": 9.591697542732224, "learning_rate": 4.9777117384843985e-06, "loss": 1.1523, "step": 335 }, { "epoch": 0.03, "grad_norm": 12.144720072202839, "learning_rate": 4.9925705794948e-06, "loss": 1.1279, "step": 336 }, { "epoch": 0.03, "grad_norm": 11.492500918353, "learning_rate": 5.007429420505201e-06, "loss": 1.1576, "step": 337 }, { "epoch": 0.03, "grad_norm": 10.283862620597446, "learning_rate": 5.022288261515602e-06, "loss": 1.0454, "step": 338 }, { "epoch": 0.03, "grad_norm": 10.894127321918003, "learning_rate": 5.037147102526004e-06, "loss": 1.015, "step": 339 }, { "epoch": 0.03, "grad_norm": 10.881864241565623, "learning_rate": 5.052005943536404e-06, "loss": 1.1712, "step": 340 }, { "epoch": 0.03, "grad_norm": 13.812211780810685, "learning_rate": 5.066864784546805e-06, "loss": 1.1233, "step": 341 }, { "epoch": 0.03, "grad_norm": 11.664980533391345, "learning_rate": 5.081723625557207e-06, "loss": 1.1308, "step": 342 }, { "epoch": 0.03, "grad_norm": 13.219826431829576, "learning_rate": 5.096582466567608e-06, "loss": 1.1132, "step": 343 }, { "epoch": 0.03, "grad_norm": 14.366829061611822, "learning_rate": 5.111441307578009e-06, "loss": 1.1141, "step": 344 }, { "epoch": 0.03, "grad_norm": 15.252527209826669, "learning_rate": 5.12630014858841e-06, "loss": 1.0661, "step": 345 }, { "epoch": 0.03, "grad_norm": 12.001826088026796, "learning_rate": 5.141158989598811e-06, "loss": 1.127, "step": 346 }, { "epoch": 0.03, "grad_norm": 10.711817410896405, "learning_rate": 5.156017830609212e-06, "loss": 1.102, "step": 347 }, { "epoch": 0.03, "grad_norm": 11.129342182134883, "learning_rate": 5.1708766716196144e-06, "loss": 1.0366, "step": 348 }, { "epoch": 0.03, "grad_norm": 12.803651277071285, "learning_rate": 5.185735512630016e-06, "loss": 1.1489, "step": 349 }, { "epoch": 0.03, "grad_norm": 9.627983348990764, "learning_rate": 5.200594353640417e-06, "loss": 1.1832, "step": 350 }, { "epoch": 0.03, "grad_norm": 10.682764159326739, "learning_rate": 5.215453194650818e-06, "loss": 1.1601, "step": 351 }, { "epoch": 0.03, "grad_norm": 11.006312614977624, "learning_rate": 5.230312035661219e-06, "loss": 1.1131, "step": 352 }, { "epoch": 0.03, "grad_norm": 16.68769698032886, "learning_rate": 5.24517087667162e-06, "loss": 1.1326, "step": 353 }, { "epoch": 0.03, "grad_norm": 8.357079970804948, "learning_rate": 5.260029717682021e-06, "loss": 1.0554, "step": 354 }, { "epoch": 0.03, "grad_norm": 11.49207319213776, "learning_rate": 5.274888558692423e-06, "loss": 1.0998, "step": 355 }, { "epoch": 0.03, "grad_norm": 12.574872920554217, "learning_rate": 5.289747399702824e-06, "loss": 1.1145, "step": 356 }, { "epoch": 0.03, "grad_norm": 13.838195094509109, "learning_rate": 5.304606240713225e-06, "loss": 1.1592, "step": 357 }, { "epoch": 0.03, "grad_norm": 12.171830536264517, "learning_rate": 5.319465081723626e-06, "loss": 1.0931, "step": 358 }, { "epoch": 0.03, "grad_norm": 17.82660079430687, "learning_rate": 5.334323922734027e-06, "loss": 1.0801, "step": 359 }, { "epoch": 0.03, "grad_norm": 12.164632727151705, "learning_rate": 5.349182763744428e-06, "loss": 1.0847, "step": 360 }, { "epoch": 0.03, "grad_norm": 12.333894099834296, "learning_rate": 5.3640416047548295e-06, "loss": 1.0902, "step": 361 }, { "epoch": 0.03, "grad_norm": 13.118399448488406, "learning_rate": 5.378900445765231e-06, "loss": 1.1339, "step": 362 }, { "epoch": 0.03, "grad_norm": 12.872355798308535, "learning_rate": 5.393759286775631e-06, "loss": 1.0995, "step": 363 }, { "epoch": 0.03, "grad_norm": 10.901371635416284, "learning_rate": 5.408618127786033e-06, "loss": 1.1166, "step": 364 }, { "epoch": 0.03, "grad_norm": 13.570112246183713, "learning_rate": 5.423476968796434e-06, "loss": 1.1612, "step": 365 }, { "epoch": 0.03, "grad_norm": 10.577306247838742, "learning_rate": 5.438335809806835e-06, "loss": 1.1298, "step": 366 }, { "epoch": 0.03, "grad_norm": 16.016028292625748, "learning_rate": 5.4531946508172365e-06, "loss": 1.0656, "step": 367 }, { "epoch": 0.03, "grad_norm": 12.31447410430925, "learning_rate": 5.468053491827638e-06, "loss": 1.1652, "step": 368 }, { "epoch": 0.03, "grad_norm": 13.096617481720974, "learning_rate": 5.48291233283804e-06, "loss": 1.1162, "step": 369 }, { "epoch": 0.03, "grad_norm": 9.915602711290008, "learning_rate": 5.49777117384844e-06, "loss": 1.0988, "step": 370 }, { "epoch": 0.03, "grad_norm": 10.359593642520789, "learning_rate": 5.512630014858842e-06, "loss": 1.0883, "step": 371 }, { "epoch": 0.03, "grad_norm": 10.041169748367924, "learning_rate": 5.527488855869243e-06, "loss": 1.0486, "step": 372 }, { "epoch": 0.03, "grad_norm": 12.157720689523385, "learning_rate": 5.542347696879644e-06, "loss": 1.098, "step": 373 }, { "epoch": 0.03, "grad_norm": 10.989415528203331, "learning_rate": 5.5572065378900455e-06, "loss": 1.0264, "step": 374 }, { "epoch": 0.03, "grad_norm": 7.928899024622885, "learning_rate": 5.572065378900447e-06, "loss": 1.1299, "step": 375 }, { "epoch": 0.03, "grad_norm": 14.830538006019573, "learning_rate": 5.586924219910847e-06, "loss": 1.1004, "step": 376 }, { "epoch": 0.03, "grad_norm": 10.298892518913483, "learning_rate": 5.6017830609212485e-06, "loss": 1.1726, "step": 377 }, { "epoch": 0.03, "grad_norm": 9.597504879147783, "learning_rate": 5.61664190193165e-06, "loss": 1.1402, "step": 378 }, { "epoch": 0.03, "grad_norm": 12.315359962449914, "learning_rate": 5.631500742942051e-06, "loss": 1.0969, "step": 379 }, { "epoch": 0.03, "grad_norm": 11.474500926448806, "learning_rate": 5.6463595839524524e-06, "loss": 1.0829, "step": 380 }, { "epoch": 0.03, "grad_norm": 10.143376052591305, "learning_rate": 5.661218424962853e-06, "loss": 1.0613, "step": 381 }, { "epoch": 0.03, "grad_norm": 10.026457750192046, "learning_rate": 5.676077265973254e-06, "loss": 1.0886, "step": 382 }, { "epoch": 0.03, "grad_norm": 12.34071087714645, "learning_rate": 5.6909361069836555e-06, "loss": 1.1038, "step": 383 }, { "epoch": 0.03, "grad_norm": 12.087189805639952, "learning_rate": 5.705794947994057e-06, "loss": 1.1075, "step": 384 }, { "epoch": 0.03, "grad_norm": 18.182979736482697, "learning_rate": 5.720653789004458e-06, "loss": 1.1291, "step": 385 }, { "epoch": 0.03, "grad_norm": 11.850618220765881, "learning_rate": 5.735512630014859e-06, "loss": 1.147, "step": 386 }, { "epoch": 0.03, "grad_norm": 9.655433025474341, "learning_rate": 5.75037147102526e-06, "loss": 1.0633, "step": 387 }, { "epoch": 0.03, "grad_norm": 11.466120735041697, "learning_rate": 5.765230312035661e-06, "loss": 1.1737, "step": 388 }, { "epoch": 0.03, "grad_norm": 8.874822433638904, "learning_rate": 5.780089153046062e-06, "loss": 1.1781, "step": 389 }, { "epoch": 0.03, "grad_norm": 8.235618857025864, "learning_rate": 5.7949479940564645e-06, "loss": 1.1743, "step": 390 }, { "epoch": 0.03, "grad_norm": 9.672164991604085, "learning_rate": 5.809806835066866e-06, "loss": 1.0779, "step": 391 }, { "epoch": 0.03, "grad_norm": 9.214118010924405, "learning_rate": 5.824665676077267e-06, "loss": 1.1064, "step": 392 }, { "epoch": 0.04, "grad_norm": 10.186190132450484, "learning_rate": 5.839524517087668e-06, "loss": 1.1405, "step": 393 }, { "epoch": 0.04, "grad_norm": 8.544449788417802, "learning_rate": 5.854383358098069e-06, "loss": 1.1041, "step": 394 }, { "epoch": 0.04, "grad_norm": 14.711154397535067, "learning_rate": 5.86924219910847e-06, "loss": 1.1502, "step": 395 }, { "epoch": 0.04, "grad_norm": 10.694145647518669, "learning_rate": 5.884101040118871e-06, "loss": 1.0788, "step": 396 }, { "epoch": 0.04, "grad_norm": 12.940970512349567, "learning_rate": 5.898959881129273e-06, "loss": 1.092, "step": 397 }, { "epoch": 0.04, "grad_norm": 10.884556149567274, "learning_rate": 5.913818722139674e-06, "loss": 1.0877, "step": 398 }, { "epoch": 0.04, "grad_norm": 8.262113589977268, "learning_rate": 5.9286775631500745e-06, "loss": 1.1076, "step": 399 }, { "epoch": 0.04, "grad_norm": 9.51325697539055, "learning_rate": 5.943536404160476e-06, "loss": 1.0636, "step": 400 }, { "epoch": 0.04, "grad_norm": 13.446050393774, "learning_rate": 5.958395245170877e-06, "loss": 1.0826, "step": 401 }, { "epoch": 0.04, "grad_norm": 9.379205158608055, "learning_rate": 5.973254086181278e-06, "loss": 1.0193, "step": 402 }, { "epoch": 0.04, "grad_norm": 9.999284403289655, "learning_rate": 5.98811292719168e-06, "loss": 1.1267, "step": 403 }, { "epoch": 0.04, "grad_norm": 11.917812043293901, "learning_rate": 6.002971768202081e-06, "loss": 1.0823, "step": 404 }, { "epoch": 0.04, "grad_norm": 10.449861350114748, "learning_rate": 6.017830609212481e-06, "loss": 1.0346, "step": 405 }, { "epoch": 0.04, "grad_norm": 9.651418729054114, "learning_rate": 6.032689450222883e-06, "loss": 1.0463, "step": 406 }, { "epoch": 0.04, "grad_norm": 12.621962037936385, "learning_rate": 6.047548291233284e-06, "loss": 1.1162, "step": 407 }, { "epoch": 0.04, "grad_norm": 9.764463028967802, "learning_rate": 6.062407132243685e-06, "loss": 1.0848, "step": 408 }, { "epoch": 0.04, "grad_norm": 11.642134858716629, "learning_rate": 6.0772659732540865e-06, "loss": 1.0604, "step": 409 }, { "epoch": 0.04, "grad_norm": 12.631888644208388, "learning_rate": 6.092124814264487e-06, "loss": 1.0383, "step": 410 }, { "epoch": 0.04, "grad_norm": 10.927327038251738, "learning_rate": 6.10698365527489e-06, "loss": 1.0587, "step": 411 }, { "epoch": 0.04, "grad_norm": 9.813346491467549, "learning_rate": 6.12184249628529e-06, "loss": 1.0199, "step": 412 }, { "epoch": 0.04, "grad_norm": 9.976474295949616, "learning_rate": 6.136701337295692e-06, "loss": 1.0789, "step": 413 }, { "epoch": 0.04, "grad_norm": 9.218846154551258, "learning_rate": 6.151560178306093e-06, "loss": 1.085, "step": 414 }, { "epoch": 0.04, "grad_norm": 12.52370423732885, "learning_rate": 6.166419019316494e-06, "loss": 1.117, "step": 415 }, { "epoch": 0.04, "grad_norm": 13.333590316540821, "learning_rate": 6.181277860326896e-06, "loss": 1.0873, "step": 416 }, { "epoch": 0.04, "grad_norm": 13.700627178771182, "learning_rate": 6.196136701337296e-06, "loss": 1.0547, "step": 417 }, { "epoch": 0.04, "grad_norm": 10.713914393188082, "learning_rate": 6.210995542347697e-06, "loss": 1.115, "step": 418 }, { "epoch": 0.04, "grad_norm": 10.537414893672029, "learning_rate": 6.225854383358099e-06, "loss": 1.0873, "step": 419 }, { "epoch": 0.04, "grad_norm": 14.047764998940131, "learning_rate": 6.2407132243685e-06, "loss": 1.1282, "step": 420 }, { "epoch": 0.04, "grad_norm": 10.996923076730589, "learning_rate": 6.255572065378901e-06, "loss": 1.0704, "step": 421 }, { "epoch": 0.04, "grad_norm": 14.469191468092976, "learning_rate": 6.270430906389302e-06, "loss": 1.1288, "step": 422 }, { "epoch": 0.04, "grad_norm": 12.62057914801523, "learning_rate": 6.285289747399703e-06, "loss": 1.0763, "step": 423 }, { "epoch": 0.04, "grad_norm": 9.279374554299574, "learning_rate": 6.300148588410104e-06, "loss": 1.0355, "step": 424 }, { "epoch": 0.04, "grad_norm": 9.962173335077091, "learning_rate": 6.3150074294205055e-06, "loss": 1.0891, "step": 425 }, { "epoch": 0.04, "grad_norm": 12.042352441265631, "learning_rate": 6.329866270430907e-06, "loss": 1.1005, "step": 426 }, { "epoch": 0.04, "grad_norm": 10.0343757648233, "learning_rate": 6.344725111441308e-06, "loss": 1.0459, "step": 427 }, { "epoch": 0.04, "grad_norm": 11.545088790676903, "learning_rate": 6.3595839524517086e-06, "loss": 1.112, "step": 428 }, { "epoch": 0.04, "grad_norm": 15.433696255392372, "learning_rate": 6.37444279346211e-06, "loss": 1.1623, "step": 429 }, { "epoch": 0.04, "grad_norm": 12.103402093460588, "learning_rate": 6.389301634472511e-06, "loss": 1.039, "step": 430 }, { "epoch": 0.04, "grad_norm": 10.601063807102944, "learning_rate": 6.4041604754829124e-06, "loss": 1.131, "step": 431 }, { "epoch": 0.04, "grad_norm": 12.692039398190387, "learning_rate": 6.419019316493315e-06, "loss": 1.0846, "step": 432 }, { "epoch": 0.04, "grad_norm": 10.198569465131731, "learning_rate": 6.433878157503716e-06, "loss": 1.1596, "step": 433 }, { "epoch": 0.04, "grad_norm": 13.3797476064304, "learning_rate": 6.448736998514117e-06, "loss": 1.12, "step": 434 }, { "epoch": 0.04, "grad_norm": 10.063453876032748, "learning_rate": 6.463595839524518e-06, "loss": 1.1252, "step": 435 }, { "epoch": 0.04, "grad_norm": 13.038468465734933, "learning_rate": 6.478454680534919e-06, "loss": 1.0631, "step": 436 }, { "epoch": 0.04, "grad_norm": 15.035972964047915, "learning_rate": 6.49331352154532e-06, "loss": 1.0719, "step": 437 }, { "epoch": 0.04, "grad_norm": 10.133314064314307, "learning_rate": 6.5081723625557215e-06, "loss": 1.0336, "step": 438 }, { "epoch": 0.04, "grad_norm": 11.06964302939007, "learning_rate": 6.523031203566123e-06, "loss": 1.0922, "step": 439 }, { "epoch": 0.04, "grad_norm": 11.37830849371796, "learning_rate": 6.537890044576523e-06, "loss": 1.0918, "step": 440 }, { "epoch": 0.04, "grad_norm": 12.067596500017343, "learning_rate": 6.5527488855869245e-06, "loss": 1.0846, "step": 441 }, { "epoch": 0.04, "grad_norm": 10.298040611922534, "learning_rate": 6.567607726597326e-06, "loss": 1.1023, "step": 442 }, { "epoch": 0.04, "grad_norm": 9.774846456763964, "learning_rate": 6.582466567607727e-06, "loss": 1.024, "step": 443 }, { "epoch": 0.04, "grad_norm": 10.169866048657992, "learning_rate": 6.597325408618128e-06, "loss": 1.1111, "step": 444 }, { "epoch": 0.04, "grad_norm": 7.638399799046015, "learning_rate": 6.61218424962853e-06, "loss": 1.1371, "step": 445 }, { "epoch": 0.04, "grad_norm": 12.062016188197536, "learning_rate": 6.62704309063893e-06, "loss": 1.0947, "step": 446 }, { "epoch": 0.04, "grad_norm": 11.588075423302197, "learning_rate": 6.6419019316493314e-06, "loss": 1.0951, "step": 447 }, { "epoch": 0.04, "grad_norm": 9.561749521728126, "learning_rate": 6.656760772659733e-06, "loss": 1.0623, "step": 448 }, { "epoch": 0.04, "grad_norm": 12.058674443345739, "learning_rate": 6.671619613670134e-06, "loss": 1.065, "step": 449 }, { "epoch": 0.04, "grad_norm": 12.537991187831732, "learning_rate": 6.686478454680535e-06, "loss": 1.0648, "step": 450 }, { "epoch": 0.04, "grad_norm": 10.537193397979344, "learning_rate": 6.701337295690936e-06, "loss": 1.1165, "step": 451 }, { "epoch": 0.04, "grad_norm": 11.635958391067676, "learning_rate": 6.716196136701337e-06, "loss": 1.0481, "step": 452 }, { "epoch": 0.04, "grad_norm": 11.82577974690559, "learning_rate": 6.731054977711739e-06, "loss": 1.0411, "step": 453 }, { "epoch": 0.04, "grad_norm": 11.435756314570446, "learning_rate": 6.7459138187221405e-06, "loss": 1.1841, "step": 454 }, { "epoch": 0.04, "grad_norm": 10.568503818929775, "learning_rate": 6.760772659732542e-06, "loss": 1.0145, "step": 455 }, { "epoch": 0.04, "grad_norm": 10.947804702475315, "learning_rate": 6.775631500742943e-06, "loss": 1.0216, "step": 456 }, { "epoch": 0.04, "grad_norm": 12.758707295194826, "learning_rate": 6.790490341753344e-06, "loss": 1.0554, "step": 457 }, { "epoch": 0.04, "grad_norm": 8.916852313395255, "learning_rate": 6.805349182763745e-06, "loss": 1.0418, "step": 458 }, { "epoch": 0.04, "grad_norm": 10.891134571046292, "learning_rate": 6.820208023774146e-06, "loss": 1.0529, "step": 459 }, { "epoch": 0.04, "grad_norm": 12.702494665572685, "learning_rate": 6.835066864784547e-06, "loss": 1.0675, "step": 460 }, { "epoch": 0.04, "grad_norm": 12.235738744844763, "learning_rate": 6.849925705794949e-06, "loss": 1.119, "step": 461 }, { "epoch": 0.04, "grad_norm": 13.204578913554466, "learning_rate": 6.86478454680535e-06, "loss": 1.0392, "step": 462 }, { "epoch": 0.04, "grad_norm": 9.935780373604464, "learning_rate": 6.879643387815751e-06, "loss": 1.1044, "step": 463 }, { "epoch": 0.04, "grad_norm": 14.766522457944976, "learning_rate": 6.894502228826152e-06, "loss": 1.0862, "step": 464 }, { "epoch": 0.04, "grad_norm": 9.953917063614647, "learning_rate": 6.909361069836553e-06, "loss": 1.1255, "step": 465 }, { "epoch": 0.04, "grad_norm": 14.573650841571785, "learning_rate": 6.924219910846954e-06, "loss": 1.0395, "step": 466 }, { "epoch": 0.04, "grad_norm": 10.258706604637766, "learning_rate": 6.939078751857356e-06, "loss": 1.0577, "step": 467 }, { "epoch": 0.04, "grad_norm": 11.03921632844517, "learning_rate": 6.953937592867757e-06, "loss": 1.1145, "step": 468 }, { "epoch": 0.04, "grad_norm": 12.06022703305555, "learning_rate": 6.968796433878157e-06, "loss": 0.9777, "step": 469 }, { "epoch": 0.04, "grad_norm": 9.5656366673217, "learning_rate": 6.983655274888559e-06, "loss": 1.0527, "step": 470 }, { "epoch": 0.04, "grad_norm": 12.803081291319273, "learning_rate": 6.99851411589896e-06, "loss": 1.0367, "step": 471 }, { "epoch": 0.04, "grad_norm": 8.080953557823713, "learning_rate": 7.013372956909361e-06, "loss": 1.0159, "step": 472 }, { "epoch": 0.04, "grad_norm": 12.576575443761769, "learning_rate": 7.0282317979197625e-06, "loss": 1.1302, "step": 473 }, { "epoch": 0.04, "grad_norm": 10.389873208727682, "learning_rate": 7.043090638930165e-06, "loss": 1.0904, "step": 474 }, { "epoch": 0.04, "grad_norm": 11.24412578383744, "learning_rate": 7.057949479940566e-06, "loss": 1.0145, "step": 475 }, { "epoch": 0.04, "grad_norm": 8.085750076882057, "learning_rate": 7.072808320950966e-06, "loss": 1.1031, "step": 476 }, { "epoch": 0.04, "grad_norm": 11.525127650372667, "learning_rate": 7.087667161961368e-06, "loss": 1.0408, "step": 477 }, { "epoch": 0.04, "grad_norm": 12.198103636553823, "learning_rate": 7.102526002971769e-06, "loss": 1.0692, "step": 478 }, { "epoch": 0.04, "grad_norm": 10.363138094055538, "learning_rate": 7.11738484398217e-06, "loss": 1.0592, "step": 479 }, { "epoch": 0.04, "grad_norm": 9.299123155036039, "learning_rate": 7.1322436849925716e-06, "loss": 1.0844, "step": 480 }, { "epoch": 0.04, "grad_norm": 9.282021282912071, "learning_rate": 7.147102526002973e-06, "loss": 1.1159, "step": 481 }, { "epoch": 0.04, "grad_norm": 7.246026191917027, "learning_rate": 7.161961367013373e-06, "loss": 0.9831, "step": 482 }, { "epoch": 0.04, "grad_norm": 9.064639282097621, "learning_rate": 7.176820208023775e-06, "loss": 1.0781, "step": 483 }, { "epoch": 0.04, "grad_norm": 10.709223791930382, "learning_rate": 7.191679049034176e-06, "loss": 1.0965, "step": 484 }, { "epoch": 0.04, "grad_norm": 9.348270902778255, "learning_rate": 7.206537890044577e-06, "loss": 1.0928, "step": 485 }, { "epoch": 0.04, "grad_norm": 9.328008159250846, "learning_rate": 7.2213967310549785e-06, "loss": 1.0359, "step": 486 }, { "epoch": 0.04, "grad_norm": 14.345799529536906, "learning_rate": 7.236255572065379e-06, "loss": 0.9923, "step": 487 }, { "epoch": 0.04, "grad_norm": 10.287663906708264, "learning_rate": 7.25111441307578e-06, "loss": 1.0258, "step": 488 }, { "epoch": 0.04, "grad_norm": 10.10281497892318, "learning_rate": 7.2659732540861815e-06, "loss": 1.1016, "step": 489 }, { "epoch": 0.04, "grad_norm": 13.917241726250158, "learning_rate": 7.280832095096583e-06, "loss": 1.1344, "step": 490 }, { "epoch": 0.04, "grad_norm": 9.441572543164023, "learning_rate": 7.295690936106984e-06, "loss": 1.1199, "step": 491 }, { "epoch": 0.04, "grad_norm": 9.018870027566267, "learning_rate": 7.310549777117385e-06, "loss": 1.0834, "step": 492 }, { "epoch": 0.04, "grad_norm": 9.877156730788565, "learning_rate": 7.325408618127786e-06, "loss": 0.9573, "step": 493 }, { "epoch": 0.04, "grad_norm": 11.21742714100454, "learning_rate": 7.340267459138187e-06, "loss": 0.9669, "step": 494 }, { "epoch": 0.04, "grad_norm": 11.761872924756993, "learning_rate": 7.355126300148589e-06, "loss": 1.0723, "step": 495 }, { "epoch": 0.04, "grad_norm": 9.064189396129455, "learning_rate": 7.3699851411589906e-06, "loss": 1.0928, "step": 496 }, { "epoch": 0.04, "grad_norm": 15.755575959351075, "learning_rate": 7.384843982169392e-06, "loss": 1.0533, "step": 497 }, { "epoch": 0.04, "grad_norm": 8.66794328807516, "learning_rate": 7.399702823179793e-06, "loss": 1.0944, "step": 498 }, { "epoch": 0.04, "grad_norm": 11.372905042944108, "learning_rate": 7.4145616641901944e-06, "loss": 1.0713, "step": 499 }, { "epoch": 0.04, "grad_norm": 13.2502854265202, "learning_rate": 7.429420505200595e-06, "loss": 1.1844, "step": 500 }, { "epoch": 0.04, "grad_norm": 9.195471989176056, "learning_rate": 7.444279346210996e-06, "loss": 1.0479, "step": 501 }, { "epoch": 0.04, "grad_norm": 10.949909025416147, "learning_rate": 7.4591381872213975e-06, "loss": 1.1865, "step": 502 }, { "epoch": 0.04, "grad_norm": 10.252290027661497, "learning_rate": 7.473997028231799e-06, "loss": 1.0415, "step": 503 }, { "epoch": 0.04, "grad_norm": 8.893804698230767, "learning_rate": 7.4888558692422e-06, "loss": 1.1058, "step": 504 }, { "epoch": 0.05, "grad_norm": 8.41859529234815, "learning_rate": 7.5037147102526005e-06, "loss": 1.0247, "step": 505 }, { "epoch": 0.05, "grad_norm": 9.216347496885334, "learning_rate": 7.518573551263002e-06, "loss": 1.0477, "step": 506 }, { "epoch": 0.05, "grad_norm": 9.610736465901066, "learning_rate": 7.533432392273403e-06, "loss": 1.0016, "step": 507 }, { "epoch": 0.05, "grad_norm": 10.669955935585106, "learning_rate": 7.548291233283804e-06, "loss": 1.1493, "step": 508 }, { "epoch": 0.05, "grad_norm": 9.925053525678942, "learning_rate": 7.563150074294206e-06, "loss": 1.0773, "step": 509 }, { "epoch": 0.05, "grad_norm": 10.209142854481104, "learning_rate": 7.578008915304607e-06, "loss": 1.0814, "step": 510 }, { "epoch": 0.05, "grad_norm": 9.265542297306787, "learning_rate": 7.5928677563150074e-06, "loss": 1.0712, "step": 511 }, { "epoch": 0.05, "grad_norm": 10.755730234614122, "learning_rate": 7.607726597325409e-06, "loss": 1.0809, "step": 512 }, { "epoch": 0.05, "grad_norm": 11.266738188025256, "learning_rate": 7.62258543833581e-06, "loss": 1.0278, "step": 513 }, { "epoch": 0.05, "grad_norm": 9.732289132469514, "learning_rate": 7.63744427934621e-06, "loss": 1.0468, "step": 514 }, { "epoch": 0.05, "grad_norm": 10.259449102927174, "learning_rate": 7.652303120356612e-06, "loss": 1.1276, "step": 515 }, { "epoch": 0.05, "grad_norm": 10.970650616986509, "learning_rate": 7.667161961367015e-06, "loss": 1.0291, "step": 516 }, { "epoch": 0.05, "grad_norm": 11.984941535528321, "learning_rate": 7.682020802377416e-06, "loss": 1.0475, "step": 517 }, { "epoch": 0.05, "grad_norm": 8.98840673581107, "learning_rate": 7.696879643387817e-06, "loss": 1.1277, "step": 518 }, { "epoch": 0.05, "grad_norm": 9.351965589564253, "learning_rate": 7.711738484398219e-06, "loss": 1.0895, "step": 519 }, { "epoch": 0.05, "grad_norm": 10.53645337733584, "learning_rate": 7.72659732540862e-06, "loss": 1.0809, "step": 520 }, { "epoch": 0.05, "grad_norm": 13.220435955460754, "learning_rate": 7.74145616641902e-06, "loss": 1.0287, "step": 521 }, { "epoch": 0.05, "grad_norm": 11.970058713671222, "learning_rate": 7.75631500742942e-06, "loss": 1.0969, "step": 522 }, { "epoch": 0.05, "grad_norm": 13.48724598880786, "learning_rate": 7.771173848439822e-06, "loss": 1.0105, "step": 523 }, { "epoch": 0.05, "grad_norm": 10.599097087875933, "learning_rate": 7.786032689450223e-06, "loss": 1.112, "step": 524 }, { "epoch": 0.05, "grad_norm": 14.051624029583836, "learning_rate": 7.800891530460625e-06, "loss": 1.0641, "step": 525 }, { "epoch": 0.05, "grad_norm": 14.024510106001976, "learning_rate": 7.815750371471026e-06, "loss": 1.0594, "step": 526 }, { "epoch": 0.05, "grad_norm": 11.423018360409106, "learning_rate": 7.830609212481427e-06, "loss": 1.0375, "step": 527 }, { "epoch": 0.05, "grad_norm": 9.611068641736255, "learning_rate": 7.845468053491829e-06, "loss": 1.0908, "step": 528 }, { "epoch": 0.05, "grad_norm": 12.885593143256525, "learning_rate": 7.86032689450223e-06, "loss": 1.1056, "step": 529 }, { "epoch": 0.05, "grad_norm": 9.11642312546528, "learning_rate": 7.875185735512631e-06, "loss": 1.0539, "step": 530 }, { "epoch": 0.05, "grad_norm": 13.952679676317683, "learning_rate": 7.89004457652303e-06, "loss": 1.0974, "step": 531 }, { "epoch": 0.05, "grad_norm": 9.97238125579649, "learning_rate": 7.904903417533432e-06, "loss": 1.0293, "step": 532 }, { "epoch": 0.05, "grad_norm": 8.239216208918084, "learning_rate": 7.919762258543833e-06, "loss": 1.0133, "step": 533 }, { "epoch": 0.05, "grad_norm": 11.887399670671932, "learning_rate": 7.934621099554235e-06, "loss": 1.0551, "step": 534 }, { "epoch": 0.05, "grad_norm": 9.42900621886104, "learning_rate": 7.949479940564636e-06, "loss": 1.1033, "step": 535 }, { "epoch": 0.05, "grad_norm": 15.96927325842374, "learning_rate": 7.964338781575037e-06, "loss": 1.0594, "step": 536 }, { "epoch": 0.05, "grad_norm": 11.717799099126825, "learning_rate": 7.97919762258544e-06, "loss": 1.0247, "step": 537 }, { "epoch": 0.05, "grad_norm": 11.012322170488845, "learning_rate": 7.99405646359584e-06, "loss": 1.0061, "step": 538 }, { "epoch": 0.05, "grad_norm": 7.942903167029972, "learning_rate": 8.008915304606241e-06, "loss": 1.05, "step": 539 }, { "epoch": 0.05, "grad_norm": 10.690848417593793, "learning_rate": 8.023774145616642e-06, "loss": 1.0701, "step": 540 }, { "epoch": 0.05, "grad_norm": 9.782884977548475, "learning_rate": 8.038632986627044e-06, "loss": 1.0698, "step": 541 }, { "epoch": 0.05, "grad_norm": 10.58376454100326, "learning_rate": 8.053491827637445e-06, "loss": 1.0728, "step": 542 }, { "epoch": 0.05, "grad_norm": 10.99206625704471, "learning_rate": 8.068350668647846e-06, "loss": 1.0914, "step": 543 }, { "epoch": 0.05, "grad_norm": 13.849014127736218, "learning_rate": 8.083209509658248e-06, "loss": 1.0262, "step": 544 }, { "epoch": 0.05, "grad_norm": 11.343768881715658, "learning_rate": 8.098068350668649e-06, "loss": 1.0523, "step": 545 }, { "epoch": 0.05, "grad_norm": 10.164830071550107, "learning_rate": 8.11292719167905e-06, "loss": 1.0536, "step": 546 }, { "epoch": 0.05, "grad_norm": 11.664979640879006, "learning_rate": 8.127786032689451e-06, "loss": 1.0313, "step": 547 }, { "epoch": 0.05, "grad_norm": 9.77106407245154, "learning_rate": 8.142644873699853e-06, "loss": 1.1136, "step": 548 }, { "epoch": 0.05, "grad_norm": 11.62209264710672, "learning_rate": 8.157503714710252e-06, "loss": 1.0647, "step": 549 }, { "epoch": 0.05, "grad_norm": 10.186908296221132, "learning_rate": 8.172362555720654e-06, "loss": 1.0978, "step": 550 }, { "epoch": 0.05, "grad_norm": 16.362927591768752, "learning_rate": 8.187221396731055e-06, "loss": 1.0554, "step": 551 }, { "epoch": 0.05, "grad_norm": 12.142361852107289, "learning_rate": 8.202080237741456e-06, "loss": 1.0355, "step": 552 }, { "epoch": 0.05, "grad_norm": 9.902229141196612, "learning_rate": 8.216939078751858e-06, "loss": 1.1195, "step": 553 }, { "epoch": 0.05, "grad_norm": 9.760229988730577, "learning_rate": 8.231797919762259e-06, "loss": 1.0914, "step": 554 }, { "epoch": 0.05, "grad_norm": 12.379092877488281, "learning_rate": 8.24665676077266e-06, "loss": 0.9086, "step": 555 }, { "epoch": 0.05, "grad_norm": 11.573760725419241, "learning_rate": 8.261515601783061e-06, "loss": 0.9965, "step": 556 }, { "epoch": 0.05, "grad_norm": 9.878017266700095, "learning_rate": 8.276374442793463e-06, "loss": 1.0434, "step": 557 }, { "epoch": 0.05, "grad_norm": 11.977434223424188, "learning_rate": 8.291233283803864e-06, "loss": 1.0584, "step": 558 }, { "epoch": 0.05, "grad_norm": 8.894103118622146, "learning_rate": 8.306092124814265e-06, "loss": 1.0357, "step": 559 }, { "epoch": 0.05, "grad_norm": 10.846983152986644, "learning_rate": 8.320950965824667e-06, "loss": 1.1461, "step": 560 }, { "epoch": 0.05, "grad_norm": 8.7437703543386, "learning_rate": 8.335809806835068e-06, "loss": 1.0718, "step": 561 }, { "epoch": 0.05, "grad_norm": 11.083292178410538, "learning_rate": 8.350668647845469e-06, "loss": 0.9969, "step": 562 }, { "epoch": 0.05, "grad_norm": 10.258467562269697, "learning_rate": 8.36552748885587e-06, "loss": 0.9757, "step": 563 }, { "epoch": 0.05, "grad_norm": 10.530940687389796, "learning_rate": 8.380386329866272e-06, "loss": 1.0477, "step": 564 }, { "epoch": 0.05, "grad_norm": 12.44524237541492, "learning_rate": 8.395245170876673e-06, "loss": 1.1481, "step": 565 }, { "epoch": 0.05, "grad_norm": 11.595558323591767, "learning_rate": 8.410104011887074e-06, "loss": 1.0542, "step": 566 }, { "epoch": 0.05, "grad_norm": 8.83283229553203, "learning_rate": 8.424962852897474e-06, "loss": 1.089, "step": 567 }, { "epoch": 0.05, "grad_norm": 11.391334696446814, "learning_rate": 8.439821693907875e-06, "loss": 1.0068, "step": 568 }, { "epoch": 0.05, "grad_norm": 13.548702971173704, "learning_rate": 8.454680534918276e-06, "loss": 1.0674, "step": 569 }, { "epoch": 0.05, "grad_norm": 13.641426146186935, "learning_rate": 8.469539375928678e-06, "loss": 0.9898, "step": 570 }, { "epoch": 0.05, "grad_norm": 12.888977843317264, "learning_rate": 8.484398216939079e-06, "loss": 1.1028, "step": 571 }, { "epoch": 0.05, "grad_norm": 12.256762954394175, "learning_rate": 8.49925705794948e-06, "loss": 1.0672, "step": 572 }, { "epoch": 0.05, "grad_norm": 10.433856872753363, "learning_rate": 8.514115898959882e-06, "loss": 1.0657, "step": 573 }, { "epoch": 0.05, "grad_norm": 9.954871235712872, "learning_rate": 8.528974739970283e-06, "loss": 1.0413, "step": 574 }, { "epoch": 0.05, "grad_norm": 8.262491889811633, "learning_rate": 8.543833580980684e-06, "loss": 1.0713, "step": 575 }, { "epoch": 0.05, "grad_norm": 8.9496586031532, "learning_rate": 8.558692421991086e-06, "loss": 1.0109, "step": 576 }, { "epoch": 0.05, "grad_norm": 9.21498577767788, "learning_rate": 8.573551263001487e-06, "loss": 1.063, "step": 577 }, { "epoch": 0.05, "grad_norm": 10.946231435196236, "learning_rate": 8.588410104011886e-06, "loss": 1.1269, "step": 578 }, { "epoch": 0.05, "grad_norm": 8.298141586611257, "learning_rate": 8.60326894502229e-06, "loss": 1.0085, "step": 579 }, { "epoch": 0.05, "grad_norm": 10.679306259663587, "learning_rate": 8.61812778603269e-06, "loss": 1.0236, "step": 580 }, { "epoch": 0.05, "grad_norm": 11.791069919927743, "learning_rate": 8.632986627043092e-06, "loss": 1.0323, "step": 581 }, { "epoch": 0.05, "grad_norm": 10.991060292165049, "learning_rate": 8.647845468053493e-06, "loss": 1.0712, "step": 582 }, { "epoch": 0.05, "grad_norm": 15.505209032076564, "learning_rate": 8.662704309063895e-06, "loss": 1.0781, "step": 583 }, { "epoch": 0.05, "grad_norm": 10.075115246551459, "learning_rate": 8.677563150074296e-06, "loss": 1.0894, "step": 584 }, { "epoch": 0.05, "grad_norm": 12.61895286352917, "learning_rate": 8.692421991084695e-06, "loss": 1.0333, "step": 585 }, { "epoch": 0.05, "grad_norm": 9.269378367666873, "learning_rate": 8.707280832095097e-06, "loss": 1.0445, "step": 586 }, { "epoch": 0.05, "grad_norm": 11.963542249585114, "learning_rate": 8.722139673105498e-06, "loss": 1.0383, "step": 587 }, { "epoch": 0.05, "grad_norm": 7.889542963421267, "learning_rate": 8.7369985141159e-06, "loss": 0.9926, "step": 588 }, { "epoch": 0.05, "grad_norm": 14.393390392507921, "learning_rate": 8.7518573551263e-06, "loss": 1.0115, "step": 589 }, { "epoch": 0.05, "grad_norm": 9.988164169695649, "learning_rate": 8.766716196136702e-06, "loss": 1.0094, "step": 590 }, { "epoch": 0.05, "grad_norm": 9.51433748687375, "learning_rate": 8.781575037147103e-06, "loss": 1.0917, "step": 591 }, { "epoch": 0.05, "grad_norm": 10.245819635308987, "learning_rate": 8.796433878157505e-06, "loss": 1.0714, "step": 592 }, { "epoch": 0.05, "grad_norm": 10.701221215211948, "learning_rate": 8.811292719167906e-06, "loss": 1.1008, "step": 593 }, { "epoch": 0.05, "grad_norm": 11.336619668775048, "learning_rate": 8.826151560178307e-06, "loss": 1.075, "step": 594 }, { "epoch": 0.05, "grad_norm": 12.758621238138701, "learning_rate": 8.841010401188708e-06, "loss": 1.0117, "step": 595 }, { "epoch": 0.05, "grad_norm": 9.660061475354045, "learning_rate": 8.855869242199108e-06, "loss": 1.0854, "step": 596 }, { "epoch": 0.05, "grad_norm": 8.054250850552469, "learning_rate": 8.87072808320951e-06, "loss": 1.1383, "step": 597 }, { "epoch": 0.05, "grad_norm": 11.307742987532654, "learning_rate": 8.88558692421991e-06, "loss": 1.0894, "step": 598 }, { "epoch": 0.05, "grad_norm": 8.629190813633109, "learning_rate": 8.900445765230312e-06, "loss": 1.016, "step": 599 }, { "epoch": 0.05, "grad_norm": 9.475523864259122, "learning_rate": 8.915304606240715e-06, "loss": 1.0546, "step": 600 }, { "epoch": 0.05, "grad_norm": 7.846085196322862, "learning_rate": 8.930163447251116e-06, "loss": 1.0179, "step": 601 }, { "epoch": 0.05, "grad_norm": 8.955986427794002, "learning_rate": 8.945022288261517e-06, "loss": 1.0847, "step": 602 }, { "epoch": 0.05, "grad_norm": 13.644612383693392, "learning_rate": 8.959881129271917e-06, "loss": 1.0692, "step": 603 }, { "epoch": 0.05, "grad_norm": 11.17322004505467, "learning_rate": 8.974739970282318e-06, "loss": 1.0944, "step": 604 }, { "epoch": 0.05, "grad_norm": 9.024198208821508, "learning_rate": 8.98959881129272e-06, "loss": 1.0187, "step": 605 }, { "epoch": 0.05, "grad_norm": 11.366883233623614, "learning_rate": 9.004457652303121e-06, "loss": 1.0238, "step": 606 }, { "epoch": 0.05, "grad_norm": 10.91688822096214, "learning_rate": 9.019316493313522e-06, "loss": 1.0113, "step": 607 }, { "epoch": 0.05, "grad_norm": 9.139097874384309, "learning_rate": 9.034175334323924e-06, "loss": 1.0747, "step": 608 }, { "epoch": 0.05, "grad_norm": 13.15402019332059, "learning_rate": 9.049034175334325e-06, "loss": 1.0488, "step": 609 }, { "epoch": 0.05, "grad_norm": 12.412005995499799, "learning_rate": 9.063893016344726e-06, "loss": 0.989, "step": 610 }, { "epoch": 0.05, "grad_norm": 10.097195969900469, "learning_rate": 9.078751857355127e-06, "loss": 1.0094, "step": 611 }, { "epoch": 0.05, "grad_norm": 7.618506413220849, "learning_rate": 9.093610698365529e-06, "loss": 1.1212, "step": 612 }, { "epoch": 0.05, "grad_norm": 10.47134611274406, "learning_rate": 9.10846953937593e-06, "loss": 1.0212, "step": 613 }, { "epoch": 0.05, "grad_norm": 9.558707914610387, "learning_rate": 9.12332838038633e-06, "loss": 1.0465, "step": 614 }, { "epoch": 0.05, "grad_norm": 8.239846394359331, "learning_rate": 9.138187221396731e-06, "loss": 1.0336, "step": 615 }, { "epoch": 0.05, "grad_norm": 11.868584636839273, "learning_rate": 9.153046062407132e-06, "loss": 1.0395, "step": 616 }, { "epoch": 0.06, "grad_norm": 12.931118231631226, "learning_rate": 9.167904903417533e-06, "loss": 1.0187, "step": 617 }, { "epoch": 0.06, "grad_norm": 8.246990409768058, "learning_rate": 9.182763744427935e-06, "loss": 1.0288, "step": 618 }, { "epoch": 0.06, "grad_norm": 9.87647692590792, "learning_rate": 9.197622585438336e-06, "loss": 0.9611, "step": 619 }, { "epoch": 0.06, "grad_norm": 11.46081953158234, "learning_rate": 9.212481426448737e-06, "loss": 1.1024, "step": 620 }, { "epoch": 0.06, "grad_norm": 9.889556633143053, "learning_rate": 9.227340267459139e-06, "loss": 1.0969, "step": 621 }, { "epoch": 0.06, "grad_norm": 9.925013080633466, "learning_rate": 9.24219910846954e-06, "loss": 1.0522, "step": 622 }, { "epoch": 0.06, "grad_norm": 11.364528651810405, "learning_rate": 9.257057949479941e-06, "loss": 1.0758, "step": 623 }, { "epoch": 0.06, "grad_norm": 7.302806875329254, "learning_rate": 9.271916790490343e-06, "loss": 1.0135, "step": 624 }, { "epoch": 0.06, "grad_norm": 9.758099993632804, "learning_rate": 9.286775631500744e-06, "loss": 1.0677, "step": 625 }, { "epoch": 0.06, "grad_norm": 9.781732195415422, "learning_rate": 9.301634472511145e-06, "loss": 1.0507, "step": 626 }, { "epoch": 0.06, "grad_norm": 13.24137316254574, "learning_rate": 9.316493313521546e-06, "loss": 1.037, "step": 627 }, { "epoch": 0.06, "grad_norm": 9.148056374662406, "learning_rate": 9.331352154531948e-06, "loss": 1.0403, "step": 628 }, { "epoch": 0.06, "grad_norm": 11.421279834087812, "learning_rate": 9.346210995542349e-06, "loss": 0.9584, "step": 629 }, { "epoch": 0.06, "grad_norm": 9.285767992381842, "learning_rate": 9.36106983655275e-06, "loss": 1.0439, "step": 630 }, { "epoch": 0.06, "grad_norm": 14.191651208841742, "learning_rate": 9.375928677563152e-06, "loss": 1.0583, "step": 631 }, { "epoch": 0.06, "grad_norm": 11.2131632169938, "learning_rate": 9.390787518573551e-06, "loss": 1.0128, "step": 632 }, { "epoch": 0.06, "grad_norm": 9.877191523982757, "learning_rate": 9.405646359583952e-06, "loss": 1.0106, "step": 633 }, { "epoch": 0.06, "grad_norm": 13.289356409960144, "learning_rate": 9.420505200594354e-06, "loss": 1.0275, "step": 634 }, { "epoch": 0.06, "grad_norm": 9.452573514775871, "learning_rate": 9.435364041604755e-06, "loss": 0.9913, "step": 635 }, { "epoch": 0.06, "grad_norm": 13.19085334689723, "learning_rate": 9.450222882615156e-06, "loss": 0.9812, "step": 636 }, { "epoch": 0.06, "grad_norm": 14.432550063786342, "learning_rate": 9.465081723625558e-06, "loss": 1.0583, "step": 637 }, { "epoch": 0.06, "grad_norm": 9.267224293385516, "learning_rate": 9.479940564635959e-06, "loss": 0.9808, "step": 638 }, { "epoch": 0.06, "grad_norm": 11.428995053984929, "learning_rate": 9.49479940564636e-06, "loss": 1.0211, "step": 639 }, { "epoch": 0.06, "grad_norm": 10.343755396807515, "learning_rate": 9.509658246656762e-06, "loss": 0.9794, "step": 640 }, { "epoch": 0.06, "grad_norm": 10.862853573250566, "learning_rate": 9.524517087667163e-06, "loss": 0.9794, "step": 641 }, { "epoch": 0.06, "grad_norm": 9.001965834339806, "learning_rate": 9.539375928677564e-06, "loss": 1.0429, "step": 642 }, { "epoch": 0.06, "grad_norm": 10.247742617489289, "learning_rate": 9.554234769687965e-06, "loss": 1.0699, "step": 643 }, { "epoch": 0.06, "grad_norm": 10.11654038506872, "learning_rate": 9.569093610698367e-06, "loss": 1.0915, "step": 644 }, { "epoch": 0.06, "grad_norm": 9.277941921939588, "learning_rate": 9.583952451708768e-06, "loss": 0.9911, "step": 645 }, { "epoch": 0.06, "grad_norm": 8.339910802415295, "learning_rate": 9.59881129271917e-06, "loss": 1.0066, "step": 646 }, { "epoch": 0.06, "grad_norm": 9.60651880666665, "learning_rate": 9.61367013372957e-06, "loss": 0.9628, "step": 647 }, { "epoch": 0.06, "grad_norm": 10.985092683082364, "learning_rate": 9.628528974739972e-06, "loss": 1.1053, "step": 648 }, { "epoch": 0.06, "grad_norm": 10.519234349314388, "learning_rate": 9.643387815750373e-06, "loss": 1.0782, "step": 649 }, { "epoch": 0.06, "grad_norm": 9.924695852282168, "learning_rate": 9.658246656760773e-06, "loss": 0.9985, "step": 650 }, { "epoch": 0.06, "grad_norm": 7.737882818286664, "learning_rate": 9.673105497771174e-06, "loss": 0.9668, "step": 651 }, { "epoch": 0.06, "grad_norm": 9.988021742516203, "learning_rate": 9.687964338781575e-06, "loss": 0.9903, "step": 652 }, { "epoch": 0.06, "grad_norm": 9.682210911072573, "learning_rate": 9.702823179791977e-06, "loss": 1.0642, "step": 653 }, { "epoch": 0.06, "grad_norm": 10.44704879972261, "learning_rate": 9.717682020802378e-06, "loss": 1.0436, "step": 654 }, { "epoch": 0.06, "grad_norm": 8.846456270468657, "learning_rate": 9.73254086181278e-06, "loss": 1.0275, "step": 655 }, { "epoch": 0.06, "grad_norm": 10.240281836478205, "learning_rate": 9.74739970282318e-06, "loss": 1.0986, "step": 656 }, { "epoch": 0.06, "grad_norm": 11.521280315221375, "learning_rate": 9.762258543833582e-06, "loss": 1.0837, "step": 657 }, { "epoch": 0.06, "grad_norm": 7.922877500881096, "learning_rate": 9.777117384843983e-06, "loss": 1.054, "step": 658 }, { "epoch": 0.06, "grad_norm": 9.869601100798858, "learning_rate": 9.791976225854384e-06, "loss": 1.0691, "step": 659 }, { "epoch": 0.06, "grad_norm": 10.812401802226132, "learning_rate": 9.806835066864786e-06, "loss": 1.0014, "step": 660 }, { "epoch": 0.06, "grad_norm": 7.4019707347110035, "learning_rate": 9.821693907875185e-06, "loss": 1.0295, "step": 661 }, { "epoch": 0.06, "grad_norm": 9.22565453490378, "learning_rate": 9.836552748885587e-06, "loss": 0.9849, "step": 662 }, { "epoch": 0.06, "grad_norm": 8.668102218136031, "learning_rate": 9.85141158989599e-06, "loss": 1.1027, "step": 663 }, { "epoch": 0.06, "grad_norm": 8.600620178550768, "learning_rate": 9.866270430906391e-06, "loss": 1.0592, "step": 664 }, { "epoch": 0.06, "grad_norm": 8.80767286027621, "learning_rate": 9.881129271916792e-06, "loss": 1.0542, "step": 665 }, { "epoch": 0.06, "grad_norm": 13.616585560979118, "learning_rate": 9.895988112927193e-06, "loss": 1.091, "step": 666 }, { "epoch": 0.06, "grad_norm": 8.75139719210695, "learning_rate": 9.910846953937595e-06, "loss": 0.9999, "step": 667 }, { "epoch": 0.06, "grad_norm": 9.090000507237583, "learning_rate": 9.925705794947994e-06, "loss": 1.0477, "step": 668 }, { "epoch": 0.06, "grad_norm": 8.80835990273545, "learning_rate": 9.940564635958396e-06, "loss": 1.0699, "step": 669 }, { "epoch": 0.06, "grad_norm": 12.0778392045012, "learning_rate": 9.955423476968797e-06, "loss": 1.0987, "step": 670 }, { "epoch": 0.06, "grad_norm": 9.447115539065816, "learning_rate": 9.970282317979198e-06, "loss": 0.987, "step": 671 }, { "epoch": 0.06, "grad_norm": 7.6998551159311335, "learning_rate": 9.9851411589896e-06, "loss": 1.0205, "step": 672 }, { "epoch": 0.06, "grad_norm": 9.264205615981833, "learning_rate": 1e-05, "loss": 1.0043, "step": 673 }, { "epoch": 0.06, "grad_norm": 8.243592653176773, "learning_rate": 9.999999947817975e-06, "loss": 1.0488, "step": 674 }, { "epoch": 0.06, "grad_norm": 8.048871830901943, "learning_rate": 9.999999791271902e-06, "loss": 1.117, "step": 675 }, { "epoch": 0.06, "grad_norm": 10.295754683650285, "learning_rate": 9.999999530361782e-06, "loss": 0.9816, "step": 676 }, { "epoch": 0.06, "grad_norm": 9.793308101542664, "learning_rate": 9.999999165087622e-06, "loss": 0.996, "step": 677 }, { "epoch": 0.06, "grad_norm": 8.832588797559488, "learning_rate": 9.999998695449427e-06, "loss": 1.0481, "step": 678 }, { "epoch": 0.06, "grad_norm": 11.269392462989188, "learning_rate": 9.999998121447212e-06, "loss": 1.0856, "step": 679 }, { "epoch": 0.06, "grad_norm": 10.616875405224269, "learning_rate": 9.999997443080984e-06, "loss": 1.0914, "step": 680 }, { "epoch": 0.06, "grad_norm": 12.14873935462538, "learning_rate": 9.999996660350762e-06, "loss": 0.9706, "step": 681 }, { "epoch": 0.06, "grad_norm": 9.280734142264352, "learning_rate": 9.999995773256556e-06, "loss": 1.0384, "step": 682 }, { "epoch": 0.06, "grad_norm": 11.511662389933731, "learning_rate": 9.99999478179839e-06, "loss": 1.0213, "step": 683 }, { "epoch": 0.06, "grad_norm": 12.122756310484402, "learning_rate": 9.999993685976283e-06, "loss": 1.0219, "step": 684 }, { "epoch": 0.06, "grad_norm": 10.605958925750578, "learning_rate": 9.999992485790257e-06, "loss": 1.004, "step": 685 }, { "epoch": 0.06, "grad_norm": 8.00937450456748, "learning_rate": 9.999991181240337e-06, "loss": 1.0438, "step": 686 }, { "epoch": 0.06, "grad_norm": 10.773401582333547, "learning_rate": 9.999989772326551e-06, "loss": 1.0334, "step": 687 }, { "epoch": 0.06, "grad_norm": 10.199019187336722, "learning_rate": 9.99998825904893e-06, "loss": 0.9849, "step": 688 }, { "epoch": 0.06, "grad_norm": 9.591213520881503, "learning_rate": 9.999986641407503e-06, "loss": 0.9909, "step": 689 }, { "epoch": 0.06, "grad_norm": 8.855999649063298, "learning_rate": 9.999984919402304e-06, "loss": 1.0558, "step": 690 }, { "epoch": 0.06, "grad_norm": 11.609528934572811, "learning_rate": 9.99998309303337e-06, "loss": 1.1312, "step": 691 }, { "epoch": 0.06, "grad_norm": 9.25822169894325, "learning_rate": 9.999981162300737e-06, "loss": 1.1013, "step": 692 }, { "epoch": 0.06, "grad_norm": 8.473279294889037, "learning_rate": 9.999979127204449e-06, "loss": 1.0089, "step": 693 }, { "epoch": 0.06, "grad_norm": 10.801398281963607, "learning_rate": 9.999976987744546e-06, "loss": 1.0259, "step": 694 }, { "epoch": 0.06, "grad_norm": 11.02365635691635, "learning_rate": 9.999974743921075e-06, "loss": 1.0535, "step": 695 }, { "epoch": 0.06, "grad_norm": 9.14254965528287, "learning_rate": 9.999972395734078e-06, "loss": 1.1158, "step": 696 }, { "epoch": 0.06, "grad_norm": 11.058428729146204, "learning_rate": 9.999969943183609e-06, "loss": 1.0949, "step": 697 }, { "epoch": 0.06, "grad_norm": 12.54811137290809, "learning_rate": 9.999967386269715e-06, "loss": 0.9733, "step": 698 }, { "epoch": 0.06, "grad_norm": 9.42264648426438, "learning_rate": 9.999964724992454e-06, "loss": 1.0079, "step": 699 }, { "epoch": 0.06, "grad_norm": 9.351788792252671, "learning_rate": 9.999961959351878e-06, "loss": 1.065, "step": 700 }, { "epoch": 0.06, "grad_norm": 9.9013842617969, "learning_rate": 9.999959089348046e-06, "loss": 0.9206, "step": 701 }, { "epoch": 0.06, "grad_norm": 9.898082023335332, "learning_rate": 9.999956114981018e-06, "loss": 1.0439, "step": 702 }, { "epoch": 0.06, "grad_norm": 11.092520153388849, "learning_rate": 9.999953036250856e-06, "loss": 0.9985, "step": 703 }, { "epoch": 0.06, "grad_norm": 8.415759563340771, "learning_rate": 9.999949853157622e-06, "loss": 0.9652, "step": 704 }, { "epoch": 0.06, "grad_norm": 8.11054503597933, "learning_rate": 9.999946565701386e-06, "loss": 1.0073, "step": 705 }, { "epoch": 0.06, "grad_norm": 7.211414960139495, "learning_rate": 9.999943173882217e-06, "loss": 1.1216, "step": 706 }, { "epoch": 0.06, "grad_norm": 12.546861706765409, "learning_rate": 9.99993967770018e-06, "loss": 1.0782, "step": 707 }, { "epoch": 0.06, "grad_norm": 11.26090994403747, "learning_rate": 9.999936077155354e-06, "loss": 1.0074, "step": 708 }, { "epoch": 0.06, "grad_norm": 8.410344787489873, "learning_rate": 9.999932372247814e-06, "loss": 1.008, "step": 709 }, { "epoch": 0.06, "grad_norm": 8.320652444145278, "learning_rate": 9.999928562977632e-06, "loss": 0.9628, "step": 710 }, { "epoch": 0.06, "grad_norm": 12.06172427321282, "learning_rate": 9.999924649344893e-06, "loss": 1.0455, "step": 711 }, { "epoch": 0.06, "grad_norm": 16.605841455059778, "learning_rate": 9.999920631349675e-06, "loss": 1.0124, "step": 712 }, { "epoch": 0.06, "grad_norm": 10.328585359914742, "learning_rate": 9.999916508992065e-06, "loss": 1.0578, "step": 713 }, { "epoch": 0.06, "grad_norm": 10.181999002413107, "learning_rate": 9.999912282272147e-06, "loss": 1.0358, "step": 714 }, { "epoch": 0.06, "grad_norm": 7.354840183862304, "learning_rate": 9.999907951190012e-06, "loss": 0.996, "step": 715 }, { "epoch": 0.06, "grad_norm": 12.100788819220488, "learning_rate": 9.999903515745743e-06, "loss": 1.0094, "step": 716 }, { "epoch": 0.06, "grad_norm": 9.19005721409269, "learning_rate": 9.99989897593944e-06, "loss": 1.0397, "step": 717 }, { "epoch": 0.06, "grad_norm": 8.268806035840235, "learning_rate": 9.999894331771199e-06, "loss": 1.0061, "step": 718 }, { "epoch": 0.06, "grad_norm": 9.104417139463868, "learning_rate": 9.99988958324111e-06, "loss": 0.9958, "step": 719 }, { "epoch": 0.06, "grad_norm": 9.943324928367094, "learning_rate": 9.999884730349275e-06, "loss": 1.0365, "step": 720 }, { "epoch": 0.06, "grad_norm": 9.067587799986475, "learning_rate": 9.999879773095797e-06, "loss": 1.0724, "step": 721 }, { "epoch": 0.06, "grad_norm": 8.455956516572698, "learning_rate": 9.999874711480777e-06, "loss": 1.1609, "step": 722 }, { "epoch": 0.06, "grad_norm": 8.02512357440648, "learning_rate": 9.999869545504323e-06, "loss": 0.9997, "step": 723 }, { "epoch": 0.06, "grad_norm": 8.348670272927727, "learning_rate": 9.999864275166543e-06, "loss": 1.0419, "step": 724 }, { "epoch": 0.06, "grad_norm": 7.365832459444025, "learning_rate": 9.999858900467544e-06, "loss": 1.0545, "step": 725 }, { "epoch": 0.06, "grad_norm": 10.724433725506643, "learning_rate": 9.99985342140744e-06, "loss": 1.1011, "step": 726 }, { "epoch": 0.06, "grad_norm": 7.38999422106584, "learning_rate": 9.999847837986346e-06, "loss": 0.9414, "step": 727 }, { "epoch": 0.06, "grad_norm": 9.355783246357326, "learning_rate": 9.999842150204378e-06, "loss": 1.036, "step": 728 }, { "epoch": 0.07, "grad_norm": 10.500280506504824, "learning_rate": 9.999836358061654e-06, "loss": 1.0393, "step": 729 }, { "epoch": 0.07, "grad_norm": 9.87688369430259, "learning_rate": 9.999830461558294e-06, "loss": 0.9788, "step": 730 }, { "epoch": 0.07, "grad_norm": 9.487285433517528, "learning_rate": 9.999824460694426e-06, "loss": 1.0261, "step": 731 }, { "epoch": 0.07, "grad_norm": 10.278259370395507, "learning_rate": 9.99981835547017e-06, "loss": 1.0198, "step": 732 }, { "epoch": 0.07, "grad_norm": 8.353512627340146, "learning_rate": 9.999812145885655e-06, "loss": 1.0128, "step": 733 }, { "epoch": 0.07, "grad_norm": 12.194894501542754, "learning_rate": 9.999805831941011e-06, "loss": 1.0391, "step": 734 }, { "epoch": 0.07, "grad_norm": 12.278488452410807, "learning_rate": 9.99979941363637e-06, "loss": 1.0658, "step": 735 }, { "epoch": 0.07, "grad_norm": 11.7849860943382, "learning_rate": 9.999792890971867e-06, "loss": 1.112, "step": 736 }, { "epoch": 0.07, "grad_norm": 7.113042960238992, "learning_rate": 9.999786263947634e-06, "loss": 1.0553, "step": 737 }, { "epoch": 0.07, "grad_norm": 11.295082946096922, "learning_rate": 9.999779532563813e-06, "loss": 1.0102, "step": 738 }, { "epoch": 0.07, "grad_norm": 7.011148920919268, "learning_rate": 9.999772696820545e-06, "loss": 1.0312, "step": 739 }, { "epoch": 0.07, "grad_norm": 8.284219622396297, "learning_rate": 9.999765756717969e-06, "loss": 1.1135, "step": 740 }, { "epoch": 0.07, "grad_norm": 10.154364594552943, "learning_rate": 9.999758712256233e-06, "loss": 1.0912, "step": 741 }, { "epoch": 0.07, "grad_norm": 8.411272701943217, "learning_rate": 9.999751563435484e-06, "loss": 1.0212, "step": 742 }, { "epoch": 0.07, "grad_norm": 11.233692824869369, "learning_rate": 9.99974431025587e-06, "loss": 1.0986, "step": 743 }, { "epoch": 0.07, "grad_norm": 8.337589319745028, "learning_rate": 9.999736952717543e-06, "loss": 1.0235, "step": 744 }, { "epoch": 0.07, "grad_norm": 10.85153960598842, "learning_rate": 9.999729490820656e-06, "loss": 1.0359, "step": 745 }, { "epoch": 0.07, "grad_norm": 10.694478713750327, "learning_rate": 9.999721924565364e-06, "loss": 1.0541, "step": 746 }, { "epoch": 0.07, "grad_norm": 7.384072501678186, "learning_rate": 9.999714253951828e-06, "loss": 1.0083, "step": 747 }, { "epoch": 0.07, "grad_norm": 7.872780624083503, "learning_rate": 9.999706478980204e-06, "loss": 0.9994, "step": 748 }, { "epoch": 0.07, "grad_norm": 7.931982827221065, "learning_rate": 9.999698599650658e-06, "loss": 1.002, "step": 749 }, { "epoch": 0.07, "grad_norm": 12.545860358133144, "learning_rate": 9.999690615963353e-06, "loss": 1.0254, "step": 750 }, { "epoch": 0.07, "grad_norm": 11.453842638810778, "learning_rate": 9.999682527918456e-06, "loss": 0.9963, "step": 751 }, { "epoch": 0.07, "grad_norm": 7.064690516333482, "learning_rate": 9.999674335516134e-06, "loss": 1.0396, "step": 752 }, { "epoch": 0.07, "grad_norm": 9.622988431681986, "learning_rate": 9.999666038756559e-06, "loss": 1.0164, "step": 753 }, { "epoch": 0.07, "grad_norm": 7.651091173457328, "learning_rate": 9.999657637639905e-06, "loss": 0.969, "step": 754 }, { "epoch": 0.07, "grad_norm": 13.024906087390393, "learning_rate": 9.999649132166346e-06, "loss": 1.0161, "step": 755 }, { "epoch": 0.07, "grad_norm": 7.093115796782326, "learning_rate": 9.999640522336063e-06, "loss": 0.9684, "step": 756 }, { "epoch": 0.07, "grad_norm": 12.654367805598046, "learning_rate": 9.999631808149232e-06, "loss": 0.9895, "step": 757 }, { "epoch": 0.07, "grad_norm": 9.795490344263026, "learning_rate": 9.999622989606035e-06, "loss": 1.0439, "step": 758 }, { "epoch": 0.07, "grad_norm": 10.57024042655462, "learning_rate": 9.999614066706658e-06, "loss": 1.0264, "step": 759 }, { "epoch": 0.07, "grad_norm": 10.140255222045084, "learning_rate": 9.999605039451287e-06, "loss": 1.0599, "step": 760 }, { "epoch": 0.07, "grad_norm": 12.054863774305609, "learning_rate": 9.999595907840109e-06, "loss": 1.0146, "step": 761 }, { "epoch": 0.07, "grad_norm": 10.042345375129704, "learning_rate": 9.999586671873313e-06, "loss": 1.0749, "step": 762 }, { "epoch": 0.07, "grad_norm": 9.213088850421277, "learning_rate": 9.999577331551098e-06, "loss": 1.049, "step": 763 }, { "epoch": 0.07, "grad_norm": 9.598328543746009, "learning_rate": 9.999567886873653e-06, "loss": 1.0024, "step": 764 }, { "epoch": 0.07, "grad_norm": 8.016189945664944, "learning_rate": 9.999558337841178e-06, "loss": 0.9824, "step": 765 }, { "epoch": 0.07, "grad_norm": 9.658363974695595, "learning_rate": 9.99954868445387e-06, "loss": 1.0759, "step": 766 }, { "epoch": 0.07, "grad_norm": 10.540760676020042, "learning_rate": 9.999538926711934e-06, "loss": 0.931, "step": 767 }, { "epoch": 0.07, "grad_norm": 9.116209465766385, "learning_rate": 9.999529064615572e-06, "loss": 0.9968, "step": 768 }, { "epoch": 0.07, "grad_norm": 9.765539189998801, "learning_rate": 9.999519098164988e-06, "loss": 0.9805, "step": 769 }, { "epoch": 0.07, "grad_norm": 11.129203744777987, "learning_rate": 9.999509027360392e-06, "loss": 0.9941, "step": 770 }, { "epoch": 0.07, "grad_norm": 9.199921314915212, "learning_rate": 9.999498852201994e-06, "loss": 0.9723, "step": 771 }, { "epoch": 0.07, "grad_norm": 7.65799959388264, "learning_rate": 9.999488572690006e-06, "loss": 1.0301, "step": 772 }, { "epoch": 0.07, "grad_norm": 9.474276731752648, "learning_rate": 9.999478188824643e-06, "loss": 0.9889, "step": 773 }, { "epoch": 0.07, "grad_norm": 10.516014951082246, "learning_rate": 9.99946770060612e-06, "loss": 0.9742, "step": 774 }, { "epoch": 0.07, "grad_norm": 10.131651491301703, "learning_rate": 9.99945710803466e-06, "loss": 1.006, "step": 775 }, { "epoch": 0.07, "grad_norm": 8.597963201989241, "learning_rate": 9.999446411110479e-06, "loss": 0.9896, "step": 776 }, { "epoch": 0.07, "grad_norm": 7.892245534996906, "learning_rate": 9.999435609833805e-06, "loss": 1.0729, "step": 777 }, { "epoch": 0.07, "grad_norm": 9.54211489802198, "learning_rate": 9.99942470420486e-06, "loss": 1.0505, "step": 778 }, { "epoch": 0.07, "grad_norm": 11.403306720055143, "learning_rate": 9.999413694223874e-06, "loss": 1.1076, "step": 779 }, { "epoch": 0.07, "grad_norm": 8.110372965685114, "learning_rate": 9.999402579891073e-06, "loss": 1.0224, "step": 780 }, { "epoch": 0.07, "grad_norm": 8.650474474171203, "learning_rate": 9.999391361206694e-06, "loss": 0.9987, "step": 781 }, { "epoch": 0.07, "grad_norm": 8.842257697809902, "learning_rate": 9.999380038170967e-06, "loss": 1.0388, "step": 782 }, { "epoch": 0.07, "grad_norm": 10.956807976120622, "learning_rate": 9.999368610784132e-06, "loss": 0.9939, "step": 783 }, { "epoch": 0.07, "grad_norm": 9.823582455309587, "learning_rate": 9.999357079046426e-06, "loss": 1.0487, "step": 784 }, { "epoch": 0.07, "grad_norm": 12.690994313193038, "learning_rate": 9.999345442958088e-06, "loss": 1.0757, "step": 785 }, { "epoch": 0.07, "grad_norm": 9.154045596999365, "learning_rate": 9.999333702519363e-06, "loss": 0.9684, "step": 786 }, { "epoch": 0.07, "grad_norm": 10.605625628572112, "learning_rate": 9.999321857730494e-06, "loss": 1.0364, "step": 787 }, { "epoch": 0.07, "grad_norm": 7.150052320692485, "learning_rate": 9.99930990859173e-06, "loss": 1.0275, "step": 788 }, { "epoch": 0.07, "grad_norm": 11.44157761578933, "learning_rate": 9.99929785510332e-06, "loss": 0.9377, "step": 789 }, { "epoch": 0.07, "grad_norm": 7.182123953158637, "learning_rate": 9.999285697265518e-06, "loss": 1.0636, "step": 790 }, { "epoch": 0.07, "grad_norm": 6.996675277542571, "learning_rate": 9.999273435078572e-06, "loss": 1.0844, "step": 791 }, { "epoch": 0.07, "grad_norm": 8.47072888980035, "learning_rate": 9.999261068542742e-06, "loss": 0.9886, "step": 792 }, { "epoch": 0.07, "grad_norm": 9.004159944319575, "learning_rate": 9.999248597658286e-06, "loss": 1.0563, "step": 793 }, { "epoch": 0.07, "grad_norm": 7.730669225740804, "learning_rate": 9.999236022425464e-06, "loss": 0.9809, "step": 794 }, { "epoch": 0.07, "grad_norm": 6.866075027213889, "learning_rate": 9.999223342844538e-06, "loss": 1.0083, "step": 795 }, { "epoch": 0.07, "grad_norm": 11.101345986166228, "learning_rate": 9.999210558915773e-06, "loss": 1.0798, "step": 796 }, { "epoch": 0.07, "grad_norm": 13.14234019942694, "learning_rate": 9.999197670639436e-06, "loss": 1.0139, "step": 797 }, { "epoch": 0.07, "grad_norm": 9.938183447367406, "learning_rate": 9.999184678015793e-06, "loss": 1.0893, "step": 798 }, { "epoch": 0.07, "grad_norm": 7.531737180382077, "learning_rate": 9.999171581045121e-06, "loss": 1.0059, "step": 799 }, { "epoch": 0.07, "grad_norm": 12.890705828986743, "learning_rate": 9.99915837972769e-06, "loss": 0.9439, "step": 800 }, { "epoch": 0.07, "grad_norm": 10.074308649026603, "learning_rate": 9.999145074063773e-06, "loss": 1.0173, "step": 801 }, { "epoch": 0.07, "grad_norm": 9.111397352114071, "learning_rate": 9.999131664053655e-06, "loss": 0.9925, "step": 802 }, { "epoch": 0.07, "grad_norm": 11.45684403739392, "learning_rate": 9.999118149697608e-06, "loss": 1.0679, "step": 803 }, { "epoch": 0.07, "grad_norm": 9.75158959833569, "learning_rate": 9.999104530995918e-06, "loss": 0.9635, "step": 804 }, { "epoch": 0.07, "grad_norm": 7.6376594499753985, "learning_rate": 9.99909080794887e-06, "loss": 1.0223, "step": 805 }, { "epoch": 0.07, "grad_norm": 10.735355647193114, "learning_rate": 9.999076980556748e-06, "loss": 1.0069, "step": 806 }, { "epoch": 0.07, "grad_norm": 9.637124064114406, "learning_rate": 9.999063048819841e-06, "loss": 1.0236, "step": 807 }, { "epoch": 0.07, "grad_norm": 8.989035332072238, "learning_rate": 9.999049012738441e-06, "loss": 1.018, "step": 808 }, { "epoch": 0.07, "grad_norm": 9.853823401855315, "learning_rate": 9.999034872312842e-06, "loss": 0.988, "step": 809 }, { "epoch": 0.07, "grad_norm": 11.727369710415346, "learning_rate": 9.999020627543338e-06, "loss": 0.9607, "step": 810 }, { "epoch": 0.07, "grad_norm": 8.661500215412708, "learning_rate": 9.999006278430226e-06, "loss": 1.0492, "step": 811 }, { "epoch": 0.07, "grad_norm": 10.531914949504957, "learning_rate": 9.998991824973804e-06, "loss": 0.9437, "step": 812 }, { "epoch": 0.07, "grad_norm": 9.155739567576108, "learning_rate": 9.998977267174375e-06, "loss": 1.0163, "step": 813 }, { "epoch": 0.07, "grad_norm": 8.5295814885244, "learning_rate": 9.998962605032246e-06, "loss": 1.0019, "step": 814 }, { "epoch": 0.07, "grad_norm": 9.066040399773762, "learning_rate": 9.998947838547717e-06, "loss": 0.9334, "step": 815 }, { "epoch": 0.07, "grad_norm": 8.294368526879245, "learning_rate": 9.9989329677211e-06, "loss": 1.015, "step": 816 }, { "epoch": 0.07, "grad_norm": 10.381955374479535, "learning_rate": 9.998917992552706e-06, "loss": 1.0307, "step": 817 }, { "epoch": 0.07, "grad_norm": 10.237152025811778, "learning_rate": 9.998902913042845e-06, "loss": 0.9688, "step": 818 }, { "epoch": 0.07, "grad_norm": 9.765137113143844, "learning_rate": 9.998887729191836e-06, "loss": 0.9356, "step": 819 }, { "epoch": 0.07, "grad_norm": 7.225831107915645, "learning_rate": 9.99887244099999e-06, "loss": 1.0306, "step": 820 }, { "epoch": 0.07, "grad_norm": 7.0452868425090065, "learning_rate": 9.99885704846763e-06, "loss": 0.9511, "step": 821 }, { "epoch": 0.07, "grad_norm": 9.225563280331507, "learning_rate": 9.998841551595076e-06, "loss": 1.0141, "step": 822 }, { "epoch": 0.07, "grad_norm": 9.733521065070352, "learning_rate": 9.998825950382654e-06, "loss": 0.9283, "step": 823 }, { "epoch": 0.07, "grad_norm": 7.167960354591541, "learning_rate": 9.998810244830685e-06, "loss": 0.9941, "step": 824 }, { "epoch": 0.07, "grad_norm": 9.078901240952447, "learning_rate": 9.9987944349395e-06, "loss": 0.9588, "step": 825 }, { "epoch": 0.07, "grad_norm": 9.475670378710102, "learning_rate": 9.99877852070943e-06, "loss": 1.0018, "step": 826 }, { "epoch": 0.07, "grad_norm": 10.574197989227159, "learning_rate": 9.998762502140802e-06, "loss": 0.9755, "step": 827 }, { "epoch": 0.07, "grad_norm": 11.825195363997548, "learning_rate": 9.998746379233958e-06, "loss": 0.9232, "step": 828 }, { "epoch": 0.07, "grad_norm": 8.24960207617306, "learning_rate": 9.998730151989227e-06, "loss": 0.9687, "step": 829 }, { "epoch": 0.07, "grad_norm": 8.130660165568324, "learning_rate": 9.998713820406953e-06, "loss": 1.0033, "step": 830 }, { "epoch": 0.07, "grad_norm": 7.801358023593332, "learning_rate": 9.998697384487475e-06, "loss": 1.0019, "step": 831 }, { "epoch": 0.07, "grad_norm": 8.516486898398215, "learning_rate": 9.998680844231135e-06, "loss": 1.0064, "step": 832 }, { "epoch": 0.07, "grad_norm": 8.091949697729644, "learning_rate": 9.998664199638279e-06, "loss": 0.9589, "step": 833 }, { "epoch": 0.07, "grad_norm": 8.00163035050054, "learning_rate": 9.998647450709257e-06, "loss": 0.9503, "step": 834 }, { "epoch": 0.07, "grad_norm": 6.4881404341583195, "learning_rate": 9.998630597444415e-06, "loss": 0.972, "step": 835 }, { "epoch": 0.07, "grad_norm": 11.784424487511476, "learning_rate": 9.998613639844104e-06, "loss": 0.9762, "step": 836 }, { "epoch": 0.07, "grad_norm": 9.640234309768429, "learning_rate": 9.998596577908684e-06, "loss": 0.9857, "step": 837 }, { "epoch": 0.07, "grad_norm": 9.326280822648059, "learning_rate": 9.998579411638506e-06, "loss": 1.1463, "step": 838 }, { "epoch": 0.07, "grad_norm": 9.079300819160272, "learning_rate": 9.998562141033927e-06, "loss": 1.051, "step": 839 }, { "epoch": 0.07, "grad_norm": 6.831602318197288, "learning_rate": 9.998544766095312e-06, "loss": 0.9534, "step": 840 }, { "epoch": 0.08, "grad_norm": 8.875888004271978, "learning_rate": 9.998527286823021e-06, "loss": 0.9565, "step": 841 }, { "epoch": 0.08, "grad_norm": 7.037118217640445, "learning_rate": 9.99850970321742e-06, "loss": 0.9761, "step": 842 }, { "epoch": 0.08, "grad_norm": 11.234180849248652, "learning_rate": 9.998492015278874e-06, "loss": 1.032, "step": 843 }, { "epoch": 0.08, "grad_norm": 8.162553028475706, "learning_rate": 9.998474223007756e-06, "loss": 1.0046, "step": 844 }, { "epoch": 0.08, "grad_norm": 9.006100919616056, "learning_rate": 9.998456326404433e-06, "loss": 0.9509, "step": 845 }, { "epoch": 0.08, "grad_norm": 8.278200612900712, "learning_rate": 9.998438325469282e-06, "loss": 1.0242, "step": 846 }, { "epoch": 0.08, "grad_norm": 10.707781213665642, "learning_rate": 9.998420220202678e-06, "loss": 0.9787, "step": 847 }, { "epoch": 0.08, "grad_norm": 8.155020684922716, "learning_rate": 9.998402010604998e-06, "loss": 1.0536, "step": 848 }, { "epoch": 0.08, "grad_norm": 7.321189351701641, "learning_rate": 9.998383696676621e-06, "loss": 1.0116, "step": 849 }, { "epoch": 0.08, "grad_norm": 8.436086532330405, "learning_rate": 9.998365278417929e-06, "loss": 1.0244, "step": 850 }, { "epoch": 0.08, "grad_norm": 11.062238640879263, "learning_rate": 9.998346755829311e-06, "loss": 1.0074, "step": 851 }, { "epoch": 0.08, "grad_norm": 8.365784590412884, "learning_rate": 9.99832812891115e-06, "loss": 1.0249, "step": 852 }, { "epoch": 0.08, "grad_norm": 8.993906303087318, "learning_rate": 9.998309397663836e-06, "loss": 1.0291, "step": 853 }, { "epoch": 0.08, "grad_norm": 9.959739139175088, "learning_rate": 9.998290562087758e-06, "loss": 1.0051, "step": 854 }, { "epoch": 0.08, "grad_norm": 9.877176139961323, "learning_rate": 9.998271622183313e-06, "loss": 0.9566, "step": 855 }, { "epoch": 0.08, "grad_norm": 9.657500736562499, "learning_rate": 9.998252577950891e-06, "loss": 0.9721, "step": 856 }, { "epoch": 0.08, "grad_norm": 10.279961408586907, "learning_rate": 9.998233429390893e-06, "loss": 0.9345, "step": 857 }, { "epoch": 0.08, "grad_norm": 7.6492197981166505, "learning_rate": 9.99821417650372e-06, "loss": 0.995, "step": 858 }, { "epoch": 0.08, "grad_norm": 7.207463039174929, "learning_rate": 9.998194819289771e-06, "loss": 1.0159, "step": 859 }, { "epoch": 0.08, "grad_norm": 9.417261353648795, "learning_rate": 9.998175357749451e-06, "loss": 1.0428, "step": 860 }, { "epoch": 0.08, "grad_norm": 8.03448636351223, "learning_rate": 9.998155791883166e-06, "loss": 0.971, "step": 861 }, { "epoch": 0.08, "grad_norm": 8.536641322969025, "learning_rate": 9.998136121691325e-06, "loss": 0.9238, "step": 862 }, { "epoch": 0.08, "grad_norm": 7.227101715895324, "learning_rate": 9.998116347174336e-06, "loss": 1.0306, "step": 863 }, { "epoch": 0.08, "grad_norm": 6.727707075132388, "learning_rate": 9.998096468332617e-06, "loss": 1.0229, "step": 864 }, { "epoch": 0.08, "grad_norm": 10.915024409991847, "learning_rate": 9.998076485166579e-06, "loss": 1.0229, "step": 865 }, { "epoch": 0.08, "grad_norm": 12.062271159379488, "learning_rate": 9.998056397676639e-06, "loss": 1.0375, "step": 866 }, { "epoch": 0.08, "grad_norm": 9.055285114291404, "learning_rate": 9.998036205863218e-06, "loss": 0.9396, "step": 867 }, { "epoch": 0.08, "grad_norm": 8.718616704882963, "learning_rate": 9.998015909726737e-06, "loss": 0.9402, "step": 868 }, { "epoch": 0.08, "grad_norm": 9.97907063207494, "learning_rate": 9.997995509267619e-06, "loss": 0.9725, "step": 869 }, { "epoch": 0.08, "grad_norm": 8.458216468025302, "learning_rate": 9.99797500448629e-06, "loss": 0.9991, "step": 870 }, { "epoch": 0.08, "grad_norm": 12.035208430367854, "learning_rate": 9.997954395383178e-06, "loss": 0.954, "step": 871 }, { "epoch": 0.08, "grad_norm": 8.0667790566975, "learning_rate": 9.997933681958714e-06, "loss": 0.9475, "step": 872 }, { "epoch": 0.08, "grad_norm": 9.78850028611461, "learning_rate": 9.997912864213327e-06, "loss": 0.9803, "step": 873 }, { "epoch": 0.08, "grad_norm": 8.761347585011826, "learning_rate": 9.99789194214746e-06, "loss": 0.9012, "step": 874 }, { "epoch": 0.08, "grad_norm": 7.212051971180096, "learning_rate": 9.997870915761539e-06, "loss": 1.0111, "step": 875 }, { "epoch": 0.08, "grad_norm": 8.956251670008232, "learning_rate": 9.997849785056009e-06, "loss": 0.9894, "step": 876 }, { "epoch": 0.08, "grad_norm": 9.261481843759254, "learning_rate": 9.99782855003131e-06, "loss": 0.9911, "step": 877 }, { "epoch": 0.08, "grad_norm": 6.400987475573911, "learning_rate": 9.997807210687884e-06, "loss": 1.0265, "step": 878 }, { "epoch": 0.08, "grad_norm": 7.06650923897517, "learning_rate": 9.997785767026181e-06, "loss": 0.9558, "step": 879 }, { "epoch": 0.08, "grad_norm": 15.783081219499563, "learning_rate": 9.997764219046641e-06, "loss": 1.0335, "step": 880 }, { "epoch": 0.08, "grad_norm": 9.489237644381799, "learning_rate": 9.99774256674972e-06, "loss": 0.9928, "step": 881 }, { "epoch": 0.08, "grad_norm": 9.93508368946788, "learning_rate": 9.997720810135867e-06, "loss": 1.0215, "step": 882 }, { "epoch": 0.08, "grad_norm": 8.858578107029446, "learning_rate": 9.997698949205536e-06, "loss": 0.9998, "step": 883 }, { "epoch": 0.08, "grad_norm": 8.351754877748318, "learning_rate": 9.997676983959188e-06, "loss": 1.0109, "step": 884 }, { "epoch": 0.08, "grad_norm": 10.09382991863049, "learning_rate": 9.997654914397274e-06, "loss": 0.9956, "step": 885 }, { "epoch": 0.08, "grad_norm": 10.776792049680456, "learning_rate": 9.99763274052026e-06, "loss": 1.0228, "step": 886 }, { "epoch": 0.08, "grad_norm": 9.450315848583875, "learning_rate": 9.997610462328607e-06, "loss": 0.9731, "step": 887 }, { "epoch": 0.08, "grad_norm": 7.799075565391173, "learning_rate": 9.997588079822778e-06, "loss": 0.9824, "step": 888 }, { "epoch": 0.08, "grad_norm": 8.147937211235977, "learning_rate": 9.997565593003245e-06, "loss": 0.9904, "step": 889 }, { "epoch": 0.08, "grad_norm": 7.941964904194568, "learning_rate": 9.997543001870474e-06, "loss": 0.9687, "step": 890 }, { "epoch": 0.08, "grad_norm": 8.19685619005091, "learning_rate": 9.997520306424936e-06, "loss": 0.9839, "step": 891 }, { "epoch": 0.08, "grad_norm": 8.280410608907395, "learning_rate": 9.997497506667108e-06, "loss": 0.9352, "step": 892 }, { "epoch": 0.08, "grad_norm": 7.931891914920883, "learning_rate": 9.997474602597463e-06, "loss": 1.0402, "step": 893 }, { "epoch": 0.08, "grad_norm": 10.541957455184061, "learning_rate": 9.99745159421648e-06, "loss": 1.0096, "step": 894 }, { "epoch": 0.08, "grad_norm": 9.482572192689705, "learning_rate": 9.997428481524639e-06, "loss": 1.008, "step": 895 }, { "epoch": 0.08, "grad_norm": 7.772837817630702, "learning_rate": 9.997405264522423e-06, "loss": 1.0143, "step": 896 }, { "epoch": 0.08, "grad_norm": 9.793025104270116, "learning_rate": 9.997381943210316e-06, "loss": 0.9671, "step": 897 }, { "epoch": 0.08, "grad_norm": 11.701773131723733, "learning_rate": 9.997358517588805e-06, "loss": 0.9655, "step": 898 }, { "epoch": 0.08, "grad_norm": 9.727101413057323, "learning_rate": 9.99733498765838e-06, "loss": 0.9884, "step": 899 }, { "epoch": 0.08, "grad_norm": 9.093472865831096, "learning_rate": 9.99731135341953e-06, "loss": 1.0063, "step": 900 }, { "epoch": 0.08, "grad_norm": 9.88420339791517, "learning_rate": 9.997287614872749e-06, "loss": 1.0019, "step": 901 }, { "epoch": 0.08, "grad_norm": 7.872546322008035, "learning_rate": 9.997263772018533e-06, "loss": 0.9245, "step": 902 }, { "epoch": 0.08, "grad_norm": 11.015538360963271, "learning_rate": 9.99723982485738e-06, "loss": 1.0939, "step": 903 }, { "epoch": 0.08, "grad_norm": 10.590160878618287, "learning_rate": 9.99721577338979e-06, "loss": 0.9454, "step": 904 }, { "epoch": 0.08, "grad_norm": 11.36996702539662, "learning_rate": 9.997191617616264e-06, "loss": 0.9516, "step": 905 }, { "epoch": 0.08, "grad_norm": 10.611965931153284, "learning_rate": 9.997167357537306e-06, "loss": 0.9522, "step": 906 }, { "epoch": 0.08, "grad_norm": 8.380929859210196, "learning_rate": 9.997142993153423e-06, "loss": 0.9539, "step": 907 }, { "epoch": 0.08, "grad_norm": 8.362117930071422, "learning_rate": 9.997118524465126e-06, "loss": 0.9432, "step": 908 }, { "epoch": 0.08, "grad_norm": 8.629403569424733, "learning_rate": 9.99709395147292e-06, "loss": 0.9524, "step": 909 }, { "epoch": 0.08, "grad_norm": 8.784298503072442, "learning_rate": 9.997069274177323e-06, "loss": 1.0483, "step": 910 }, { "epoch": 0.08, "grad_norm": 8.08787003949281, "learning_rate": 9.997044492578847e-06, "loss": 0.9112, "step": 911 }, { "epoch": 0.08, "grad_norm": 10.536530726793412, "learning_rate": 9.997019606678014e-06, "loss": 0.9864, "step": 912 }, { "epoch": 0.08, "grad_norm": 8.023570588211143, "learning_rate": 9.996994616475336e-06, "loss": 0.99, "step": 913 }, { "epoch": 0.08, "grad_norm": 9.34347507234705, "learning_rate": 9.996969521971341e-06, "loss": 0.9845, "step": 914 }, { "epoch": 0.08, "grad_norm": 12.132376942691971, "learning_rate": 9.996944323166549e-06, "loss": 1.0304, "step": 915 }, { "epoch": 0.08, "grad_norm": 9.210421513737613, "learning_rate": 9.996919020061489e-06, "loss": 0.9442, "step": 916 }, { "epoch": 0.08, "grad_norm": 8.70461688922444, "learning_rate": 9.996893612656684e-06, "loss": 0.9708, "step": 917 }, { "epoch": 0.08, "grad_norm": 7.270826096425844, "learning_rate": 9.996868100952671e-06, "loss": 0.9563, "step": 918 }, { "epoch": 0.08, "grad_norm": 8.41158915972332, "learning_rate": 9.99684248494998e-06, "loss": 1.0305, "step": 919 }, { "epoch": 0.08, "grad_norm": 8.527136962549797, "learning_rate": 9.996816764649143e-06, "loss": 1.0226, "step": 920 }, { "epoch": 0.08, "grad_norm": 9.923163325254967, "learning_rate": 9.9967909400507e-06, "loss": 0.9901, "step": 921 }, { "epoch": 0.08, "grad_norm": 8.489260378612245, "learning_rate": 9.996765011155187e-06, "loss": 0.9287, "step": 922 }, { "epoch": 0.08, "grad_norm": 8.730763089215504, "learning_rate": 9.996738977963149e-06, "loss": 0.959, "step": 923 }, { "epoch": 0.08, "grad_norm": 8.570788783522387, "learning_rate": 9.996712840475127e-06, "loss": 1.0125, "step": 924 }, { "epoch": 0.08, "grad_norm": 8.914867656292998, "learning_rate": 9.996686598691668e-06, "loss": 1.018, "step": 925 }, { "epoch": 0.08, "grad_norm": 9.012072153148338, "learning_rate": 9.996660252613317e-06, "loss": 1.0536, "step": 926 }, { "epoch": 0.08, "grad_norm": 8.628037688052148, "learning_rate": 9.996633802240627e-06, "loss": 0.9862, "step": 927 }, { "epoch": 0.08, "grad_norm": 10.599329495026513, "learning_rate": 9.996607247574148e-06, "loss": 1.048, "step": 928 }, { "epoch": 0.08, "grad_norm": 8.06679572032971, "learning_rate": 9.996580588614437e-06, "loss": 1.0354, "step": 929 }, { "epoch": 0.08, "grad_norm": 10.748633377020838, "learning_rate": 9.996553825362046e-06, "loss": 1.0221, "step": 930 }, { "epoch": 0.08, "grad_norm": 7.100222886869438, "learning_rate": 9.996526957817538e-06, "loss": 1.0547, "step": 931 }, { "epoch": 0.08, "grad_norm": 10.878659049340408, "learning_rate": 9.99649998598147e-06, "loss": 1.003, "step": 932 }, { "epoch": 0.08, "grad_norm": 17.56335431343529, "learning_rate": 9.99647290985441e-06, "loss": 0.956, "step": 933 }, { "epoch": 0.08, "grad_norm": 10.07651413400778, "learning_rate": 9.996445729436918e-06, "loss": 0.9476, "step": 934 }, { "epoch": 0.08, "grad_norm": 10.709840946161437, "learning_rate": 9.996418444729562e-06, "loss": 0.9497, "step": 935 }, { "epoch": 0.08, "grad_norm": 6.487039521473529, "learning_rate": 9.996391055732916e-06, "loss": 0.9808, "step": 936 }, { "epoch": 0.08, "grad_norm": 7.5830926198785775, "learning_rate": 9.996363562447548e-06, "loss": 1.0021, "step": 937 }, { "epoch": 0.08, "grad_norm": 8.176799916541752, "learning_rate": 9.996335964874031e-06, "loss": 1.0003, "step": 938 }, { "epoch": 0.08, "grad_norm": 8.134740072491587, "learning_rate": 9.996308263012944e-06, "loss": 0.9795, "step": 939 }, { "epoch": 0.08, "grad_norm": 8.219140081754393, "learning_rate": 9.996280456864862e-06, "loss": 0.9343, "step": 940 }, { "epoch": 0.08, "grad_norm": 7.7292352429680955, "learning_rate": 9.99625254643037e-06, "loss": 0.9485, "step": 941 }, { "epoch": 0.08, "grad_norm": 7.851053910202773, "learning_rate": 9.996224531710045e-06, "loss": 0.95, "step": 942 }, { "epoch": 0.08, "grad_norm": 7.910862164118473, "learning_rate": 9.996196412704476e-06, "loss": 0.8952, "step": 943 }, { "epoch": 0.08, "grad_norm": 8.638752770057254, "learning_rate": 9.996168189414246e-06, "loss": 1.0271, "step": 944 }, { "epoch": 0.08, "grad_norm": 8.735001452753258, "learning_rate": 9.996139861839948e-06, "loss": 0.9979, "step": 945 }, { "epoch": 0.08, "grad_norm": 8.47907691906831, "learning_rate": 9.996111429982173e-06, "loss": 0.9693, "step": 946 }, { "epoch": 0.08, "grad_norm": 11.569904804829509, "learning_rate": 9.996082893841512e-06, "loss": 1.009, "step": 947 }, { "epoch": 0.08, "grad_norm": 6.591808252095188, "learning_rate": 9.996054253418561e-06, "loss": 1.0038, "step": 948 }, { "epoch": 0.08, "grad_norm": 7.845054822540564, "learning_rate": 9.99602550871392e-06, "loss": 1.0059, "step": 949 }, { "epoch": 0.08, "grad_norm": 9.457469461067877, "learning_rate": 9.995996659728187e-06, "loss": 0.9651, "step": 950 }, { "epoch": 0.08, "grad_norm": 8.309180622295747, "learning_rate": 9.995967706461965e-06, "loss": 1.0216, "step": 951 }, { "epoch": 0.08, "grad_norm": 8.206846805290429, "learning_rate": 9.995938648915858e-06, "loss": 1.0205, "step": 952 }, { "epoch": 0.09, "grad_norm": 7.567442708562054, "learning_rate": 9.995909487090472e-06, "loss": 1.0346, "step": 953 }, { "epoch": 0.09, "grad_norm": 10.906991452894864, "learning_rate": 9.995880220986417e-06, "loss": 1.0321, "step": 954 }, { "epoch": 0.09, "grad_norm": 11.974302840235083, "learning_rate": 9.995850850604304e-06, "loss": 1.0519, "step": 955 }, { "epoch": 0.09, "grad_norm": 7.6733007572190814, "learning_rate": 9.995821375944744e-06, "loss": 0.9607, "step": 956 }, { "epoch": 0.09, "grad_norm": 8.33287573867397, "learning_rate": 9.995791797008354e-06, "loss": 0.9939, "step": 957 }, { "epoch": 0.09, "grad_norm": 7.334985897257653, "learning_rate": 9.99576211379575e-06, "loss": 1.062, "step": 958 }, { "epoch": 0.09, "grad_norm": 8.495972847324932, "learning_rate": 9.995732326307555e-06, "loss": 1.0429, "step": 959 }, { "epoch": 0.09, "grad_norm": 10.054117600563726, "learning_rate": 9.995702434544386e-06, "loss": 0.9841, "step": 960 }, { "epoch": 0.09, "grad_norm": 7.332252818361094, "learning_rate": 9.99567243850687e-06, "loss": 1.0064, "step": 961 }, { "epoch": 0.09, "grad_norm": 7.669724934460178, "learning_rate": 9.99564233819563e-06, "loss": 0.9798, "step": 962 }, { "epoch": 0.09, "grad_norm": 8.879399023141424, "learning_rate": 9.995612133611298e-06, "loss": 0.9366, "step": 963 }, { "epoch": 0.09, "grad_norm": 8.055374751446326, "learning_rate": 9.995581824754503e-06, "loss": 0.9377, "step": 964 }, { "epoch": 0.09, "grad_norm": 7.889184047874464, "learning_rate": 9.995551411625877e-06, "loss": 0.9907, "step": 965 }, { "epoch": 0.09, "grad_norm": 6.858808648356668, "learning_rate": 9.995520894226056e-06, "loss": 0.9629, "step": 966 }, { "epoch": 0.09, "grad_norm": 7.342147609876978, "learning_rate": 9.995490272555677e-06, "loss": 0.934, "step": 967 }, { "epoch": 0.09, "grad_norm": 11.16091061706455, "learning_rate": 9.995459546615376e-06, "loss": 1.0047, "step": 968 }, { "epoch": 0.09, "grad_norm": 7.295426492436262, "learning_rate": 9.995428716405799e-06, "loss": 0.9277, "step": 969 }, { "epoch": 0.09, "grad_norm": 9.415259242933013, "learning_rate": 9.995397781927587e-06, "loss": 0.9295, "step": 970 }, { "epoch": 0.09, "grad_norm": 10.646342171555547, "learning_rate": 9.995366743181385e-06, "loss": 0.9686, "step": 971 }, { "epoch": 0.09, "grad_norm": 6.972979944484932, "learning_rate": 9.995335600167844e-06, "loss": 0.9972, "step": 972 }, { "epoch": 0.09, "grad_norm": 12.108019848529077, "learning_rate": 9.995304352887609e-06, "loss": 0.9574, "step": 973 }, { "epoch": 0.09, "grad_norm": 7.096087381507601, "learning_rate": 9.995273001341338e-06, "loss": 0.9558, "step": 974 }, { "epoch": 0.09, "grad_norm": 10.197171999669669, "learning_rate": 9.995241545529682e-06, "loss": 0.9846, "step": 975 }, { "epoch": 0.09, "grad_norm": 7.256180287677908, "learning_rate": 9.995209985453296e-06, "loss": 0.9579, "step": 976 }, { "epoch": 0.09, "grad_norm": 6.724295615278792, "learning_rate": 9.995178321112843e-06, "loss": 0.9875, "step": 977 }, { "epoch": 0.09, "grad_norm": 9.340082744011708, "learning_rate": 9.995146552508981e-06, "loss": 0.971, "step": 978 }, { "epoch": 0.09, "grad_norm": 8.404431209629506, "learning_rate": 9.995114679642374e-06, "loss": 1.0256, "step": 979 }, { "epoch": 0.09, "grad_norm": 7.6436059677923085, "learning_rate": 9.995082702513688e-06, "loss": 0.986, "step": 980 }, { "epoch": 0.09, "grad_norm": 8.109019610112588, "learning_rate": 9.995050621123589e-06, "loss": 0.9629, "step": 981 }, { "epoch": 0.09, "grad_norm": 11.196770569742844, "learning_rate": 9.995018435472746e-06, "loss": 0.9324, "step": 982 }, { "epoch": 0.09, "grad_norm": 10.048181166353656, "learning_rate": 9.994986145561833e-06, "loss": 0.9582, "step": 983 }, { "epoch": 0.09, "grad_norm": 10.95397075644332, "learning_rate": 9.994953751391525e-06, "loss": 1.0406, "step": 984 }, { "epoch": 0.09, "grad_norm": 8.048399361843256, "learning_rate": 9.994921252962492e-06, "loss": 0.9553, "step": 985 }, { "epoch": 0.09, "grad_norm": 8.34785946920711, "learning_rate": 9.99488865027542e-06, "loss": 1.0136, "step": 986 }, { "epoch": 0.09, "grad_norm": 9.318231643650801, "learning_rate": 9.994855943330986e-06, "loss": 1.001, "step": 987 }, { "epoch": 0.09, "grad_norm": 9.306159909055284, "learning_rate": 9.994823132129871e-06, "loss": 0.9336, "step": 988 }, { "epoch": 0.09, "grad_norm": 7.280946909804913, "learning_rate": 9.994790216672763e-06, "loss": 0.9473, "step": 989 }, { "epoch": 0.09, "grad_norm": 8.316331266857857, "learning_rate": 9.994757196960348e-06, "loss": 0.938, "step": 990 }, { "epoch": 0.09, "grad_norm": 7.323957963735286, "learning_rate": 9.994724072993313e-06, "loss": 0.9509, "step": 991 }, { "epoch": 0.09, "grad_norm": 10.201511993466811, "learning_rate": 9.994690844772352e-06, "loss": 0.9711, "step": 992 }, { "epoch": 0.09, "grad_norm": 9.087015668346059, "learning_rate": 9.994657512298158e-06, "loss": 1.0163, "step": 993 }, { "epoch": 0.09, "grad_norm": 8.12000217774927, "learning_rate": 9.994624075571426e-06, "loss": 0.9576, "step": 994 }, { "epoch": 0.09, "grad_norm": 6.591411847605315, "learning_rate": 9.994590534592854e-06, "loss": 0.9179, "step": 995 }, { "epoch": 0.09, "grad_norm": 14.8839240075901, "learning_rate": 9.994556889363145e-06, "loss": 0.938, "step": 996 }, { "epoch": 0.09, "grad_norm": 10.344409484816145, "learning_rate": 9.994523139882997e-06, "loss": 0.9651, "step": 997 }, { "epoch": 0.09, "grad_norm": 8.154111922309497, "learning_rate": 9.994489286153116e-06, "loss": 0.9531, "step": 998 }, { "epoch": 0.09, "grad_norm": 9.514055631924581, "learning_rate": 9.994455328174209e-06, "loss": 0.9537, "step": 999 }, { "epoch": 0.09, "grad_norm": 7.484204683552593, "learning_rate": 9.994421265946984e-06, "loss": 0.9449, "step": 1000 }, { "epoch": 0.09, "grad_norm": 9.124542686530342, "learning_rate": 9.994387099472156e-06, "loss": 0.9988, "step": 1001 }, { "epoch": 0.09, "grad_norm": 8.258402495003683, "learning_rate": 9.994352828750431e-06, "loss": 1.0371, "step": 1002 }, { "epoch": 0.09, "grad_norm": 7.658579577391695, "learning_rate": 9.99431845378253e-06, "loss": 0.9525, "step": 1003 }, { "epoch": 0.09, "grad_norm": 9.680866866367063, "learning_rate": 9.99428397456917e-06, "loss": 0.9774, "step": 1004 }, { "epoch": 0.09, "grad_norm": 8.478438877493712, "learning_rate": 9.994249391111067e-06, "loss": 0.9784, "step": 1005 }, { "epoch": 0.09, "grad_norm": 8.534440059744917, "learning_rate": 9.994214703408946e-06, "loss": 0.9398, "step": 1006 }, { "epoch": 0.09, "grad_norm": 7.180300218505205, "learning_rate": 9.99417991146353e-06, "loss": 0.9068, "step": 1007 }, { "epoch": 0.09, "grad_norm": 9.076194451869712, "learning_rate": 9.994145015275544e-06, "loss": 0.9686, "step": 1008 }, { "epoch": 0.09, "grad_norm": 8.10532080872228, "learning_rate": 9.994110014845722e-06, "loss": 1.0107, "step": 1009 }, { "epoch": 0.09, "grad_norm": 7.3995517598974585, "learning_rate": 9.994074910174787e-06, "loss": 0.9759, "step": 1010 }, { "epoch": 0.09, "grad_norm": 8.207171532098636, "learning_rate": 9.994039701263476e-06, "loss": 1.0228, "step": 1011 }, { "epoch": 0.09, "grad_norm": 8.610920064319663, "learning_rate": 9.994004388112521e-06, "loss": 0.972, "step": 1012 }, { "epoch": 0.09, "grad_norm": 7.6119914533557695, "learning_rate": 9.993968970722663e-06, "loss": 0.9694, "step": 1013 }, { "epoch": 0.09, "grad_norm": 7.43786462970772, "learning_rate": 9.99393344909464e-06, "loss": 0.9366, "step": 1014 }, { "epoch": 0.09, "grad_norm": 6.794334023099357, "learning_rate": 9.993897823229193e-06, "loss": 0.9848, "step": 1015 }, { "epoch": 0.09, "grad_norm": 8.690814839424235, "learning_rate": 9.993862093127063e-06, "loss": 0.9278, "step": 1016 }, { "epoch": 0.09, "grad_norm": 9.19692281914169, "learning_rate": 9.993826258789e-06, "loss": 0.9573, "step": 1017 }, { "epoch": 0.09, "grad_norm": 10.571044230516712, "learning_rate": 9.99379032021575e-06, "loss": 0.9935, "step": 1018 }, { "epoch": 0.09, "grad_norm": 10.11484207188465, "learning_rate": 9.993754277408062e-06, "loss": 0.9836, "step": 1019 }, { "epoch": 0.09, "grad_norm": 6.663368628584689, "learning_rate": 9.993718130366692e-06, "loss": 1.0417, "step": 1020 }, { "epoch": 0.09, "grad_norm": 10.060058159335902, "learning_rate": 9.993681879092389e-06, "loss": 0.9284, "step": 1021 }, { "epoch": 0.09, "grad_norm": 7.331192381249381, "learning_rate": 9.993645523585915e-06, "loss": 1.0282, "step": 1022 }, { "epoch": 0.09, "grad_norm": 9.125837177813874, "learning_rate": 9.993609063848027e-06, "loss": 0.9738, "step": 1023 }, { "epoch": 0.09, "grad_norm": 9.684131653502822, "learning_rate": 9.993572499879484e-06, "loss": 1.0177, "step": 1024 }, { "epoch": 0.09, "grad_norm": 9.127310264994257, "learning_rate": 9.993535831681052e-06, "loss": 0.9843, "step": 1025 }, { "epoch": 0.09, "grad_norm": 9.378210588730779, "learning_rate": 9.993499059253494e-06, "loss": 0.9597, "step": 1026 }, { "epoch": 0.09, "grad_norm": 6.6256205508552135, "learning_rate": 9.99346218259758e-06, "loss": 0.9693, "step": 1027 }, { "epoch": 0.09, "grad_norm": 8.23502643876236, "learning_rate": 9.993425201714078e-06, "loss": 0.9693, "step": 1028 }, { "epoch": 0.09, "grad_norm": 8.378916986998767, "learning_rate": 9.993388116603759e-06, "loss": 0.9604, "step": 1029 }, { "epoch": 0.09, "grad_norm": 8.902511319087857, "learning_rate": 9.9933509272674e-06, "loss": 0.9816, "step": 1030 }, { "epoch": 0.09, "grad_norm": 8.096033612758523, "learning_rate": 9.993313633705775e-06, "loss": 0.978, "step": 1031 }, { "epoch": 0.09, "grad_norm": 9.160736057075026, "learning_rate": 9.993276235919663e-06, "loss": 0.9933, "step": 1032 }, { "epoch": 0.09, "grad_norm": 9.77309957280692, "learning_rate": 9.993238733909844e-06, "loss": 0.8811, "step": 1033 }, { "epoch": 0.09, "grad_norm": 11.631498981222252, "learning_rate": 9.993201127677104e-06, "loss": 1.0114, "step": 1034 }, { "epoch": 0.09, "grad_norm": 7.087523316616698, "learning_rate": 9.993163417222223e-06, "loss": 0.9418, "step": 1035 }, { "epoch": 0.09, "grad_norm": 14.897457268990928, "learning_rate": 9.99312560254599e-06, "loss": 1.0099, "step": 1036 }, { "epoch": 0.09, "grad_norm": 9.818179185956172, "learning_rate": 9.993087683649196e-06, "loss": 0.9763, "step": 1037 }, { "epoch": 0.09, "grad_norm": 8.787630368835707, "learning_rate": 9.993049660532633e-06, "loss": 1.0357, "step": 1038 }, { "epoch": 0.09, "grad_norm": 10.424558187525257, "learning_rate": 9.99301153319709e-06, "loss": 1.0173, "step": 1039 }, { "epoch": 0.09, "grad_norm": 6.820221460440834, "learning_rate": 9.992973301643368e-06, "loss": 0.9546, "step": 1040 }, { "epoch": 0.09, "grad_norm": 11.194490458945435, "learning_rate": 9.992934965872261e-06, "loss": 0.9475, "step": 1041 }, { "epoch": 0.09, "grad_norm": 8.17858339549725, "learning_rate": 9.992896525884572e-06, "loss": 0.9834, "step": 1042 }, { "epoch": 0.09, "grad_norm": 8.872104660303865, "learning_rate": 9.9928579816811e-06, "loss": 0.9579, "step": 1043 }, { "epoch": 0.09, "grad_norm": 8.064506078442822, "learning_rate": 9.992819333262656e-06, "loss": 1.0656, "step": 1044 }, { "epoch": 0.09, "grad_norm": 7.092237797117757, "learning_rate": 9.992780580630039e-06, "loss": 1.0115, "step": 1045 }, { "epoch": 0.09, "grad_norm": 10.514881133196614, "learning_rate": 9.992741723784064e-06, "loss": 0.9526, "step": 1046 }, { "epoch": 0.09, "grad_norm": 6.7504479707131715, "learning_rate": 9.992702762725538e-06, "loss": 0.974, "step": 1047 }, { "epoch": 0.09, "grad_norm": 10.972734810693535, "learning_rate": 9.992663697455274e-06, "loss": 1.0129, "step": 1048 }, { "epoch": 0.09, "grad_norm": 9.894894067139228, "learning_rate": 9.99262452797409e-06, "loss": 1.0223, "step": 1049 }, { "epoch": 0.09, "grad_norm": 7.5192801905960955, "learning_rate": 9.992585254282805e-06, "loss": 0.9669, "step": 1050 }, { "epoch": 0.09, "grad_norm": 7.483212836508495, "learning_rate": 9.992545876382235e-06, "loss": 0.9536, "step": 1051 }, { "epoch": 0.09, "grad_norm": 7.309198741285574, "learning_rate": 9.992506394273203e-06, "loss": 0.9619, "step": 1052 }, { "epoch": 0.09, "grad_norm": 8.238895653840148, "learning_rate": 9.992466807956534e-06, "loss": 0.9521, "step": 1053 }, { "epoch": 0.09, "grad_norm": 9.249532184885036, "learning_rate": 9.992427117433053e-06, "loss": 1.0104, "step": 1054 }, { "epoch": 0.09, "grad_norm": 9.469745764211813, "learning_rate": 9.99238732270359e-06, "loss": 1.0179, "step": 1055 }, { "epoch": 0.09, "grad_norm": 8.005452879324764, "learning_rate": 9.992347423768974e-06, "loss": 0.937, "step": 1056 }, { "epoch": 0.09, "grad_norm": 6.357685359828577, "learning_rate": 9.992307420630038e-06, "loss": 1.0259, "step": 1057 }, { "epoch": 0.09, "grad_norm": 6.991609413542076, "learning_rate": 9.992267313287618e-06, "loss": 0.9328, "step": 1058 }, { "epoch": 0.09, "grad_norm": 11.439572687771475, "learning_rate": 9.992227101742551e-06, "loss": 0.9814, "step": 1059 }, { "epoch": 0.09, "grad_norm": 6.40603838700476, "learning_rate": 9.992186785995678e-06, "loss": 1.0265, "step": 1060 }, { "epoch": 0.09, "grad_norm": 7.647546453756519, "learning_rate": 9.992146366047834e-06, "loss": 0.9508, "step": 1061 }, { "epoch": 0.09, "grad_norm": 9.542083744297042, "learning_rate": 9.99210584189987e-06, "loss": 0.9441, "step": 1062 }, { "epoch": 0.09, "grad_norm": 9.753833685031594, "learning_rate": 9.992065213552628e-06, "loss": 0.9985, "step": 1063 }, { "epoch": 0.09, "grad_norm": 9.222084465789717, "learning_rate": 9.992024481006959e-06, "loss": 0.9937, "step": 1064 }, { "epoch": 0.1, "grad_norm": 8.385555053065362, "learning_rate": 9.99198364426371e-06, "loss": 0.9857, "step": 1065 }, { "epoch": 0.1, "grad_norm": 7.005047827689797, "learning_rate": 9.991942703323736e-06, "loss": 0.9752, "step": 1066 }, { "epoch": 0.1, "grad_norm": 8.364364180372897, "learning_rate": 9.991901658187888e-06, "loss": 0.9678, "step": 1067 }, { "epoch": 0.1, "grad_norm": 6.994366622104884, "learning_rate": 9.991860508857027e-06, "loss": 0.9869, "step": 1068 }, { "epoch": 0.1, "grad_norm": 9.131050189862517, "learning_rate": 9.991819255332009e-06, "loss": 0.9861, "step": 1069 }, { "epoch": 0.1, "grad_norm": 7.835108516660802, "learning_rate": 9.991777897613696e-06, "loss": 0.9401, "step": 1070 }, { "epoch": 0.1, "grad_norm": 8.945187528534511, "learning_rate": 9.991736435702951e-06, "loss": 0.9114, "step": 1071 }, { "epoch": 0.1, "grad_norm": 7.285703586762205, "learning_rate": 9.99169486960064e-06, "loss": 1.0164, "step": 1072 }, { "epoch": 0.1, "grad_norm": 7.307220579657872, "learning_rate": 9.99165319930763e-06, "loss": 0.9134, "step": 1073 }, { "epoch": 0.1, "grad_norm": 9.768162382944341, "learning_rate": 9.991611424824792e-06, "loss": 1.0036, "step": 1074 }, { "epoch": 0.1, "grad_norm": 7.325942651797224, "learning_rate": 9.991569546152995e-06, "loss": 0.9295, "step": 1075 }, { "epoch": 0.1, "grad_norm": 6.961607266135269, "learning_rate": 9.991527563293117e-06, "loss": 0.9372, "step": 1076 }, { "epoch": 0.1, "grad_norm": 8.300526537914138, "learning_rate": 9.99148547624603e-06, "loss": 1.0138, "step": 1077 }, { "epoch": 0.1, "grad_norm": 7.158509872853647, "learning_rate": 9.991443285012616e-06, "loss": 0.9964, "step": 1078 }, { "epoch": 0.1, "grad_norm": 7.1548880254718785, "learning_rate": 9.991400989593756e-06, "loss": 0.9657, "step": 1079 }, { "epoch": 0.1, "grad_norm": 9.58402681642553, "learning_rate": 9.991358589990329e-06, "loss": 0.997, "step": 1080 }, { "epoch": 0.1, "grad_norm": 8.911764367428903, "learning_rate": 9.991316086203225e-06, "loss": 1.0246, "step": 1081 }, { "epoch": 0.1, "grad_norm": 5.916636586964335, "learning_rate": 9.991273478233325e-06, "loss": 0.9299, "step": 1082 }, { "epoch": 0.1, "grad_norm": 8.265002041438526, "learning_rate": 9.991230766081526e-06, "loss": 0.9729, "step": 1083 }, { "epoch": 0.1, "grad_norm": 8.26210632416989, "learning_rate": 9.991187949748712e-06, "loss": 0.9798, "step": 1084 }, { "epoch": 0.1, "grad_norm": 9.08383816682079, "learning_rate": 9.99114502923578e-06, "loss": 0.9042, "step": 1085 }, { "epoch": 0.1, "grad_norm": 6.969261617575683, "learning_rate": 9.991102004543629e-06, "loss": 0.9507, "step": 1086 }, { "epoch": 0.1, "grad_norm": 9.233904234480644, "learning_rate": 9.991058875673152e-06, "loss": 0.9114, "step": 1087 }, { "epoch": 0.1, "grad_norm": 7.4482507053481966, "learning_rate": 9.99101564262525e-06, "loss": 0.9864, "step": 1088 }, { "epoch": 0.1, "grad_norm": 7.837422677259048, "learning_rate": 9.990972305400827e-06, "loss": 0.9562, "step": 1089 }, { "epoch": 0.1, "grad_norm": 7.491255786958795, "learning_rate": 9.99092886400079e-06, "loss": 0.9381, "step": 1090 }, { "epoch": 0.1, "grad_norm": 8.347360729507525, "learning_rate": 9.99088531842604e-06, "loss": 0.9955, "step": 1091 }, { "epoch": 0.1, "grad_norm": 9.186263516867967, "learning_rate": 9.990841668677492e-06, "loss": 1.0879, "step": 1092 }, { "epoch": 0.1, "grad_norm": 9.602142781683266, "learning_rate": 9.99079791475605e-06, "loss": 0.9565, "step": 1093 }, { "epoch": 0.1, "grad_norm": 8.913049088861985, "learning_rate": 9.990754056662633e-06, "loss": 0.9948, "step": 1094 }, { "epoch": 0.1, "grad_norm": 9.924370182133304, "learning_rate": 9.990710094398156e-06, "loss": 0.9707, "step": 1095 }, { "epoch": 0.1, "grad_norm": 9.194990054620504, "learning_rate": 9.990666027963532e-06, "loss": 0.9848, "step": 1096 }, { "epoch": 0.1, "grad_norm": 6.5567424319790435, "learning_rate": 9.990621857359686e-06, "loss": 1.0207, "step": 1097 }, { "epoch": 0.1, "grad_norm": 7.133354823971637, "learning_rate": 9.99057758258754e-06, "loss": 1.0312, "step": 1098 }, { "epoch": 0.1, "grad_norm": 12.10945516692436, "learning_rate": 9.990533203648012e-06, "loss": 0.9866, "step": 1099 }, { "epoch": 0.1, "grad_norm": 7.2079617724930305, "learning_rate": 9.990488720542035e-06, "loss": 0.976, "step": 1100 }, { "epoch": 0.1, "grad_norm": 6.926600534418078, "learning_rate": 9.990444133270533e-06, "loss": 0.9907, "step": 1101 }, { "epoch": 0.1, "grad_norm": 8.115293934503965, "learning_rate": 9.990399441834441e-06, "loss": 1.0513, "step": 1102 }, { "epoch": 0.1, "grad_norm": 8.551324131734745, "learning_rate": 9.99035464623469e-06, "loss": 1.0111, "step": 1103 }, { "epoch": 0.1, "grad_norm": 9.63877848176835, "learning_rate": 9.990309746472211e-06, "loss": 0.9567, "step": 1104 }, { "epoch": 0.1, "grad_norm": 8.898827206499991, "learning_rate": 9.990264742547947e-06, "loss": 1.0162, "step": 1105 }, { "epoch": 0.1, "grad_norm": 6.631221936025121, "learning_rate": 9.990219634462834e-06, "loss": 0.9557, "step": 1106 }, { "epoch": 0.1, "grad_norm": 10.954341859754555, "learning_rate": 9.990174422217815e-06, "loss": 0.9339, "step": 1107 }, { "epoch": 0.1, "grad_norm": 6.929891941545815, "learning_rate": 9.990129105813835e-06, "loss": 0.9533, "step": 1108 }, { "epoch": 0.1, "grad_norm": 9.194863491065385, "learning_rate": 9.990083685251836e-06, "loss": 0.9565, "step": 1109 }, { "epoch": 0.1, "grad_norm": 8.4403315885118, "learning_rate": 9.990038160532767e-06, "loss": 1.01, "step": 1110 }, { "epoch": 0.1, "grad_norm": 8.65077841893017, "learning_rate": 9.989992531657582e-06, "loss": 0.9383, "step": 1111 }, { "epoch": 0.1, "grad_norm": 8.899303213142547, "learning_rate": 9.98994679862723e-06, "loss": 0.9888, "step": 1112 }, { "epoch": 0.1, "grad_norm": 7.434933395462747, "learning_rate": 9.989900961442666e-06, "loss": 1.0015, "step": 1113 }, { "epoch": 0.1, "grad_norm": 8.532893412512905, "learning_rate": 9.989855020104848e-06, "loss": 1.0047, "step": 1114 }, { "epoch": 0.1, "grad_norm": 10.300686901885404, "learning_rate": 9.989808974614733e-06, "loss": 0.9721, "step": 1115 }, { "epoch": 0.1, "grad_norm": 7.685013457120647, "learning_rate": 9.989762824973284e-06, "loss": 0.9542, "step": 1116 }, { "epoch": 0.1, "grad_norm": 7.2956331083394526, "learning_rate": 9.989716571181463e-06, "loss": 0.9746, "step": 1117 }, { "epoch": 0.1, "grad_norm": 12.977407482464809, "learning_rate": 9.989670213240234e-06, "loss": 1.0154, "step": 1118 }, { "epoch": 0.1, "grad_norm": 9.630693281254286, "learning_rate": 9.989623751150569e-06, "loss": 1.0001, "step": 1119 }, { "epoch": 0.1, "grad_norm": 7.365617509049695, "learning_rate": 9.989577184913436e-06, "loss": 0.9486, "step": 1120 }, { "epoch": 0.1, "grad_norm": 9.278454299311035, "learning_rate": 9.989530514529803e-06, "loss": 0.9455, "step": 1121 }, { "epoch": 0.1, "grad_norm": 7.405380323066382, "learning_rate": 9.98948374000065e-06, "loss": 0.9442, "step": 1122 }, { "epoch": 0.1, "grad_norm": 8.301957194625947, "learning_rate": 9.989436861326948e-06, "loss": 1.025, "step": 1123 }, { "epoch": 0.1, "grad_norm": 8.828943429354764, "learning_rate": 9.98938987850968e-06, "loss": 0.9831, "step": 1124 }, { "epoch": 0.1, "grad_norm": 7.989599638233167, "learning_rate": 9.989342791549827e-06, "loss": 0.9351, "step": 1125 }, { "epoch": 0.1, "grad_norm": 7.085517422343752, "learning_rate": 9.989295600448367e-06, "loss": 0.9816, "step": 1126 }, { "epoch": 0.1, "grad_norm": 9.156246058101658, "learning_rate": 9.989248305206288e-06, "loss": 0.908, "step": 1127 }, { "epoch": 0.1, "grad_norm": 7.165876708522559, "learning_rate": 9.989200905824578e-06, "loss": 0.9664, "step": 1128 }, { "epoch": 0.1, "grad_norm": 10.521264460833743, "learning_rate": 9.989153402304223e-06, "loss": 0.9988, "step": 1129 }, { "epoch": 0.1, "grad_norm": 12.531573057344238, "learning_rate": 9.989105794646219e-06, "loss": 0.9731, "step": 1130 }, { "epoch": 0.1, "grad_norm": 7.102079991088457, "learning_rate": 9.989058082851558e-06, "loss": 0.9432, "step": 1131 }, { "epoch": 0.1, "grad_norm": 9.467146681329364, "learning_rate": 9.989010266921233e-06, "loss": 0.9925, "step": 1132 }, { "epoch": 0.1, "grad_norm": 8.637783861205074, "learning_rate": 9.988962346856246e-06, "loss": 0.9592, "step": 1133 }, { "epoch": 0.1, "grad_norm": 7.326447599078782, "learning_rate": 9.988914322657594e-06, "loss": 0.9377, "step": 1134 }, { "epoch": 0.1, "grad_norm": 8.545377702360122, "learning_rate": 9.988866194326284e-06, "loss": 0.8623, "step": 1135 }, { "epoch": 0.1, "grad_norm": 9.314916041169402, "learning_rate": 9.988817961863315e-06, "loss": 0.9273, "step": 1136 }, { "epoch": 0.1, "grad_norm": 6.606474257623923, "learning_rate": 9.988769625269698e-06, "loss": 0.9468, "step": 1137 }, { "epoch": 0.1, "grad_norm": 6.8801302965003845, "learning_rate": 9.98872118454644e-06, "loss": 0.9928, "step": 1138 }, { "epoch": 0.1, "grad_norm": 6.025325171025866, "learning_rate": 9.98867263969455e-06, "loss": 0.9994, "step": 1139 }, { "epoch": 0.1, "grad_norm": 7.507025610536579, "learning_rate": 9.988623990715047e-06, "loss": 1.0042, "step": 1140 }, { "epoch": 0.1, "grad_norm": 7.793646076210973, "learning_rate": 9.988575237608942e-06, "loss": 0.9272, "step": 1141 }, { "epoch": 0.1, "grad_norm": 8.4003827006897, "learning_rate": 9.988526380377252e-06, "loss": 0.9636, "step": 1142 }, { "epoch": 0.1, "grad_norm": 8.743702067695908, "learning_rate": 9.988477419021e-06, "loss": 0.9635, "step": 1143 }, { "epoch": 0.1, "grad_norm": 7.514234122875846, "learning_rate": 9.988428353541207e-06, "loss": 0.9296, "step": 1144 }, { "epoch": 0.1, "grad_norm": 6.8210409063994195, "learning_rate": 9.988379183938897e-06, "loss": 0.929, "step": 1145 }, { "epoch": 0.1, "grad_norm": 7.4074527685742835, "learning_rate": 9.988329910215095e-06, "loss": 0.9513, "step": 1146 }, { "epoch": 0.1, "grad_norm": 10.054495137599615, "learning_rate": 9.988280532370828e-06, "loss": 0.9424, "step": 1147 }, { "epoch": 0.1, "grad_norm": 7.722207552765884, "learning_rate": 9.988231050407134e-06, "loss": 0.8753, "step": 1148 }, { "epoch": 0.1, "grad_norm": 7.486262518441393, "learning_rate": 9.988181464325037e-06, "loss": 0.9144, "step": 1149 }, { "epoch": 0.1, "grad_norm": 10.72661355953756, "learning_rate": 9.988131774125577e-06, "loss": 0.9415, "step": 1150 }, { "epoch": 0.1, "grad_norm": 8.29639097222841, "learning_rate": 9.98808197980979e-06, "loss": 0.938, "step": 1151 }, { "epoch": 0.1, "grad_norm": 7.7972277802652155, "learning_rate": 9.988032081378715e-06, "loss": 1.0217, "step": 1152 }, { "epoch": 0.1, "grad_norm": 8.85497993379049, "learning_rate": 9.987982078833394e-06, "loss": 0.9604, "step": 1153 }, { "epoch": 0.1, "grad_norm": 6.1269828936951445, "learning_rate": 9.987931972174872e-06, "loss": 0.9681, "step": 1154 }, { "epoch": 0.1, "grad_norm": 8.702938777086002, "learning_rate": 9.987881761404192e-06, "loss": 0.9376, "step": 1155 }, { "epoch": 0.1, "grad_norm": 8.707591098429688, "learning_rate": 9.987831446522404e-06, "loss": 0.9367, "step": 1156 }, { "epoch": 0.1, "grad_norm": 9.559553583996403, "learning_rate": 9.987781027530557e-06, "loss": 1.0019, "step": 1157 }, { "epoch": 0.1, "grad_norm": 6.505387412356555, "learning_rate": 9.987730504429704e-06, "loss": 1.0041, "step": 1158 }, { "epoch": 0.1, "grad_norm": 8.20425924440906, "learning_rate": 9.987679877220902e-06, "loss": 0.9574, "step": 1159 }, { "epoch": 0.1, "grad_norm": 6.672025359563292, "learning_rate": 9.987629145905203e-06, "loss": 0.9325, "step": 1160 }, { "epoch": 0.1, "grad_norm": 7.92358605095803, "learning_rate": 9.98757831048367e-06, "loss": 0.9326, "step": 1161 }, { "epoch": 0.1, "grad_norm": 7.1888861635689745, "learning_rate": 9.987527370957362e-06, "loss": 0.9666, "step": 1162 }, { "epoch": 0.1, "grad_norm": 10.089329785158665, "learning_rate": 9.98747632732734e-06, "loss": 0.9748, "step": 1163 }, { "epoch": 0.1, "grad_norm": 8.620397293353196, "learning_rate": 9.987425179594675e-06, "loss": 0.9464, "step": 1164 }, { "epoch": 0.1, "grad_norm": 5.869897310980044, "learning_rate": 9.987373927760432e-06, "loss": 0.9556, "step": 1165 }, { "epoch": 0.1, "grad_norm": 7.894540920338943, "learning_rate": 9.987322571825678e-06, "loss": 0.8911, "step": 1166 }, { "epoch": 0.1, "grad_norm": 6.4162187240772575, "learning_rate": 9.987271111791489e-06, "loss": 1.0573, "step": 1167 }, { "epoch": 0.1, "grad_norm": 7.881055160147344, "learning_rate": 9.987219547658937e-06, "loss": 0.9865, "step": 1168 }, { "epoch": 0.1, "grad_norm": 6.5866986148995, "learning_rate": 9.987167879429097e-06, "loss": 0.8932, "step": 1169 }, { "epoch": 0.1, "grad_norm": 11.757663189525715, "learning_rate": 9.987116107103052e-06, "loss": 0.97, "step": 1170 }, { "epoch": 0.1, "grad_norm": 6.529205159834894, "learning_rate": 9.98706423068188e-06, "loss": 0.8876, "step": 1171 }, { "epoch": 0.1, "grad_norm": 9.436513016982534, "learning_rate": 9.987012250166662e-06, "loss": 0.9587, "step": 1172 }, { "epoch": 0.1, "grad_norm": 7.099617321458918, "learning_rate": 9.986960165558487e-06, "loss": 0.9109, "step": 1173 }, { "epoch": 0.1, "grad_norm": 7.49854323931392, "learning_rate": 9.986907976858436e-06, "loss": 0.9095, "step": 1174 }, { "epoch": 0.1, "grad_norm": 9.001144446369654, "learning_rate": 9.986855684067604e-06, "loss": 0.9766, "step": 1175 }, { "epoch": 0.1, "grad_norm": 8.379721310870202, "learning_rate": 9.986803287187082e-06, "loss": 0.973, "step": 1176 }, { "epoch": 0.11, "grad_norm": 8.510805098844974, "learning_rate": 9.986750786217961e-06, "loss": 0.9332, "step": 1177 }, { "epoch": 0.11, "grad_norm": 9.41910804398586, "learning_rate": 9.986698181161338e-06, "loss": 1.0484, "step": 1178 }, { "epoch": 0.11, "grad_norm": 6.2917170055154035, "learning_rate": 9.986645472018312e-06, "loss": 0.9096, "step": 1179 }, { "epoch": 0.11, "grad_norm": 12.894847404801544, "learning_rate": 9.986592658789982e-06, "loss": 0.948, "step": 1180 }, { "epoch": 0.11, "grad_norm": 7.57405215872667, "learning_rate": 9.98653974147745e-06, "loss": 0.9816, "step": 1181 }, { "epoch": 0.11, "grad_norm": 8.267550264745159, "learning_rate": 9.986486720081821e-06, "loss": 0.8923, "step": 1182 }, { "epoch": 0.11, "grad_norm": 8.622565694635163, "learning_rate": 9.986433594604202e-06, "loss": 0.9461, "step": 1183 }, { "epoch": 0.11, "grad_norm": 8.152602289372348, "learning_rate": 9.986380365045703e-06, "loss": 0.9147, "step": 1184 }, { "epoch": 0.11, "grad_norm": 9.115420942282809, "learning_rate": 9.986327031407433e-06, "loss": 0.9088, "step": 1185 }, { "epoch": 0.11, "grad_norm": 7.863773129733878, "learning_rate": 9.986273593690506e-06, "loss": 1.0022, "step": 1186 }, { "epoch": 0.11, "grad_norm": 8.456631837738072, "learning_rate": 9.986220051896039e-06, "loss": 0.9723, "step": 1187 }, { "epoch": 0.11, "grad_norm": 5.633662348171267, "learning_rate": 9.986166406025146e-06, "loss": 0.8882, "step": 1188 }, { "epoch": 0.11, "grad_norm": 7.769937035758776, "learning_rate": 9.98611265607895e-06, "loss": 0.941, "step": 1189 }, { "epoch": 0.11, "grad_norm": 9.569560007246109, "learning_rate": 9.986058802058572e-06, "loss": 0.9329, "step": 1190 }, { "epoch": 0.11, "grad_norm": 9.419436645120479, "learning_rate": 9.986004843965134e-06, "loss": 0.9194, "step": 1191 }, { "epoch": 0.11, "grad_norm": 9.641382482363346, "learning_rate": 9.985950781799766e-06, "loss": 0.9732, "step": 1192 }, { "epoch": 0.11, "grad_norm": 7.498753855881156, "learning_rate": 9.985896615563593e-06, "loss": 0.9586, "step": 1193 }, { "epoch": 0.11, "grad_norm": 9.074609922092675, "learning_rate": 9.985842345257748e-06, "loss": 0.937, "step": 1194 }, { "epoch": 0.11, "grad_norm": 10.705637069858234, "learning_rate": 9.985787970883363e-06, "loss": 1.0124, "step": 1195 }, { "epoch": 0.11, "grad_norm": 12.639772993029775, "learning_rate": 9.985733492441571e-06, "loss": 1.0221, "step": 1196 }, { "epoch": 0.11, "grad_norm": 9.252758959482215, "learning_rate": 9.985678909933514e-06, "loss": 0.9501, "step": 1197 }, { "epoch": 0.11, "grad_norm": 6.64825405339732, "learning_rate": 9.985624223360327e-06, "loss": 0.9106, "step": 1198 }, { "epoch": 0.11, "grad_norm": 7.109733217191102, "learning_rate": 9.98556943272315e-06, "loss": 0.9475, "step": 1199 }, { "epoch": 0.11, "grad_norm": 7.213791084877371, "learning_rate": 9.985514538023134e-06, "loss": 1.0137, "step": 1200 }, { "epoch": 0.11, "grad_norm": 9.045456399696434, "learning_rate": 9.985459539261417e-06, "loss": 0.9416, "step": 1201 }, { "epoch": 0.11, "grad_norm": 8.335525722020186, "learning_rate": 9.98540443643915e-06, "loss": 0.9938, "step": 1202 }, { "epoch": 0.11, "grad_norm": 8.304753291064538, "learning_rate": 9.985349229557484e-06, "loss": 0.9436, "step": 1203 }, { "epoch": 0.11, "grad_norm": 7.522355577872712, "learning_rate": 9.985293918617572e-06, "loss": 0.9849, "step": 1204 }, { "epoch": 0.11, "grad_norm": 9.162614462254137, "learning_rate": 9.985238503620565e-06, "loss": 0.8583, "step": 1205 }, { "epoch": 0.11, "grad_norm": 7.111004371572633, "learning_rate": 9.985182984567621e-06, "loss": 0.9467, "step": 1206 }, { "epoch": 0.11, "grad_norm": 7.065856884940847, "learning_rate": 9.985127361459902e-06, "loss": 0.9344, "step": 1207 }, { "epoch": 0.11, "grad_norm": 8.605132651039941, "learning_rate": 9.985071634298564e-06, "loss": 0.9179, "step": 1208 }, { "epoch": 0.11, "grad_norm": 9.353934107653108, "learning_rate": 9.985015803084774e-06, "loss": 0.9645, "step": 1209 }, { "epoch": 0.11, "grad_norm": 9.432296894031921, "learning_rate": 9.984959867819696e-06, "loss": 0.9286, "step": 1210 }, { "epoch": 0.11, "grad_norm": 7.401606631111568, "learning_rate": 9.9849038285045e-06, "loss": 0.9698, "step": 1211 }, { "epoch": 0.11, "grad_norm": 13.193834510115998, "learning_rate": 9.984847685140351e-06, "loss": 1.0228, "step": 1212 }, { "epoch": 0.11, "grad_norm": 8.940343155388753, "learning_rate": 9.984791437728423e-06, "loss": 1.0495, "step": 1213 }, { "epoch": 0.11, "grad_norm": 7.028310604630057, "learning_rate": 9.984735086269889e-06, "loss": 0.8997, "step": 1214 }, { "epoch": 0.11, "grad_norm": 8.26749580704144, "learning_rate": 9.98467863076593e-06, "loss": 0.9317, "step": 1215 }, { "epoch": 0.11, "grad_norm": 6.22083249056773, "learning_rate": 9.984622071217718e-06, "loss": 0.9859, "step": 1216 }, { "epoch": 0.11, "grad_norm": 6.19080625515515, "learning_rate": 9.984565407626439e-06, "loss": 0.939, "step": 1217 }, { "epoch": 0.11, "grad_norm": 8.47814206451388, "learning_rate": 9.984508639993271e-06, "loss": 1.0215, "step": 1218 }, { "epoch": 0.11, "grad_norm": 8.01660523838273, "learning_rate": 9.9844517683194e-06, "loss": 0.9814, "step": 1219 }, { "epoch": 0.11, "grad_norm": 11.732328774115322, "learning_rate": 9.984394792606018e-06, "loss": 0.9729, "step": 1220 }, { "epoch": 0.11, "grad_norm": 7.314766515585315, "learning_rate": 9.984337712854307e-06, "loss": 0.9282, "step": 1221 }, { "epoch": 0.11, "grad_norm": 7.978357107338396, "learning_rate": 9.984280529065462e-06, "loss": 0.9217, "step": 1222 }, { "epoch": 0.11, "grad_norm": 7.130992415129934, "learning_rate": 9.984223241240678e-06, "loss": 0.9311, "step": 1223 }, { "epoch": 0.11, "grad_norm": 7.520005998889693, "learning_rate": 9.984165849381149e-06, "loss": 1.023, "step": 1224 }, { "epoch": 0.11, "grad_norm": 12.930753635384601, "learning_rate": 9.98410835348807e-06, "loss": 1.0149, "step": 1225 }, { "epoch": 0.11, "grad_norm": 9.051072034871773, "learning_rate": 9.984050753562648e-06, "loss": 1.0296, "step": 1226 }, { "epoch": 0.11, "grad_norm": 6.37327872859455, "learning_rate": 9.98399304960608e-06, "loss": 0.9703, "step": 1227 }, { "epoch": 0.11, "grad_norm": 7.508041502599887, "learning_rate": 9.98393524161957e-06, "loss": 1.003, "step": 1228 }, { "epoch": 0.11, "grad_norm": 8.663663508143976, "learning_rate": 9.98387732960433e-06, "loss": 0.9686, "step": 1229 }, { "epoch": 0.11, "grad_norm": 9.243696429590202, "learning_rate": 9.983819313561563e-06, "loss": 0.9904, "step": 1230 }, { "epoch": 0.11, "grad_norm": 8.170779312599446, "learning_rate": 9.983761193492482e-06, "loss": 0.9254, "step": 1231 }, { "epoch": 0.11, "grad_norm": 8.463172699851384, "learning_rate": 9.983702969398301e-06, "loss": 0.9612, "step": 1232 }, { "epoch": 0.11, "grad_norm": 8.011931850030773, "learning_rate": 9.983644641280234e-06, "loss": 0.9266, "step": 1233 }, { "epoch": 0.11, "grad_norm": 6.98027260352625, "learning_rate": 9.9835862091395e-06, "loss": 1.0495, "step": 1234 }, { "epoch": 0.11, "grad_norm": 7.458429028401512, "learning_rate": 9.983527672977318e-06, "loss": 1.0018, "step": 1235 }, { "epoch": 0.11, "grad_norm": 8.088941194700503, "learning_rate": 9.983469032794909e-06, "loss": 0.9319, "step": 1236 }, { "epoch": 0.11, "grad_norm": 8.861913589131415, "learning_rate": 9.983410288593497e-06, "loss": 0.9352, "step": 1237 }, { "epoch": 0.11, "grad_norm": 11.688252377321225, "learning_rate": 9.983351440374309e-06, "loss": 0.9319, "step": 1238 }, { "epoch": 0.11, "grad_norm": 8.18913119948747, "learning_rate": 9.983292488138573e-06, "loss": 0.8321, "step": 1239 }, { "epoch": 0.11, "grad_norm": 7.418702550186432, "learning_rate": 9.98323343188752e-06, "loss": 0.9929, "step": 1240 }, { "epoch": 0.11, "grad_norm": 7.8987301047324, "learning_rate": 9.983174271622382e-06, "loss": 0.9516, "step": 1241 }, { "epoch": 0.11, "grad_norm": 9.651851622040894, "learning_rate": 9.983115007344394e-06, "loss": 1.004, "step": 1242 }, { "epoch": 0.11, "grad_norm": 8.559996202245957, "learning_rate": 9.983055639054793e-06, "loss": 0.9011, "step": 1243 }, { "epoch": 0.11, "grad_norm": 15.212841122125589, "learning_rate": 9.982996166754819e-06, "loss": 1.02, "step": 1244 }, { "epoch": 0.11, "grad_norm": 6.906746773294869, "learning_rate": 9.982936590445711e-06, "loss": 0.9467, "step": 1245 }, { "epoch": 0.11, "grad_norm": 8.258063866301292, "learning_rate": 9.982876910128716e-06, "loss": 0.8862, "step": 1246 }, { "epoch": 0.11, "grad_norm": 8.20897419131393, "learning_rate": 9.982817125805076e-06, "loss": 1.0499, "step": 1247 }, { "epoch": 0.11, "grad_norm": 8.93139806355661, "learning_rate": 9.982757237476042e-06, "loss": 0.9842, "step": 1248 }, { "epoch": 0.11, "grad_norm": 8.474674055697095, "learning_rate": 9.982697245142863e-06, "loss": 0.9798, "step": 1249 }, { "epoch": 0.11, "grad_norm": 9.69138732584315, "learning_rate": 9.98263714880679e-06, "loss": 0.9866, "step": 1250 }, { "epoch": 0.11, "grad_norm": 8.121282559565715, "learning_rate": 9.982576948469079e-06, "loss": 0.9382, "step": 1251 }, { "epoch": 0.11, "grad_norm": 6.193833491172351, "learning_rate": 9.982516644130984e-06, "loss": 0.9638, "step": 1252 }, { "epoch": 0.11, "grad_norm": 9.747625268753172, "learning_rate": 9.982456235793767e-06, "loss": 1.0191, "step": 1253 }, { "epoch": 0.11, "grad_norm": 7.898614454753023, "learning_rate": 9.982395723458688e-06, "loss": 0.9298, "step": 1254 }, { "epoch": 0.11, "grad_norm": 6.4215107898928405, "learning_rate": 9.982335107127008e-06, "loss": 0.9039, "step": 1255 }, { "epoch": 0.11, "grad_norm": 7.286866785737138, "learning_rate": 9.982274386799996e-06, "loss": 0.9811, "step": 1256 }, { "epoch": 0.11, "grad_norm": 8.136200755864387, "learning_rate": 9.982213562478915e-06, "loss": 0.9247, "step": 1257 }, { "epoch": 0.11, "grad_norm": 6.743672423386007, "learning_rate": 9.982152634165037e-06, "loss": 1.0043, "step": 1258 }, { "epoch": 0.11, "grad_norm": 7.5032247859144325, "learning_rate": 9.982091601859634e-06, "loss": 1.0311, "step": 1259 }, { "epoch": 0.11, "grad_norm": 8.066509189847997, "learning_rate": 9.982030465563981e-06, "loss": 0.9259, "step": 1260 }, { "epoch": 0.11, "grad_norm": 8.550812327149089, "learning_rate": 9.981969225279351e-06, "loss": 0.8951, "step": 1261 }, { "epoch": 0.11, "grad_norm": 8.270534072059098, "learning_rate": 9.981907881007023e-06, "loss": 0.9649, "step": 1262 }, { "epoch": 0.11, "grad_norm": 7.96328432718407, "learning_rate": 9.98184643274828e-06, "loss": 1.0126, "step": 1263 }, { "epoch": 0.11, "grad_norm": 8.504304250387007, "learning_rate": 9.981784880504402e-06, "loss": 0.9901, "step": 1264 }, { "epoch": 0.11, "grad_norm": 9.739176256622612, "learning_rate": 9.981723224276673e-06, "loss": 0.9448, "step": 1265 }, { "epoch": 0.11, "grad_norm": 9.694588704503909, "learning_rate": 9.981661464066383e-06, "loss": 1.0384, "step": 1266 }, { "epoch": 0.11, "grad_norm": 8.077271730312509, "learning_rate": 9.98159959987482e-06, "loss": 0.9606, "step": 1267 }, { "epoch": 0.11, "grad_norm": 9.057522544848359, "learning_rate": 9.981537631703273e-06, "loss": 0.9693, "step": 1268 }, { "epoch": 0.11, "grad_norm": 13.543450741620813, "learning_rate": 9.981475559553039e-06, "loss": 0.9713, "step": 1269 }, { "epoch": 0.11, "grad_norm": 8.691933529154625, "learning_rate": 9.981413383425411e-06, "loss": 0.9754, "step": 1270 }, { "epoch": 0.11, "grad_norm": 9.75677864545581, "learning_rate": 9.981351103321689e-06, "loss": 0.9403, "step": 1271 }, { "epoch": 0.11, "grad_norm": 8.838569790617246, "learning_rate": 9.981288719243171e-06, "loss": 0.9842, "step": 1272 }, { "epoch": 0.11, "grad_norm": 7.62095799062756, "learning_rate": 9.98122623119116e-06, "loss": 0.9548, "step": 1273 }, { "epoch": 0.11, "grad_norm": 7.209254676221442, "learning_rate": 9.981163639166958e-06, "loss": 0.9037, "step": 1274 }, { "epoch": 0.11, "grad_norm": 6.849038535512064, "learning_rate": 9.981100943171876e-06, "loss": 0.9603, "step": 1275 }, { "epoch": 0.11, "grad_norm": 13.734334579243923, "learning_rate": 9.98103814320722e-06, "loss": 1.0152, "step": 1276 }, { "epoch": 0.11, "grad_norm": 6.669537240922854, "learning_rate": 9.9809752392743e-06, "loss": 0.9393, "step": 1277 }, { "epoch": 0.11, "grad_norm": 8.448754496315086, "learning_rate": 9.980912231374431e-06, "loss": 0.9366, "step": 1278 }, { "epoch": 0.11, "grad_norm": 10.907337796447488, "learning_rate": 9.980849119508927e-06, "loss": 0.9386, "step": 1279 }, { "epoch": 0.11, "grad_norm": 8.965369016016487, "learning_rate": 9.980785903679106e-06, "loss": 0.9068, "step": 1280 }, { "epoch": 0.11, "grad_norm": 7.549956437511614, "learning_rate": 9.980722583886286e-06, "loss": 0.969, "step": 1281 }, { "epoch": 0.11, "grad_norm": 7.743954925855676, "learning_rate": 9.98065916013179e-06, "loss": 0.9311, "step": 1282 }, { "epoch": 0.11, "grad_norm": 8.401469494581274, "learning_rate": 9.980595632416942e-06, "loss": 0.9175, "step": 1283 }, { "epoch": 0.11, "grad_norm": 9.101171664745694, "learning_rate": 9.980532000743066e-06, "loss": 0.8783, "step": 1284 }, { "epoch": 0.11, "grad_norm": 7.890382506358185, "learning_rate": 9.980468265111493e-06, "loss": 0.9905, "step": 1285 }, { "epoch": 0.11, "grad_norm": 7.259861453659001, "learning_rate": 9.980404425523552e-06, "loss": 0.9246, "step": 1286 }, { "epoch": 0.11, "grad_norm": 5.155485946474958, "learning_rate": 9.980340481980574e-06, "loss": 0.9348, "step": 1287 }, { "epoch": 0.11, "grad_norm": 7.221275704072247, "learning_rate": 9.980276434483898e-06, "loss": 0.9977, "step": 1288 }, { "epoch": 0.11, "grad_norm": 8.677039877850333, "learning_rate": 9.980212283034856e-06, "loss": 0.9292, "step": 1289 }, { "epoch": 0.12, "grad_norm": 9.709714414439395, "learning_rate": 9.980148027634791e-06, "loss": 0.9038, "step": 1290 }, { "epoch": 0.12, "grad_norm": 6.797949864259427, "learning_rate": 9.98008366828504e-06, "loss": 0.906, "step": 1291 }, { "epoch": 0.12, "grad_norm": 7.085238797674527, "learning_rate": 9.98001920498695e-06, "loss": 0.9509, "step": 1292 }, { "epoch": 0.12, "grad_norm": 9.656538276521108, "learning_rate": 9.979954637741865e-06, "loss": 0.9772, "step": 1293 }, { "epoch": 0.12, "grad_norm": 8.5257275472863, "learning_rate": 9.97988996655113e-06, "loss": 0.9157, "step": 1294 }, { "epoch": 0.12, "grad_norm": 7.264016564336823, "learning_rate": 9.979825191416101e-06, "loss": 0.9801, "step": 1295 }, { "epoch": 0.12, "grad_norm": 9.51206960813078, "learning_rate": 9.979760312338127e-06, "loss": 0.9331, "step": 1296 }, { "epoch": 0.12, "grad_norm": 9.585030904735335, "learning_rate": 9.97969532931856e-06, "loss": 0.9002, "step": 1297 }, { "epoch": 0.12, "grad_norm": 9.865124609143631, "learning_rate": 9.97963024235876e-06, "loss": 0.9378, "step": 1298 }, { "epoch": 0.12, "grad_norm": 9.140483739434602, "learning_rate": 9.979565051460083e-06, "loss": 0.9278, "step": 1299 }, { "epoch": 0.12, "grad_norm": 7.210233560495285, "learning_rate": 9.97949975662389e-06, "loss": 0.9754, "step": 1300 }, { "epoch": 0.12, "grad_norm": 9.524364412868774, "learning_rate": 9.979434357851545e-06, "loss": 0.9832, "step": 1301 }, { "epoch": 0.12, "grad_norm": 7.205446824122753, "learning_rate": 9.979368855144413e-06, "loss": 0.9434, "step": 1302 }, { "epoch": 0.12, "grad_norm": 8.07719120486523, "learning_rate": 9.97930324850386e-06, "loss": 0.9433, "step": 1303 }, { "epoch": 0.12, "grad_norm": 7.311973327765198, "learning_rate": 9.979237537931257e-06, "loss": 0.9017, "step": 1304 }, { "epoch": 0.12, "grad_norm": 6.8831141599387555, "learning_rate": 9.979171723427975e-06, "loss": 0.9155, "step": 1305 }, { "epoch": 0.12, "grad_norm": 10.79220419173369, "learning_rate": 9.979105804995388e-06, "loss": 0.8773, "step": 1306 }, { "epoch": 0.12, "grad_norm": 7.1573271876080184, "learning_rate": 9.97903978263487e-06, "loss": 0.9072, "step": 1307 }, { "epoch": 0.12, "grad_norm": 11.310367111165748, "learning_rate": 9.978973656347801e-06, "loss": 0.9492, "step": 1308 }, { "epoch": 0.12, "grad_norm": 6.283892069314969, "learning_rate": 9.97890742613556e-06, "loss": 0.9458, "step": 1309 }, { "epoch": 0.12, "grad_norm": 6.16561713730703, "learning_rate": 9.97884109199953e-06, "loss": 0.9782, "step": 1310 }, { "epoch": 0.12, "grad_norm": 7.827234941328474, "learning_rate": 9.978774653941096e-06, "loss": 0.8969, "step": 1311 }, { "epoch": 0.12, "grad_norm": 9.786008277869296, "learning_rate": 9.978708111961645e-06, "loss": 1.0277, "step": 1312 }, { "epoch": 0.12, "grad_norm": 9.758791677964346, "learning_rate": 9.978641466062566e-06, "loss": 0.9825, "step": 1313 }, { "epoch": 0.12, "grad_norm": 6.572434089479751, "learning_rate": 9.978574716245248e-06, "loss": 1.0085, "step": 1314 }, { "epoch": 0.12, "grad_norm": 6.93791478341313, "learning_rate": 9.978507862511087e-06, "loss": 0.9712, "step": 1315 }, { "epoch": 0.12, "grad_norm": 7.01848007793686, "learning_rate": 9.978440904861474e-06, "loss": 0.9305, "step": 1316 }, { "epoch": 0.12, "grad_norm": 9.315182319147407, "learning_rate": 9.978373843297814e-06, "loss": 0.9261, "step": 1317 }, { "epoch": 0.12, "grad_norm": 7.371892337072602, "learning_rate": 9.9783066778215e-06, "loss": 0.9379, "step": 1318 }, { "epoch": 0.12, "grad_norm": 8.55806994215191, "learning_rate": 9.978239408433936e-06, "loss": 0.8601, "step": 1319 }, { "epoch": 0.12, "grad_norm": 11.011020774129388, "learning_rate": 9.978172035136528e-06, "loss": 0.9355, "step": 1320 }, { "epoch": 0.12, "grad_norm": 7.810276556662753, "learning_rate": 9.97810455793068e-06, "loss": 0.9866, "step": 1321 }, { "epoch": 0.12, "grad_norm": 8.426495676264583, "learning_rate": 9.978036976817802e-06, "loss": 0.8887, "step": 1322 }, { "epoch": 0.12, "grad_norm": 6.084751337251286, "learning_rate": 9.977969291799303e-06, "loss": 0.9734, "step": 1323 }, { "epoch": 0.12, "grad_norm": 8.481151930344522, "learning_rate": 9.977901502876597e-06, "loss": 0.9483, "step": 1324 }, { "epoch": 0.12, "grad_norm": 7.121779377136498, "learning_rate": 9.977833610051097e-06, "loss": 0.8844, "step": 1325 }, { "epoch": 0.12, "grad_norm": 8.883010817170145, "learning_rate": 9.977765613324225e-06, "loss": 0.967, "step": 1326 }, { "epoch": 0.12, "grad_norm": 6.701406343132003, "learning_rate": 9.977697512697394e-06, "loss": 0.983, "step": 1327 }, { "epoch": 0.12, "grad_norm": 8.83416916297677, "learning_rate": 9.97762930817203e-06, "loss": 0.9384, "step": 1328 }, { "epoch": 0.12, "grad_norm": 10.10298692444333, "learning_rate": 9.977560999749553e-06, "loss": 0.9397, "step": 1329 }, { "epoch": 0.12, "grad_norm": 8.10547203011475, "learning_rate": 9.977492587431391e-06, "loss": 0.9897, "step": 1330 }, { "epoch": 0.12, "grad_norm": 9.005537602418642, "learning_rate": 9.977424071218975e-06, "loss": 0.9393, "step": 1331 }, { "epoch": 0.12, "grad_norm": 9.031776983725786, "learning_rate": 9.97735545111373e-06, "loss": 0.8998, "step": 1332 }, { "epoch": 0.12, "grad_norm": 7.546304655292217, "learning_rate": 9.977286727117089e-06, "loss": 1.0029, "step": 1333 }, { "epoch": 0.12, "grad_norm": 8.607986167446821, "learning_rate": 9.977217899230488e-06, "loss": 0.9635, "step": 1334 }, { "epoch": 0.12, "grad_norm": 7.2417322055113456, "learning_rate": 9.977148967455365e-06, "loss": 0.9524, "step": 1335 }, { "epoch": 0.12, "grad_norm": 9.766439290215281, "learning_rate": 9.977079931793156e-06, "loss": 0.965, "step": 1336 }, { "epoch": 0.12, "grad_norm": 9.259837233298295, "learning_rate": 9.977010792245303e-06, "loss": 0.9461, "step": 1337 }, { "epoch": 0.12, "grad_norm": 7.581119162621205, "learning_rate": 9.97694154881325e-06, "loss": 0.9406, "step": 1338 }, { "epoch": 0.12, "grad_norm": 8.629874706365678, "learning_rate": 9.97687220149844e-06, "loss": 0.9116, "step": 1339 }, { "epoch": 0.12, "grad_norm": 9.023039510668053, "learning_rate": 9.976802750302323e-06, "loss": 0.9557, "step": 1340 }, { "epoch": 0.12, "grad_norm": 7.350161042865411, "learning_rate": 9.976733195226347e-06, "loss": 0.8974, "step": 1341 }, { "epoch": 0.12, "grad_norm": 9.428181666680423, "learning_rate": 9.976663536271964e-06, "loss": 0.979, "step": 1342 }, { "epoch": 0.12, "grad_norm": 7.4830350675783, "learning_rate": 9.976593773440629e-06, "loss": 0.9363, "step": 1343 }, { "epoch": 0.12, "grad_norm": 7.2872145602477, "learning_rate": 9.976523906733798e-06, "loss": 0.9839, "step": 1344 }, { "epoch": 0.12, "grad_norm": 7.4572992459170235, "learning_rate": 9.976453936152927e-06, "loss": 0.9499, "step": 1345 }, { "epoch": 0.12, "grad_norm": 7.568250627843918, "learning_rate": 9.976383861699482e-06, "loss": 0.9154, "step": 1346 }, { "epoch": 0.12, "grad_norm": 8.787210614509, "learning_rate": 9.976313683374918e-06, "loss": 0.9099, "step": 1347 }, { "epoch": 0.12, "grad_norm": 9.668733736831303, "learning_rate": 9.976243401180708e-06, "loss": 0.8941, "step": 1348 }, { "epoch": 0.12, "grad_norm": 8.67169873013758, "learning_rate": 9.97617301511831e-06, "loss": 0.9313, "step": 1349 }, { "epoch": 0.12, "grad_norm": 9.338279321264206, "learning_rate": 9.976102525189202e-06, "loss": 0.9329, "step": 1350 }, { "epoch": 0.12, "grad_norm": 7.153217958801919, "learning_rate": 9.97603193139485e-06, "loss": 0.9762, "step": 1351 }, { "epoch": 0.12, "grad_norm": 7.445429429087869, "learning_rate": 9.975961233736727e-06, "loss": 0.9491, "step": 1352 }, { "epoch": 0.12, "grad_norm": 6.861502126813018, "learning_rate": 9.975890432216312e-06, "loss": 0.897, "step": 1353 }, { "epoch": 0.12, "grad_norm": 7.032212918560607, "learning_rate": 9.97581952683508e-06, "loss": 1.0228, "step": 1354 }, { "epoch": 0.12, "grad_norm": 6.712342143413182, "learning_rate": 9.975748517594515e-06, "loss": 0.9654, "step": 1355 }, { "epoch": 0.12, "grad_norm": 9.665911040455217, "learning_rate": 9.975677404496093e-06, "loss": 1.0147, "step": 1356 }, { "epoch": 0.12, "grad_norm": 6.9408618294008395, "learning_rate": 9.975606187541305e-06, "loss": 0.9692, "step": 1357 }, { "epoch": 0.12, "grad_norm": 9.08487712606646, "learning_rate": 9.975534866731631e-06, "loss": 0.9315, "step": 1358 }, { "epoch": 0.12, "grad_norm": 7.54238476281645, "learning_rate": 9.975463442068565e-06, "loss": 1.0039, "step": 1359 }, { "epoch": 0.12, "grad_norm": 6.142342608252962, "learning_rate": 9.975391913553595e-06, "loss": 0.9865, "step": 1360 }, { "epoch": 0.12, "grad_norm": 7.272058921016833, "learning_rate": 9.975320281188215e-06, "loss": 1.0235, "step": 1361 }, { "epoch": 0.12, "grad_norm": 6.41415030257654, "learning_rate": 9.975248544973919e-06, "loss": 0.9616, "step": 1362 }, { "epoch": 0.12, "grad_norm": 12.859808225315065, "learning_rate": 9.975176704912206e-06, "loss": 0.8953, "step": 1363 }, { "epoch": 0.12, "grad_norm": 7.225301879452562, "learning_rate": 9.975104761004574e-06, "loss": 0.9341, "step": 1364 }, { "epoch": 0.12, "grad_norm": 6.676258468386849, "learning_rate": 9.975032713252527e-06, "loss": 0.8688, "step": 1365 }, { "epoch": 0.12, "grad_norm": 7.4963208564464745, "learning_rate": 9.974960561657567e-06, "loss": 0.8833, "step": 1366 }, { "epoch": 0.12, "grad_norm": 9.238044518885136, "learning_rate": 9.9748883062212e-06, "loss": 0.9369, "step": 1367 }, { "epoch": 0.12, "grad_norm": 7.378147187315362, "learning_rate": 9.974815946944933e-06, "loss": 0.9544, "step": 1368 }, { "epoch": 0.12, "grad_norm": 6.3452465843525605, "learning_rate": 9.97474348383028e-06, "loss": 0.9083, "step": 1369 }, { "epoch": 0.12, "grad_norm": 8.527799805884163, "learning_rate": 9.97467091687875e-06, "loss": 0.9858, "step": 1370 }, { "epoch": 0.12, "grad_norm": 7.089482221535015, "learning_rate": 9.974598246091859e-06, "loss": 0.9156, "step": 1371 }, { "epoch": 0.12, "grad_norm": 10.045131168120767, "learning_rate": 9.974525471471125e-06, "loss": 0.99, "step": 1372 }, { "epoch": 0.12, "grad_norm": 9.11140258072261, "learning_rate": 9.974452593018064e-06, "loss": 0.9926, "step": 1373 }, { "epoch": 0.12, "grad_norm": 7.026847242409577, "learning_rate": 9.974379610734199e-06, "loss": 0.9397, "step": 1374 }, { "epoch": 0.12, "grad_norm": 7.599693175612462, "learning_rate": 9.974306524621054e-06, "loss": 0.9533, "step": 1375 }, { "epoch": 0.12, "grad_norm": 6.312198526422322, "learning_rate": 9.974233334680155e-06, "loss": 0.9174, "step": 1376 }, { "epoch": 0.12, "grad_norm": 10.070508827867345, "learning_rate": 9.974160040913025e-06, "loss": 0.9779, "step": 1377 }, { "epoch": 0.12, "grad_norm": 8.718744347070423, "learning_rate": 9.974086643321202e-06, "loss": 0.941, "step": 1378 }, { "epoch": 0.12, "grad_norm": 7.752182428921662, "learning_rate": 9.97401314190621e-06, "loss": 0.9223, "step": 1379 }, { "epoch": 0.12, "grad_norm": 7.888891295129369, "learning_rate": 9.973939536669589e-06, "loss": 0.9426, "step": 1380 }, { "epoch": 0.12, "grad_norm": 7.936479854797045, "learning_rate": 9.973865827612869e-06, "loss": 0.9073, "step": 1381 }, { "epoch": 0.12, "grad_norm": 7.536103427839438, "learning_rate": 9.973792014737596e-06, "loss": 0.9239, "step": 1382 }, { "epoch": 0.12, "grad_norm": 9.534424925651377, "learning_rate": 9.973718098045307e-06, "loss": 0.8731, "step": 1383 }, { "epoch": 0.12, "grad_norm": 8.395004677931334, "learning_rate": 9.973644077537543e-06, "loss": 0.972, "step": 1384 }, { "epoch": 0.12, "grad_norm": 8.062003110464344, "learning_rate": 9.973569953215853e-06, "loss": 0.9652, "step": 1385 }, { "epoch": 0.12, "grad_norm": 8.61661016289107, "learning_rate": 9.973495725081782e-06, "loss": 0.9774, "step": 1386 }, { "epoch": 0.12, "grad_norm": 9.225084141819947, "learning_rate": 9.973421393136877e-06, "loss": 0.8932, "step": 1387 }, { "epoch": 0.12, "grad_norm": 7.174980563139468, "learning_rate": 9.973346957382694e-06, "loss": 0.9222, "step": 1388 }, { "epoch": 0.12, "grad_norm": 10.493626974999602, "learning_rate": 9.973272417820785e-06, "loss": 0.9373, "step": 1389 }, { "epoch": 0.12, "grad_norm": 7.748764182554102, "learning_rate": 9.973197774452705e-06, "loss": 0.9231, "step": 1390 }, { "epoch": 0.12, "grad_norm": 10.271093409145028, "learning_rate": 9.973123027280013e-06, "loss": 0.8543, "step": 1391 }, { "epoch": 0.12, "grad_norm": 8.436258019637284, "learning_rate": 9.973048176304267e-06, "loss": 0.8816, "step": 1392 }, { "epoch": 0.12, "grad_norm": 8.422706784420768, "learning_rate": 9.972973221527034e-06, "loss": 0.9169, "step": 1393 }, { "epoch": 0.12, "grad_norm": 8.92028105455892, "learning_rate": 9.972898162949873e-06, "loss": 1.0274, "step": 1394 }, { "epoch": 0.12, "grad_norm": 6.786742245681121, "learning_rate": 9.972823000574356e-06, "loss": 0.9764, "step": 1395 }, { "epoch": 0.12, "grad_norm": 5.2748910591766975, "learning_rate": 9.972747734402047e-06, "loss": 0.9217, "step": 1396 }, { "epoch": 0.12, "grad_norm": 6.456598129428456, "learning_rate": 9.97267236443452e-06, "loss": 0.874, "step": 1397 }, { "epoch": 0.12, "grad_norm": 7.560215297114147, "learning_rate": 9.972596890673348e-06, "loss": 0.9331, "step": 1398 }, { "epoch": 0.12, "grad_norm": 7.382915011542906, "learning_rate": 9.972521313120104e-06, "loss": 0.8948, "step": 1399 }, { "epoch": 0.12, "grad_norm": 6.8005493625211395, "learning_rate": 9.972445631776369e-06, "loss": 0.9445, "step": 1400 }, { "epoch": 0.12, "grad_norm": 9.193045312205014, "learning_rate": 9.972369846643718e-06, "loss": 0.9673, "step": 1401 }, { "epoch": 0.13, "grad_norm": 9.489185159993385, "learning_rate": 9.972293957723738e-06, "loss": 0.964, "step": 1402 }, { "epoch": 0.13, "grad_norm": 9.124306280574945, "learning_rate": 9.97221796501801e-06, "loss": 0.964, "step": 1403 }, { "epoch": 0.13, "grad_norm": 9.014632279641974, "learning_rate": 9.972141868528122e-06, "loss": 0.9243, "step": 1404 }, { "epoch": 0.13, "grad_norm": 7.00376633299006, "learning_rate": 9.972065668255661e-06, "loss": 0.9367, "step": 1405 }, { "epoch": 0.13, "grad_norm": 8.608011079071936, "learning_rate": 9.971989364202218e-06, "loss": 0.9971, "step": 1406 }, { "epoch": 0.13, "grad_norm": 7.539719960602782, "learning_rate": 9.971912956369384e-06, "loss": 0.9041, "step": 1407 }, { "epoch": 0.13, "grad_norm": 7.581733229210309, "learning_rate": 9.971836444758757e-06, "loss": 0.9185, "step": 1408 }, { "epoch": 0.13, "grad_norm": 7.591490397915814, "learning_rate": 9.971759829371932e-06, "loss": 0.8614, "step": 1409 }, { "epoch": 0.13, "grad_norm": 7.85093714956467, "learning_rate": 9.971683110210508e-06, "loss": 0.9125, "step": 1410 }, { "epoch": 0.13, "grad_norm": 9.793398650791511, "learning_rate": 9.971606287276087e-06, "loss": 0.9388, "step": 1411 }, { "epoch": 0.13, "grad_norm": 8.042171884680501, "learning_rate": 9.971529360570274e-06, "loss": 0.9215, "step": 1412 }, { "epoch": 0.13, "grad_norm": 8.764365488674578, "learning_rate": 9.97145233009467e-06, "loss": 0.8917, "step": 1413 }, { "epoch": 0.13, "grad_norm": 7.278725636110836, "learning_rate": 9.971375195850888e-06, "loss": 0.9283, "step": 1414 }, { "epoch": 0.13, "grad_norm": 9.308213058636161, "learning_rate": 9.971297957840536e-06, "loss": 1.0105, "step": 1415 }, { "epoch": 0.13, "grad_norm": 6.1044021065483465, "learning_rate": 9.971220616065225e-06, "loss": 0.8685, "step": 1416 }, { "epoch": 0.13, "grad_norm": 8.24260011826315, "learning_rate": 9.971143170526571e-06, "loss": 0.9139, "step": 1417 }, { "epoch": 0.13, "grad_norm": 6.910762315653145, "learning_rate": 9.97106562122619e-06, "loss": 0.9503, "step": 1418 }, { "epoch": 0.13, "grad_norm": 7.875247249659718, "learning_rate": 9.9709879681657e-06, "loss": 0.9025, "step": 1419 }, { "epoch": 0.13, "grad_norm": 8.101539209121077, "learning_rate": 9.970910211346723e-06, "loss": 0.9489, "step": 1420 }, { "epoch": 0.13, "grad_norm": 6.794269038346682, "learning_rate": 9.970832350770882e-06, "loss": 0.9511, "step": 1421 }, { "epoch": 0.13, "grad_norm": 8.259308646964794, "learning_rate": 9.9707543864398e-06, "loss": 0.8548, "step": 1422 }, { "epoch": 0.13, "grad_norm": 9.07995805562377, "learning_rate": 9.970676318355108e-06, "loss": 0.9039, "step": 1423 }, { "epoch": 0.13, "grad_norm": 9.334089375380962, "learning_rate": 9.970598146518431e-06, "loss": 0.938, "step": 1424 }, { "epoch": 0.13, "grad_norm": 10.429980010464089, "learning_rate": 9.970519870931405e-06, "loss": 0.9694, "step": 1425 }, { "epoch": 0.13, "grad_norm": 6.517376976859172, "learning_rate": 9.970441491595662e-06, "loss": 0.9931, "step": 1426 }, { "epoch": 0.13, "grad_norm": 9.15408167098142, "learning_rate": 9.970363008512836e-06, "loss": 0.9004, "step": 1427 }, { "epoch": 0.13, "grad_norm": 8.870985848401764, "learning_rate": 9.97028442168457e-06, "loss": 0.9558, "step": 1428 }, { "epoch": 0.13, "grad_norm": 8.458755578375824, "learning_rate": 9.970205731112499e-06, "loss": 0.9081, "step": 1429 }, { "epoch": 0.13, "grad_norm": 6.695345012298683, "learning_rate": 9.97012693679827e-06, "loss": 0.9563, "step": 1430 }, { "epoch": 0.13, "grad_norm": 5.929847616312393, "learning_rate": 9.970048038743523e-06, "loss": 0.873, "step": 1431 }, { "epoch": 0.13, "grad_norm": 6.089376367160769, "learning_rate": 9.96996903694991e-06, "loss": 0.8906, "step": 1432 }, { "epoch": 0.13, "grad_norm": 8.106364765616947, "learning_rate": 9.969889931419076e-06, "loss": 0.9084, "step": 1433 }, { "epoch": 0.13, "grad_norm": 5.977237367091928, "learning_rate": 9.969810722152672e-06, "loss": 0.9273, "step": 1434 }, { "epoch": 0.13, "grad_norm": 6.461515941192997, "learning_rate": 9.969731409152357e-06, "loss": 0.9659, "step": 1435 }, { "epoch": 0.13, "grad_norm": 8.092915242835346, "learning_rate": 9.96965199241978e-06, "loss": 0.9358, "step": 1436 }, { "epoch": 0.13, "grad_norm": 7.873865237724646, "learning_rate": 9.969572471956601e-06, "loss": 0.9388, "step": 1437 }, { "epoch": 0.13, "grad_norm": 8.160620534612267, "learning_rate": 9.969492847764479e-06, "loss": 0.9129, "step": 1438 }, { "epoch": 0.13, "grad_norm": 7.8410005413856245, "learning_rate": 9.969413119845078e-06, "loss": 0.9585, "step": 1439 }, { "epoch": 0.13, "grad_norm": 5.915623791878984, "learning_rate": 9.96933328820006e-06, "loss": 0.9305, "step": 1440 }, { "epoch": 0.13, "grad_norm": 8.66773988434155, "learning_rate": 9.969253352831092e-06, "loss": 0.9756, "step": 1441 }, { "epoch": 0.13, "grad_norm": 8.575876506793106, "learning_rate": 9.969173313739845e-06, "loss": 0.9462, "step": 1442 }, { "epoch": 0.13, "grad_norm": 7.320628445842298, "learning_rate": 9.969093170927986e-06, "loss": 0.946, "step": 1443 }, { "epoch": 0.13, "grad_norm": 7.3208368325851705, "learning_rate": 9.96901292439719e-06, "loss": 0.9195, "step": 1444 }, { "epoch": 0.13, "grad_norm": 6.521625704810516, "learning_rate": 9.96893257414913e-06, "loss": 0.886, "step": 1445 }, { "epoch": 0.13, "grad_norm": 7.529356995429448, "learning_rate": 9.968852120185484e-06, "loss": 0.8844, "step": 1446 }, { "epoch": 0.13, "grad_norm": 9.469608780727466, "learning_rate": 9.968771562507934e-06, "loss": 0.9914, "step": 1447 }, { "epoch": 0.13, "grad_norm": 8.505761818307187, "learning_rate": 9.968690901118155e-06, "loss": 0.9024, "step": 1448 }, { "epoch": 0.13, "grad_norm": 6.558244684036303, "learning_rate": 9.968610136017839e-06, "loss": 0.9485, "step": 1449 }, { "epoch": 0.13, "grad_norm": 9.265197907561143, "learning_rate": 9.968529267208665e-06, "loss": 0.9706, "step": 1450 }, { "epoch": 0.13, "grad_norm": 7.933033012533065, "learning_rate": 9.968448294692323e-06, "loss": 0.9901, "step": 1451 }, { "epoch": 0.13, "grad_norm": 9.101708079944538, "learning_rate": 9.968367218470504e-06, "loss": 0.8873, "step": 1452 }, { "epoch": 0.13, "grad_norm": 5.865341625333042, "learning_rate": 9.9682860385449e-06, "loss": 0.9304, "step": 1453 }, { "epoch": 0.13, "grad_norm": 7.279785009111402, "learning_rate": 9.968204754917206e-06, "loss": 0.8705, "step": 1454 }, { "epoch": 0.13, "grad_norm": 6.683729958828887, "learning_rate": 9.968123367589117e-06, "loss": 0.9749, "step": 1455 }, { "epoch": 0.13, "grad_norm": 9.15217496545845, "learning_rate": 9.968041876562334e-06, "loss": 0.9322, "step": 1456 }, { "epoch": 0.13, "grad_norm": 8.565612783489023, "learning_rate": 9.967960281838555e-06, "loss": 0.9083, "step": 1457 }, { "epoch": 0.13, "grad_norm": 7.068144033561165, "learning_rate": 9.967878583419487e-06, "loss": 0.9526, "step": 1458 }, { "epoch": 0.13, "grad_norm": 6.569752827626449, "learning_rate": 9.967796781306832e-06, "loss": 0.9701, "step": 1459 }, { "epoch": 0.13, "grad_norm": 8.894134901307497, "learning_rate": 9.967714875502298e-06, "loss": 0.8823, "step": 1460 }, { "epoch": 0.13, "grad_norm": 8.615266358061982, "learning_rate": 9.967632866007595e-06, "loss": 0.8834, "step": 1461 }, { "epoch": 0.13, "grad_norm": 6.317984399215114, "learning_rate": 9.967550752824436e-06, "loss": 0.8883, "step": 1462 }, { "epoch": 0.13, "grad_norm": 8.417259047434122, "learning_rate": 9.967468535954532e-06, "loss": 0.9067, "step": 1463 }, { "epoch": 0.13, "grad_norm": 6.577994419642581, "learning_rate": 9.967386215399601e-06, "loss": 0.8895, "step": 1464 }, { "epoch": 0.13, "grad_norm": 8.889024129548776, "learning_rate": 9.967303791161362e-06, "loss": 0.9679, "step": 1465 }, { "epoch": 0.13, "grad_norm": 8.705117004352962, "learning_rate": 9.967221263241535e-06, "loss": 1.0101, "step": 1466 }, { "epoch": 0.13, "grad_norm": 7.457160401140723, "learning_rate": 9.967138631641842e-06, "loss": 0.9524, "step": 1467 }, { "epoch": 0.13, "grad_norm": 8.614198265902633, "learning_rate": 9.967055896364007e-06, "loss": 0.9569, "step": 1468 }, { "epoch": 0.13, "grad_norm": 8.710900416824499, "learning_rate": 9.96697305740976e-06, "loss": 0.97, "step": 1469 }, { "epoch": 0.13, "grad_norm": 6.457975005706524, "learning_rate": 9.966890114780825e-06, "loss": 0.9078, "step": 1470 }, { "epoch": 0.13, "grad_norm": 6.701785151874144, "learning_rate": 9.966807068478938e-06, "loss": 0.9453, "step": 1471 }, { "epoch": 0.13, "grad_norm": 9.662837493129242, "learning_rate": 9.96672391850583e-06, "loss": 1.0557, "step": 1472 }, { "epoch": 0.13, "grad_norm": 8.984656379870067, "learning_rate": 9.966640664863236e-06, "loss": 1.043, "step": 1473 }, { "epoch": 0.13, "grad_norm": 8.265190386228888, "learning_rate": 9.966557307552897e-06, "loss": 0.9516, "step": 1474 }, { "epoch": 0.13, "grad_norm": 8.631319048933607, "learning_rate": 9.96647384657655e-06, "loss": 0.9557, "step": 1475 }, { "epoch": 0.13, "grad_norm": 6.672550121272586, "learning_rate": 9.966390281935939e-06, "loss": 0.9713, "step": 1476 }, { "epoch": 0.13, "grad_norm": 8.479244067560822, "learning_rate": 9.966306613632805e-06, "loss": 0.9785, "step": 1477 }, { "epoch": 0.13, "grad_norm": 10.245573691690058, "learning_rate": 9.966222841668897e-06, "loss": 0.8925, "step": 1478 }, { "epoch": 0.13, "grad_norm": 7.057061889179519, "learning_rate": 9.966138966045964e-06, "loss": 0.8694, "step": 1479 }, { "epoch": 0.13, "grad_norm": 10.76239027831692, "learning_rate": 9.966054986765754e-06, "loss": 0.9517, "step": 1480 }, { "epoch": 0.13, "grad_norm": 9.840527642161367, "learning_rate": 9.965970903830022e-06, "loss": 0.9371, "step": 1481 }, { "epoch": 0.13, "grad_norm": 7.334577670001729, "learning_rate": 9.965886717240524e-06, "loss": 0.9405, "step": 1482 }, { "epoch": 0.13, "grad_norm": 8.809989489007398, "learning_rate": 9.965802426999015e-06, "loss": 0.9381, "step": 1483 }, { "epoch": 0.13, "grad_norm": 14.77528387800605, "learning_rate": 9.965718033107256e-06, "loss": 0.9434, "step": 1484 }, { "epoch": 0.13, "grad_norm": 10.273239490917932, "learning_rate": 9.965633535567007e-06, "loss": 0.9476, "step": 1485 }, { "epoch": 0.13, "grad_norm": 6.756966387729549, "learning_rate": 9.965548934380033e-06, "loss": 0.9606, "step": 1486 }, { "epoch": 0.13, "grad_norm": 9.99997759198704, "learning_rate": 9.965464229548098e-06, "loss": 0.9341, "step": 1487 }, { "epoch": 0.13, "grad_norm": 7.731272228521683, "learning_rate": 9.965379421072974e-06, "loss": 1.0007, "step": 1488 }, { "epoch": 0.13, "grad_norm": 6.662229386857797, "learning_rate": 9.965294508956427e-06, "loss": 0.9046, "step": 1489 }, { "epoch": 0.13, "grad_norm": 6.988912540664809, "learning_rate": 9.96520949320023e-06, "loss": 0.9646, "step": 1490 }, { "epoch": 0.13, "grad_norm": 7.2273173756626425, "learning_rate": 9.96512437380616e-06, "loss": 0.95, "step": 1491 }, { "epoch": 0.13, "grad_norm": 7.8307106945305, "learning_rate": 9.965039150775992e-06, "loss": 0.977, "step": 1492 }, { "epoch": 0.13, "grad_norm": 6.750281797996829, "learning_rate": 9.964953824111502e-06, "loss": 0.9012, "step": 1493 }, { "epoch": 0.13, "grad_norm": 8.494776625352568, "learning_rate": 9.964868393814478e-06, "loss": 0.8817, "step": 1494 }, { "epoch": 0.13, "grad_norm": 6.487876752870589, "learning_rate": 9.964782859886698e-06, "loss": 0.9434, "step": 1495 }, { "epoch": 0.13, "grad_norm": 8.89335746750403, "learning_rate": 9.964697222329948e-06, "loss": 0.8869, "step": 1496 }, { "epoch": 0.13, "grad_norm": 6.9075575136972835, "learning_rate": 9.964611481146017e-06, "loss": 0.9677, "step": 1497 }, { "epoch": 0.13, "grad_norm": 8.111916567288766, "learning_rate": 9.964525636336693e-06, "loss": 0.9889, "step": 1498 }, { "epoch": 0.13, "grad_norm": 8.095137933468349, "learning_rate": 9.964439687903767e-06, "loss": 0.9234, "step": 1499 }, { "epoch": 0.13, "grad_norm": 6.816236430818839, "learning_rate": 9.964353635849035e-06, "loss": 0.9633, "step": 1500 }, { "epoch": 0.13, "grad_norm": 6.886778649423889, "learning_rate": 9.964267480174294e-06, "loss": 0.861, "step": 1501 }, { "epoch": 0.13, "grad_norm": 7.545404470312661, "learning_rate": 9.964181220881342e-06, "loss": 0.9093, "step": 1502 }, { "epoch": 0.13, "grad_norm": 7.9030707338731645, "learning_rate": 9.964094857971975e-06, "loss": 0.931, "step": 1503 }, { "epoch": 0.13, "grad_norm": 7.171015532527012, "learning_rate": 9.964008391448001e-06, "loss": 0.9152, "step": 1504 }, { "epoch": 0.13, "grad_norm": 6.51890505584007, "learning_rate": 9.963921821311222e-06, "loss": 0.9268, "step": 1505 }, { "epoch": 0.13, "grad_norm": 7.367841449567285, "learning_rate": 9.963835147563448e-06, "loss": 0.8673, "step": 1506 }, { "epoch": 0.13, "grad_norm": 7.984130528331638, "learning_rate": 9.963748370206484e-06, "loss": 0.9503, "step": 1507 }, { "epoch": 0.13, "grad_norm": 7.815962981905127, "learning_rate": 9.963661489242142e-06, "loss": 0.9149, "step": 1508 }, { "epoch": 0.13, "grad_norm": 9.695890120393516, "learning_rate": 9.963574504672238e-06, "loss": 0.9357, "step": 1509 }, { "epoch": 0.13, "grad_norm": 9.71650306074139, "learning_rate": 9.963487416498587e-06, "loss": 0.9137, "step": 1510 }, { "epoch": 0.13, "grad_norm": 8.198996380186198, "learning_rate": 9.963400224723006e-06, "loss": 0.8921, "step": 1511 }, { "epoch": 0.13, "grad_norm": 7.4371197132936935, "learning_rate": 9.963312929347314e-06, "loss": 0.9446, "step": 1512 }, { "epoch": 0.13, "grad_norm": 9.496143406043688, "learning_rate": 9.963225530373334e-06, "loss": 0.9714, "step": 1513 }, { "epoch": 0.14, "grad_norm": 7.211123153090494, "learning_rate": 9.96313802780289e-06, "loss": 0.9901, "step": 1514 }, { "epoch": 0.14, "grad_norm": 7.777507095592017, "learning_rate": 9.96305042163781e-06, "loss": 0.9115, "step": 1515 }, { "epoch": 0.14, "grad_norm": 6.694649435461505, "learning_rate": 9.96296271187992e-06, "loss": 0.8718, "step": 1516 }, { "epoch": 0.14, "grad_norm": 10.029009928319788, "learning_rate": 9.962874898531054e-06, "loss": 0.965, "step": 1517 }, { "epoch": 0.14, "grad_norm": 7.118782686602256, "learning_rate": 9.96278698159304e-06, "loss": 0.9467, "step": 1518 }, { "epoch": 0.14, "grad_norm": 5.734848318165714, "learning_rate": 9.962698961067719e-06, "loss": 0.9685, "step": 1519 }, { "epoch": 0.14, "grad_norm": 6.714221434940324, "learning_rate": 9.962610836956925e-06, "loss": 0.9952, "step": 1520 }, { "epoch": 0.14, "grad_norm": 7.56942945370318, "learning_rate": 9.962522609262496e-06, "loss": 0.94, "step": 1521 }, { "epoch": 0.14, "grad_norm": 5.943309027910723, "learning_rate": 9.962434277986277e-06, "loss": 0.8906, "step": 1522 }, { "epoch": 0.14, "grad_norm": 5.881272689063244, "learning_rate": 9.962345843130108e-06, "loss": 0.9553, "step": 1523 }, { "epoch": 0.14, "grad_norm": 6.293231607912387, "learning_rate": 9.962257304695839e-06, "loss": 0.8877, "step": 1524 }, { "epoch": 0.14, "grad_norm": 9.061176286345948, "learning_rate": 9.962168662685315e-06, "loss": 0.9676, "step": 1525 }, { "epoch": 0.14, "grad_norm": 7.091752423691399, "learning_rate": 9.962079917100387e-06, "loss": 0.944, "step": 1526 }, { "epoch": 0.14, "grad_norm": 5.810813299229871, "learning_rate": 9.961991067942909e-06, "loss": 0.9197, "step": 1527 }, { "epoch": 0.14, "grad_norm": 6.867019895092896, "learning_rate": 9.961902115214732e-06, "loss": 0.9805, "step": 1528 }, { "epoch": 0.14, "grad_norm": 6.905705460898345, "learning_rate": 9.961813058917715e-06, "loss": 0.9057, "step": 1529 }, { "epoch": 0.14, "grad_norm": 7.238106648905651, "learning_rate": 9.961723899053717e-06, "loss": 0.9803, "step": 1530 }, { "epoch": 0.14, "grad_norm": 8.30321980059641, "learning_rate": 9.9616346356246e-06, "loss": 0.9365, "step": 1531 }, { "epoch": 0.14, "grad_norm": 6.5463167300035705, "learning_rate": 9.961545268632223e-06, "loss": 0.9197, "step": 1532 }, { "epoch": 0.14, "grad_norm": 7.462861864393142, "learning_rate": 9.961455798078456e-06, "loss": 0.8785, "step": 1533 }, { "epoch": 0.14, "grad_norm": 9.272348750390195, "learning_rate": 9.961366223965164e-06, "loss": 0.971, "step": 1534 }, { "epoch": 0.14, "grad_norm": 6.29975833566798, "learning_rate": 9.961276546294218e-06, "loss": 0.972, "step": 1535 }, { "epoch": 0.14, "grad_norm": 8.155416331614127, "learning_rate": 9.961186765067487e-06, "loss": 0.8926, "step": 1536 }, { "epoch": 0.14, "grad_norm": 6.467988968228203, "learning_rate": 9.961096880286848e-06, "loss": 0.9183, "step": 1537 }, { "epoch": 0.14, "grad_norm": 8.436221445441653, "learning_rate": 9.961006891954179e-06, "loss": 0.9927, "step": 1538 }, { "epoch": 0.14, "grad_norm": 8.895549255783218, "learning_rate": 9.960916800071354e-06, "loss": 0.8846, "step": 1539 }, { "epoch": 0.14, "grad_norm": 7.088360463767982, "learning_rate": 9.960826604640252e-06, "loss": 0.8782, "step": 1540 }, { "epoch": 0.14, "grad_norm": 7.346497818674137, "learning_rate": 9.960736305662762e-06, "loss": 0.9009, "step": 1541 }, { "epoch": 0.14, "grad_norm": 8.46873268186375, "learning_rate": 9.960645903140764e-06, "loss": 0.8629, "step": 1542 }, { "epoch": 0.14, "grad_norm": 6.3758426789104945, "learning_rate": 9.960555397076148e-06, "loss": 0.9068, "step": 1543 }, { "epoch": 0.14, "grad_norm": 8.960273536470398, "learning_rate": 9.9604647874708e-06, "loss": 0.8841, "step": 1544 }, { "epoch": 0.14, "grad_norm": 9.14881031273148, "learning_rate": 9.960374074326614e-06, "loss": 0.9864, "step": 1545 }, { "epoch": 0.14, "grad_norm": 8.44895562633785, "learning_rate": 9.96028325764548e-06, "loss": 0.9877, "step": 1546 }, { "epoch": 0.14, "grad_norm": 8.462630464251822, "learning_rate": 9.960192337429298e-06, "loss": 0.9334, "step": 1547 }, { "epoch": 0.14, "grad_norm": 5.710264637556267, "learning_rate": 9.960101313679962e-06, "loss": 0.859, "step": 1548 }, { "epoch": 0.14, "grad_norm": 9.826475716263829, "learning_rate": 9.960010186399377e-06, "loss": 0.8924, "step": 1549 }, { "epoch": 0.14, "grad_norm": 5.9226946013126245, "learning_rate": 9.959918955589437e-06, "loss": 0.9228, "step": 1550 }, { "epoch": 0.14, "grad_norm": 7.614143607064812, "learning_rate": 9.959827621252054e-06, "loss": 0.9732, "step": 1551 }, { "epoch": 0.14, "grad_norm": 6.815543882227329, "learning_rate": 9.959736183389129e-06, "loss": 0.8627, "step": 1552 }, { "epoch": 0.14, "grad_norm": 7.609969470958643, "learning_rate": 9.959644642002575e-06, "loss": 0.8736, "step": 1553 }, { "epoch": 0.14, "grad_norm": 9.742106783433274, "learning_rate": 9.959552997094298e-06, "loss": 0.9201, "step": 1554 }, { "epoch": 0.14, "grad_norm": 6.525962518450011, "learning_rate": 9.959461248666215e-06, "loss": 0.9146, "step": 1555 }, { "epoch": 0.14, "grad_norm": 8.625844750777356, "learning_rate": 9.959369396720239e-06, "loss": 0.8562, "step": 1556 }, { "epoch": 0.14, "grad_norm": 7.161582689846505, "learning_rate": 9.959277441258289e-06, "loss": 0.8629, "step": 1557 }, { "epoch": 0.14, "grad_norm": 7.400325484654861, "learning_rate": 9.959185382282281e-06, "loss": 0.9723, "step": 1558 }, { "epoch": 0.14, "grad_norm": 9.769015325508441, "learning_rate": 9.95909321979414e-06, "loss": 0.9574, "step": 1559 }, { "epoch": 0.14, "grad_norm": 6.232166618886888, "learning_rate": 9.959000953795788e-06, "loss": 0.8452, "step": 1560 }, { "epoch": 0.14, "grad_norm": 6.346072118665611, "learning_rate": 9.95890858428915e-06, "loss": 0.9311, "step": 1561 }, { "epoch": 0.14, "grad_norm": 7.191176639869863, "learning_rate": 9.958816111276157e-06, "loss": 0.9526, "step": 1562 }, { "epoch": 0.14, "grad_norm": 9.030434895306083, "learning_rate": 9.958723534758737e-06, "loss": 0.9506, "step": 1563 }, { "epoch": 0.14, "grad_norm": 8.984700349716613, "learning_rate": 9.958630854738823e-06, "loss": 0.8884, "step": 1564 }, { "epoch": 0.14, "grad_norm": 6.896457587783347, "learning_rate": 9.958538071218346e-06, "loss": 0.8595, "step": 1565 }, { "epoch": 0.14, "grad_norm": 8.416638824952019, "learning_rate": 9.95844518419925e-06, "loss": 0.9243, "step": 1566 }, { "epoch": 0.14, "grad_norm": 9.452565730530738, "learning_rate": 9.958352193683467e-06, "loss": 0.9087, "step": 1567 }, { "epoch": 0.14, "grad_norm": 8.530453787778004, "learning_rate": 9.95825909967294e-06, "loss": 0.9537, "step": 1568 }, { "epoch": 0.14, "grad_norm": 7.062685225886606, "learning_rate": 9.958165902169615e-06, "loss": 0.9331, "step": 1569 }, { "epoch": 0.14, "grad_norm": 7.958940142387215, "learning_rate": 9.958072601175433e-06, "loss": 0.9014, "step": 1570 }, { "epoch": 0.14, "grad_norm": 7.319760098709922, "learning_rate": 9.957979196692344e-06, "loss": 0.9265, "step": 1571 }, { "epoch": 0.14, "grad_norm": 8.174085759901779, "learning_rate": 9.957885688722297e-06, "loss": 0.948, "step": 1572 }, { "epoch": 0.14, "grad_norm": 7.944361662258861, "learning_rate": 9.957792077267246e-06, "loss": 0.9295, "step": 1573 }, { "epoch": 0.14, "grad_norm": 11.250885120584293, "learning_rate": 9.95769836232914e-06, "loss": 0.9107, "step": 1574 }, { "epoch": 0.14, "grad_norm": 6.821025433340639, "learning_rate": 9.957604543909937e-06, "loss": 0.8638, "step": 1575 }, { "epoch": 0.14, "grad_norm": 10.005432621058242, "learning_rate": 9.957510622011599e-06, "loss": 0.9291, "step": 1576 }, { "epoch": 0.14, "grad_norm": 9.235087986577309, "learning_rate": 9.957416596636082e-06, "loss": 0.9453, "step": 1577 }, { "epoch": 0.14, "grad_norm": 8.853508811337052, "learning_rate": 9.95732246778535e-06, "loss": 0.9367, "step": 1578 }, { "epoch": 0.14, "grad_norm": 6.582689286431996, "learning_rate": 9.95722823546137e-06, "loss": 0.872, "step": 1579 }, { "epoch": 0.14, "grad_norm": 7.088243063544556, "learning_rate": 9.957133899666104e-06, "loss": 0.9022, "step": 1580 }, { "epoch": 0.14, "grad_norm": 6.681169994464124, "learning_rate": 9.957039460401526e-06, "loss": 0.9655, "step": 1581 }, { "epoch": 0.14, "grad_norm": 7.365774528830001, "learning_rate": 9.956944917669603e-06, "loss": 0.9053, "step": 1582 }, { "epoch": 0.14, "grad_norm": 6.232399000410088, "learning_rate": 9.95685027147231e-06, "loss": 0.9203, "step": 1583 }, { "epoch": 0.14, "grad_norm": 7.22323845760724, "learning_rate": 9.956755521811624e-06, "loss": 0.9181, "step": 1584 }, { "epoch": 0.14, "grad_norm": 6.215650437765172, "learning_rate": 9.956660668689525e-06, "loss": 0.9024, "step": 1585 }, { "epoch": 0.14, "grad_norm": 9.012589820645012, "learning_rate": 9.956565712107985e-06, "loss": 0.916, "step": 1586 }, { "epoch": 0.14, "grad_norm": 5.745181256208862, "learning_rate": 9.956470652068992e-06, "loss": 0.8777, "step": 1587 }, { "epoch": 0.14, "grad_norm": 8.111189612432613, "learning_rate": 9.956375488574528e-06, "loss": 0.9405, "step": 1588 }, { "epoch": 0.14, "grad_norm": 8.456324170831776, "learning_rate": 9.956280221626579e-06, "loss": 0.9381, "step": 1589 }, { "epoch": 0.14, "grad_norm": 7.12957436599661, "learning_rate": 9.956184851227137e-06, "loss": 0.8379, "step": 1590 }, { "epoch": 0.14, "grad_norm": 9.791346112392386, "learning_rate": 9.956089377378189e-06, "loss": 0.9704, "step": 1591 }, { "epoch": 0.14, "grad_norm": 5.2236583279503375, "learning_rate": 9.955993800081726e-06, "loss": 0.9037, "step": 1592 }, { "epoch": 0.14, "grad_norm": 7.4382925624230385, "learning_rate": 9.955898119339751e-06, "loss": 0.8835, "step": 1593 }, { "epoch": 0.14, "grad_norm": 7.788875846954354, "learning_rate": 9.955802335154252e-06, "loss": 0.8519, "step": 1594 }, { "epoch": 0.14, "grad_norm": 6.904733605378186, "learning_rate": 9.955706447527234e-06, "loss": 0.951, "step": 1595 }, { "epoch": 0.14, "grad_norm": 7.530877175412536, "learning_rate": 9.955610456460696e-06, "loss": 0.8934, "step": 1596 }, { "epoch": 0.14, "grad_norm": 7.848030672654852, "learning_rate": 9.955514361956645e-06, "loss": 0.9058, "step": 1597 }, { "epoch": 0.14, "grad_norm": 7.269765210311887, "learning_rate": 9.95541816401708e-06, "loss": 0.9655, "step": 1598 }, { "epoch": 0.14, "grad_norm": 7.572324435623046, "learning_rate": 9.955321862644015e-06, "loss": 0.8627, "step": 1599 }, { "epoch": 0.14, "grad_norm": 10.211032360126362, "learning_rate": 9.955225457839457e-06, "loss": 0.9562, "step": 1600 }, { "epoch": 0.14, "grad_norm": 7.100305627570337, "learning_rate": 9.955128949605422e-06, "loss": 0.934, "step": 1601 }, { "epoch": 0.14, "grad_norm": 7.836818277027663, "learning_rate": 9.95503233794392e-06, "loss": 0.8672, "step": 1602 }, { "epoch": 0.14, "grad_norm": 7.878640492697954, "learning_rate": 9.95493562285697e-06, "loss": 0.8796, "step": 1603 }, { "epoch": 0.14, "grad_norm": 7.748222446868447, "learning_rate": 9.954838804346589e-06, "loss": 0.9703, "step": 1604 }, { "epoch": 0.14, "grad_norm": 9.893872397135734, "learning_rate": 9.954741882414798e-06, "loss": 0.9156, "step": 1605 }, { "epoch": 0.14, "grad_norm": 7.324674758949067, "learning_rate": 9.954644857063621e-06, "loss": 0.9503, "step": 1606 }, { "epoch": 0.14, "grad_norm": 9.588664667772502, "learning_rate": 9.954547728295086e-06, "loss": 0.9713, "step": 1607 }, { "epoch": 0.14, "grad_norm": 11.109978895096539, "learning_rate": 9.954450496111215e-06, "loss": 0.8839, "step": 1608 }, { "epoch": 0.14, "grad_norm": 8.403282203101645, "learning_rate": 9.954353160514042e-06, "loss": 0.8763, "step": 1609 }, { "epoch": 0.14, "grad_norm": 11.536474803876654, "learning_rate": 9.954255721505594e-06, "loss": 0.922, "step": 1610 }, { "epoch": 0.14, "grad_norm": 10.042886649749127, "learning_rate": 9.954158179087909e-06, "loss": 0.9614, "step": 1611 }, { "epoch": 0.14, "grad_norm": 10.278404577916737, "learning_rate": 9.954060533263022e-06, "loss": 0.9188, "step": 1612 }, { "epoch": 0.14, "grad_norm": 7.570848072092514, "learning_rate": 9.953962784032969e-06, "loss": 0.8783, "step": 1613 }, { "epoch": 0.14, "grad_norm": 7.404222508147017, "learning_rate": 9.953864931399792e-06, "loss": 0.9393, "step": 1614 }, { "epoch": 0.14, "grad_norm": 8.592356539149444, "learning_rate": 9.953766975365533e-06, "loss": 0.9946, "step": 1615 }, { "epoch": 0.14, "grad_norm": 7.780505305779121, "learning_rate": 9.953668915932239e-06, "loss": 0.8763, "step": 1616 }, { "epoch": 0.14, "grad_norm": 7.711717506942648, "learning_rate": 9.953570753101954e-06, "loss": 0.9725, "step": 1617 }, { "epoch": 0.14, "grad_norm": 8.401967868599291, "learning_rate": 9.953472486876726e-06, "loss": 0.9136, "step": 1618 }, { "epoch": 0.14, "grad_norm": 6.884798886408169, "learning_rate": 9.953374117258609e-06, "loss": 0.9631, "step": 1619 }, { "epoch": 0.14, "grad_norm": 7.628192231018277, "learning_rate": 9.953275644249656e-06, "loss": 0.8703, "step": 1620 }, { "epoch": 0.14, "grad_norm": 7.97171141187945, "learning_rate": 9.95317706785192e-06, "loss": 0.9298, "step": 1621 }, { "epoch": 0.14, "grad_norm": 6.962987921613148, "learning_rate": 9.95307838806746e-06, "loss": 0.9245, "step": 1622 }, { "epoch": 0.14, "grad_norm": 8.918746859730616, "learning_rate": 9.952979604898336e-06, "loss": 0.8748, "step": 1623 }, { "epoch": 0.14, "grad_norm": 7.088328665839549, "learning_rate": 9.952880718346609e-06, "loss": 0.9175, "step": 1624 }, { "epoch": 0.14, "grad_norm": 9.934004194409862, "learning_rate": 9.952781728414345e-06, "loss": 0.9133, "step": 1625 }, { "epoch": 0.15, "grad_norm": 8.295567832750349, "learning_rate": 9.952682635103608e-06, "loss": 0.929, "step": 1626 }, { "epoch": 0.15, "grad_norm": 9.612628122857485, "learning_rate": 9.952583438416468e-06, "loss": 0.9739, "step": 1627 }, { "epoch": 0.15, "grad_norm": 7.07815200572916, "learning_rate": 9.952484138354993e-06, "loss": 0.8954, "step": 1628 }, { "epoch": 0.15, "grad_norm": 8.107446932202551, "learning_rate": 9.952384734921258e-06, "loss": 0.9275, "step": 1629 }, { "epoch": 0.15, "grad_norm": 4.755773832239196, "learning_rate": 9.952285228117338e-06, "loss": 0.9083, "step": 1630 }, { "epoch": 0.15, "grad_norm": 6.598739022702816, "learning_rate": 9.952185617945309e-06, "loss": 0.9246, "step": 1631 }, { "epoch": 0.15, "grad_norm": 7.660094529278459, "learning_rate": 9.952085904407249e-06, "loss": 0.8872, "step": 1632 }, { "epoch": 0.15, "grad_norm": 5.5431509215149015, "learning_rate": 9.951986087505243e-06, "loss": 0.9102, "step": 1633 }, { "epoch": 0.15, "grad_norm": 10.867852850266456, "learning_rate": 9.95188616724137e-06, "loss": 0.9218, "step": 1634 }, { "epoch": 0.15, "grad_norm": 7.141830856404113, "learning_rate": 9.95178614361772e-06, "loss": 0.8333, "step": 1635 }, { "epoch": 0.15, "grad_norm": 6.618740354305997, "learning_rate": 9.951686016636376e-06, "loss": 1.0055, "step": 1636 }, { "epoch": 0.15, "grad_norm": 8.580377414884051, "learning_rate": 9.951585786299432e-06, "loss": 0.8796, "step": 1637 }, { "epoch": 0.15, "grad_norm": 11.313401299055668, "learning_rate": 9.951485452608977e-06, "loss": 0.8887, "step": 1638 }, { "epoch": 0.15, "grad_norm": 6.019186887040177, "learning_rate": 9.951385015567107e-06, "loss": 0.8666, "step": 1639 }, { "epoch": 0.15, "grad_norm": 8.257089896155504, "learning_rate": 9.951284475175919e-06, "loss": 0.9352, "step": 1640 }, { "epoch": 0.15, "grad_norm": 8.206331596020261, "learning_rate": 9.951183831437511e-06, "loss": 0.9336, "step": 1641 }, { "epoch": 0.15, "grad_norm": 11.0335023142606, "learning_rate": 9.951083084353982e-06, "loss": 0.9119, "step": 1642 }, { "epoch": 0.15, "grad_norm": 8.787881356412708, "learning_rate": 9.950982233927438e-06, "loss": 0.8965, "step": 1643 }, { "epoch": 0.15, "grad_norm": 7.534586493182818, "learning_rate": 9.950881280159982e-06, "loss": 0.8833, "step": 1644 }, { "epoch": 0.15, "grad_norm": 6.297411512513467, "learning_rate": 9.950780223053721e-06, "loss": 0.8603, "step": 1645 }, { "epoch": 0.15, "grad_norm": 6.530969771725447, "learning_rate": 9.950679062610765e-06, "loss": 0.9355, "step": 1646 }, { "epoch": 0.15, "grad_norm": 6.438774427964296, "learning_rate": 9.950577798833225e-06, "loss": 0.9604, "step": 1647 }, { "epoch": 0.15, "grad_norm": 7.57751516707807, "learning_rate": 9.950476431723215e-06, "loss": 0.9704, "step": 1648 }, { "epoch": 0.15, "grad_norm": 8.060286671113943, "learning_rate": 9.950374961282851e-06, "loss": 0.8842, "step": 1649 }, { "epoch": 0.15, "grad_norm": 9.465632701451721, "learning_rate": 9.950273387514251e-06, "loss": 0.9383, "step": 1650 }, { "epoch": 0.15, "grad_norm": 9.394179935959775, "learning_rate": 9.950171710419536e-06, "loss": 0.8442, "step": 1651 }, { "epoch": 0.15, "grad_norm": 7.395904798073917, "learning_rate": 9.950069930000826e-06, "loss": 0.9532, "step": 1652 }, { "epoch": 0.15, "grad_norm": 7.385200628482545, "learning_rate": 9.949968046260247e-06, "loss": 0.9614, "step": 1653 }, { "epoch": 0.15, "grad_norm": 10.814686917170802, "learning_rate": 9.949866059199924e-06, "loss": 0.9036, "step": 1654 }, { "epoch": 0.15, "grad_norm": 7.8487332424652365, "learning_rate": 9.949763968821988e-06, "loss": 0.901, "step": 1655 }, { "epoch": 0.15, "grad_norm": 10.025135776580573, "learning_rate": 9.94966177512857e-06, "loss": 0.9594, "step": 1656 }, { "epoch": 0.15, "grad_norm": 6.62001875585656, "learning_rate": 9.949559478121803e-06, "loss": 0.944, "step": 1657 }, { "epoch": 0.15, "grad_norm": 9.11919480741434, "learning_rate": 9.949457077803818e-06, "loss": 0.946, "step": 1658 }, { "epoch": 0.15, "grad_norm": 8.915650163655938, "learning_rate": 9.949354574176758e-06, "loss": 0.8696, "step": 1659 }, { "epoch": 0.15, "grad_norm": 8.091343227222517, "learning_rate": 9.949251967242758e-06, "loss": 0.9585, "step": 1660 }, { "epoch": 0.15, "grad_norm": 8.6338414515675, "learning_rate": 9.949149257003962e-06, "loss": 0.9497, "step": 1661 }, { "epoch": 0.15, "grad_norm": 6.9403887689123485, "learning_rate": 9.949046443462516e-06, "loss": 0.874, "step": 1662 }, { "epoch": 0.15, "grad_norm": 7.033071413391915, "learning_rate": 9.948943526620563e-06, "loss": 0.9472, "step": 1663 }, { "epoch": 0.15, "grad_norm": 7.879510918371288, "learning_rate": 9.94884050648025e-06, "loss": 0.8576, "step": 1664 }, { "epoch": 0.15, "grad_norm": 5.553250545870903, "learning_rate": 9.948737383043731e-06, "loss": 0.9467, "step": 1665 }, { "epoch": 0.15, "grad_norm": 9.298021151931284, "learning_rate": 9.948634156313155e-06, "loss": 0.9417, "step": 1666 }, { "epoch": 0.15, "grad_norm": 8.679930869260088, "learning_rate": 9.948530826290681e-06, "loss": 0.9032, "step": 1667 }, { "epoch": 0.15, "grad_norm": 8.67615368403085, "learning_rate": 9.948427392978461e-06, "loss": 0.8964, "step": 1668 }, { "epoch": 0.15, "grad_norm": 8.14373243843311, "learning_rate": 9.948323856378657e-06, "loss": 0.9366, "step": 1669 }, { "epoch": 0.15, "grad_norm": 7.46552524471624, "learning_rate": 9.94822021649343e-06, "loss": 0.9038, "step": 1670 }, { "epoch": 0.15, "grad_norm": 7.266437386319904, "learning_rate": 9.948116473324941e-06, "loss": 0.9006, "step": 1671 }, { "epoch": 0.15, "grad_norm": 6.898731506656267, "learning_rate": 9.948012626875357e-06, "loss": 1.0095, "step": 1672 }, { "epoch": 0.15, "grad_norm": 6.3668682307498905, "learning_rate": 9.947908677146844e-06, "loss": 0.8666, "step": 1673 }, { "epoch": 0.15, "grad_norm": 7.4966152024561685, "learning_rate": 9.947804624141574e-06, "loss": 0.9298, "step": 1674 }, { "epoch": 0.15, "grad_norm": 6.603147347741379, "learning_rate": 9.94770046786172e-06, "loss": 0.952, "step": 1675 }, { "epoch": 0.15, "grad_norm": 8.520447343137617, "learning_rate": 9.947596208309451e-06, "loss": 0.8984, "step": 1676 }, { "epoch": 0.15, "grad_norm": 8.041433439476444, "learning_rate": 9.947491845486949e-06, "loss": 1.0086, "step": 1677 }, { "epoch": 0.15, "grad_norm": 6.548263325205756, "learning_rate": 9.947387379396388e-06, "loss": 0.9611, "step": 1678 }, { "epoch": 0.15, "grad_norm": 7.376305733970497, "learning_rate": 9.947282810039952e-06, "loss": 0.8724, "step": 1679 }, { "epoch": 0.15, "grad_norm": 7.88473640583211, "learning_rate": 9.947178137419818e-06, "loss": 0.9513, "step": 1680 }, { "epoch": 0.15, "grad_norm": 10.15334400772932, "learning_rate": 9.947073361538178e-06, "loss": 0.968, "step": 1681 }, { "epoch": 0.15, "grad_norm": 7.5419835993865325, "learning_rate": 9.946968482397212e-06, "loss": 0.9032, "step": 1682 }, { "epoch": 0.15, "grad_norm": 6.556415358310082, "learning_rate": 9.946863499999115e-06, "loss": 0.9255, "step": 1683 }, { "epoch": 0.15, "grad_norm": 6.045787642531568, "learning_rate": 9.946758414346076e-06, "loss": 0.9172, "step": 1684 }, { "epoch": 0.15, "grad_norm": 7.18339175995122, "learning_rate": 9.946653225440288e-06, "loss": 0.9054, "step": 1685 }, { "epoch": 0.15, "grad_norm": 7.257034861838449, "learning_rate": 9.946547933283946e-06, "loss": 0.883, "step": 1686 }, { "epoch": 0.15, "grad_norm": 8.018608651371892, "learning_rate": 9.94644253787925e-06, "loss": 0.889, "step": 1687 }, { "epoch": 0.15, "grad_norm": 8.033089154995128, "learning_rate": 9.946337039228398e-06, "loss": 0.9152, "step": 1688 }, { "epoch": 0.15, "grad_norm": 9.024412305894637, "learning_rate": 9.946231437333591e-06, "loss": 0.936, "step": 1689 }, { "epoch": 0.15, "grad_norm": 7.980561338240078, "learning_rate": 9.946125732197038e-06, "loss": 0.9654, "step": 1690 }, { "epoch": 0.15, "grad_norm": 7.847790590680537, "learning_rate": 9.94601992382094e-06, "loss": 0.9473, "step": 1691 }, { "epoch": 0.15, "grad_norm": 7.869862480320677, "learning_rate": 9.945914012207507e-06, "loss": 0.935, "step": 1692 }, { "epoch": 0.15, "grad_norm": 7.387289337860115, "learning_rate": 9.945807997358952e-06, "loss": 0.9288, "step": 1693 }, { "epoch": 0.15, "grad_norm": 7.852107908778715, "learning_rate": 9.945701879277487e-06, "loss": 0.9103, "step": 1694 }, { "epoch": 0.15, "grad_norm": 11.069794500256458, "learning_rate": 9.945595657965325e-06, "loss": 0.914, "step": 1695 }, { "epoch": 0.15, "grad_norm": 8.100434649482077, "learning_rate": 9.945489333424684e-06, "loss": 0.9481, "step": 1696 }, { "epoch": 0.15, "grad_norm": 9.102953555515983, "learning_rate": 9.945382905657784e-06, "loss": 0.8937, "step": 1697 }, { "epoch": 0.15, "grad_norm": 7.859250402005038, "learning_rate": 9.945276374666846e-06, "loss": 0.8706, "step": 1698 }, { "epoch": 0.15, "grad_norm": 8.170524301478496, "learning_rate": 9.945169740454093e-06, "loss": 0.9892, "step": 1699 }, { "epoch": 0.15, "grad_norm": 6.60665034666195, "learning_rate": 9.945063003021752e-06, "loss": 0.873, "step": 1700 }, { "epoch": 0.15, "grad_norm": 7.153761801261673, "learning_rate": 9.94495616237205e-06, "loss": 0.9607, "step": 1701 }, { "epoch": 0.15, "grad_norm": 8.845610578743642, "learning_rate": 9.944849218507218e-06, "loss": 0.9081, "step": 1702 }, { "epoch": 0.15, "grad_norm": 5.996019886605271, "learning_rate": 9.944742171429487e-06, "loss": 0.9512, "step": 1703 }, { "epoch": 0.15, "grad_norm": 8.652224529152166, "learning_rate": 9.944635021141092e-06, "loss": 0.9158, "step": 1704 }, { "epoch": 0.15, "grad_norm": 8.286106931876958, "learning_rate": 9.94452776764427e-06, "loss": 0.8864, "step": 1705 }, { "epoch": 0.15, "grad_norm": 6.082729459693538, "learning_rate": 9.944420410941259e-06, "loss": 0.904, "step": 1706 }, { "epoch": 0.15, "grad_norm": 6.524322349595408, "learning_rate": 9.9443129510343e-06, "loss": 0.907, "step": 1707 }, { "epoch": 0.15, "grad_norm": 8.135708507307406, "learning_rate": 9.944205387925636e-06, "loss": 0.9169, "step": 1708 }, { "epoch": 0.15, "grad_norm": 7.396832705431355, "learning_rate": 9.944097721617512e-06, "loss": 0.9884, "step": 1709 }, { "epoch": 0.15, "grad_norm": 10.013758210589762, "learning_rate": 9.943989952112175e-06, "loss": 0.9621, "step": 1710 }, { "epoch": 0.15, "grad_norm": 9.870494631658731, "learning_rate": 9.943882079411875e-06, "loss": 0.9017, "step": 1711 }, { "epoch": 0.15, "grad_norm": 8.036521946912087, "learning_rate": 9.943774103518865e-06, "loss": 1.0531, "step": 1712 }, { "epoch": 0.15, "grad_norm": 6.687615130350311, "learning_rate": 9.943666024435396e-06, "loss": 0.9067, "step": 1713 }, { "epoch": 0.15, "grad_norm": 10.441340833719739, "learning_rate": 9.943557842163724e-06, "loss": 0.8934, "step": 1714 }, { "epoch": 0.15, "grad_norm": 10.11224638978164, "learning_rate": 9.94344955670611e-06, "loss": 0.9045, "step": 1715 }, { "epoch": 0.15, "grad_norm": 7.713886352289906, "learning_rate": 9.943341168064814e-06, "loss": 0.8881, "step": 1716 }, { "epoch": 0.15, "grad_norm": 7.085050871851386, "learning_rate": 9.943232676242093e-06, "loss": 0.9092, "step": 1717 }, { "epoch": 0.15, "grad_norm": 6.7501780656307435, "learning_rate": 9.943124081240218e-06, "loss": 0.935, "step": 1718 }, { "epoch": 0.15, "grad_norm": 7.249628563876324, "learning_rate": 9.943015383061452e-06, "loss": 0.9152, "step": 1719 }, { "epoch": 0.15, "grad_norm": 7.110192158442351, "learning_rate": 9.942906581708065e-06, "loss": 0.8407, "step": 1720 }, { "epoch": 0.15, "grad_norm": 8.091247509880226, "learning_rate": 9.94279767718233e-06, "loss": 0.885, "step": 1721 }, { "epoch": 0.15, "grad_norm": 5.643072655517878, "learning_rate": 9.942688669486516e-06, "loss": 0.9057, "step": 1722 }, { "epoch": 0.15, "grad_norm": 6.203657265005804, "learning_rate": 9.9425795586229e-06, "loss": 0.9195, "step": 1723 }, { "epoch": 0.15, "grad_norm": 6.382953068547134, "learning_rate": 9.94247034459376e-06, "loss": 0.9226, "step": 1724 }, { "epoch": 0.15, "grad_norm": 7.928244709776375, "learning_rate": 9.942361027401377e-06, "loss": 0.9165, "step": 1725 }, { "epoch": 0.15, "grad_norm": 6.173564018957629, "learning_rate": 9.942251607048028e-06, "loss": 0.9384, "step": 1726 }, { "epoch": 0.15, "grad_norm": 7.517349645697361, "learning_rate": 9.942142083536001e-06, "loss": 0.8512, "step": 1727 }, { "epoch": 0.15, "grad_norm": 8.593011436539596, "learning_rate": 9.942032456867583e-06, "loss": 0.85, "step": 1728 }, { "epoch": 0.15, "grad_norm": 7.031208553051318, "learning_rate": 9.94192272704506e-06, "loss": 0.9005, "step": 1729 }, { "epoch": 0.15, "grad_norm": 8.393408853043171, "learning_rate": 9.941812894070721e-06, "loss": 0.9366, "step": 1730 }, { "epoch": 0.15, "grad_norm": 8.143431683291103, "learning_rate": 9.941702957946862e-06, "loss": 0.8755, "step": 1731 }, { "epoch": 0.15, "grad_norm": 7.165204469012367, "learning_rate": 9.941592918675776e-06, "loss": 0.9885, "step": 1732 }, { "epoch": 0.15, "grad_norm": 7.859869829834116, "learning_rate": 9.941482776259759e-06, "loss": 0.9207, "step": 1733 }, { "epoch": 0.15, "grad_norm": 10.075930869996785, "learning_rate": 9.94137253070111e-06, "loss": 0.9505, "step": 1734 }, { "epoch": 0.15, "grad_norm": 8.739330753387975, "learning_rate": 9.941262182002132e-06, "loss": 0.8816, "step": 1735 }, { "epoch": 0.15, "grad_norm": 7.316348404513737, "learning_rate": 9.941151730165126e-06, "loss": 0.8949, "step": 1736 }, { "epoch": 0.15, "grad_norm": 6.6237514632247185, "learning_rate": 9.941041175192398e-06, "loss": 0.9201, "step": 1737 }, { "epoch": 0.16, "grad_norm": 7.3332960734469035, "learning_rate": 9.940930517086257e-06, "loss": 0.9457, "step": 1738 }, { "epoch": 0.16, "grad_norm": 6.472547554711808, "learning_rate": 9.940819755849014e-06, "loss": 0.8611, "step": 1739 }, { "epoch": 0.16, "grad_norm": 6.210100322273938, "learning_rate": 9.940708891482976e-06, "loss": 0.9845, "step": 1740 }, { "epoch": 0.16, "grad_norm": 10.003344880230083, "learning_rate": 9.94059792399046e-06, "loss": 0.8689, "step": 1741 }, { "epoch": 0.16, "grad_norm": 11.720424237016914, "learning_rate": 9.940486853373784e-06, "loss": 0.8544, "step": 1742 }, { "epoch": 0.16, "grad_norm": 7.494506023182325, "learning_rate": 9.940375679635263e-06, "loss": 0.909, "step": 1743 }, { "epoch": 0.16, "grad_norm": 6.929783731476001, "learning_rate": 9.94026440277722e-06, "loss": 0.9753, "step": 1744 }, { "epoch": 0.16, "grad_norm": 8.088974893253955, "learning_rate": 9.940153022801974e-06, "loss": 1.0166, "step": 1745 }, { "epoch": 0.16, "grad_norm": 7.177373254500309, "learning_rate": 9.940041539711856e-06, "loss": 0.921, "step": 1746 }, { "epoch": 0.16, "grad_norm": 6.915353698236475, "learning_rate": 9.939929953509187e-06, "loss": 0.9182, "step": 1747 }, { "epoch": 0.16, "grad_norm": 9.147677884431397, "learning_rate": 9.939818264196298e-06, "loss": 0.9032, "step": 1748 }, { "epoch": 0.16, "grad_norm": 8.888899738992613, "learning_rate": 9.939706471775522e-06, "loss": 1.0015, "step": 1749 }, { "epoch": 0.16, "grad_norm": 7.057088963816157, "learning_rate": 9.939594576249191e-06, "loss": 0.9629, "step": 1750 }, { "epoch": 0.16, "grad_norm": 8.70960381311623, "learning_rate": 9.939482577619642e-06, "loss": 0.8987, "step": 1751 }, { "epoch": 0.16, "grad_norm": 8.920418830419786, "learning_rate": 9.93937047588921e-06, "loss": 0.9154, "step": 1752 }, { "epoch": 0.16, "grad_norm": 6.834139175094775, "learning_rate": 9.939258271060237e-06, "loss": 0.9288, "step": 1753 }, { "epoch": 0.16, "grad_norm": 6.671382658879792, "learning_rate": 9.939145963135065e-06, "loss": 0.9393, "step": 1754 }, { "epoch": 0.16, "grad_norm": 7.996948101096514, "learning_rate": 9.939033552116036e-06, "loss": 0.8532, "step": 1755 }, { "epoch": 0.16, "grad_norm": 9.643435811130972, "learning_rate": 9.938921038005497e-06, "loss": 0.975, "step": 1756 }, { "epoch": 0.16, "grad_norm": 6.506197130723651, "learning_rate": 9.9388084208058e-06, "loss": 0.8805, "step": 1757 }, { "epoch": 0.16, "grad_norm": 7.293251260862998, "learning_rate": 9.938695700519292e-06, "loss": 0.9772, "step": 1758 }, { "epoch": 0.16, "grad_norm": 5.1752799019602245, "learning_rate": 9.938582877148327e-06, "loss": 0.9273, "step": 1759 }, { "epoch": 0.16, "grad_norm": 5.321742424173082, "learning_rate": 9.938469950695258e-06, "loss": 0.9175, "step": 1760 }, { "epoch": 0.16, "grad_norm": 7.832853998502766, "learning_rate": 9.938356921162446e-06, "loss": 0.9648, "step": 1761 }, { "epoch": 0.16, "grad_norm": 7.38881770548638, "learning_rate": 9.938243788552246e-06, "loss": 0.8909, "step": 1762 }, { "epoch": 0.16, "grad_norm": 5.878616423231119, "learning_rate": 9.938130552867025e-06, "loss": 0.9131, "step": 1763 }, { "epoch": 0.16, "grad_norm": 10.222039142772411, "learning_rate": 9.93801721410914e-06, "loss": 0.9377, "step": 1764 }, { "epoch": 0.16, "grad_norm": 6.6978143321121255, "learning_rate": 9.937903772280962e-06, "loss": 0.9057, "step": 1765 }, { "epoch": 0.16, "grad_norm": 8.649518989820997, "learning_rate": 9.937790227384854e-06, "loss": 0.8694, "step": 1766 }, { "epoch": 0.16, "grad_norm": 8.010692958809571, "learning_rate": 9.93767657942319e-06, "loss": 0.9183, "step": 1767 }, { "epoch": 0.16, "grad_norm": 9.173506448218816, "learning_rate": 9.93756282839834e-06, "loss": 0.8333, "step": 1768 }, { "epoch": 0.16, "grad_norm": 9.38566092052211, "learning_rate": 9.93744897431268e-06, "loss": 0.9245, "step": 1769 }, { "epoch": 0.16, "grad_norm": 8.083496806377413, "learning_rate": 9.937335017168585e-06, "loss": 0.924, "step": 1770 }, { "epoch": 0.16, "grad_norm": 12.676180583987946, "learning_rate": 9.937220956968434e-06, "loss": 0.9565, "step": 1771 }, { "epoch": 0.16, "grad_norm": 7.848290000227272, "learning_rate": 9.937106793714608e-06, "loss": 0.9675, "step": 1772 }, { "epoch": 0.16, "grad_norm": 9.360101372723747, "learning_rate": 9.93699252740949e-06, "loss": 0.8847, "step": 1773 }, { "epoch": 0.16, "grad_norm": 7.219843094041831, "learning_rate": 9.936878158055463e-06, "loss": 0.924, "step": 1774 }, { "epoch": 0.16, "grad_norm": 8.280625889415871, "learning_rate": 9.936763685654917e-06, "loss": 0.9191, "step": 1775 }, { "epoch": 0.16, "grad_norm": 6.257874447531805, "learning_rate": 9.936649110210243e-06, "loss": 0.9347, "step": 1776 }, { "epoch": 0.16, "grad_norm": 7.466223301226329, "learning_rate": 9.936534431723828e-06, "loss": 0.8684, "step": 1777 }, { "epoch": 0.16, "grad_norm": 8.070416857126578, "learning_rate": 9.936419650198066e-06, "loss": 0.9398, "step": 1778 }, { "epoch": 0.16, "grad_norm": 7.051678861781091, "learning_rate": 9.936304765635355e-06, "loss": 0.9605, "step": 1779 }, { "epoch": 0.16, "grad_norm": 7.848459080243438, "learning_rate": 9.936189778038095e-06, "loss": 0.8847, "step": 1780 }, { "epoch": 0.16, "grad_norm": 7.798703419517447, "learning_rate": 9.93607468740868e-06, "loss": 0.9223, "step": 1781 }, { "epoch": 0.16, "grad_norm": 6.280889318500933, "learning_rate": 9.935959493749519e-06, "loss": 0.9165, "step": 1782 }, { "epoch": 0.16, "grad_norm": 7.952845267276942, "learning_rate": 9.93584419706301e-06, "loss": 0.8565, "step": 1783 }, { "epoch": 0.16, "grad_norm": 7.115539773139738, "learning_rate": 9.935728797351565e-06, "loss": 0.8972, "step": 1784 }, { "epoch": 0.16, "grad_norm": 8.772440394887406, "learning_rate": 9.935613294617589e-06, "loss": 0.9451, "step": 1785 }, { "epoch": 0.16, "grad_norm": 8.900180483950491, "learning_rate": 9.935497688863495e-06, "loss": 0.8711, "step": 1786 }, { "epoch": 0.16, "grad_norm": 10.058514416182714, "learning_rate": 9.935381980091696e-06, "loss": 0.8858, "step": 1787 }, { "epoch": 0.16, "grad_norm": 7.74635600102307, "learning_rate": 9.935266168304605e-06, "loss": 0.9124, "step": 1788 }, { "epoch": 0.16, "grad_norm": 8.543819299755475, "learning_rate": 9.935150253504641e-06, "loss": 0.9816, "step": 1789 }, { "epoch": 0.16, "grad_norm": 6.71051336583126, "learning_rate": 9.935034235694224e-06, "loss": 0.9064, "step": 1790 }, { "epoch": 0.16, "grad_norm": 7.157460895670773, "learning_rate": 9.934918114875775e-06, "loss": 0.9326, "step": 1791 }, { "epoch": 0.16, "grad_norm": 7.982347172311036, "learning_rate": 9.934801891051718e-06, "loss": 0.9103, "step": 1792 }, { "epoch": 0.16, "grad_norm": 7.761216800206074, "learning_rate": 9.934685564224476e-06, "loss": 0.9176, "step": 1793 }, { "epoch": 0.16, "grad_norm": 15.761723177451964, "learning_rate": 9.934569134396481e-06, "loss": 0.8476, "step": 1794 }, { "epoch": 0.16, "grad_norm": 7.887121635298372, "learning_rate": 9.934452601570163e-06, "loss": 0.985, "step": 1795 }, { "epoch": 0.16, "grad_norm": 7.551831083264154, "learning_rate": 9.934335965747952e-06, "loss": 1.0044, "step": 1796 }, { "epoch": 0.16, "grad_norm": 8.545950066831496, "learning_rate": 9.934219226932284e-06, "loss": 0.8444, "step": 1797 }, { "epoch": 0.16, "grad_norm": 6.984871545978071, "learning_rate": 9.934102385125597e-06, "loss": 0.9725, "step": 1798 }, { "epoch": 0.16, "grad_norm": 8.00606220749479, "learning_rate": 9.933985440330326e-06, "loss": 0.9604, "step": 1799 }, { "epoch": 0.16, "grad_norm": 9.63832991000302, "learning_rate": 9.933868392548916e-06, "loss": 0.8603, "step": 1800 }, { "epoch": 0.16, "grad_norm": 8.396207497244717, "learning_rate": 9.933751241783807e-06, "loss": 0.952, "step": 1801 }, { "epoch": 0.16, "grad_norm": 5.680716023126336, "learning_rate": 9.933633988037446e-06, "loss": 0.9466, "step": 1802 }, { "epoch": 0.16, "grad_norm": 6.573240544586446, "learning_rate": 9.93351663131228e-06, "loss": 0.8866, "step": 1803 }, { "epoch": 0.16, "grad_norm": 5.6730128401360025, "learning_rate": 9.933399171610757e-06, "loss": 0.8836, "step": 1804 }, { "epoch": 0.16, "grad_norm": 8.126686317194952, "learning_rate": 9.933281608935332e-06, "loss": 0.8863, "step": 1805 }, { "epoch": 0.16, "grad_norm": 10.110240002277981, "learning_rate": 9.933163943288456e-06, "loss": 1.0113, "step": 1806 }, { "epoch": 0.16, "grad_norm": 7.273503558260192, "learning_rate": 9.933046174672587e-06, "loss": 0.9297, "step": 1807 }, { "epoch": 0.16, "grad_norm": 7.235582858420247, "learning_rate": 9.932928303090182e-06, "loss": 0.9864, "step": 1808 }, { "epoch": 0.16, "grad_norm": 8.816108483676732, "learning_rate": 9.932810328543702e-06, "loss": 0.8795, "step": 1809 }, { "epoch": 0.16, "grad_norm": 7.876782758717439, "learning_rate": 9.932692251035609e-06, "loss": 0.9412, "step": 1810 }, { "epoch": 0.16, "grad_norm": 5.404128718579789, "learning_rate": 9.932574070568366e-06, "loss": 0.9329, "step": 1811 }, { "epoch": 0.16, "grad_norm": 6.709091974940604, "learning_rate": 9.932455787144444e-06, "loss": 0.9167, "step": 1812 }, { "epoch": 0.16, "grad_norm": 6.364741106400842, "learning_rate": 9.932337400766308e-06, "loss": 0.8257, "step": 1813 }, { "epoch": 0.16, "grad_norm": 10.94237148338265, "learning_rate": 9.93221891143643e-06, "loss": 0.957, "step": 1814 }, { "epoch": 0.16, "grad_norm": 9.160605191826752, "learning_rate": 9.932100319157286e-06, "loss": 0.908, "step": 1815 }, { "epoch": 0.16, "grad_norm": 7.191472244948973, "learning_rate": 9.931981623931346e-06, "loss": 0.9232, "step": 1816 }, { "epoch": 0.16, "grad_norm": 6.550798514247578, "learning_rate": 9.931862825761092e-06, "loss": 0.9624, "step": 1817 }, { "epoch": 0.16, "grad_norm": 6.70307094046196, "learning_rate": 9.931743924649e-06, "loss": 0.9524, "step": 1818 }, { "epoch": 0.16, "grad_norm": 6.8865632625895925, "learning_rate": 9.931624920597556e-06, "loss": 0.9107, "step": 1819 }, { "epoch": 0.16, "grad_norm": 8.418871189570078, "learning_rate": 9.93150581360924e-06, "loss": 0.8916, "step": 1820 }, { "epoch": 0.16, "grad_norm": 7.514776149416491, "learning_rate": 9.931386603686544e-06, "loss": 0.9075, "step": 1821 }, { "epoch": 0.16, "grad_norm": 6.185962051438692, "learning_rate": 9.931267290831948e-06, "loss": 0.8462, "step": 1822 }, { "epoch": 0.16, "grad_norm": 6.341480267939516, "learning_rate": 9.931147875047948e-06, "loss": 0.8568, "step": 1823 }, { "epoch": 0.16, "grad_norm": 8.776046214120354, "learning_rate": 9.931028356337035e-06, "loss": 0.9167, "step": 1824 }, { "epoch": 0.16, "grad_norm": 8.943871078296796, "learning_rate": 9.930908734701705e-06, "loss": 0.8797, "step": 1825 }, { "epoch": 0.16, "grad_norm": 6.050885725217994, "learning_rate": 9.930789010144453e-06, "loss": 0.8178, "step": 1826 }, { "epoch": 0.16, "grad_norm": 6.788748465974695, "learning_rate": 9.930669182667778e-06, "loss": 0.8805, "step": 1827 }, { "epoch": 0.16, "grad_norm": 6.500139943187679, "learning_rate": 9.930549252274182e-06, "loss": 0.9023, "step": 1828 }, { "epoch": 0.16, "grad_norm": 6.285187248469305, "learning_rate": 9.930429218966169e-06, "loss": 0.8776, "step": 1829 }, { "epoch": 0.16, "grad_norm": 9.700379898311727, "learning_rate": 9.930309082746244e-06, "loss": 0.9606, "step": 1830 }, { "epoch": 0.16, "grad_norm": 7.677150029203911, "learning_rate": 9.930188843616913e-06, "loss": 0.8879, "step": 1831 }, { "epoch": 0.16, "grad_norm": 8.769367180082794, "learning_rate": 9.930068501580686e-06, "loss": 0.9091, "step": 1832 }, { "epoch": 0.16, "grad_norm": 8.950325365742314, "learning_rate": 9.929948056640076e-06, "loss": 0.8786, "step": 1833 }, { "epoch": 0.16, "grad_norm": 7.977420432526979, "learning_rate": 9.929827508797599e-06, "loss": 0.8691, "step": 1834 }, { "epoch": 0.16, "grad_norm": 7.459009814426587, "learning_rate": 9.929706858055765e-06, "loss": 0.8706, "step": 1835 }, { "epoch": 0.16, "grad_norm": 8.833810649229092, "learning_rate": 9.9295861044171e-06, "loss": 0.8892, "step": 1836 }, { "epoch": 0.16, "grad_norm": 8.677945432990281, "learning_rate": 9.929465247884119e-06, "loss": 0.9004, "step": 1837 }, { "epoch": 0.16, "grad_norm": 7.4757094775664825, "learning_rate": 9.929344288459346e-06, "loss": 0.8786, "step": 1838 }, { "epoch": 0.16, "grad_norm": 6.759768845458339, "learning_rate": 9.929223226145306e-06, "loss": 0.8523, "step": 1839 }, { "epoch": 0.16, "grad_norm": 7.289840448734264, "learning_rate": 9.929102060944525e-06, "loss": 0.8634, "step": 1840 }, { "epoch": 0.16, "grad_norm": 7.4787952227336145, "learning_rate": 9.928980792859535e-06, "loss": 0.9583, "step": 1841 }, { "epoch": 0.16, "grad_norm": 6.382608806931879, "learning_rate": 9.928859421892864e-06, "loss": 0.9281, "step": 1842 }, { "epoch": 0.16, "grad_norm": 7.794575501427869, "learning_rate": 9.928737948047047e-06, "loss": 0.8746, "step": 1843 }, { "epoch": 0.16, "grad_norm": 8.042826174845755, "learning_rate": 9.92861637132462e-06, "loss": 0.8881, "step": 1844 }, { "epoch": 0.16, "grad_norm": 6.8871961000326, "learning_rate": 9.928494691728118e-06, "loss": 0.8743, "step": 1845 }, { "epoch": 0.16, "grad_norm": 7.2265327253727065, "learning_rate": 9.928372909260085e-06, "loss": 0.9684, "step": 1846 }, { "epoch": 0.16, "grad_norm": 8.097632110478237, "learning_rate": 9.928251023923059e-06, "loss": 0.9012, "step": 1847 }, { "epoch": 0.16, "grad_norm": 10.435580718285049, "learning_rate": 9.928129035719584e-06, "loss": 0.9562, "step": 1848 }, { "epoch": 0.16, "grad_norm": 7.387948799425206, "learning_rate": 9.92800694465221e-06, "loss": 0.9084, "step": 1849 }, { "epoch": 0.17, "grad_norm": 8.767053791447127, "learning_rate": 9.927884750723482e-06, "loss": 0.922, "step": 1850 }, { "epoch": 0.17, "grad_norm": 6.744215818627964, "learning_rate": 9.927762453935954e-06, "loss": 0.9167, "step": 1851 }, { "epoch": 0.17, "grad_norm": 7.576825531094879, "learning_rate": 9.927640054292173e-06, "loss": 0.878, "step": 1852 }, { "epoch": 0.17, "grad_norm": 6.96017431411553, "learning_rate": 9.927517551794698e-06, "loss": 0.901, "step": 1853 }, { "epoch": 0.17, "grad_norm": 6.011882537527502, "learning_rate": 9.927394946446087e-06, "loss": 0.899, "step": 1854 }, { "epoch": 0.17, "grad_norm": 6.51646628272277, "learning_rate": 9.927272238248897e-06, "loss": 0.9859, "step": 1855 }, { "epoch": 0.17, "grad_norm": 7.490810594206654, "learning_rate": 9.92714942720569e-06, "loss": 0.9102, "step": 1856 }, { "epoch": 0.17, "grad_norm": 11.666435860647674, "learning_rate": 9.927026513319026e-06, "loss": 0.9393, "step": 1857 }, { "epoch": 0.17, "grad_norm": 6.17489488136651, "learning_rate": 9.926903496591476e-06, "loss": 0.9371, "step": 1858 }, { "epoch": 0.17, "grad_norm": 7.111503436322434, "learning_rate": 9.926780377025605e-06, "loss": 0.907, "step": 1859 }, { "epoch": 0.17, "grad_norm": 7.334113458524133, "learning_rate": 9.926657154623982e-06, "loss": 0.9594, "step": 1860 }, { "epoch": 0.17, "grad_norm": 6.951764168478322, "learning_rate": 9.92653382938918e-06, "loss": 0.8548, "step": 1861 }, { "epoch": 0.17, "grad_norm": 8.56410017346514, "learning_rate": 9.926410401323774e-06, "loss": 0.9511, "step": 1862 }, { "epoch": 0.17, "grad_norm": 6.154232913908849, "learning_rate": 9.92628687043034e-06, "loss": 0.9197, "step": 1863 }, { "epoch": 0.17, "grad_norm": 7.092390652436593, "learning_rate": 9.926163236711457e-06, "loss": 0.9148, "step": 1864 }, { "epoch": 0.17, "grad_norm": 6.142839655365062, "learning_rate": 9.926039500169702e-06, "loss": 0.9152, "step": 1865 }, { "epoch": 0.17, "grad_norm": 11.439279821569576, "learning_rate": 9.925915660807662e-06, "loss": 0.8971, "step": 1866 }, { "epoch": 0.17, "grad_norm": 7.363104876581712, "learning_rate": 9.925791718627919e-06, "loss": 0.9054, "step": 1867 }, { "epoch": 0.17, "grad_norm": 8.101905569038976, "learning_rate": 9.925667673633062e-06, "loss": 0.9095, "step": 1868 }, { "epoch": 0.17, "grad_norm": 9.517695386863341, "learning_rate": 9.925543525825678e-06, "loss": 0.8681, "step": 1869 }, { "epoch": 0.17, "grad_norm": 8.528612035711372, "learning_rate": 9.92541927520836e-06, "loss": 0.9151, "step": 1870 }, { "epoch": 0.17, "grad_norm": 7.596313011679014, "learning_rate": 9.925294921783703e-06, "loss": 0.8948, "step": 1871 }, { "epoch": 0.17, "grad_norm": 6.986210726300345, "learning_rate": 9.9251704655543e-06, "loss": 0.8871, "step": 1872 }, { "epoch": 0.17, "grad_norm": 8.07407802467284, "learning_rate": 9.925045906522748e-06, "loss": 0.9302, "step": 1873 }, { "epoch": 0.17, "grad_norm": 7.450319992669869, "learning_rate": 9.92492124469165e-06, "loss": 0.9303, "step": 1874 }, { "epoch": 0.17, "grad_norm": 11.568036017642704, "learning_rate": 9.924796480063605e-06, "loss": 0.9168, "step": 1875 }, { "epoch": 0.17, "grad_norm": 9.134079275798095, "learning_rate": 9.924671612641222e-06, "loss": 0.8691, "step": 1876 }, { "epoch": 0.17, "grad_norm": 7.475980810739225, "learning_rate": 9.9245466424271e-06, "loss": 0.893, "step": 1877 }, { "epoch": 0.17, "grad_norm": 8.401800382519264, "learning_rate": 9.924421569423852e-06, "loss": 0.8866, "step": 1878 }, { "epoch": 0.17, "grad_norm": 11.791024924165079, "learning_rate": 9.92429639363409e-06, "loss": 0.9232, "step": 1879 }, { "epoch": 0.17, "grad_norm": 7.038795508962569, "learning_rate": 9.924171115060423e-06, "loss": 0.8589, "step": 1880 }, { "epoch": 0.17, "grad_norm": 7.786422823953024, "learning_rate": 9.924045733705467e-06, "loss": 0.8787, "step": 1881 }, { "epoch": 0.17, "grad_norm": 6.958520736653043, "learning_rate": 9.923920249571842e-06, "loss": 0.9571, "step": 1882 }, { "epoch": 0.17, "grad_norm": 7.700412820806753, "learning_rate": 9.923794662662166e-06, "loss": 0.8854, "step": 1883 }, { "epoch": 0.17, "grad_norm": 7.265128094951468, "learning_rate": 9.923668972979056e-06, "loss": 0.9124, "step": 1884 }, { "epoch": 0.17, "grad_norm": 7.228660306821579, "learning_rate": 9.92354318052514e-06, "loss": 0.8981, "step": 1885 }, { "epoch": 0.17, "grad_norm": 8.405093126120839, "learning_rate": 9.923417285303043e-06, "loss": 0.864, "step": 1886 }, { "epoch": 0.17, "grad_norm": 6.873532244534752, "learning_rate": 9.923291287315392e-06, "loss": 0.8779, "step": 1887 }, { "epoch": 0.17, "grad_norm": 5.178450270825086, "learning_rate": 9.923165186564817e-06, "loss": 0.8535, "step": 1888 }, { "epoch": 0.17, "grad_norm": 6.9617608829561135, "learning_rate": 9.92303898305395e-06, "loss": 0.8498, "step": 1889 }, { "epoch": 0.17, "grad_norm": 6.959421014875193, "learning_rate": 9.922912676785428e-06, "loss": 0.867, "step": 1890 }, { "epoch": 0.17, "grad_norm": 6.916917540446551, "learning_rate": 9.922786267761881e-06, "loss": 0.961, "step": 1891 }, { "epoch": 0.17, "grad_norm": 9.029072540901147, "learning_rate": 9.922659755985954e-06, "loss": 0.9609, "step": 1892 }, { "epoch": 0.17, "grad_norm": 8.071687110048709, "learning_rate": 9.922533141460285e-06, "loss": 0.8697, "step": 1893 }, { "epoch": 0.17, "grad_norm": 6.638039774159249, "learning_rate": 9.922406424187515e-06, "loss": 0.9471, "step": 1894 }, { "epoch": 0.17, "grad_norm": 9.45587084533974, "learning_rate": 9.922279604170293e-06, "loss": 0.9574, "step": 1895 }, { "epoch": 0.17, "grad_norm": 6.290059975080132, "learning_rate": 9.922152681411262e-06, "loss": 0.9148, "step": 1896 }, { "epoch": 0.17, "grad_norm": 8.181058830881105, "learning_rate": 9.922025655913074e-06, "loss": 0.8844, "step": 1897 }, { "epoch": 0.17, "grad_norm": 7.321380880243945, "learning_rate": 9.92189852767838e-06, "loss": 0.8734, "step": 1898 }, { "epoch": 0.17, "grad_norm": 7.760217216419728, "learning_rate": 9.921771296709832e-06, "loss": 0.9711, "step": 1899 }, { "epoch": 0.17, "grad_norm": 8.055354464722107, "learning_rate": 9.921643963010085e-06, "loss": 0.8882, "step": 1900 }, { "epoch": 0.17, "grad_norm": 7.836520447813757, "learning_rate": 9.921516526581803e-06, "loss": 0.8764, "step": 1901 }, { "epoch": 0.17, "grad_norm": 7.32581548456697, "learning_rate": 9.921388987427639e-06, "loss": 0.9239, "step": 1902 }, { "epoch": 0.17, "grad_norm": 9.254615236010352, "learning_rate": 9.921261345550256e-06, "loss": 0.8584, "step": 1903 }, { "epoch": 0.17, "grad_norm": 7.779936389155573, "learning_rate": 9.921133600952321e-06, "loss": 0.8893, "step": 1904 }, { "epoch": 0.17, "grad_norm": 7.627847045567178, "learning_rate": 9.9210057536365e-06, "loss": 0.9519, "step": 1905 }, { "epoch": 0.17, "grad_norm": 7.9604796296887566, "learning_rate": 9.92087780360546e-06, "loss": 0.8856, "step": 1906 }, { "epoch": 0.17, "grad_norm": 6.244701130694521, "learning_rate": 9.920749750861874e-06, "loss": 0.8529, "step": 1907 }, { "epoch": 0.17, "grad_norm": 6.491034716598624, "learning_rate": 9.92062159540841e-06, "loss": 0.9026, "step": 1908 }, { "epoch": 0.17, "grad_norm": 7.120924476461528, "learning_rate": 9.920493337247749e-06, "loss": 0.9109, "step": 1909 }, { "epoch": 0.17, "grad_norm": 6.43822864710141, "learning_rate": 9.920364976382563e-06, "loss": 0.8757, "step": 1910 }, { "epoch": 0.17, "grad_norm": 8.4086210836062, "learning_rate": 9.920236512815536e-06, "loss": 0.8666, "step": 1911 }, { "epoch": 0.17, "grad_norm": 5.518059251736505, "learning_rate": 9.920107946549345e-06, "loss": 0.8669, "step": 1912 }, { "epoch": 0.17, "grad_norm": 5.202682029561717, "learning_rate": 9.919979277586677e-06, "loss": 0.8566, "step": 1913 }, { "epoch": 0.17, "grad_norm": 6.6661192463369074, "learning_rate": 9.919850505930216e-06, "loss": 0.9361, "step": 1914 }, { "epoch": 0.17, "grad_norm": 7.815692049731296, "learning_rate": 9.919721631582648e-06, "loss": 0.9425, "step": 1915 }, { "epoch": 0.17, "grad_norm": 7.697392440671447, "learning_rate": 9.919592654546665e-06, "loss": 0.8469, "step": 1916 }, { "epoch": 0.17, "grad_norm": 10.025702154733576, "learning_rate": 9.919463574824962e-06, "loss": 0.8919, "step": 1917 }, { "epoch": 0.17, "grad_norm": 5.783534581188596, "learning_rate": 9.919334392420229e-06, "loss": 0.9603, "step": 1918 }, { "epoch": 0.17, "grad_norm": 8.123191100170658, "learning_rate": 9.919205107335163e-06, "loss": 0.9162, "step": 1919 }, { "epoch": 0.17, "grad_norm": 7.040096213134516, "learning_rate": 9.919075719572464e-06, "loss": 0.941, "step": 1920 }, { "epoch": 0.17, "grad_norm": 8.509762656935784, "learning_rate": 9.918946229134831e-06, "loss": 0.8527, "step": 1921 }, { "epoch": 0.17, "grad_norm": 7.696737954557049, "learning_rate": 9.918816636024968e-06, "loss": 0.8829, "step": 1922 }, { "epoch": 0.17, "grad_norm": 8.567100658400628, "learning_rate": 9.918686940245581e-06, "loss": 0.8782, "step": 1923 }, { "epoch": 0.17, "grad_norm": 9.351512861535879, "learning_rate": 9.918557141799374e-06, "loss": 0.9349, "step": 1924 }, { "epoch": 0.17, "grad_norm": 9.42830355070083, "learning_rate": 9.918427240689059e-06, "loss": 0.89, "step": 1925 }, { "epoch": 0.17, "grad_norm": 5.876763625626068, "learning_rate": 9.918297236917346e-06, "loss": 0.9172, "step": 1926 }, { "epoch": 0.17, "grad_norm": 7.597408151383452, "learning_rate": 9.918167130486951e-06, "loss": 0.9205, "step": 1927 }, { "epoch": 0.17, "grad_norm": 7.369350234967008, "learning_rate": 9.918036921400587e-06, "loss": 0.9008, "step": 1928 }, { "epoch": 0.17, "grad_norm": 7.173214508315758, "learning_rate": 9.917906609660971e-06, "loss": 0.8335, "step": 1929 }, { "epoch": 0.17, "grad_norm": 8.040322187744046, "learning_rate": 9.917776195270825e-06, "loss": 0.9115, "step": 1930 }, { "epoch": 0.17, "grad_norm": 6.740263102581305, "learning_rate": 9.917645678232872e-06, "loss": 0.8874, "step": 1931 }, { "epoch": 0.17, "grad_norm": 8.705295795198285, "learning_rate": 9.917515058549833e-06, "loss": 0.9233, "step": 1932 }, { "epoch": 0.17, "grad_norm": 7.773984162868728, "learning_rate": 9.917384336224438e-06, "loss": 0.9101, "step": 1933 }, { "epoch": 0.17, "grad_norm": 7.6283725286476525, "learning_rate": 9.917253511259414e-06, "loss": 0.9035, "step": 1934 }, { "epoch": 0.17, "grad_norm": 5.654361041352256, "learning_rate": 9.91712258365749e-06, "loss": 0.9339, "step": 1935 }, { "epoch": 0.17, "grad_norm": 6.832164352205454, "learning_rate": 9.916991553421403e-06, "loss": 0.9133, "step": 1936 }, { "epoch": 0.17, "grad_norm": 5.96423653404186, "learning_rate": 9.916860420553881e-06, "loss": 0.8127, "step": 1937 }, { "epoch": 0.17, "grad_norm": 8.713232759298602, "learning_rate": 9.91672918505767e-06, "loss": 0.9149, "step": 1938 }, { "epoch": 0.17, "grad_norm": 7.684821338614426, "learning_rate": 9.916597846935502e-06, "loss": 0.8879, "step": 1939 }, { "epoch": 0.17, "grad_norm": 7.967809386412753, "learning_rate": 9.916466406190121e-06, "loss": 0.8744, "step": 1940 }, { "epoch": 0.17, "grad_norm": 8.667198955023833, "learning_rate": 9.916334862824272e-06, "loss": 0.9152, "step": 1941 }, { "epoch": 0.17, "grad_norm": 9.135679782415234, "learning_rate": 9.9162032168407e-06, "loss": 0.9429, "step": 1942 }, { "epoch": 0.17, "grad_norm": 7.910917507211552, "learning_rate": 9.916071468242152e-06, "loss": 0.9173, "step": 1943 }, { "epoch": 0.17, "grad_norm": 6.439059393930484, "learning_rate": 9.915939617031375e-06, "loss": 0.9142, "step": 1944 }, { "epoch": 0.17, "grad_norm": 8.371772749151809, "learning_rate": 9.915807663211128e-06, "loss": 0.9456, "step": 1945 }, { "epoch": 0.17, "grad_norm": 8.800079047212957, "learning_rate": 9.915675606784158e-06, "loss": 0.9046, "step": 1946 }, { "epoch": 0.17, "grad_norm": 8.0617666201185, "learning_rate": 9.915543447753228e-06, "loss": 0.8866, "step": 1947 }, { "epoch": 0.17, "grad_norm": 7.452949616337805, "learning_rate": 9.915411186121092e-06, "loss": 0.9017, "step": 1948 }, { "epoch": 0.17, "grad_norm": 6.805725858739714, "learning_rate": 9.915278821890512e-06, "loss": 0.9173, "step": 1949 }, { "epoch": 0.17, "grad_norm": 7.286654577296225, "learning_rate": 9.915146355064252e-06, "loss": 0.9681, "step": 1950 }, { "epoch": 0.17, "grad_norm": 9.061557601425344, "learning_rate": 9.915013785645073e-06, "loss": 0.9548, "step": 1951 }, { "epoch": 0.17, "grad_norm": 8.08752451587432, "learning_rate": 9.914881113635748e-06, "loss": 0.8549, "step": 1952 }, { "epoch": 0.17, "grad_norm": 6.83571411592925, "learning_rate": 9.914748339039042e-06, "loss": 0.8799, "step": 1953 }, { "epoch": 0.17, "grad_norm": 5.740089928393752, "learning_rate": 9.914615461857728e-06, "loss": 0.8745, "step": 1954 }, { "epoch": 0.17, "grad_norm": 7.094155507273022, "learning_rate": 9.91448248209458e-06, "loss": 0.8806, "step": 1955 }, { "epoch": 0.17, "grad_norm": 5.408256607257896, "learning_rate": 9.914349399752368e-06, "loss": 0.9337, "step": 1956 }, { "epoch": 0.17, "grad_norm": 6.007507891567109, "learning_rate": 9.914216214833879e-06, "loss": 0.9435, "step": 1957 }, { "epoch": 0.17, "grad_norm": 6.403628454508832, "learning_rate": 9.914082927341887e-06, "loss": 0.9746, "step": 1958 }, { "epoch": 0.17, "grad_norm": 8.25357044320242, "learning_rate": 9.913949537279176e-06, "loss": 0.8987, "step": 1959 }, { "epoch": 0.17, "grad_norm": 6.621670635165892, "learning_rate": 9.91381604464853e-06, "loss": 0.8934, "step": 1960 }, { "epoch": 0.17, "grad_norm": 8.358817982254543, "learning_rate": 9.913682449452735e-06, "loss": 0.9296, "step": 1961 }, { "epoch": 0.18, "grad_norm": 8.525071767820268, "learning_rate": 9.913548751694578e-06, "loss": 0.8718, "step": 1962 }, { "epoch": 0.18, "grad_norm": 6.529690337827119, "learning_rate": 9.913414951376853e-06, "loss": 0.8605, "step": 1963 }, { "epoch": 0.18, "grad_norm": 6.496647781045088, "learning_rate": 9.91328104850235e-06, "loss": 0.8859, "step": 1964 }, { "epoch": 0.18, "grad_norm": 7.8734741299726005, "learning_rate": 9.913147043073867e-06, "loss": 0.8372, "step": 1965 }, { "epoch": 0.18, "grad_norm": 8.153102687426184, "learning_rate": 9.913012935094197e-06, "loss": 0.9078, "step": 1966 }, { "epoch": 0.18, "grad_norm": 10.119948417401723, "learning_rate": 9.912878724566141e-06, "loss": 0.8316, "step": 1967 }, { "epoch": 0.18, "grad_norm": 5.7170432941270795, "learning_rate": 9.912744411492503e-06, "loss": 0.9331, "step": 1968 }, { "epoch": 0.18, "grad_norm": 8.42798279232322, "learning_rate": 9.912609995876084e-06, "loss": 0.8632, "step": 1969 }, { "epoch": 0.18, "grad_norm": 7.21176447176481, "learning_rate": 9.912475477719688e-06, "loss": 0.873, "step": 1970 }, { "epoch": 0.18, "grad_norm": 7.799431989132538, "learning_rate": 9.912340857026124e-06, "loss": 0.9177, "step": 1971 }, { "epoch": 0.18, "grad_norm": 5.737126987515644, "learning_rate": 9.912206133798205e-06, "loss": 0.9201, "step": 1972 }, { "epoch": 0.18, "grad_norm": 9.166344749335119, "learning_rate": 9.912071308038737e-06, "loss": 0.9408, "step": 1973 }, { "epoch": 0.18, "grad_norm": 6.995732501781593, "learning_rate": 9.911936379750541e-06, "loss": 0.8739, "step": 1974 }, { "epoch": 0.18, "grad_norm": 7.2185525870880705, "learning_rate": 9.911801348936429e-06, "loss": 0.8424, "step": 1975 }, { "epoch": 0.18, "grad_norm": 7.318702610216175, "learning_rate": 9.91166621559922e-06, "loss": 0.926, "step": 1976 }, { "epoch": 0.18, "grad_norm": 7.235385437874636, "learning_rate": 9.911530979741736e-06, "loss": 0.8553, "step": 1977 }, { "epoch": 0.18, "grad_norm": 7.009170187282513, "learning_rate": 9.9113956413668e-06, "loss": 0.9196, "step": 1978 }, { "epoch": 0.18, "grad_norm": 8.239851164289183, "learning_rate": 9.911260200477234e-06, "loss": 0.9056, "step": 1979 }, { "epoch": 0.18, "grad_norm": 5.936474483853389, "learning_rate": 9.911124657075866e-06, "loss": 0.8739, "step": 1980 }, { "epoch": 0.18, "grad_norm": 5.723968955679734, "learning_rate": 9.910989011165526e-06, "loss": 0.8838, "step": 1981 }, { "epoch": 0.18, "grad_norm": 7.638872907467477, "learning_rate": 9.910853262749048e-06, "loss": 0.9227, "step": 1982 }, { "epoch": 0.18, "grad_norm": 11.803980294848449, "learning_rate": 9.91071741182926e-06, "loss": 0.967, "step": 1983 }, { "epoch": 0.18, "grad_norm": 6.463895986294813, "learning_rate": 9.910581458409e-06, "loss": 0.8865, "step": 1984 }, { "epoch": 0.18, "grad_norm": 8.071897909744786, "learning_rate": 9.910445402491107e-06, "loss": 0.9097, "step": 1985 }, { "epoch": 0.18, "grad_norm": 7.921383347447673, "learning_rate": 9.91030924407842e-06, "loss": 0.9381, "step": 1986 }, { "epoch": 0.18, "grad_norm": 8.330717435511584, "learning_rate": 9.910172983173782e-06, "loss": 0.9083, "step": 1987 }, { "epoch": 0.18, "grad_norm": 7.89734972069986, "learning_rate": 9.910036619780035e-06, "loss": 0.9549, "step": 1988 }, { "epoch": 0.18, "grad_norm": 6.854682930859191, "learning_rate": 9.909900153900027e-06, "loss": 0.9571, "step": 1989 }, { "epoch": 0.18, "grad_norm": 8.037619282926741, "learning_rate": 9.909763585536605e-06, "loss": 0.9013, "step": 1990 }, { "epoch": 0.18, "grad_norm": 6.749472341718931, "learning_rate": 9.909626914692621e-06, "loss": 0.9237, "step": 1991 }, { "epoch": 0.18, "grad_norm": 6.36227730819618, "learning_rate": 9.909490141370927e-06, "loss": 0.8414, "step": 1992 }, { "epoch": 0.18, "grad_norm": 7.126348501517217, "learning_rate": 9.909353265574377e-06, "loss": 0.8621, "step": 1993 }, { "epoch": 0.18, "grad_norm": 6.85785431612523, "learning_rate": 9.909216287305829e-06, "loss": 0.9117, "step": 1994 }, { "epoch": 0.18, "grad_norm": 7.573583223064552, "learning_rate": 9.909079206568143e-06, "loss": 0.9056, "step": 1995 }, { "epoch": 0.18, "grad_norm": 5.592444379852677, "learning_rate": 9.908942023364178e-06, "loss": 0.8578, "step": 1996 }, { "epoch": 0.18, "grad_norm": 7.490470127359818, "learning_rate": 9.9088047376968e-06, "loss": 0.8741, "step": 1997 }, { "epoch": 0.18, "grad_norm": 6.005209099826573, "learning_rate": 9.908667349568873e-06, "loss": 0.9036, "step": 1998 }, { "epoch": 0.18, "grad_norm": 7.433089662182811, "learning_rate": 9.908529858983265e-06, "loss": 0.9159, "step": 1999 }, { "epoch": 0.18, "grad_norm": 8.491019034307945, "learning_rate": 9.908392265942844e-06, "loss": 0.8495, "step": 2000 }, { "epoch": 0.18, "grad_norm": 7.796467806634283, "learning_rate": 9.908254570450485e-06, "loss": 0.8601, "step": 2001 }, { "epoch": 0.18, "grad_norm": 7.70479080609709, "learning_rate": 9.90811677250906e-06, "loss": 0.9967, "step": 2002 }, { "epoch": 0.18, "grad_norm": 6.5418846906631805, "learning_rate": 9.907978872121447e-06, "loss": 0.8818, "step": 2003 }, { "epoch": 0.18, "grad_norm": 8.573150870365698, "learning_rate": 9.907840869290523e-06, "loss": 0.9338, "step": 2004 }, { "epoch": 0.18, "grad_norm": 6.162570029168391, "learning_rate": 9.907702764019169e-06, "loss": 0.9462, "step": 2005 }, { "epoch": 0.18, "grad_norm": 7.200153322542364, "learning_rate": 9.907564556310267e-06, "loss": 0.9661, "step": 2006 }, { "epoch": 0.18, "grad_norm": 8.275668764211943, "learning_rate": 9.907426246166703e-06, "loss": 0.9807, "step": 2007 }, { "epoch": 0.18, "grad_norm": 10.138663621820154, "learning_rate": 9.907287833591361e-06, "loss": 0.9129, "step": 2008 }, { "epoch": 0.18, "grad_norm": 11.888579991935643, "learning_rate": 9.907149318587134e-06, "loss": 0.9234, "step": 2009 }, { "epoch": 0.18, "grad_norm": 5.959712115797687, "learning_rate": 9.907010701156912e-06, "loss": 0.8569, "step": 2010 }, { "epoch": 0.18, "grad_norm": 7.450134610293375, "learning_rate": 9.906871981303587e-06, "loss": 0.8922, "step": 2011 }, { "epoch": 0.18, "grad_norm": 9.853139847198163, "learning_rate": 9.906733159030054e-06, "loss": 0.9756, "step": 2012 }, { "epoch": 0.18, "grad_norm": 6.747838577177867, "learning_rate": 9.906594234339215e-06, "loss": 0.9275, "step": 2013 }, { "epoch": 0.18, "grad_norm": 6.925840277332531, "learning_rate": 9.906455207233966e-06, "loss": 0.9427, "step": 2014 }, { "epoch": 0.18, "grad_norm": 7.376629033433445, "learning_rate": 9.906316077717207e-06, "loss": 0.9468, "step": 2015 }, { "epoch": 0.18, "grad_norm": 7.312578615230302, "learning_rate": 9.906176845791847e-06, "loss": 0.8663, "step": 2016 }, { "epoch": 0.18, "grad_norm": 6.096714087856581, "learning_rate": 9.906037511460788e-06, "loss": 0.8374, "step": 2017 }, { "epoch": 0.18, "grad_norm": 9.1116856303445, "learning_rate": 9.905898074726941e-06, "loss": 0.9269, "step": 2018 }, { "epoch": 0.18, "grad_norm": 10.539006284849428, "learning_rate": 9.905758535593215e-06, "loss": 0.9391, "step": 2019 }, { "epoch": 0.18, "grad_norm": 9.864912662322192, "learning_rate": 9.905618894062524e-06, "loss": 0.9289, "step": 2020 }, { "epoch": 0.18, "grad_norm": 8.126719407594258, "learning_rate": 9.90547915013778e-06, "loss": 0.9773, "step": 2021 }, { "epoch": 0.18, "grad_norm": 6.871764505797942, "learning_rate": 9.905339303821902e-06, "loss": 0.9296, "step": 2022 }, { "epoch": 0.18, "grad_norm": 8.55487488643203, "learning_rate": 9.905199355117808e-06, "loss": 0.9293, "step": 2023 }, { "epoch": 0.18, "grad_norm": 7.734065897471375, "learning_rate": 9.905059304028421e-06, "loss": 0.8571, "step": 2024 }, { "epoch": 0.18, "grad_norm": 6.752441005141632, "learning_rate": 9.904919150556662e-06, "loss": 0.877, "step": 2025 }, { "epoch": 0.18, "grad_norm": 7.27453726764212, "learning_rate": 9.904778894705458e-06, "loss": 0.8732, "step": 2026 }, { "epoch": 0.18, "grad_norm": 7.210247528261339, "learning_rate": 9.904638536477735e-06, "loss": 0.9052, "step": 2027 }, { "epoch": 0.18, "grad_norm": 6.017030642411388, "learning_rate": 9.904498075876425e-06, "loss": 0.8758, "step": 2028 }, { "epoch": 0.18, "grad_norm": 7.879683816183592, "learning_rate": 9.904357512904457e-06, "loss": 0.9831, "step": 2029 }, { "epoch": 0.18, "grad_norm": 7.5354084234922505, "learning_rate": 9.904216847564767e-06, "loss": 0.9267, "step": 2030 }, { "epoch": 0.18, "grad_norm": 8.199546629510897, "learning_rate": 9.904076079860289e-06, "loss": 0.9482, "step": 2031 }, { "epoch": 0.18, "grad_norm": 7.4984542204914275, "learning_rate": 9.903935209793965e-06, "loss": 0.9528, "step": 2032 }, { "epoch": 0.18, "grad_norm": 6.236196516210283, "learning_rate": 9.90379423736873e-06, "loss": 0.9108, "step": 2033 }, { "epoch": 0.18, "grad_norm": 8.885510703640033, "learning_rate": 9.903653162587532e-06, "loss": 0.9105, "step": 2034 }, { "epoch": 0.18, "grad_norm": 5.536666493085037, "learning_rate": 9.903511985453312e-06, "loss": 0.8732, "step": 2035 }, { "epoch": 0.18, "grad_norm": 7.969015012190011, "learning_rate": 9.903370705969017e-06, "loss": 0.9261, "step": 2036 }, { "epoch": 0.18, "grad_norm": 9.47660247675216, "learning_rate": 9.9032293241376e-06, "loss": 0.9095, "step": 2037 }, { "epoch": 0.18, "grad_norm": 7.072236113857404, "learning_rate": 9.903087839962004e-06, "loss": 0.9449, "step": 2038 }, { "epoch": 0.18, "grad_norm": 5.991617589878235, "learning_rate": 9.90294625344519e-06, "loss": 0.9071, "step": 2039 }, { "epoch": 0.18, "grad_norm": 7.970823421571839, "learning_rate": 9.90280456459011e-06, "loss": 0.8834, "step": 2040 }, { "epoch": 0.18, "grad_norm": 9.34619354321064, "learning_rate": 9.902662773399721e-06, "loss": 0.9302, "step": 2041 }, { "epoch": 0.18, "grad_norm": 8.748297086636995, "learning_rate": 9.902520879876984e-06, "loss": 0.9265, "step": 2042 }, { "epoch": 0.18, "grad_norm": 5.9719637613149334, "learning_rate": 9.90237888402486e-06, "loss": 0.926, "step": 2043 }, { "epoch": 0.18, "grad_norm": 9.0074753028014, "learning_rate": 9.902236785846311e-06, "loss": 0.9674, "step": 2044 }, { "epoch": 0.18, "grad_norm": 6.62824528180294, "learning_rate": 9.902094585344307e-06, "loss": 0.8141, "step": 2045 }, { "epoch": 0.18, "grad_norm": 7.425252219921024, "learning_rate": 9.901952282521812e-06, "loss": 0.8711, "step": 2046 }, { "epoch": 0.18, "grad_norm": 7.015712142735464, "learning_rate": 9.901809877381798e-06, "loss": 0.8657, "step": 2047 }, { "epoch": 0.18, "grad_norm": 9.15660922824645, "learning_rate": 9.901667369927239e-06, "loss": 0.8774, "step": 2048 }, { "epoch": 0.18, "grad_norm": 7.703872341778651, "learning_rate": 9.901524760161108e-06, "loss": 0.8865, "step": 2049 }, { "epoch": 0.18, "grad_norm": 6.891815699843743, "learning_rate": 9.901382048086383e-06, "loss": 0.8812, "step": 2050 }, { "epoch": 0.18, "grad_norm": 7.149839929928816, "learning_rate": 9.901239233706039e-06, "loss": 0.9314, "step": 2051 }, { "epoch": 0.18, "grad_norm": 5.190037027773378, "learning_rate": 9.901096317023061e-06, "loss": 0.8652, "step": 2052 }, { "epoch": 0.18, "grad_norm": 9.121987516799603, "learning_rate": 9.90095329804043e-06, "loss": 0.897, "step": 2053 }, { "epoch": 0.18, "grad_norm": 6.665772977942403, "learning_rate": 9.900810176761133e-06, "loss": 0.9101, "step": 2054 }, { "epoch": 0.18, "grad_norm": 9.051203965137995, "learning_rate": 9.900666953188155e-06, "loss": 0.8974, "step": 2055 }, { "epoch": 0.18, "grad_norm": 6.436286918207099, "learning_rate": 9.900523627324487e-06, "loss": 0.8581, "step": 2056 }, { "epoch": 0.18, "grad_norm": 7.1388914101264245, "learning_rate": 9.90038019917312e-06, "loss": 0.9535, "step": 2057 }, { "epoch": 0.18, "grad_norm": 9.609413528777043, "learning_rate": 9.90023666873705e-06, "loss": 0.9623, "step": 2058 }, { "epoch": 0.18, "grad_norm": 7.663332567122199, "learning_rate": 9.900093036019267e-06, "loss": 0.8389, "step": 2059 }, { "epoch": 0.18, "grad_norm": 7.133643706749837, "learning_rate": 9.899949301022776e-06, "loss": 0.861, "step": 2060 }, { "epoch": 0.18, "grad_norm": 6.681505045115308, "learning_rate": 9.89980546375057e-06, "loss": 0.8626, "step": 2061 }, { "epoch": 0.18, "grad_norm": 8.60572856806792, "learning_rate": 9.89966152420566e-06, "loss": 0.9483, "step": 2062 }, { "epoch": 0.18, "grad_norm": 8.445110119790856, "learning_rate": 9.899517482391044e-06, "loss": 0.8858, "step": 2063 }, { "epoch": 0.18, "grad_norm": 8.11196567009811, "learning_rate": 9.89937333830973e-06, "loss": 0.9117, "step": 2064 }, { "epoch": 0.18, "grad_norm": 8.971045567996113, "learning_rate": 9.899229091964728e-06, "loss": 0.872, "step": 2065 }, { "epoch": 0.18, "grad_norm": 9.249976026800834, "learning_rate": 9.899084743359048e-06, "loss": 0.8023, "step": 2066 }, { "epoch": 0.18, "grad_norm": 7.769827169558951, "learning_rate": 9.898940292495702e-06, "loss": 0.9284, "step": 2067 }, { "epoch": 0.18, "grad_norm": 9.054915461818512, "learning_rate": 9.898795739377706e-06, "loss": 0.9074, "step": 2068 }, { "epoch": 0.18, "grad_norm": 6.515340806440426, "learning_rate": 9.898651084008077e-06, "loss": 0.8703, "step": 2069 }, { "epoch": 0.18, "grad_norm": 6.118855344860218, "learning_rate": 9.898506326389835e-06, "loss": 0.9071, "step": 2070 }, { "epoch": 0.18, "grad_norm": 8.324322347367508, "learning_rate": 9.898361466526e-06, "loss": 0.9011, "step": 2071 }, { "epoch": 0.18, "grad_norm": 9.86070396653971, "learning_rate": 9.898216504419597e-06, "loss": 0.8726, "step": 2072 }, { "epoch": 0.18, "grad_norm": 9.10279621006575, "learning_rate": 9.89807144007365e-06, "loss": 0.8909, "step": 2073 }, { "epoch": 0.19, "grad_norm": 5.561204581920791, "learning_rate": 9.897926273491192e-06, "loss": 0.8182, "step": 2074 }, { "epoch": 0.19, "grad_norm": 6.7401041820770455, "learning_rate": 9.897781004675246e-06, "loss": 0.9465, "step": 2075 }, { "epoch": 0.19, "grad_norm": 7.741613920593604, "learning_rate": 9.897635633628848e-06, "loss": 0.8711, "step": 2076 }, { "epoch": 0.19, "grad_norm": 6.626644066524354, "learning_rate": 9.897490160355033e-06, "loss": 0.8134, "step": 2077 }, { "epoch": 0.19, "grad_norm": 8.96638494102516, "learning_rate": 9.897344584856836e-06, "loss": 0.8571, "step": 2078 }, { "epoch": 0.19, "grad_norm": 6.19122059989182, "learning_rate": 9.897198907137296e-06, "loss": 0.8474, "step": 2079 }, { "epoch": 0.19, "grad_norm": 6.91946256933639, "learning_rate": 9.897053127199451e-06, "loss": 0.88, "step": 2080 }, { "epoch": 0.19, "grad_norm": 8.177540433386215, "learning_rate": 9.89690724504635e-06, "loss": 0.8844, "step": 2081 }, { "epoch": 0.19, "grad_norm": 7.200296555131909, "learning_rate": 9.896761260681032e-06, "loss": 0.9088, "step": 2082 }, { "epoch": 0.19, "grad_norm": 6.598899446246424, "learning_rate": 9.896615174106547e-06, "loss": 0.8758, "step": 2083 }, { "epoch": 0.19, "grad_norm": 6.437407016073574, "learning_rate": 9.896468985325943e-06, "loss": 0.9642, "step": 2084 }, { "epoch": 0.19, "grad_norm": 9.559428525089809, "learning_rate": 9.896322694342272e-06, "loss": 0.8794, "step": 2085 }, { "epoch": 0.19, "grad_norm": 9.501938759191598, "learning_rate": 9.896176301158588e-06, "loss": 0.9369, "step": 2086 }, { "epoch": 0.19, "grad_norm": 9.06374219742869, "learning_rate": 9.896029805777947e-06, "loss": 0.8526, "step": 2087 }, { "epoch": 0.19, "grad_norm": 5.2985857634015625, "learning_rate": 9.895883208203405e-06, "loss": 0.9294, "step": 2088 }, { "epoch": 0.19, "grad_norm": 7.847330226151448, "learning_rate": 9.895736508438023e-06, "loss": 0.9444, "step": 2089 }, { "epoch": 0.19, "grad_norm": 5.913330447806181, "learning_rate": 9.895589706484862e-06, "loss": 0.8688, "step": 2090 }, { "epoch": 0.19, "grad_norm": 8.5434023244214, "learning_rate": 9.895442802346989e-06, "loss": 0.9602, "step": 2091 }, { "epoch": 0.19, "grad_norm": 7.380144999360028, "learning_rate": 9.895295796027467e-06, "loss": 0.9024, "step": 2092 }, { "epoch": 0.19, "grad_norm": 6.828883164145811, "learning_rate": 9.895148687529366e-06, "loss": 0.987, "step": 2093 }, { "epoch": 0.19, "grad_norm": 8.863717358786092, "learning_rate": 9.895001476855757e-06, "loss": 0.8637, "step": 2094 }, { "epoch": 0.19, "grad_norm": 7.712884095518957, "learning_rate": 9.894854164009712e-06, "loss": 0.9229, "step": 2095 }, { "epoch": 0.19, "grad_norm": 6.757259126770476, "learning_rate": 9.894706748994305e-06, "loss": 0.8584, "step": 2096 }, { "epoch": 0.19, "grad_norm": 6.655174773921718, "learning_rate": 9.894559231812614e-06, "loss": 0.8641, "step": 2097 }, { "epoch": 0.19, "grad_norm": 7.75962201639118, "learning_rate": 9.894411612467718e-06, "loss": 0.8842, "step": 2098 }, { "epoch": 0.19, "grad_norm": 8.252388476535, "learning_rate": 9.894263890962698e-06, "loss": 0.8703, "step": 2099 }, { "epoch": 0.19, "grad_norm": 6.600037800806456, "learning_rate": 9.894116067300638e-06, "loss": 0.8975, "step": 2100 }, { "epoch": 0.19, "grad_norm": 7.234022941048635, "learning_rate": 9.893968141484622e-06, "loss": 0.8408, "step": 2101 }, { "epoch": 0.19, "grad_norm": 7.368395755375345, "learning_rate": 9.893820113517741e-06, "loss": 0.9435, "step": 2102 }, { "epoch": 0.19, "grad_norm": 6.0418403116653465, "learning_rate": 9.89367198340308e-06, "loss": 0.9464, "step": 2103 }, { "epoch": 0.19, "grad_norm": 6.972165412324308, "learning_rate": 9.893523751143734e-06, "loss": 0.9034, "step": 2104 }, { "epoch": 0.19, "grad_norm": 6.8138784584021685, "learning_rate": 9.893375416742796e-06, "loss": 0.9416, "step": 2105 }, { "epoch": 0.19, "grad_norm": 6.655779242186405, "learning_rate": 9.893226980203364e-06, "loss": 0.9926, "step": 2106 }, { "epoch": 0.19, "grad_norm": 6.290522556032731, "learning_rate": 9.893078441528534e-06, "loss": 0.8797, "step": 2107 }, { "epoch": 0.19, "grad_norm": 6.845787361183861, "learning_rate": 9.892929800721406e-06, "loss": 0.8894, "step": 2108 }, { "epoch": 0.19, "grad_norm": 8.462588431889658, "learning_rate": 9.892781057785087e-06, "loss": 1.0165, "step": 2109 }, { "epoch": 0.19, "grad_norm": 7.179273219130319, "learning_rate": 9.892632212722675e-06, "loss": 0.8053, "step": 2110 }, { "epoch": 0.19, "grad_norm": 7.470727697484738, "learning_rate": 9.892483265537281e-06, "loss": 0.9681, "step": 2111 }, { "epoch": 0.19, "grad_norm": 7.80868888632017, "learning_rate": 9.892334216232014e-06, "loss": 0.8349, "step": 2112 }, { "epoch": 0.19, "grad_norm": 7.694398933292079, "learning_rate": 9.892185064809983e-06, "loss": 0.8856, "step": 2113 }, { "epoch": 0.19, "grad_norm": 9.265585010813266, "learning_rate": 9.892035811274303e-06, "loss": 0.9229, "step": 2114 }, { "epoch": 0.19, "grad_norm": 6.466503587655123, "learning_rate": 9.89188645562809e-06, "loss": 0.9661, "step": 2115 }, { "epoch": 0.19, "grad_norm": 7.785581204707679, "learning_rate": 9.891736997874459e-06, "loss": 0.8511, "step": 2116 }, { "epoch": 0.19, "grad_norm": 6.443857673535124, "learning_rate": 9.891587438016532e-06, "loss": 0.8652, "step": 2117 }, { "epoch": 0.19, "grad_norm": 8.747148904847142, "learning_rate": 9.891437776057429e-06, "loss": 0.9349, "step": 2118 }, { "epoch": 0.19, "grad_norm": 7.870466674492571, "learning_rate": 9.891288012000273e-06, "loss": 0.837, "step": 2119 }, { "epoch": 0.19, "grad_norm": 6.7642043858497285, "learning_rate": 9.891138145848194e-06, "loss": 0.9366, "step": 2120 }, { "epoch": 0.19, "grad_norm": 7.867599078039895, "learning_rate": 9.890988177604316e-06, "loss": 0.8828, "step": 2121 }, { "epoch": 0.19, "grad_norm": 7.914739682600121, "learning_rate": 9.890838107271772e-06, "loss": 0.8688, "step": 2122 }, { "epoch": 0.19, "grad_norm": 8.049210777665678, "learning_rate": 9.890687934853692e-06, "loss": 0.8339, "step": 2123 }, { "epoch": 0.19, "grad_norm": 7.9928549104300295, "learning_rate": 9.890537660353212e-06, "loss": 0.933, "step": 2124 }, { "epoch": 0.19, "grad_norm": 7.844406610574125, "learning_rate": 9.89038728377347e-06, "loss": 0.9076, "step": 2125 }, { "epoch": 0.19, "grad_norm": 6.069294992950967, "learning_rate": 9.8902368051176e-06, "loss": 0.8815, "step": 2126 }, { "epoch": 0.19, "grad_norm": 7.686428466737412, "learning_rate": 9.89008622438875e-06, "loss": 0.9631, "step": 2127 }, { "epoch": 0.19, "grad_norm": 5.765179628805529, "learning_rate": 9.889935541590057e-06, "loss": 0.9622, "step": 2128 }, { "epoch": 0.19, "grad_norm": 7.096095191872828, "learning_rate": 9.88978475672467e-06, "loss": 0.8716, "step": 2129 }, { "epoch": 0.19, "grad_norm": 6.277889721067508, "learning_rate": 9.889633869795732e-06, "loss": 0.9709, "step": 2130 }, { "epoch": 0.19, "grad_norm": 7.485231384189294, "learning_rate": 9.889482880806397e-06, "loss": 0.883, "step": 2131 }, { "epoch": 0.19, "grad_norm": 6.640639423598375, "learning_rate": 9.889331789759813e-06, "loss": 0.8509, "step": 2132 }, { "epoch": 0.19, "grad_norm": 7.041268397296485, "learning_rate": 9.889180596659138e-06, "loss": 0.8243, "step": 2133 }, { "epoch": 0.19, "grad_norm": 5.46378729526833, "learning_rate": 9.889029301507523e-06, "loss": 0.8272, "step": 2134 }, { "epoch": 0.19, "grad_norm": 9.039869892098624, "learning_rate": 9.88887790430813e-06, "loss": 0.858, "step": 2135 }, { "epoch": 0.19, "grad_norm": 6.744955153364569, "learning_rate": 9.888726405064115e-06, "loss": 0.9023, "step": 2136 }, { "epoch": 0.19, "grad_norm": 6.446687239903378, "learning_rate": 9.888574803778643e-06, "loss": 1.0018, "step": 2137 }, { "epoch": 0.19, "grad_norm": 8.802978045479446, "learning_rate": 9.888423100454878e-06, "loss": 0.8648, "step": 2138 }, { "epoch": 0.19, "grad_norm": 7.535546873831142, "learning_rate": 9.888271295095986e-06, "loss": 0.8977, "step": 2139 }, { "epoch": 0.19, "grad_norm": 6.64942107163137, "learning_rate": 9.888119387705137e-06, "loss": 0.8712, "step": 2140 }, { "epoch": 0.19, "grad_norm": 7.770252942979096, "learning_rate": 9.8879673782855e-06, "loss": 0.9388, "step": 2141 }, { "epoch": 0.19, "grad_norm": 7.400889092018879, "learning_rate": 9.887815266840249e-06, "loss": 0.8299, "step": 2142 }, { "epoch": 0.19, "grad_norm": 6.377049640217959, "learning_rate": 9.887663053372556e-06, "loss": 0.8922, "step": 2143 }, { "epoch": 0.19, "grad_norm": 7.544174219673894, "learning_rate": 9.887510737885604e-06, "loss": 0.9496, "step": 2144 }, { "epoch": 0.19, "grad_norm": 7.741848711883488, "learning_rate": 9.887358320382565e-06, "loss": 0.898, "step": 2145 }, { "epoch": 0.19, "grad_norm": 6.368122258264475, "learning_rate": 9.887205800866627e-06, "loss": 0.9581, "step": 2146 }, { "epoch": 0.19, "grad_norm": 7.8257323440612065, "learning_rate": 9.88705317934097e-06, "loss": 0.8568, "step": 2147 }, { "epoch": 0.19, "grad_norm": 7.082414128052759, "learning_rate": 9.886900455808782e-06, "loss": 0.8704, "step": 2148 }, { "epoch": 0.19, "grad_norm": 7.675016533105091, "learning_rate": 9.886747630273245e-06, "loss": 0.8913, "step": 2149 }, { "epoch": 0.19, "grad_norm": 6.537220658871868, "learning_rate": 9.886594702737556e-06, "loss": 0.9538, "step": 2150 }, { "epoch": 0.19, "grad_norm": 5.710399039726077, "learning_rate": 9.886441673204903e-06, "loss": 0.9754, "step": 2151 }, { "epoch": 0.19, "grad_norm": 7.489406292314961, "learning_rate": 9.886288541678482e-06, "loss": 0.8807, "step": 2152 }, { "epoch": 0.19, "grad_norm": 7.294134460396027, "learning_rate": 9.886135308161488e-06, "loss": 0.9366, "step": 2153 }, { "epoch": 0.19, "grad_norm": 5.406496967819656, "learning_rate": 9.88598197265712e-06, "loss": 0.8774, "step": 2154 }, { "epoch": 0.19, "grad_norm": 5.606938911490862, "learning_rate": 9.885828535168578e-06, "loss": 0.9536, "step": 2155 }, { "epoch": 0.19, "grad_norm": 6.277691223444727, "learning_rate": 9.885674995699066e-06, "loss": 0.8955, "step": 2156 }, { "epoch": 0.19, "grad_norm": 5.805208067656203, "learning_rate": 9.885521354251789e-06, "loss": 0.8612, "step": 2157 }, { "epoch": 0.19, "grad_norm": 7.625502571504582, "learning_rate": 9.88536761082995e-06, "loss": 0.9223, "step": 2158 }, { "epoch": 0.19, "grad_norm": 10.150758315523154, "learning_rate": 9.885213765436763e-06, "loss": 0.9585, "step": 2159 }, { "epoch": 0.19, "grad_norm": 6.3938814667532915, "learning_rate": 9.885059818075436e-06, "loss": 0.9673, "step": 2160 }, { "epoch": 0.19, "grad_norm": 7.14903165097625, "learning_rate": 9.884905768749184e-06, "loss": 0.8109, "step": 2161 }, { "epoch": 0.19, "grad_norm": 8.508600408396909, "learning_rate": 9.884751617461221e-06, "loss": 0.8988, "step": 2162 }, { "epoch": 0.19, "grad_norm": 6.807297131318559, "learning_rate": 9.884597364214766e-06, "loss": 0.8734, "step": 2163 }, { "epoch": 0.19, "grad_norm": 7.14949747545713, "learning_rate": 9.884443009013038e-06, "loss": 0.872, "step": 2164 }, { "epoch": 0.19, "grad_norm": 8.254711629111561, "learning_rate": 9.88428855185926e-06, "loss": 0.923, "step": 2165 }, { "epoch": 0.19, "grad_norm": 8.699474827610734, "learning_rate": 9.884133992756652e-06, "loss": 0.9219, "step": 2166 }, { "epoch": 0.19, "grad_norm": 7.134220953608768, "learning_rate": 9.883979331708446e-06, "loss": 0.8951, "step": 2167 }, { "epoch": 0.19, "grad_norm": 5.345800617949756, "learning_rate": 9.883824568717866e-06, "loss": 0.8778, "step": 2168 }, { "epoch": 0.19, "grad_norm": 5.0584090883792125, "learning_rate": 9.883669703788143e-06, "loss": 0.8432, "step": 2169 }, { "epoch": 0.19, "grad_norm": 6.028768942564992, "learning_rate": 9.88351473692251e-06, "loss": 0.8579, "step": 2170 }, { "epoch": 0.19, "grad_norm": 6.27762074564474, "learning_rate": 9.883359668124204e-06, "loss": 0.9159, "step": 2171 }, { "epoch": 0.19, "grad_norm": 7.448728168998883, "learning_rate": 9.883204497396457e-06, "loss": 0.9268, "step": 2172 }, { "epoch": 0.19, "grad_norm": 6.9566970553127705, "learning_rate": 9.88304922474251e-06, "loss": 0.8428, "step": 2173 }, { "epoch": 0.19, "grad_norm": 6.607499974286011, "learning_rate": 9.882893850165606e-06, "loss": 0.8769, "step": 2174 }, { "epoch": 0.19, "grad_norm": 6.7372746282982545, "learning_rate": 9.882738373668985e-06, "loss": 0.8981, "step": 2175 }, { "epoch": 0.19, "grad_norm": 10.580840921300068, "learning_rate": 9.882582795255893e-06, "loss": 0.9398, "step": 2176 }, { "epoch": 0.19, "grad_norm": 7.062027287772669, "learning_rate": 9.882427114929578e-06, "loss": 0.9006, "step": 2177 }, { "epoch": 0.19, "grad_norm": 6.881971178763336, "learning_rate": 9.882271332693289e-06, "loss": 0.9347, "step": 2178 }, { "epoch": 0.19, "grad_norm": 6.547705587533227, "learning_rate": 9.882115448550279e-06, "loss": 0.9444, "step": 2179 }, { "epoch": 0.19, "grad_norm": 7.820382408708565, "learning_rate": 9.8819594625038e-06, "loss": 0.9551, "step": 2180 }, { "epoch": 0.19, "grad_norm": 9.484085078846652, "learning_rate": 9.881803374557108e-06, "loss": 0.9276, "step": 2181 }, { "epoch": 0.19, "grad_norm": 6.839967762015463, "learning_rate": 9.881647184713463e-06, "loss": 0.8716, "step": 2182 }, { "epoch": 0.19, "grad_norm": 9.631715673908413, "learning_rate": 9.881490892976124e-06, "loss": 0.9389, "step": 2183 }, { "epoch": 0.19, "grad_norm": 6.4860422554074475, "learning_rate": 9.881334499348351e-06, "loss": 0.9412, "step": 2184 }, { "epoch": 0.19, "grad_norm": 9.149991143304295, "learning_rate": 9.881178003833411e-06, "loss": 0.9031, "step": 2185 }, { "epoch": 0.2, "grad_norm": 5.728348032008197, "learning_rate": 9.88102140643457e-06, "loss": 0.9259, "step": 2186 }, { "epoch": 0.2, "grad_norm": 6.712095869398096, "learning_rate": 9.880864707155096e-06, "loss": 0.9238, "step": 2187 }, { "epoch": 0.2, "grad_norm": 10.571304371004494, "learning_rate": 9.88070790599826e-06, "loss": 0.8822, "step": 2188 }, { "epoch": 0.2, "grad_norm": 10.19300980735663, "learning_rate": 9.880551002967335e-06, "loss": 0.9791, "step": 2189 }, { "epoch": 0.2, "grad_norm": 6.105738584884674, "learning_rate": 9.880393998065597e-06, "loss": 0.8527, "step": 2190 }, { "epoch": 0.2, "grad_norm": 9.755124440734525, "learning_rate": 9.880236891296322e-06, "loss": 0.8786, "step": 2191 }, { "epoch": 0.2, "grad_norm": 7.571558688477606, "learning_rate": 9.880079682662788e-06, "loss": 0.8487, "step": 2192 }, { "epoch": 0.2, "grad_norm": 5.051867144947555, "learning_rate": 9.87992237216828e-06, "loss": 0.9366, "step": 2193 }, { "epoch": 0.2, "grad_norm": 6.027739182740559, "learning_rate": 9.879764959816077e-06, "loss": 0.8638, "step": 2194 }, { "epoch": 0.2, "grad_norm": 8.70729429903281, "learning_rate": 9.87960744560947e-06, "loss": 0.8388, "step": 2195 }, { "epoch": 0.2, "grad_norm": 7.847061740492873, "learning_rate": 9.87944982955174e-06, "loss": 0.8157, "step": 2196 }, { "epoch": 0.2, "grad_norm": 6.438635538772719, "learning_rate": 9.879292111646183e-06, "loss": 0.83, "step": 2197 }, { "epoch": 0.2, "grad_norm": 7.678539337539831, "learning_rate": 9.879134291896088e-06, "loss": 0.915, "step": 2198 }, { "epoch": 0.2, "grad_norm": 5.218580299337293, "learning_rate": 9.87897637030475e-06, "loss": 0.8906, "step": 2199 }, { "epoch": 0.2, "grad_norm": 5.723026803415765, "learning_rate": 9.878818346875466e-06, "loss": 0.8795, "step": 2200 }, { "epoch": 0.2, "grad_norm": 10.205048138024566, "learning_rate": 9.878660221611532e-06, "loss": 0.9256, "step": 2201 }, { "epoch": 0.2, "grad_norm": 8.284635257029652, "learning_rate": 9.87850199451625e-06, "loss": 0.9108, "step": 2202 }, { "epoch": 0.2, "grad_norm": 6.243702500146266, "learning_rate": 9.878343665592922e-06, "loss": 0.8748, "step": 2203 }, { "epoch": 0.2, "grad_norm": 5.874647352544806, "learning_rate": 9.878185234844855e-06, "loss": 0.8273, "step": 2204 }, { "epoch": 0.2, "grad_norm": 6.790231257953075, "learning_rate": 9.878026702275354e-06, "loss": 0.922, "step": 2205 }, { "epoch": 0.2, "grad_norm": 8.985939415363083, "learning_rate": 9.877868067887727e-06, "loss": 0.8432, "step": 2206 }, { "epoch": 0.2, "grad_norm": 8.316780932979622, "learning_rate": 9.877709331685288e-06, "loss": 0.8201, "step": 2207 }, { "epoch": 0.2, "grad_norm": 8.478992198331348, "learning_rate": 9.877550493671347e-06, "loss": 0.9743, "step": 2208 }, { "epoch": 0.2, "grad_norm": 6.319210115539912, "learning_rate": 9.877391553849223e-06, "loss": 0.9195, "step": 2209 }, { "epoch": 0.2, "grad_norm": 6.523042745267375, "learning_rate": 9.87723251222223e-06, "loss": 0.8636, "step": 2210 }, { "epoch": 0.2, "grad_norm": 6.30172163689355, "learning_rate": 9.87707336879369e-06, "loss": 0.8823, "step": 2211 }, { "epoch": 0.2, "grad_norm": 5.4152119859920225, "learning_rate": 9.876914123566923e-06, "loss": 0.8591, "step": 2212 }, { "epoch": 0.2, "grad_norm": 7.794097256103962, "learning_rate": 9.876754776545257e-06, "loss": 0.929, "step": 2213 }, { "epoch": 0.2, "grad_norm": 7.194967269187306, "learning_rate": 9.876595327732012e-06, "loss": 0.9421, "step": 2214 }, { "epoch": 0.2, "grad_norm": 12.682541896666878, "learning_rate": 9.87643577713052e-06, "loss": 0.9453, "step": 2215 }, { "epoch": 0.2, "grad_norm": 5.923221544727557, "learning_rate": 9.87627612474411e-06, "loss": 0.9197, "step": 2216 }, { "epoch": 0.2, "grad_norm": 7.086520597046269, "learning_rate": 9.876116370576115e-06, "loss": 0.8686, "step": 2217 }, { "epoch": 0.2, "grad_norm": 4.76674044382957, "learning_rate": 9.87595651462987e-06, "loss": 0.8878, "step": 2218 }, { "epoch": 0.2, "grad_norm": 8.645363320544467, "learning_rate": 9.87579655690871e-06, "loss": 0.8841, "step": 2219 }, { "epoch": 0.2, "grad_norm": 6.7774046981462766, "learning_rate": 9.875636497415974e-06, "loss": 0.9761, "step": 2220 }, { "epoch": 0.2, "grad_norm": 5.80285561159722, "learning_rate": 9.875476336155005e-06, "loss": 0.8793, "step": 2221 }, { "epoch": 0.2, "grad_norm": 6.378222649200347, "learning_rate": 9.875316073129144e-06, "loss": 0.8684, "step": 2222 }, { "epoch": 0.2, "grad_norm": 7.778738359668561, "learning_rate": 9.875155708341738e-06, "loss": 0.8933, "step": 2223 }, { "epoch": 0.2, "grad_norm": 7.962168090409637, "learning_rate": 9.874995241796132e-06, "loss": 0.8927, "step": 2224 }, { "epoch": 0.2, "grad_norm": 6.945287664505354, "learning_rate": 9.874834673495675e-06, "loss": 0.889, "step": 2225 }, { "epoch": 0.2, "grad_norm": 5.427987104552881, "learning_rate": 9.874674003443722e-06, "loss": 0.8549, "step": 2226 }, { "epoch": 0.2, "grad_norm": 8.220279481633401, "learning_rate": 9.874513231643623e-06, "loss": 0.933, "step": 2227 }, { "epoch": 0.2, "grad_norm": 9.86431107617849, "learning_rate": 9.874352358098737e-06, "loss": 0.8341, "step": 2228 }, { "epoch": 0.2, "grad_norm": 6.708657613156528, "learning_rate": 9.874191382812419e-06, "loss": 0.8966, "step": 2229 }, { "epoch": 0.2, "grad_norm": 8.23692879973121, "learning_rate": 9.87403030578803e-06, "loss": 0.7692, "step": 2230 }, { "epoch": 0.2, "grad_norm": 9.195870018735967, "learning_rate": 9.873869127028932e-06, "loss": 0.8913, "step": 2231 }, { "epoch": 0.2, "grad_norm": 5.880687846018852, "learning_rate": 9.873707846538491e-06, "loss": 0.8821, "step": 2232 }, { "epoch": 0.2, "grad_norm": 7.624580216039018, "learning_rate": 9.87354646432007e-06, "loss": 0.9226, "step": 2233 }, { "epoch": 0.2, "grad_norm": 7.443765826626277, "learning_rate": 9.87338498037704e-06, "loss": 0.9148, "step": 2234 }, { "epoch": 0.2, "grad_norm": 7.426419461742298, "learning_rate": 9.873223394712773e-06, "loss": 0.8732, "step": 2235 }, { "epoch": 0.2, "grad_norm": 7.3629416874023965, "learning_rate": 9.873061707330638e-06, "loss": 0.872, "step": 2236 }, { "epoch": 0.2, "grad_norm": 6.953473113117662, "learning_rate": 9.872899918234014e-06, "loss": 0.8608, "step": 2237 }, { "epoch": 0.2, "grad_norm": 10.80696203720073, "learning_rate": 9.872738027426271e-06, "loss": 0.9278, "step": 2238 }, { "epoch": 0.2, "grad_norm": 6.86226730184155, "learning_rate": 9.872576034910797e-06, "loss": 0.8677, "step": 2239 }, { "epoch": 0.2, "grad_norm": 7.112985929760328, "learning_rate": 9.872413940690969e-06, "loss": 0.9453, "step": 2240 }, { "epoch": 0.2, "grad_norm": 8.374576933699547, "learning_rate": 9.872251744770168e-06, "loss": 0.8816, "step": 2241 }, { "epoch": 0.2, "grad_norm": 6.247891040010777, "learning_rate": 9.872089447151783e-06, "loss": 0.8528, "step": 2242 }, { "epoch": 0.2, "grad_norm": 8.151375900206258, "learning_rate": 9.871927047839201e-06, "loss": 0.9067, "step": 2243 }, { "epoch": 0.2, "grad_norm": 5.994878186926854, "learning_rate": 9.87176454683581e-06, "loss": 0.8545, "step": 2244 }, { "epoch": 0.2, "grad_norm": 7.293832263666268, "learning_rate": 9.871601944145003e-06, "loss": 0.9417, "step": 2245 }, { "epoch": 0.2, "grad_norm": 8.743929839839472, "learning_rate": 9.871439239770175e-06, "loss": 0.9278, "step": 2246 }, { "epoch": 0.2, "grad_norm": 6.685281764469025, "learning_rate": 9.87127643371472e-06, "loss": 0.8916, "step": 2247 }, { "epoch": 0.2, "grad_norm": 5.3551939559078345, "learning_rate": 9.871113525982039e-06, "loss": 0.9586, "step": 2248 }, { "epoch": 0.2, "grad_norm": 5.941544694458453, "learning_rate": 9.87095051657553e-06, "loss": 0.8422, "step": 2249 }, { "epoch": 0.2, "grad_norm": 7.824241286080517, "learning_rate": 9.870787405498597e-06, "loss": 0.8767, "step": 2250 }, { "epoch": 0.2, "grad_norm": 5.437603587946523, "learning_rate": 9.870624192754643e-06, "loss": 0.9015, "step": 2251 }, { "epoch": 0.2, "grad_norm": 9.834935537081883, "learning_rate": 9.870460878347075e-06, "loss": 0.8989, "step": 2252 }, { "epoch": 0.2, "grad_norm": 12.089752743817384, "learning_rate": 9.870297462279303e-06, "loss": 0.8842, "step": 2253 }, { "epoch": 0.2, "grad_norm": 7.596727508081665, "learning_rate": 9.870133944554736e-06, "loss": 0.8994, "step": 2254 }, { "epoch": 0.2, "grad_norm": 7.238669569988212, "learning_rate": 9.86997032517679e-06, "loss": 0.9366, "step": 2255 }, { "epoch": 0.2, "grad_norm": 6.215271913908399, "learning_rate": 9.869806604148877e-06, "loss": 0.9364, "step": 2256 }, { "epoch": 0.2, "grad_norm": 6.839483743832036, "learning_rate": 9.869642781474416e-06, "loss": 0.8987, "step": 2257 }, { "epoch": 0.2, "grad_norm": 7.585629507724072, "learning_rate": 9.869478857156826e-06, "loss": 0.8227, "step": 2258 }, { "epoch": 0.2, "grad_norm": 6.445490723768435, "learning_rate": 9.86931483119953e-06, "loss": 0.9727, "step": 2259 }, { "epoch": 0.2, "grad_norm": 8.842141317364588, "learning_rate": 9.869150703605951e-06, "loss": 0.8395, "step": 2260 }, { "epoch": 0.2, "grad_norm": 8.565304485910724, "learning_rate": 9.868986474379515e-06, "loss": 0.853, "step": 2261 }, { "epoch": 0.2, "grad_norm": 6.505063752897994, "learning_rate": 9.868822143523646e-06, "loss": 0.9191, "step": 2262 }, { "epoch": 0.2, "grad_norm": 6.331840313137623, "learning_rate": 9.868657711041779e-06, "loss": 0.87, "step": 2263 }, { "epoch": 0.2, "grad_norm": 7.739395347720917, "learning_rate": 9.868493176937346e-06, "loss": 0.87, "step": 2264 }, { "epoch": 0.2, "grad_norm": 5.684875764632902, "learning_rate": 9.868328541213777e-06, "loss": 0.8766, "step": 2265 }, { "epoch": 0.2, "grad_norm": 7.48164243796936, "learning_rate": 9.868163803874513e-06, "loss": 0.9624, "step": 2266 }, { "epoch": 0.2, "grad_norm": 6.950332654875624, "learning_rate": 9.867998964922988e-06, "loss": 0.9211, "step": 2267 }, { "epoch": 0.2, "grad_norm": 6.5554362496870295, "learning_rate": 9.867834024362648e-06, "loss": 0.8844, "step": 2268 }, { "epoch": 0.2, "grad_norm": 9.269038094449606, "learning_rate": 9.867668982196933e-06, "loss": 0.8367, "step": 2269 }, { "epoch": 0.2, "grad_norm": 7.834102163569689, "learning_rate": 9.867503838429288e-06, "loss": 0.933, "step": 2270 }, { "epoch": 0.2, "grad_norm": 7.591729639804242, "learning_rate": 9.867338593063159e-06, "loss": 0.8488, "step": 2271 }, { "epoch": 0.2, "grad_norm": 7.468734554423874, "learning_rate": 9.867173246101998e-06, "loss": 0.8288, "step": 2272 }, { "epoch": 0.2, "grad_norm": 5.488849722618598, "learning_rate": 9.867007797549253e-06, "loss": 0.8692, "step": 2273 }, { "epoch": 0.2, "grad_norm": 8.253206815655089, "learning_rate": 9.866842247408378e-06, "loss": 1.0104, "step": 2274 }, { "epoch": 0.2, "grad_norm": 6.901837273070414, "learning_rate": 9.86667659568283e-06, "loss": 0.9066, "step": 2275 }, { "epoch": 0.2, "grad_norm": 6.118116528833679, "learning_rate": 9.866510842376067e-06, "loss": 0.9258, "step": 2276 }, { "epoch": 0.2, "grad_norm": 7.073684622602918, "learning_rate": 9.866344987491546e-06, "loss": 0.8807, "step": 2277 }, { "epoch": 0.2, "grad_norm": 7.852905260073221, "learning_rate": 9.866179031032732e-06, "loss": 0.926, "step": 2278 }, { "epoch": 0.2, "grad_norm": 6.307703647075966, "learning_rate": 9.866012973003087e-06, "loss": 0.9008, "step": 2279 }, { "epoch": 0.2, "grad_norm": 8.34877679475249, "learning_rate": 9.865846813406077e-06, "loss": 0.9561, "step": 2280 }, { "epoch": 0.2, "grad_norm": 6.607567225978684, "learning_rate": 9.865680552245171e-06, "loss": 0.8679, "step": 2281 }, { "epoch": 0.2, "grad_norm": 6.797125335745823, "learning_rate": 9.86551418952384e-06, "loss": 0.923, "step": 2282 }, { "epoch": 0.2, "grad_norm": 7.243659696785783, "learning_rate": 9.865347725245553e-06, "loss": 0.8806, "step": 2283 }, { "epoch": 0.2, "grad_norm": 6.088980520523614, "learning_rate": 9.86518115941379e-06, "loss": 0.889, "step": 2284 }, { "epoch": 0.2, "grad_norm": 6.594255558188656, "learning_rate": 9.865014492032023e-06, "loss": 0.9395, "step": 2285 }, { "epoch": 0.2, "grad_norm": 8.923838005335186, "learning_rate": 9.864847723103734e-06, "loss": 0.8839, "step": 2286 }, { "epoch": 0.2, "grad_norm": 6.215290184471627, "learning_rate": 9.864680852632402e-06, "loss": 0.9064, "step": 2287 }, { "epoch": 0.2, "grad_norm": 5.043336711044405, "learning_rate": 9.864513880621511e-06, "loss": 0.8744, "step": 2288 }, { "epoch": 0.2, "grad_norm": 6.975456576005333, "learning_rate": 9.864346807074545e-06, "loss": 0.9212, "step": 2289 }, { "epoch": 0.2, "grad_norm": 7.201425782180013, "learning_rate": 9.864179631994994e-06, "loss": 0.879, "step": 2290 }, { "epoch": 0.2, "grad_norm": 5.805802528193533, "learning_rate": 9.864012355386344e-06, "loss": 0.8696, "step": 2291 }, { "epoch": 0.2, "grad_norm": 5.647285538209215, "learning_rate": 9.863844977252088e-06, "loss": 0.8434, "step": 2292 }, { "epoch": 0.2, "grad_norm": 8.497824376359516, "learning_rate": 9.863677497595721e-06, "loss": 0.8404, "step": 2293 }, { "epoch": 0.2, "grad_norm": 5.770101581188881, "learning_rate": 9.863509916420737e-06, "loss": 0.9012, "step": 2294 }, { "epoch": 0.2, "grad_norm": 5.858052908037306, "learning_rate": 9.863342233730633e-06, "loss": 0.8735, "step": 2295 }, { "epoch": 0.2, "grad_norm": 7.601306347319077, "learning_rate": 9.863174449528911e-06, "loss": 0.9107, "step": 2296 }, { "epoch": 0.2, "grad_norm": 6.515911380543793, "learning_rate": 9.863006563819074e-06, "loss": 0.9695, "step": 2297 }, { "epoch": 0.21, "grad_norm": 7.378236506281609, "learning_rate": 9.862838576604623e-06, "loss": 0.8766, "step": 2298 }, { "epoch": 0.21, "grad_norm": 7.2850415714147605, "learning_rate": 9.862670487889068e-06, "loss": 0.8497, "step": 2299 }, { "epoch": 0.21, "grad_norm": 8.00133660861966, "learning_rate": 9.862502297675914e-06, "loss": 0.9888, "step": 2300 }, { "epoch": 0.21, "grad_norm": 8.720446607959591, "learning_rate": 9.862334005968675e-06, "loss": 0.7964, "step": 2301 }, { "epoch": 0.21, "grad_norm": 5.863406747388093, "learning_rate": 9.86216561277086e-06, "loss": 0.9389, "step": 2302 }, { "epoch": 0.21, "grad_norm": 6.559430427816366, "learning_rate": 9.861997118085986e-06, "loss": 0.914, "step": 2303 }, { "epoch": 0.21, "grad_norm": 7.08174428874434, "learning_rate": 9.861828521917571e-06, "loss": 0.8788, "step": 2304 }, { "epoch": 0.21, "grad_norm": 6.517843411158368, "learning_rate": 9.861659824269132e-06, "loss": 0.898, "step": 2305 }, { "epoch": 0.21, "grad_norm": 7.417034101187277, "learning_rate": 9.861491025144192e-06, "loss": 0.8925, "step": 2306 }, { "epoch": 0.21, "grad_norm": 7.609235991404573, "learning_rate": 9.861322124546273e-06, "loss": 0.8763, "step": 2307 }, { "epoch": 0.21, "grad_norm": 7.2076753982263915, "learning_rate": 9.861153122478899e-06, "loss": 0.941, "step": 2308 }, { "epoch": 0.21, "grad_norm": 7.070982118579626, "learning_rate": 9.8609840189456e-06, "loss": 0.8778, "step": 2309 }, { "epoch": 0.21, "grad_norm": 7.84940231916242, "learning_rate": 9.860814813949906e-06, "loss": 0.89, "step": 2310 }, { "epoch": 0.21, "grad_norm": 8.599023406140649, "learning_rate": 9.860645507495348e-06, "loss": 0.8955, "step": 2311 }, { "epoch": 0.21, "grad_norm": 7.44699036513979, "learning_rate": 9.86047609958546e-06, "loss": 0.8653, "step": 2312 }, { "epoch": 0.21, "grad_norm": 8.59756759125582, "learning_rate": 9.860306590223777e-06, "loss": 0.9121, "step": 2313 }, { "epoch": 0.21, "grad_norm": 6.290210945038987, "learning_rate": 9.860136979413835e-06, "loss": 0.9132, "step": 2314 }, { "epoch": 0.21, "grad_norm": 8.090764215514604, "learning_rate": 9.85996726715918e-06, "loss": 0.8752, "step": 2315 }, { "epoch": 0.21, "grad_norm": 8.090169904229091, "learning_rate": 9.85979745346335e-06, "loss": 0.8866, "step": 2316 }, { "epoch": 0.21, "grad_norm": 9.398285054298242, "learning_rate": 9.859627538329894e-06, "loss": 0.8913, "step": 2317 }, { "epoch": 0.21, "grad_norm": 5.713513640234962, "learning_rate": 9.859457521762351e-06, "loss": 0.8637, "step": 2318 }, { "epoch": 0.21, "grad_norm": 6.953153002490357, "learning_rate": 9.859287403764279e-06, "loss": 0.8472, "step": 2319 }, { "epoch": 0.21, "grad_norm": 6.60179677540859, "learning_rate": 9.859117184339221e-06, "loss": 0.9034, "step": 2320 }, { "epoch": 0.21, "grad_norm": 7.55700092714277, "learning_rate": 9.858946863490735e-06, "loss": 0.8437, "step": 2321 }, { "epoch": 0.21, "grad_norm": 7.24452216268417, "learning_rate": 9.858776441222373e-06, "loss": 0.9415, "step": 2322 }, { "epoch": 0.21, "grad_norm": 7.724300016411335, "learning_rate": 9.858605917537694e-06, "loss": 0.8893, "step": 2323 }, { "epoch": 0.21, "grad_norm": 8.120602713324038, "learning_rate": 9.858435292440256e-06, "loss": 0.8686, "step": 2324 }, { "epoch": 0.21, "grad_norm": 11.871696064010134, "learning_rate": 9.85826456593362e-06, "loss": 0.84, "step": 2325 }, { "epoch": 0.21, "grad_norm": 8.276460010330153, "learning_rate": 9.85809373802135e-06, "loss": 0.865, "step": 2326 }, { "epoch": 0.21, "grad_norm": 9.790236814581444, "learning_rate": 9.857922808707015e-06, "loss": 0.9164, "step": 2327 }, { "epoch": 0.21, "grad_norm": 6.519435116248819, "learning_rate": 9.85775177799418e-06, "loss": 0.8836, "step": 2328 }, { "epoch": 0.21, "grad_norm": 8.309724469387918, "learning_rate": 9.857580645886413e-06, "loss": 0.8735, "step": 2329 }, { "epoch": 0.21, "grad_norm": 7.527244808701618, "learning_rate": 9.857409412387289e-06, "loss": 0.9415, "step": 2330 }, { "epoch": 0.21, "grad_norm": 5.838146635962811, "learning_rate": 9.857238077500382e-06, "loss": 0.878, "step": 2331 }, { "epoch": 0.21, "grad_norm": 8.486866181644194, "learning_rate": 9.857066641229265e-06, "loss": 0.8108, "step": 2332 }, { "epoch": 0.21, "grad_norm": 7.013700059765665, "learning_rate": 9.85689510357752e-06, "loss": 0.8447, "step": 2333 }, { "epoch": 0.21, "grad_norm": 6.393662713874419, "learning_rate": 9.856723464548726e-06, "loss": 0.9248, "step": 2334 }, { "epoch": 0.21, "grad_norm": 7.297802720681074, "learning_rate": 9.856551724146465e-06, "loss": 0.8656, "step": 2335 }, { "epoch": 0.21, "grad_norm": 6.328783292218154, "learning_rate": 9.856379882374325e-06, "loss": 0.8233, "step": 2336 }, { "epoch": 0.21, "grad_norm": 6.248698933116842, "learning_rate": 9.856207939235888e-06, "loss": 0.842, "step": 2337 }, { "epoch": 0.21, "grad_norm": 6.389323020738942, "learning_rate": 9.856035894734745e-06, "loss": 0.8504, "step": 2338 }, { "epoch": 0.21, "grad_norm": 7.5183856190796545, "learning_rate": 9.855863748874487e-06, "loss": 0.8724, "step": 2339 }, { "epoch": 0.21, "grad_norm": 6.697486409810685, "learning_rate": 9.85569150165871e-06, "loss": 0.8624, "step": 2340 }, { "epoch": 0.21, "grad_norm": 6.588459575494302, "learning_rate": 9.855519153091005e-06, "loss": 0.8555, "step": 2341 }, { "epoch": 0.21, "grad_norm": 6.969295164169597, "learning_rate": 9.85534670317497e-06, "loss": 0.9064, "step": 2342 }, { "epoch": 0.21, "grad_norm": 5.585261426821832, "learning_rate": 9.855174151914206e-06, "loss": 0.9023, "step": 2343 }, { "epoch": 0.21, "grad_norm": 7.389555040968531, "learning_rate": 9.855001499312315e-06, "loss": 0.8672, "step": 2344 }, { "epoch": 0.21, "grad_norm": 9.33887385783266, "learning_rate": 9.8548287453729e-06, "loss": 0.9304, "step": 2345 }, { "epoch": 0.21, "grad_norm": 9.596423200870415, "learning_rate": 9.854655890099568e-06, "loss": 0.8495, "step": 2346 }, { "epoch": 0.21, "grad_norm": 7.912411501791836, "learning_rate": 9.854482933495923e-06, "loss": 0.8585, "step": 2347 }, { "epoch": 0.21, "grad_norm": 7.482246003695389, "learning_rate": 9.85430987556558e-06, "loss": 0.9013, "step": 2348 }, { "epoch": 0.21, "grad_norm": 7.152525189787622, "learning_rate": 9.854136716312148e-06, "loss": 0.8419, "step": 2349 }, { "epoch": 0.21, "grad_norm": 7.588827866549158, "learning_rate": 9.853963455739243e-06, "loss": 0.9493, "step": 2350 }, { "epoch": 0.21, "grad_norm": 7.421296248802825, "learning_rate": 9.853790093850481e-06, "loss": 0.9337, "step": 2351 }, { "epoch": 0.21, "grad_norm": 6.967737978635837, "learning_rate": 9.85361663064948e-06, "loss": 0.9358, "step": 2352 }, { "epoch": 0.21, "grad_norm": 6.307409848223405, "learning_rate": 9.853443066139862e-06, "loss": 0.8627, "step": 2353 }, { "epoch": 0.21, "grad_norm": 6.620241099807115, "learning_rate": 9.853269400325246e-06, "loss": 0.9337, "step": 2354 }, { "epoch": 0.21, "grad_norm": 5.123702760883563, "learning_rate": 9.853095633209265e-06, "loss": 0.8905, "step": 2355 }, { "epoch": 0.21, "grad_norm": 6.7320168156037195, "learning_rate": 9.852921764795536e-06, "loss": 0.9049, "step": 2356 }, { "epoch": 0.21, "grad_norm": 7.864477566092781, "learning_rate": 9.852747795087695e-06, "loss": 0.8907, "step": 2357 }, { "epoch": 0.21, "grad_norm": 8.05457904005513, "learning_rate": 9.85257372408937e-06, "loss": 0.9348, "step": 2358 }, { "epoch": 0.21, "grad_norm": 8.008355357702138, "learning_rate": 9.852399551804196e-06, "loss": 0.8791, "step": 2359 }, { "epoch": 0.21, "grad_norm": 7.945923748720724, "learning_rate": 9.852225278235807e-06, "loss": 0.8668, "step": 2360 }, { "epoch": 0.21, "grad_norm": 6.083010185422739, "learning_rate": 9.852050903387845e-06, "loss": 0.8906, "step": 2361 }, { "epoch": 0.21, "grad_norm": 5.6338293926415846, "learning_rate": 9.851876427263943e-06, "loss": 0.9641, "step": 2362 }, { "epoch": 0.21, "grad_norm": 6.476598708579082, "learning_rate": 9.851701849867745e-06, "loss": 0.8635, "step": 2363 }, { "epoch": 0.21, "grad_norm": 10.309861143195437, "learning_rate": 9.8515271712029e-06, "loss": 0.9672, "step": 2364 }, { "epoch": 0.21, "grad_norm": 7.1919677163005, "learning_rate": 9.851352391273045e-06, "loss": 0.8999, "step": 2365 }, { "epoch": 0.21, "grad_norm": 9.406522748379095, "learning_rate": 9.851177510081837e-06, "loss": 0.8999, "step": 2366 }, { "epoch": 0.21, "grad_norm": 7.133980021133775, "learning_rate": 9.85100252763292e-06, "loss": 0.8974, "step": 2367 }, { "epoch": 0.21, "grad_norm": 7.707877506771987, "learning_rate": 9.850827443929952e-06, "loss": 0.836, "step": 2368 }, { "epoch": 0.21, "grad_norm": 7.215576168305376, "learning_rate": 9.85065225897658e-06, "loss": 0.8822, "step": 2369 }, { "epoch": 0.21, "grad_norm": 6.490967098980666, "learning_rate": 9.850476972776468e-06, "loss": 0.8416, "step": 2370 }, { "epoch": 0.21, "grad_norm": 7.57736643156538, "learning_rate": 9.85030158533327e-06, "loss": 0.9106, "step": 2371 }, { "epoch": 0.21, "grad_norm": 7.720586686561496, "learning_rate": 9.850126096650649e-06, "loss": 0.8353, "step": 2372 }, { "epoch": 0.21, "grad_norm": 5.212036649452139, "learning_rate": 9.849950506732267e-06, "loss": 0.9115, "step": 2373 }, { "epoch": 0.21, "grad_norm": 8.728007018885922, "learning_rate": 9.84977481558179e-06, "loss": 0.9988, "step": 2374 }, { "epoch": 0.21, "grad_norm": 8.214722973818981, "learning_rate": 9.849599023202884e-06, "loss": 0.9061, "step": 2375 }, { "epoch": 0.21, "grad_norm": 10.238561102953476, "learning_rate": 9.849423129599218e-06, "loss": 0.8355, "step": 2376 }, { "epoch": 0.21, "grad_norm": 6.0032912330738855, "learning_rate": 9.849247134774466e-06, "loss": 0.8361, "step": 2377 }, { "epoch": 0.21, "grad_norm": 5.898534246692978, "learning_rate": 9.849071038732299e-06, "loss": 0.8906, "step": 2378 }, { "epoch": 0.21, "grad_norm": 7.205994358332342, "learning_rate": 9.848894841476393e-06, "loss": 0.843, "step": 2379 }, { "epoch": 0.21, "grad_norm": 7.017239443393275, "learning_rate": 9.848718543010426e-06, "loss": 0.9253, "step": 2380 }, { "epoch": 0.21, "grad_norm": 5.9489185874866415, "learning_rate": 9.848542143338079e-06, "loss": 0.9244, "step": 2381 }, { "epoch": 0.21, "grad_norm": 8.321721353637983, "learning_rate": 9.848365642463032e-06, "loss": 0.8595, "step": 2382 }, { "epoch": 0.21, "grad_norm": 6.321452559370103, "learning_rate": 9.848189040388969e-06, "loss": 0.8652, "step": 2383 }, { "epoch": 0.21, "grad_norm": 6.43385239585459, "learning_rate": 9.84801233711958e-06, "loss": 0.8651, "step": 2384 }, { "epoch": 0.21, "grad_norm": 7.421417950961274, "learning_rate": 9.847835532658547e-06, "loss": 0.8866, "step": 2385 }, { "epoch": 0.21, "grad_norm": 7.930191201254984, "learning_rate": 9.847658627009567e-06, "loss": 0.9224, "step": 2386 }, { "epoch": 0.21, "grad_norm": 5.479871714192704, "learning_rate": 9.847481620176326e-06, "loss": 0.9511, "step": 2387 }, { "epoch": 0.21, "grad_norm": 7.348485518203908, "learning_rate": 9.847304512162524e-06, "loss": 0.9154, "step": 2388 }, { "epoch": 0.21, "grad_norm": 7.552225781575768, "learning_rate": 9.847127302971855e-06, "loss": 0.9318, "step": 2389 }, { "epoch": 0.21, "grad_norm": 8.932647221276676, "learning_rate": 9.84694999260802e-06, "loss": 0.8355, "step": 2390 }, { "epoch": 0.21, "grad_norm": 8.616187633565632, "learning_rate": 9.846772581074717e-06, "loss": 0.8511, "step": 2391 }, { "epoch": 0.21, "grad_norm": 6.322110534721389, "learning_rate": 9.846595068375653e-06, "loss": 0.9187, "step": 2392 }, { "epoch": 0.21, "grad_norm": 6.663968550547319, "learning_rate": 9.846417454514528e-06, "loss": 0.9032, "step": 2393 }, { "epoch": 0.21, "grad_norm": 6.915759084476773, "learning_rate": 9.846239739495054e-06, "loss": 0.8325, "step": 2394 }, { "epoch": 0.21, "grad_norm": 7.060553029584971, "learning_rate": 9.846061923320938e-06, "loss": 0.9411, "step": 2395 }, { "epoch": 0.21, "grad_norm": 7.58227686056441, "learning_rate": 9.845884005995892e-06, "loss": 0.8313, "step": 2396 }, { "epoch": 0.21, "grad_norm": 6.823665099351213, "learning_rate": 9.845705987523628e-06, "loss": 0.9829, "step": 2397 }, { "epoch": 0.21, "grad_norm": 6.908340471231027, "learning_rate": 9.845527867907866e-06, "loss": 0.9222, "step": 2398 }, { "epoch": 0.21, "grad_norm": 7.368443202961965, "learning_rate": 9.845349647152321e-06, "loss": 0.9127, "step": 2399 }, { "epoch": 0.21, "grad_norm": 6.9412490731285645, "learning_rate": 9.845171325260712e-06, "loss": 0.8938, "step": 2400 }, { "epoch": 0.21, "grad_norm": 9.60580638752615, "learning_rate": 9.844992902236762e-06, "loss": 0.9556, "step": 2401 }, { "epoch": 0.21, "grad_norm": 7.001069830718047, "learning_rate": 9.844814378084196e-06, "loss": 0.8143, "step": 2402 }, { "epoch": 0.21, "grad_norm": 5.473625257422621, "learning_rate": 9.844635752806739e-06, "loss": 0.8793, "step": 2403 }, { "epoch": 0.21, "grad_norm": 7.703567722146185, "learning_rate": 9.844457026408123e-06, "loss": 0.7995, "step": 2404 }, { "epoch": 0.21, "grad_norm": 7.172092345863533, "learning_rate": 9.844278198892074e-06, "loss": 0.9213, "step": 2405 }, { "epoch": 0.21, "grad_norm": 7.602670517612148, "learning_rate": 9.844099270262327e-06, "loss": 0.8602, "step": 2406 }, { "epoch": 0.21, "grad_norm": 6.4743631989026955, "learning_rate": 9.843920240522616e-06, "loss": 0.945, "step": 2407 }, { "epoch": 0.21, "grad_norm": 5.651098485186191, "learning_rate": 9.843741109676676e-06, "loss": 0.8964, "step": 2408 }, { "epoch": 0.21, "grad_norm": 7.607664017096932, "learning_rate": 9.843561877728249e-06, "loss": 0.9342, "step": 2409 }, { "epoch": 0.21, "grad_norm": 7.377297393276695, "learning_rate": 9.843382544681076e-06, "loss": 0.8614, "step": 2410 }, { "epoch": 0.22, "grad_norm": 8.419120132774292, "learning_rate": 9.8432031105389e-06, "loss": 0.9777, "step": 2411 }, { "epoch": 0.22, "grad_norm": 6.603376204394441, "learning_rate": 9.843023575305464e-06, "loss": 0.8744, "step": 2412 }, { "epoch": 0.22, "grad_norm": 7.411525569016264, "learning_rate": 9.842843938984517e-06, "loss": 0.9244, "step": 2413 }, { "epoch": 0.22, "grad_norm": 7.294141609221271, "learning_rate": 9.842664201579807e-06, "loss": 0.9068, "step": 2414 }, { "epoch": 0.22, "grad_norm": 5.864759344218264, "learning_rate": 9.84248436309509e-06, "loss": 0.8957, "step": 2415 }, { "epoch": 0.22, "grad_norm": 8.393404092070174, "learning_rate": 9.842304423534116e-06, "loss": 0.9784, "step": 2416 }, { "epoch": 0.22, "grad_norm": 8.363562667370635, "learning_rate": 9.84212438290064e-06, "loss": 0.8925, "step": 2417 }, { "epoch": 0.22, "grad_norm": 7.286418924483991, "learning_rate": 9.841944241198422e-06, "loss": 0.8934, "step": 2418 }, { "epoch": 0.22, "grad_norm": 6.684811563871887, "learning_rate": 9.841763998431223e-06, "loss": 0.8286, "step": 2419 }, { "epoch": 0.22, "grad_norm": 8.392467924845583, "learning_rate": 9.841583654602802e-06, "loss": 0.938, "step": 2420 }, { "epoch": 0.22, "grad_norm": 5.797361711277601, "learning_rate": 9.841403209716927e-06, "loss": 0.9516, "step": 2421 }, { "epoch": 0.22, "grad_norm": 7.645003866515064, "learning_rate": 9.84122266377736e-06, "loss": 0.8603, "step": 2422 }, { "epoch": 0.22, "grad_norm": 5.857645763346228, "learning_rate": 9.841042016787876e-06, "loss": 0.9517, "step": 2423 }, { "epoch": 0.22, "grad_norm": 9.965922994671066, "learning_rate": 9.840861268752239e-06, "loss": 0.9255, "step": 2424 }, { "epoch": 0.22, "grad_norm": 7.78194304641037, "learning_rate": 9.840680419674225e-06, "loss": 0.9601, "step": 2425 }, { "epoch": 0.22, "grad_norm": 7.670116584123574, "learning_rate": 9.840499469557607e-06, "loss": 0.9175, "step": 2426 }, { "epoch": 0.22, "grad_norm": 11.173469189065774, "learning_rate": 9.840318418406163e-06, "loss": 0.9089, "step": 2427 }, { "epoch": 0.22, "grad_norm": 7.975093799401943, "learning_rate": 9.840137266223672e-06, "loss": 0.8303, "step": 2428 }, { "epoch": 0.22, "grad_norm": 6.367420553662351, "learning_rate": 9.839956013013918e-06, "loss": 0.8765, "step": 2429 }, { "epoch": 0.22, "grad_norm": 7.31729886906235, "learning_rate": 9.83977465878068e-06, "loss": 0.8527, "step": 2430 }, { "epoch": 0.22, "grad_norm": 6.746994675722921, "learning_rate": 9.839593203527744e-06, "loss": 0.8682, "step": 2431 }, { "epoch": 0.22, "grad_norm": 6.6223095883821825, "learning_rate": 9.839411647258898e-06, "loss": 0.8793, "step": 2432 }, { "epoch": 0.22, "grad_norm": 6.073531451814592, "learning_rate": 9.839229989977934e-06, "loss": 0.8131, "step": 2433 }, { "epoch": 0.22, "grad_norm": 8.331787145535653, "learning_rate": 9.839048231688642e-06, "loss": 0.8983, "step": 2434 }, { "epoch": 0.22, "grad_norm": 7.701679796902026, "learning_rate": 9.838866372394813e-06, "loss": 0.8606, "step": 2435 }, { "epoch": 0.22, "grad_norm": 6.6042890135484535, "learning_rate": 9.838684412100247e-06, "loss": 0.8402, "step": 2436 }, { "epoch": 0.22, "grad_norm": 6.491210350277201, "learning_rate": 9.83850235080874e-06, "loss": 0.8741, "step": 2437 }, { "epoch": 0.22, "grad_norm": 8.451974819796494, "learning_rate": 9.838320188524093e-06, "loss": 0.8717, "step": 2438 }, { "epoch": 0.22, "grad_norm": 7.513248423056612, "learning_rate": 9.838137925250107e-06, "loss": 0.9001, "step": 2439 }, { "epoch": 0.22, "grad_norm": 5.707903076990253, "learning_rate": 9.837955560990588e-06, "loss": 0.9079, "step": 2440 }, { "epoch": 0.22, "grad_norm": 7.1758720395564, "learning_rate": 9.837773095749342e-06, "loss": 0.8675, "step": 2441 }, { "epoch": 0.22, "grad_norm": 6.115664614432461, "learning_rate": 9.837590529530175e-06, "loss": 0.903, "step": 2442 }, { "epoch": 0.22, "grad_norm": 7.81203520424122, "learning_rate": 9.837407862336902e-06, "loss": 0.918, "step": 2443 }, { "epoch": 0.22, "grad_norm": 9.71382787349826, "learning_rate": 9.837225094173333e-06, "loss": 0.8824, "step": 2444 }, { "epoch": 0.22, "grad_norm": 7.62204743760053, "learning_rate": 9.837042225043284e-06, "loss": 0.918, "step": 2445 }, { "epoch": 0.22, "grad_norm": 9.469075192759506, "learning_rate": 9.83685925495057e-06, "loss": 0.9045, "step": 2446 }, { "epoch": 0.22, "grad_norm": 6.91407905973321, "learning_rate": 9.836676183899013e-06, "loss": 0.9078, "step": 2447 }, { "epoch": 0.22, "grad_norm": 7.3392280493779385, "learning_rate": 9.836493011892431e-06, "loss": 0.9031, "step": 2448 }, { "epoch": 0.22, "grad_norm": 7.201237849198963, "learning_rate": 9.83630973893465e-06, "loss": 0.8927, "step": 2449 }, { "epoch": 0.22, "grad_norm": 7.563751850673228, "learning_rate": 9.836126365029497e-06, "loss": 0.874, "step": 2450 }, { "epoch": 0.22, "grad_norm": 10.97997151589386, "learning_rate": 9.835942890180794e-06, "loss": 0.9763, "step": 2451 }, { "epoch": 0.22, "grad_norm": 7.80124493789533, "learning_rate": 9.835759314392375e-06, "loss": 0.8825, "step": 2452 }, { "epoch": 0.22, "grad_norm": 6.357952232284233, "learning_rate": 9.83557563766807e-06, "loss": 0.861, "step": 2453 }, { "epoch": 0.22, "grad_norm": 6.2537024621687305, "learning_rate": 9.835391860011714e-06, "loss": 0.9099, "step": 2454 }, { "epoch": 0.22, "grad_norm": 7.17319107758288, "learning_rate": 9.835207981427141e-06, "loss": 0.9895, "step": 2455 }, { "epoch": 0.22, "grad_norm": 6.350517457214161, "learning_rate": 9.835024001918191e-06, "loss": 0.945, "step": 2456 }, { "epoch": 0.22, "grad_norm": 6.228178870297306, "learning_rate": 9.834839921488703e-06, "loss": 0.8597, "step": 2457 }, { "epoch": 0.22, "grad_norm": 8.357418884027526, "learning_rate": 9.83465574014252e-06, "loss": 0.9634, "step": 2458 }, { "epoch": 0.22, "grad_norm": 7.818749598397317, "learning_rate": 9.834471457883488e-06, "loss": 0.8788, "step": 2459 }, { "epoch": 0.22, "grad_norm": 6.446346598378385, "learning_rate": 9.83428707471545e-06, "loss": 0.8553, "step": 2460 }, { "epoch": 0.22, "grad_norm": 9.159967057807451, "learning_rate": 9.834102590642256e-06, "loss": 0.9249, "step": 2461 }, { "epoch": 0.22, "grad_norm": 5.677027363856248, "learning_rate": 9.833918005667757e-06, "loss": 0.8261, "step": 2462 }, { "epoch": 0.22, "grad_norm": 5.190222324147908, "learning_rate": 9.833733319795806e-06, "loss": 0.9071, "step": 2463 }, { "epoch": 0.22, "grad_norm": 6.594179097248613, "learning_rate": 9.833548533030257e-06, "loss": 0.841, "step": 2464 }, { "epoch": 0.22, "grad_norm": 6.6490695878304775, "learning_rate": 9.833363645374969e-06, "loss": 0.905, "step": 2465 }, { "epoch": 0.22, "grad_norm": 6.2799054099002545, "learning_rate": 9.833178656833799e-06, "loss": 0.9011, "step": 2466 }, { "epoch": 0.22, "grad_norm": 6.962959359357749, "learning_rate": 9.832993567410608e-06, "loss": 0.9411, "step": 2467 }, { "epoch": 0.22, "grad_norm": 7.062598066473613, "learning_rate": 9.832808377109262e-06, "loss": 0.9132, "step": 2468 }, { "epoch": 0.22, "grad_norm": 9.334217609800442, "learning_rate": 9.832623085933623e-06, "loss": 0.8755, "step": 2469 }, { "epoch": 0.22, "grad_norm": 5.626308702056708, "learning_rate": 9.832437693887561e-06, "loss": 0.9012, "step": 2470 }, { "epoch": 0.22, "grad_norm": 9.030800981080466, "learning_rate": 9.832252200974946e-06, "loss": 0.9034, "step": 2471 }, { "epoch": 0.22, "grad_norm": 6.60333883872574, "learning_rate": 9.832066607199648e-06, "loss": 0.9011, "step": 2472 }, { "epoch": 0.22, "grad_norm": 6.358043820446671, "learning_rate": 9.831880912565544e-06, "loss": 0.8921, "step": 2473 }, { "epoch": 0.22, "grad_norm": 6.879178905798572, "learning_rate": 9.831695117076503e-06, "loss": 0.8321, "step": 2474 }, { "epoch": 0.22, "grad_norm": 8.63503572166374, "learning_rate": 9.831509220736412e-06, "loss": 0.8897, "step": 2475 }, { "epoch": 0.22, "grad_norm": 5.991945284442245, "learning_rate": 9.831323223549145e-06, "loss": 0.8978, "step": 2476 }, { "epoch": 0.22, "grad_norm": 8.994498663596056, "learning_rate": 9.831137125518586e-06, "loss": 0.86, "step": 2477 }, { "epoch": 0.22, "grad_norm": 8.969385962828241, "learning_rate": 9.83095092664862e-06, "loss": 0.8995, "step": 2478 }, { "epoch": 0.22, "grad_norm": 5.291466166656589, "learning_rate": 9.830764626943132e-06, "loss": 0.8826, "step": 2479 }, { "epoch": 0.22, "grad_norm": 7.636571497043233, "learning_rate": 9.830578226406012e-06, "loss": 0.9473, "step": 2480 }, { "epoch": 0.22, "grad_norm": 5.450213770577966, "learning_rate": 9.83039172504115e-06, "loss": 0.8736, "step": 2481 }, { "epoch": 0.22, "grad_norm": 7.4496827588255075, "learning_rate": 9.83020512285244e-06, "loss": 0.9009, "step": 2482 }, { "epoch": 0.22, "grad_norm": 5.5642542517204365, "learning_rate": 9.830018419843776e-06, "loss": 0.8865, "step": 2483 }, { "epoch": 0.22, "grad_norm": 7.368464394009108, "learning_rate": 9.829831616019055e-06, "loss": 0.8515, "step": 2484 }, { "epoch": 0.22, "grad_norm": 7.76275007670052, "learning_rate": 9.829644711382176e-06, "loss": 0.9078, "step": 2485 }, { "epoch": 0.22, "grad_norm": 6.791807037677918, "learning_rate": 9.82945770593704e-06, "loss": 0.9295, "step": 2486 }, { "epoch": 0.22, "grad_norm": 5.708693862665198, "learning_rate": 9.82927059968755e-06, "loss": 0.8558, "step": 2487 }, { "epoch": 0.22, "grad_norm": 9.705755020783277, "learning_rate": 9.829083392637614e-06, "loss": 0.8693, "step": 2488 }, { "epoch": 0.22, "grad_norm": 7.692614907472564, "learning_rate": 9.828896084791138e-06, "loss": 0.8798, "step": 2489 }, { "epoch": 0.22, "grad_norm": 6.458214927027219, "learning_rate": 9.828708676152029e-06, "loss": 0.8439, "step": 2490 }, { "epoch": 0.22, "grad_norm": 6.627640823818022, "learning_rate": 9.828521166724202e-06, "loss": 0.8025, "step": 2491 }, { "epoch": 0.22, "grad_norm": 5.643857323903263, "learning_rate": 9.82833355651157e-06, "loss": 0.9572, "step": 2492 }, { "epoch": 0.22, "grad_norm": 7.81601116765085, "learning_rate": 9.828145845518049e-06, "loss": 0.9413, "step": 2493 }, { "epoch": 0.22, "grad_norm": 7.706585895183364, "learning_rate": 9.827958033747558e-06, "loss": 0.9232, "step": 2494 }, { "epoch": 0.22, "grad_norm": 6.75340033346612, "learning_rate": 9.827770121204015e-06, "loss": 0.8642, "step": 2495 }, { "epoch": 0.22, "grad_norm": 6.819261753264862, "learning_rate": 9.827582107891343e-06, "loss": 0.8666, "step": 2496 }, { "epoch": 0.22, "grad_norm": 8.454850974444113, "learning_rate": 9.827393993813468e-06, "loss": 0.906, "step": 2497 }, { "epoch": 0.22, "grad_norm": 7.222603574783349, "learning_rate": 9.827205778974314e-06, "loss": 0.922, "step": 2498 }, { "epoch": 0.22, "grad_norm": 6.524701386095603, "learning_rate": 9.827017463377811e-06, "loss": 0.886, "step": 2499 }, { "epoch": 0.22, "grad_norm": 8.906055580114247, "learning_rate": 9.826829047027889e-06, "loss": 0.8452, "step": 2500 }, { "epoch": 0.22, "grad_norm": 6.7685239490543205, "learning_rate": 9.826640529928482e-06, "loss": 0.8872, "step": 2501 }, { "epoch": 0.22, "grad_norm": 6.4790939034817105, "learning_rate": 9.826451912083523e-06, "loss": 0.9619, "step": 2502 }, { "epoch": 0.22, "grad_norm": 7.640321274877171, "learning_rate": 9.826263193496952e-06, "loss": 0.9404, "step": 2503 }, { "epoch": 0.22, "grad_norm": 7.222953721968597, "learning_rate": 9.826074374172704e-06, "loss": 0.8954, "step": 2504 }, { "epoch": 0.22, "grad_norm": 7.052759430257341, "learning_rate": 9.825885454114725e-06, "loss": 0.8968, "step": 2505 }, { "epoch": 0.22, "grad_norm": 6.210615536856999, "learning_rate": 9.825696433326954e-06, "loss": 0.9112, "step": 2506 }, { "epoch": 0.22, "grad_norm": 5.771478610895155, "learning_rate": 9.825507311813338e-06, "loss": 0.8803, "step": 2507 }, { "epoch": 0.22, "grad_norm": 8.680176138918098, "learning_rate": 9.825318089577825e-06, "loss": 0.8536, "step": 2508 }, { "epoch": 0.22, "grad_norm": 7.723865795817744, "learning_rate": 9.825128766624364e-06, "loss": 0.8565, "step": 2509 }, { "epoch": 0.22, "grad_norm": 7.8827621710566405, "learning_rate": 9.824939342956906e-06, "loss": 0.7971, "step": 2510 }, { "epoch": 0.22, "grad_norm": 5.585113517542912, "learning_rate": 9.824749818579406e-06, "loss": 0.8714, "step": 2511 }, { "epoch": 0.22, "grad_norm": 7.323047083924022, "learning_rate": 9.82456019349582e-06, "loss": 0.8322, "step": 2512 }, { "epoch": 0.22, "grad_norm": 6.448253358987684, "learning_rate": 9.824370467710106e-06, "loss": 0.9071, "step": 2513 }, { "epoch": 0.22, "grad_norm": 6.712828164131581, "learning_rate": 9.824180641226223e-06, "loss": 0.8673, "step": 2514 }, { "epoch": 0.22, "grad_norm": 7.380255011357968, "learning_rate": 9.823990714048135e-06, "loss": 0.8615, "step": 2515 }, { "epoch": 0.22, "grad_norm": 6.167816480732198, "learning_rate": 9.823800686179802e-06, "loss": 0.8581, "step": 2516 }, { "epoch": 0.22, "grad_norm": 7.545902614413698, "learning_rate": 9.823610557625199e-06, "loss": 0.8593, "step": 2517 }, { "epoch": 0.22, "grad_norm": 4.896918339137562, "learning_rate": 9.823420328388285e-06, "loss": 0.8843, "step": 2518 }, { "epoch": 0.22, "grad_norm": 6.571439345385356, "learning_rate": 9.823229998473037e-06, "loss": 0.8905, "step": 2519 }, { "epoch": 0.22, "grad_norm": 5.966851577638757, "learning_rate": 9.823039567883423e-06, "loss": 0.8712, "step": 2520 }, { "epoch": 0.22, "grad_norm": 6.354245738473049, "learning_rate": 9.82284903662342e-06, "loss": 0.9063, "step": 2521 }, { "epoch": 0.22, "grad_norm": 7.097093077716542, "learning_rate": 9.822658404697008e-06, "loss": 0.8213, "step": 2522 }, { "epoch": 0.23, "grad_norm": 5.351510651658792, "learning_rate": 9.822467672108163e-06, "loss": 0.8582, "step": 2523 }, { "epoch": 0.23, "grad_norm": 6.64901844746443, "learning_rate": 9.822276838860865e-06, "loss": 0.896, "step": 2524 }, { "epoch": 0.23, "grad_norm": 6.832522642817914, "learning_rate": 9.822085904959097e-06, "loss": 0.9471, "step": 2525 }, { "epoch": 0.23, "grad_norm": 6.21945147643285, "learning_rate": 9.821894870406849e-06, "loss": 0.901, "step": 2526 }, { "epoch": 0.23, "grad_norm": 7.322050982253022, "learning_rate": 9.821703735208104e-06, "loss": 0.8344, "step": 2527 }, { "epoch": 0.23, "grad_norm": 6.884219257854892, "learning_rate": 9.821512499366855e-06, "loss": 0.8756, "step": 2528 }, { "epoch": 0.23, "grad_norm": 7.69454009346205, "learning_rate": 9.821321162887088e-06, "loss": 0.8584, "step": 2529 }, { "epoch": 0.23, "grad_norm": 7.380347594797289, "learning_rate": 9.821129725772802e-06, "loss": 0.8452, "step": 2530 }, { "epoch": 0.23, "grad_norm": 5.821666943433089, "learning_rate": 9.82093818802799e-06, "loss": 0.9732, "step": 2531 }, { "epoch": 0.23, "grad_norm": 5.624631746290377, "learning_rate": 9.820746549656654e-06, "loss": 0.8656, "step": 2532 }, { "epoch": 0.23, "grad_norm": 5.929477238217664, "learning_rate": 9.820554810662789e-06, "loss": 0.8849, "step": 2533 }, { "epoch": 0.23, "grad_norm": 8.766988580938753, "learning_rate": 9.820362971050398e-06, "loss": 0.911, "step": 2534 }, { "epoch": 0.23, "grad_norm": 8.476071076943576, "learning_rate": 9.820171030823489e-06, "loss": 0.8603, "step": 2535 }, { "epoch": 0.23, "grad_norm": 7.794647936409016, "learning_rate": 9.819978989986063e-06, "loss": 0.8607, "step": 2536 }, { "epoch": 0.23, "grad_norm": 13.034955927417442, "learning_rate": 9.819786848542132e-06, "loss": 0.9288, "step": 2537 }, { "epoch": 0.23, "grad_norm": 5.929182732321872, "learning_rate": 9.819594606495707e-06, "loss": 0.9203, "step": 2538 }, { "epoch": 0.23, "grad_norm": 8.546972455933007, "learning_rate": 9.819402263850799e-06, "loss": 0.8315, "step": 2539 }, { "epoch": 0.23, "grad_norm": 5.8591333477435885, "learning_rate": 9.819209820611422e-06, "loss": 0.8535, "step": 2540 }, { "epoch": 0.23, "grad_norm": 6.2851734647998905, "learning_rate": 9.819017276781595e-06, "loss": 0.9678, "step": 2541 }, { "epoch": 0.23, "grad_norm": 8.836935165628057, "learning_rate": 9.818824632365334e-06, "loss": 0.9209, "step": 2542 }, { "epoch": 0.23, "grad_norm": 6.107025889817976, "learning_rate": 9.818631887366663e-06, "loss": 0.8734, "step": 2543 }, { "epoch": 0.23, "grad_norm": 6.87517105528627, "learning_rate": 9.818439041789604e-06, "loss": 0.9327, "step": 2544 }, { "epoch": 0.23, "grad_norm": 7.303846576535831, "learning_rate": 9.818246095638183e-06, "loss": 0.8514, "step": 2545 }, { "epoch": 0.23, "grad_norm": 7.643394571077277, "learning_rate": 9.818053048916425e-06, "loss": 0.8517, "step": 2546 }, { "epoch": 0.23, "grad_norm": 8.769305203148667, "learning_rate": 9.817859901628362e-06, "loss": 0.8678, "step": 2547 }, { "epoch": 0.23, "grad_norm": 8.56597133725013, "learning_rate": 9.817666653778024e-06, "loss": 0.8451, "step": 2548 }, { "epoch": 0.23, "grad_norm": 6.671769038065682, "learning_rate": 9.817473305369447e-06, "loss": 0.9007, "step": 2549 }, { "epoch": 0.23, "grad_norm": 7.544861851518432, "learning_rate": 9.817279856406664e-06, "loss": 0.8377, "step": 2550 }, { "epoch": 0.23, "grad_norm": 7.183065178571264, "learning_rate": 9.817086306893711e-06, "loss": 0.9428, "step": 2551 }, { "epoch": 0.23, "grad_norm": 5.713328026630656, "learning_rate": 9.816892656834632e-06, "loss": 0.86, "step": 2552 }, { "epoch": 0.23, "grad_norm": 5.759402543524686, "learning_rate": 9.816698906233469e-06, "loss": 0.9151, "step": 2553 }, { "epoch": 0.23, "grad_norm": 6.369250187237468, "learning_rate": 9.816505055094264e-06, "loss": 0.8986, "step": 2554 }, { "epoch": 0.23, "grad_norm": 8.911550838012886, "learning_rate": 9.816311103421064e-06, "loss": 0.8695, "step": 2555 }, { "epoch": 0.23, "grad_norm": 8.33847535883093, "learning_rate": 9.816117051217916e-06, "loss": 0.9601, "step": 2556 }, { "epoch": 0.23, "grad_norm": 6.519861875299529, "learning_rate": 9.815922898488873e-06, "loss": 0.9094, "step": 2557 }, { "epoch": 0.23, "grad_norm": 5.616197266089012, "learning_rate": 9.815728645237984e-06, "loss": 0.9196, "step": 2558 }, { "epoch": 0.23, "grad_norm": 4.986200245995738, "learning_rate": 9.815534291469308e-06, "loss": 0.8413, "step": 2559 }, { "epoch": 0.23, "grad_norm": 7.231001733878972, "learning_rate": 9.8153398371869e-06, "loss": 0.9118, "step": 2560 }, { "epoch": 0.23, "grad_norm": 6.176490814786114, "learning_rate": 9.815145282394818e-06, "loss": 0.8986, "step": 2561 }, { "epoch": 0.23, "grad_norm": 7.599662407369803, "learning_rate": 9.814950627097122e-06, "loss": 0.9204, "step": 2562 }, { "epoch": 0.23, "grad_norm": 7.3010769285434485, "learning_rate": 9.814755871297876e-06, "loss": 0.8472, "step": 2563 }, { "epoch": 0.23, "grad_norm": 5.8709532051061455, "learning_rate": 9.814561015001147e-06, "loss": 0.855, "step": 2564 }, { "epoch": 0.23, "grad_norm": 6.223104587320485, "learning_rate": 9.814366058211e-06, "loss": 0.8136, "step": 2565 }, { "epoch": 0.23, "grad_norm": 6.046472206734374, "learning_rate": 9.814171000931503e-06, "loss": 0.8438, "step": 2566 }, { "epoch": 0.23, "grad_norm": 5.739132864667485, "learning_rate": 9.813975843166733e-06, "loss": 0.8658, "step": 2567 }, { "epoch": 0.23, "grad_norm": 7.488078623989257, "learning_rate": 9.813780584920756e-06, "loss": 0.8744, "step": 2568 }, { "epoch": 0.23, "grad_norm": 7.361184877142465, "learning_rate": 9.813585226197653e-06, "loss": 0.8485, "step": 2569 }, { "epoch": 0.23, "grad_norm": 7.46610938607044, "learning_rate": 9.8133897670015e-06, "loss": 0.9022, "step": 2570 }, { "epoch": 0.23, "grad_norm": 7.590913314453891, "learning_rate": 9.813194207336376e-06, "loss": 0.8614, "step": 2571 }, { "epoch": 0.23, "grad_norm": 8.177536435166868, "learning_rate": 9.812998547206364e-06, "loss": 0.8668, "step": 2572 }, { "epoch": 0.23, "grad_norm": 8.152119205764954, "learning_rate": 9.812802786615547e-06, "loss": 0.8415, "step": 2573 }, { "epoch": 0.23, "grad_norm": 6.212690089721803, "learning_rate": 9.812606925568014e-06, "loss": 0.8598, "step": 2574 }, { "epoch": 0.23, "grad_norm": 7.5190849763482195, "learning_rate": 9.812410964067849e-06, "loss": 0.8702, "step": 2575 }, { "epoch": 0.23, "grad_norm": 6.860538443925113, "learning_rate": 9.812214902119144e-06, "loss": 0.8569, "step": 2576 }, { "epoch": 0.23, "grad_norm": 6.378638385637407, "learning_rate": 9.812018739725992e-06, "loss": 0.8168, "step": 2577 }, { "epoch": 0.23, "grad_norm": 6.139503053603262, "learning_rate": 9.811822476892487e-06, "loss": 0.8871, "step": 2578 }, { "epoch": 0.23, "grad_norm": 8.073651723359445, "learning_rate": 9.811626113622725e-06, "loss": 0.8613, "step": 2579 }, { "epoch": 0.23, "grad_norm": 7.096354137737452, "learning_rate": 9.811429649920805e-06, "loss": 0.846, "step": 2580 }, { "epoch": 0.23, "grad_norm": 6.945653633157951, "learning_rate": 9.811233085790828e-06, "loss": 0.8549, "step": 2581 }, { "epoch": 0.23, "grad_norm": 7.730098357178783, "learning_rate": 9.811036421236898e-06, "loss": 0.8608, "step": 2582 }, { "epoch": 0.23, "grad_norm": 12.411209091442236, "learning_rate": 9.810839656263117e-06, "loss": 0.8887, "step": 2583 }, { "epoch": 0.23, "grad_norm": 5.875537394301156, "learning_rate": 9.810642790873594e-06, "loss": 0.8624, "step": 2584 }, { "epoch": 0.23, "grad_norm": 6.278468831963581, "learning_rate": 9.810445825072439e-06, "loss": 0.8895, "step": 2585 }, { "epoch": 0.23, "grad_norm": 7.425862400237917, "learning_rate": 9.810248758863762e-06, "loss": 0.8461, "step": 2586 }, { "epoch": 0.23, "grad_norm": 7.059628710133865, "learning_rate": 9.810051592251676e-06, "loss": 0.9032, "step": 2587 }, { "epoch": 0.23, "grad_norm": 6.418639503816613, "learning_rate": 9.809854325240297e-06, "loss": 0.8826, "step": 2588 }, { "epoch": 0.23, "grad_norm": 5.588726826963997, "learning_rate": 9.809656957833742e-06, "loss": 0.8609, "step": 2589 }, { "epoch": 0.23, "grad_norm": 7.3305575338036295, "learning_rate": 9.80945949003613e-06, "loss": 0.8426, "step": 2590 }, { "epoch": 0.23, "grad_norm": 7.316967537884525, "learning_rate": 9.809261921851585e-06, "loss": 0.8085, "step": 2591 }, { "epoch": 0.23, "grad_norm": 7.232819523369361, "learning_rate": 9.809064253284231e-06, "loss": 0.8456, "step": 2592 }, { "epoch": 0.23, "grad_norm": 5.464288723592693, "learning_rate": 9.80886648433819e-06, "loss": 0.8624, "step": 2593 }, { "epoch": 0.23, "grad_norm": 8.258335931845298, "learning_rate": 9.808668615017593e-06, "loss": 0.8529, "step": 2594 }, { "epoch": 0.23, "grad_norm": 6.262585936482656, "learning_rate": 9.808470645326568e-06, "loss": 0.8651, "step": 2595 }, { "epoch": 0.23, "grad_norm": 6.832751364670795, "learning_rate": 9.808272575269252e-06, "loss": 0.9293, "step": 2596 }, { "epoch": 0.23, "grad_norm": 8.292768028024154, "learning_rate": 9.808074404849774e-06, "loss": 0.8698, "step": 2597 }, { "epoch": 0.23, "grad_norm": 5.3081936280850694, "learning_rate": 9.807876134072272e-06, "loss": 0.9037, "step": 2598 }, { "epoch": 0.23, "grad_norm": 7.091178689188524, "learning_rate": 9.807677762940886e-06, "loss": 0.9077, "step": 2599 }, { "epoch": 0.23, "grad_norm": 6.618092923512377, "learning_rate": 9.807479291459754e-06, "loss": 0.8939, "step": 2600 }, { "epoch": 0.23, "grad_norm": 6.361214936520273, "learning_rate": 9.807280719633021e-06, "loss": 0.8382, "step": 2601 }, { "epoch": 0.23, "grad_norm": 5.25733728611795, "learning_rate": 9.80708204746483e-06, "loss": 0.9136, "step": 2602 }, { "epoch": 0.23, "grad_norm": 9.183134623700713, "learning_rate": 9.806883274959329e-06, "loss": 0.9757, "step": 2603 }, { "epoch": 0.23, "grad_norm": 5.569039585987264, "learning_rate": 9.806684402120665e-06, "loss": 0.8842, "step": 2604 }, { "epoch": 0.23, "grad_norm": 8.149575956592454, "learning_rate": 9.806485428952993e-06, "loss": 0.8912, "step": 2605 }, { "epoch": 0.23, "grad_norm": 7.59626676209339, "learning_rate": 9.806286355460463e-06, "loss": 0.878, "step": 2606 }, { "epoch": 0.23, "grad_norm": 6.400864836100951, "learning_rate": 9.80608718164723e-06, "loss": 0.9485, "step": 2607 }, { "epoch": 0.23, "grad_norm": 6.187529097026416, "learning_rate": 9.805887907517452e-06, "loss": 0.928, "step": 2608 }, { "epoch": 0.23, "grad_norm": 6.908417138784767, "learning_rate": 9.80568853307529e-06, "loss": 0.8766, "step": 2609 }, { "epoch": 0.23, "grad_norm": 7.836648206219623, "learning_rate": 9.805489058324904e-06, "loss": 1.0171, "step": 2610 }, { "epoch": 0.23, "grad_norm": 5.341021600055184, "learning_rate": 9.805289483270458e-06, "loss": 0.9318, "step": 2611 }, { "epoch": 0.23, "grad_norm": 9.067583487890701, "learning_rate": 9.805089807916116e-06, "loss": 0.9393, "step": 2612 }, { "epoch": 0.23, "grad_norm": 8.979457899459106, "learning_rate": 9.804890032266047e-06, "loss": 0.8221, "step": 2613 }, { "epoch": 0.23, "grad_norm": 6.699905005815041, "learning_rate": 9.804690156324424e-06, "loss": 0.9275, "step": 2614 }, { "epoch": 0.23, "grad_norm": 6.645119830924037, "learning_rate": 9.804490180095413e-06, "loss": 0.9306, "step": 2615 }, { "epoch": 0.23, "grad_norm": 7.261927394111703, "learning_rate": 9.804290103583193e-06, "loss": 0.8423, "step": 2616 }, { "epoch": 0.23, "grad_norm": 5.848621009335286, "learning_rate": 9.804089926791938e-06, "loss": 0.8989, "step": 2617 }, { "epoch": 0.23, "grad_norm": 6.031191080038291, "learning_rate": 9.803889649725825e-06, "loss": 0.919, "step": 2618 }, { "epoch": 0.23, "grad_norm": 6.090313306875797, "learning_rate": 9.803689272389037e-06, "loss": 0.8684, "step": 2619 }, { "epoch": 0.23, "grad_norm": 8.451350574654388, "learning_rate": 9.803488794785755e-06, "loss": 0.8575, "step": 2620 }, { "epoch": 0.23, "grad_norm": 7.585424331822806, "learning_rate": 9.803288216920166e-06, "loss": 0.8975, "step": 2621 }, { "epoch": 0.23, "grad_norm": 6.30978483139431, "learning_rate": 9.803087538796452e-06, "loss": 0.9466, "step": 2622 }, { "epoch": 0.23, "grad_norm": 8.610123823099812, "learning_rate": 9.802886760418804e-06, "loss": 0.8632, "step": 2623 }, { "epoch": 0.23, "grad_norm": 8.624445997142006, "learning_rate": 9.802685881791414e-06, "loss": 0.8971, "step": 2624 }, { "epoch": 0.23, "grad_norm": 5.478622766159374, "learning_rate": 9.802484902918474e-06, "loss": 0.9145, "step": 2625 }, { "epoch": 0.23, "grad_norm": 7.989950189467666, "learning_rate": 9.80228382380418e-06, "loss": 0.8691, "step": 2626 }, { "epoch": 0.23, "grad_norm": 7.354638344772864, "learning_rate": 9.802082644452725e-06, "loss": 0.8851, "step": 2627 }, { "epoch": 0.23, "grad_norm": 6.861692463708176, "learning_rate": 9.801881364868315e-06, "loss": 0.9797, "step": 2628 }, { "epoch": 0.23, "grad_norm": 5.843793005529265, "learning_rate": 9.801679985055145e-06, "loss": 0.8421, "step": 2629 }, { "epoch": 0.23, "grad_norm": 8.470252478830073, "learning_rate": 9.80147850501742e-06, "loss": 0.8698, "step": 2630 }, { "epoch": 0.23, "grad_norm": 6.096335299421257, "learning_rate": 9.801276924759348e-06, "loss": 0.8901, "step": 2631 }, { "epoch": 0.23, "grad_norm": 6.255654868884059, "learning_rate": 9.801075244285135e-06, "loss": 0.793, "step": 2632 }, { "epoch": 0.23, "grad_norm": 6.893767657643325, "learning_rate": 9.80087346359899e-06, "loss": 0.8226, "step": 2633 }, { "epoch": 0.23, "grad_norm": 6.231606526332115, "learning_rate": 9.800671582705125e-06, "loss": 0.8213, "step": 2634 }, { "epoch": 0.24, "grad_norm": 7.646276226033292, "learning_rate": 9.800469601607754e-06, "loss": 0.8475, "step": 2635 }, { "epoch": 0.24, "grad_norm": 5.584779456636542, "learning_rate": 9.800267520311092e-06, "loss": 0.8084, "step": 2636 }, { "epoch": 0.24, "grad_norm": 6.421095949206507, "learning_rate": 9.80006533881936e-06, "loss": 0.8584, "step": 2637 }, { "epoch": 0.24, "grad_norm": 6.26842197190103, "learning_rate": 9.799863057136775e-06, "loss": 0.9493, "step": 2638 }, { "epoch": 0.24, "grad_norm": 7.598106942769635, "learning_rate": 9.79966067526756e-06, "loss": 0.8817, "step": 2639 }, { "epoch": 0.24, "grad_norm": 6.582426853178686, "learning_rate": 9.799458193215939e-06, "loss": 0.8687, "step": 2640 }, { "epoch": 0.24, "grad_norm": 8.723467923519511, "learning_rate": 9.79925561098614e-06, "loss": 0.8629, "step": 2641 }, { "epoch": 0.24, "grad_norm": 6.359909125188462, "learning_rate": 9.799052928582389e-06, "loss": 0.9083, "step": 2642 }, { "epoch": 0.24, "grad_norm": 10.572358281853177, "learning_rate": 9.798850146008919e-06, "loss": 0.9806, "step": 2643 }, { "epoch": 0.24, "grad_norm": 7.129108012280406, "learning_rate": 9.798647263269961e-06, "loss": 0.8456, "step": 2644 }, { "epoch": 0.24, "grad_norm": 6.745890779515605, "learning_rate": 9.79844428036975e-06, "loss": 0.8613, "step": 2645 }, { "epoch": 0.24, "grad_norm": 8.49312904313298, "learning_rate": 9.798241197312523e-06, "loss": 0.8598, "step": 2646 }, { "epoch": 0.24, "grad_norm": 6.9349781806210204, "learning_rate": 9.79803801410252e-06, "loss": 0.8599, "step": 2647 }, { "epoch": 0.24, "grad_norm": 6.938414145996964, "learning_rate": 9.79783473074398e-06, "loss": 0.9016, "step": 2648 }, { "epoch": 0.24, "grad_norm": 8.495506309611578, "learning_rate": 9.797631347241147e-06, "loss": 0.8173, "step": 2649 }, { "epoch": 0.24, "grad_norm": 5.426418747841631, "learning_rate": 9.797427863598268e-06, "loss": 0.8708, "step": 2650 }, { "epoch": 0.24, "grad_norm": 8.922290423735427, "learning_rate": 9.797224279819588e-06, "loss": 0.8007, "step": 2651 }, { "epoch": 0.24, "grad_norm": 7.7449455059954175, "learning_rate": 9.797020595909355e-06, "loss": 0.8657, "step": 2652 }, { "epoch": 0.24, "grad_norm": 6.506792026897186, "learning_rate": 9.796816811871823e-06, "loss": 0.8237, "step": 2653 }, { "epoch": 0.24, "grad_norm": 6.266965115449891, "learning_rate": 9.796612927711246e-06, "loss": 0.8685, "step": 2654 }, { "epoch": 0.24, "grad_norm": 6.66052021092432, "learning_rate": 9.796408943431878e-06, "loss": 0.9259, "step": 2655 }, { "epoch": 0.24, "grad_norm": 6.451909861199742, "learning_rate": 9.796204859037976e-06, "loss": 0.9007, "step": 2656 }, { "epoch": 0.24, "grad_norm": 6.227135945791931, "learning_rate": 9.796000674533804e-06, "loss": 0.8658, "step": 2657 }, { "epoch": 0.24, "grad_norm": 6.8040110429027845, "learning_rate": 9.795796389923617e-06, "loss": 0.8565, "step": 2658 }, { "epoch": 0.24, "grad_norm": 6.183202440628008, "learning_rate": 9.795592005211687e-06, "loss": 0.9607, "step": 2659 }, { "epoch": 0.24, "grad_norm": 7.110464677544873, "learning_rate": 9.795387520402273e-06, "loss": 0.8597, "step": 2660 }, { "epoch": 0.24, "grad_norm": 9.261121158660702, "learning_rate": 9.795182935499648e-06, "loss": 0.9278, "step": 2661 }, { "epoch": 0.24, "grad_norm": 7.268610342040886, "learning_rate": 9.79497825050808e-06, "loss": 0.8848, "step": 2662 }, { "epoch": 0.24, "grad_norm": 6.904880851410976, "learning_rate": 9.794773465431842e-06, "loss": 0.8289, "step": 2663 }, { "epoch": 0.24, "grad_norm": 6.189922282603601, "learning_rate": 9.794568580275208e-06, "loss": 0.8855, "step": 2664 }, { "epoch": 0.24, "grad_norm": 7.3203383759399525, "learning_rate": 9.794363595042456e-06, "loss": 0.9353, "step": 2665 }, { "epoch": 0.24, "grad_norm": 7.1641003428207695, "learning_rate": 9.794158509737862e-06, "loss": 0.8294, "step": 2666 }, { "epoch": 0.24, "grad_norm": 5.88655136872046, "learning_rate": 9.793953324365708e-06, "loss": 0.8735, "step": 2667 }, { "epoch": 0.24, "grad_norm": 6.685943365598724, "learning_rate": 9.793748038930277e-06, "loss": 0.845, "step": 2668 }, { "epoch": 0.24, "grad_norm": 6.1844555274771835, "learning_rate": 9.793542653435854e-06, "loss": 0.8235, "step": 2669 }, { "epoch": 0.24, "grad_norm": 5.164089872041784, "learning_rate": 9.793337167886727e-06, "loss": 0.8756, "step": 2670 }, { "epoch": 0.24, "grad_norm": 6.748076456797995, "learning_rate": 9.793131582287183e-06, "loss": 0.8723, "step": 2671 }, { "epoch": 0.24, "grad_norm": 5.849169786675935, "learning_rate": 9.792925896641513e-06, "loss": 0.8937, "step": 2672 }, { "epoch": 0.24, "grad_norm": 8.199732243766565, "learning_rate": 9.792720110954013e-06, "loss": 0.8583, "step": 2673 }, { "epoch": 0.24, "grad_norm": 7.962879215965455, "learning_rate": 9.792514225228974e-06, "loss": 0.9036, "step": 2674 }, { "epoch": 0.24, "grad_norm": 5.166743027870251, "learning_rate": 9.792308239470698e-06, "loss": 0.8507, "step": 2675 }, { "epoch": 0.24, "grad_norm": 5.648807479850959, "learning_rate": 9.792102153683482e-06, "loss": 0.9224, "step": 2676 }, { "epoch": 0.24, "grad_norm": 7.716276880362775, "learning_rate": 9.79189596787163e-06, "loss": 0.8991, "step": 2677 }, { "epoch": 0.24, "grad_norm": 7.447549417463951, "learning_rate": 9.79168968203944e-06, "loss": 0.9025, "step": 2678 }, { "epoch": 0.24, "grad_norm": 5.975676791685224, "learning_rate": 9.791483296191224e-06, "loss": 0.8399, "step": 2679 }, { "epoch": 0.24, "grad_norm": 5.318259501697101, "learning_rate": 9.791276810331287e-06, "loss": 0.881, "step": 2680 }, { "epoch": 0.24, "grad_norm": 7.615465788680345, "learning_rate": 9.791070224463939e-06, "loss": 0.8976, "step": 2681 }, { "epoch": 0.24, "grad_norm": 7.242169511125475, "learning_rate": 9.790863538593492e-06, "loss": 0.81, "step": 2682 }, { "epoch": 0.24, "grad_norm": 5.979957733070584, "learning_rate": 9.79065675272426e-06, "loss": 0.8491, "step": 2683 }, { "epoch": 0.24, "grad_norm": 7.007512967026943, "learning_rate": 9.790449866860561e-06, "loss": 0.8925, "step": 2684 }, { "epoch": 0.24, "grad_norm": 7.415212261644758, "learning_rate": 9.790242881006713e-06, "loss": 0.876, "step": 2685 }, { "epoch": 0.24, "grad_norm": 7.393105366067362, "learning_rate": 9.790035795167033e-06, "loss": 0.9045, "step": 2686 }, { "epoch": 0.24, "grad_norm": 7.706083333124812, "learning_rate": 9.789828609345847e-06, "loss": 0.8598, "step": 2687 }, { "epoch": 0.24, "grad_norm": 7.157613252724487, "learning_rate": 9.789621323547478e-06, "loss": 0.9179, "step": 2688 }, { "epoch": 0.24, "grad_norm": 7.66661546407446, "learning_rate": 9.789413937776253e-06, "loss": 0.9678, "step": 2689 }, { "epoch": 0.24, "grad_norm": 6.198605828395586, "learning_rate": 9.789206452036499e-06, "loss": 0.9159, "step": 2690 }, { "epoch": 0.24, "grad_norm": 5.874026038134801, "learning_rate": 9.78899886633255e-06, "loss": 0.931, "step": 2691 }, { "epoch": 0.24, "grad_norm": 8.766153563797758, "learning_rate": 9.788791180668738e-06, "loss": 0.8778, "step": 2692 }, { "epoch": 0.24, "grad_norm": 6.5845312914360985, "learning_rate": 9.788583395049399e-06, "loss": 0.8815, "step": 2693 }, { "epoch": 0.24, "grad_norm": 7.0238100520426086, "learning_rate": 9.788375509478865e-06, "loss": 0.8491, "step": 2694 }, { "epoch": 0.24, "grad_norm": 6.376318353056826, "learning_rate": 9.78816752396148e-06, "loss": 0.8317, "step": 2695 }, { "epoch": 0.24, "grad_norm": 6.786249922582333, "learning_rate": 9.787959438501584e-06, "loss": 0.8783, "step": 2696 }, { "epoch": 0.24, "grad_norm": 6.837715439027034, "learning_rate": 9.787751253103518e-06, "loss": 0.92, "step": 2697 }, { "epoch": 0.24, "grad_norm": 7.0183746791063415, "learning_rate": 9.787542967771633e-06, "loss": 0.9987, "step": 2698 }, { "epoch": 0.24, "grad_norm": 6.502893365765567, "learning_rate": 9.78733458251027e-06, "loss": 0.9345, "step": 2699 }, { "epoch": 0.24, "grad_norm": 7.862912350946509, "learning_rate": 9.787126097323785e-06, "loss": 0.851, "step": 2700 }, { "epoch": 0.24, "grad_norm": 6.21344499833905, "learning_rate": 9.786917512216523e-06, "loss": 0.8796, "step": 2701 }, { "epoch": 0.24, "grad_norm": 8.747656206489461, "learning_rate": 9.786708827192843e-06, "loss": 0.9877, "step": 2702 }, { "epoch": 0.24, "grad_norm": 8.652939921240534, "learning_rate": 9.786500042257098e-06, "loss": 0.8393, "step": 2703 }, { "epoch": 0.24, "grad_norm": 5.949785792937308, "learning_rate": 9.786291157413648e-06, "loss": 0.8889, "step": 2704 }, { "epoch": 0.24, "grad_norm": 7.27546917660922, "learning_rate": 9.786082172666852e-06, "loss": 0.8526, "step": 2705 }, { "epoch": 0.24, "grad_norm": 6.66508193119292, "learning_rate": 9.785873088021072e-06, "loss": 0.8913, "step": 2706 }, { "epoch": 0.24, "grad_norm": 7.575185278857841, "learning_rate": 9.785663903480672e-06, "loss": 0.9224, "step": 2707 }, { "epoch": 0.24, "grad_norm": 6.788937651779318, "learning_rate": 9.785454619050017e-06, "loss": 0.8804, "step": 2708 }, { "epoch": 0.24, "grad_norm": 6.373237261785669, "learning_rate": 9.785245234733477e-06, "loss": 0.9642, "step": 2709 }, { "epoch": 0.24, "grad_norm": 5.764861027808113, "learning_rate": 9.785035750535423e-06, "loss": 0.9576, "step": 2710 }, { "epoch": 0.24, "grad_norm": 8.542975894528062, "learning_rate": 9.784826166460227e-06, "loss": 0.8365, "step": 2711 }, { "epoch": 0.24, "grad_norm": 6.3224079906286725, "learning_rate": 9.784616482512264e-06, "loss": 0.8863, "step": 2712 }, { "epoch": 0.24, "grad_norm": 6.236532906928701, "learning_rate": 9.784406698695908e-06, "loss": 0.855, "step": 2713 }, { "epoch": 0.24, "grad_norm": 8.765668075678219, "learning_rate": 9.784196815015542e-06, "loss": 0.8568, "step": 2714 }, { "epoch": 0.24, "grad_norm": 8.583453015972575, "learning_rate": 9.783986831475542e-06, "loss": 0.9707, "step": 2715 }, { "epoch": 0.24, "grad_norm": 5.694189645864043, "learning_rate": 9.783776748080296e-06, "loss": 0.8477, "step": 2716 }, { "epoch": 0.24, "grad_norm": 5.876151234173226, "learning_rate": 9.783566564834186e-06, "loss": 0.9393, "step": 2717 }, { "epoch": 0.24, "grad_norm": 8.809707056928803, "learning_rate": 9.783356281741601e-06, "loss": 0.8605, "step": 2718 }, { "epoch": 0.24, "grad_norm": 7.376235027172749, "learning_rate": 9.783145898806927e-06, "loss": 0.8857, "step": 2719 }, { "epoch": 0.24, "grad_norm": 6.372740084125681, "learning_rate": 9.782935416034559e-06, "loss": 0.8796, "step": 2720 }, { "epoch": 0.24, "grad_norm": 7.738844992370631, "learning_rate": 9.782724833428888e-06, "loss": 0.8866, "step": 2721 }, { "epoch": 0.24, "grad_norm": 5.798585374060727, "learning_rate": 9.782514150994309e-06, "loss": 0.9193, "step": 2722 }, { "epoch": 0.24, "grad_norm": 7.131842571200875, "learning_rate": 9.782303368735222e-06, "loss": 0.9224, "step": 2723 }, { "epoch": 0.24, "grad_norm": 6.983329925546443, "learning_rate": 9.782092486656024e-06, "loss": 0.9616, "step": 2724 }, { "epoch": 0.24, "grad_norm": 7.731805214562907, "learning_rate": 9.78188150476112e-06, "loss": 0.9403, "step": 2725 }, { "epoch": 0.24, "grad_norm": 6.858053441161775, "learning_rate": 9.781670423054908e-06, "loss": 0.8966, "step": 2726 }, { "epoch": 0.24, "grad_norm": 6.362925974161827, "learning_rate": 9.781459241541801e-06, "loss": 0.8146, "step": 2727 }, { "epoch": 0.24, "grad_norm": 6.461919282286607, "learning_rate": 9.781247960226203e-06, "loss": 0.8671, "step": 2728 }, { "epoch": 0.24, "grad_norm": 7.801718089682048, "learning_rate": 9.781036579112523e-06, "loss": 0.9381, "step": 2729 }, { "epoch": 0.24, "grad_norm": 8.393150717870906, "learning_rate": 9.780825098205176e-06, "loss": 0.931, "step": 2730 }, { "epoch": 0.24, "grad_norm": 7.602922873403676, "learning_rate": 9.780613517508575e-06, "loss": 0.8977, "step": 2731 }, { "epoch": 0.24, "grad_norm": 7.773538450331096, "learning_rate": 9.780401837027137e-06, "loss": 0.8216, "step": 2732 }, { "epoch": 0.24, "grad_norm": 8.229063704059266, "learning_rate": 9.780190056765278e-06, "loss": 0.8506, "step": 2733 }, { "epoch": 0.24, "grad_norm": 6.647809568840821, "learning_rate": 9.77997817672742e-06, "loss": 0.9387, "step": 2734 }, { "epoch": 0.24, "grad_norm": 5.868684241754241, "learning_rate": 9.779766196917985e-06, "loss": 0.8534, "step": 2735 }, { "epoch": 0.24, "grad_norm": 6.559750276543947, "learning_rate": 9.7795541173414e-06, "loss": 0.846, "step": 2736 }, { "epoch": 0.24, "grad_norm": 6.644125206206668, "learning_rate": 9.779341938002089e-06, "loss": 0.8436, "step": 2737 }, { "epoch": 0.24, "grad_norm": 6.246963962046901, "learning_rate": 9.779129658904482e-06, "loss": 0.8486, "step": 2738 }, { "epoch": 0.24, "grad_norm": 6.5637500453940145, "learning_rate": 9.778917280053008e-06, "loss": 0.8525, "step": 2739 }, { "epoch": 0.24, "grad_norm": 5.846542709214897, "learning_rate": 9.778704801452103e-06, "loss": 0.971, "step": 2740 }, { "epoch": 0.24, "grad_norm": 8.136444226598629, "learning_rate": 9.7784922231062e-06, "loss": 0.8691, "step": 2741 }, { "epoch": 0.24, "grad_norm": 7.294131028760153, "learning_rate": 9.778279545019737e-06, "loss": 0.8523, "step": 2742 }, { "epoch": 0.24, "grad_norm": 4.148059570591415, "learning_rate": 9.778066767197152e-06, "loss": 0.8954, "step": 2743 }, { "epoch": 0.24, "grad_norm": 8.730837875508556, "learning_rate": 9.777853889642887e-06, "loss": 0.8469, "step": 2744 }, { "epoch": 0.24, "grad_norm": 6.359756233134474, "learning_rate": 9.777640912361385e-06, "loss": 0.9201, "step": 2745 }, { "epoch": 0.24, "grad_norm": 6.776477293125902, "learning_rate": 9.777427835357094e-06, "loss": 0.8253, "step": 2746 }, { "epoch": 0.25, "grad_norm": 6.995346132849461, "learning_rate": 9.777214658634458e-06, "loss": 0.8858, "step": 2747 }, { "epoch": 0.25, "grad_norm": 8.574556061867451, "learning_rate": 9.777001382197928e-06, "loss": 0.8739, "step": 2748 }, { "epoch": 0.25, "grad_norm": 6.779615281579619, "learning_rate": 9.776788006051954e-06, "loss": 0.8557, "step": 2749 }, { "epoch": 0.25, "grad_norm": 8.44790723757806, "learning_rate": 9.776574530200993e-06, "loss": 0.8574, "step": 2750 }, { "epoch": 0.25, "grad_norm": 7.755302360797564, "learning_rate": 9.776360954649499e-06, "loss": 0.8773, "step": 2751 }, { "epoch": 0.25, "grad_norm": 5.500506408185759, "learning_rate": 9.77614727940193e-06, "loss": 0.8729, "step": 2752 }, { "epoch": 0.25, "grad_norm": 5.267226467781265, "learning_rate": 9.775933504462746e-06, "loss": 0.8627, "step": 2753 }, { "epoch": 0.25, "grad_norm": 6.142928676266129, "learning_rate": 9.775719629836409e-06, "loss": 0.8825, "step": 2754 }, { "epoch": 0.25, "grad_norm": 7.349355114397092, "learning_rate": 9.775505655527384e-06, "loss": 0.8724, "step": 2755 }, { "epoch": 0.25, "grad_norm": 7.837113862706781, "learning_rate": 9.775291581540136e-06, "loss": 0.8468, "step": 2756 }, { "epoch": 0.25, "grad_norm": 7.398738499103372, "learning_rate": 9.775077407879135e-06, "loss": 0.9375, "step": 2757 }, { "epoch": 0.25, "grad_norm": 4.58356398752839, "learning_rate": 9.774863134548848e-06, "loss": 0.919, "step": 2758 }, { "epoch": 0.25, "grad_norm": 6.298146369914531, "learning_rate": 9.774648761553753e-06, "loss": 0.8664, "step": 2759 }, { "epoch": 0.25, "grad_norm": 6.991561455952531, "learning_rate": 9.77443428889832e-06, "loss": 0.8798, "step": 2760 }, { "epoch": 0.25, "grad_norm": 7.468540874355288, "learning_rate": 9.774219716587026e-06, "loss": 0.7828, "step": 2761 }, { "epoch": 0.25, "grad_norm": 7.5287219830061805, "learning_rate": 9.774005044624352e-06, "loss": 0.873, "step": 2762 }, { "epoch": 0.25, "grad_norm": 7.0892975595159635, "learning_rate": 9.773790273014777e-06, "loss": 0.8349, "step": 2763 }, { "epoch": 0.25, "grad_norm": 5.9526311630527875, "learning_rate": 9.773575401762784e-06, "loss": 0.904, "step": 2764 }, { "epoch": 0.25, "grad_norm": 8.322204692848214, "learning_rate": 9.77336043087286e-06, "loss": 0.9067, "step": 2765 }, { "epoch": 0.25, "grad_norm": 6.36718290578751, "learning_rate": 9.77314536034949e-06, "loss": 0.9257, "step": 2766 }, { "epoch": 0.25, "grad_norm": 7.317669747563812, "learning_rate": 9.772930190197164e-06, "loss": 0.9467, "step": 2767 }, { "epoch": 0.25, "grad_norm": 6.700300870616958, "learning_rate": 9.772714920420372e-06, "loss": 0.9372, "step": 2768 }, { "epoch": 0.25, "grad_norm": 5.5670995001177435, "learning_rate": 9.772499551023608e-06, "loss": 0.8368, "step": 2769 }, { "epoch": 0.25, "grad_norm": 5.9836405995199256, "learning_rate": 9.772284082011367e-06, "loss": 0.7823, "step": 2770 }, { "epoch": 0.25, "grad_norm": 6.999009212941442, "learning_rate": 9.772068513388148e-06, "loss": 0.8855, "step": 2771 }, { "epoch": 0.25, "grad_norm": 7.099438849203958, "learning_rate": 9.771852845158449e-06, "loss": 0.8386, "step": 2772 }, { "epoch": 0.25, "grad_norm": 6.672111585642031, "learning_rate": 9.771637077326772e-06, "loss": 0.8992, "step": 2773 }, { "epoch": 0.25, "grad_norm": 5.791653225875142, "learning_rate": 9.771421209897622e-06, "loss": 0.8826, "step": 2774 }, { "epoch": 0.25, "grad_norm": 6.0946047182331595, "learning_rate": 9.7712052428755e-06, "loss": 0.8019, "step": 2775 }, { "epoch": 0.25, "grad_norm": 7.645489027390587, "learning_rate": 9.770989176264922e-06, "loss": 0.8698, "step": 2776 }, { "epoch": 0.25, "grad_norm": 7.6865389850730255, "learning_rate": 9.77077301007039e-06, "loss": 0.786, "step": 2777 }, { "epoch": 0.25, "grad_norm": 8.493061422314831, "learning_rate": 9.77055674429642e-06, "loss": 0.8842, "step": 2778 }, { "epoch": 0.25, "grad_norm": 4.980537048741069, "learning_rate": 9.770340378947526e-06, "loss": 0.8748, "step": 2779 }, { "epoch": 0.25, "grad_norm": 6.925270711819601, "learning_rate": 9.770123914028221e-06, "loss": 0.844, "step": 2780 }, { "epoch": 0.25, "grad_norm": 5.873106982715956, "learning_rate": 9.769907349543028e-06, "loss": 0.8852, "step": 2781 }, { "epoch": 0.25, "grad_norm": 4.784975302325801, "learning_rate": 9.769690685496466e-06, "loss": 0.862, "step": 2782 }, { "epoch": 0.25, "grad_norm": 5.599531993031499, "learning_rate": 9.769473921893055e-06, "loss": 0.9496, "step": 2783 }, { "epoch": 0.25, "grad_norm": 7.6051755742430815, "learning_rate": 9.76925705873732e-06, "loss": 0.9054, "step": 2784 }, { "epoch": 0.25, "grad_norm": 5.92768668168519, "learning_rate": 9.769040096033789e-06, "loss": 0.8312, "step": 2785 }, { "epoch": 0.25, "grad_norm": 7.076069387073558, "learning_rate": 9.76882303378699e-06, "loss": 0.8656, "step": 2786 }, { "epoch": 0.25, "grad_norm": 6.343827471498975, "learning_rate": 9.768605872001456e-06, "loss": 0.9515, "step": 2787 }, { "epoch": 0.25, "grad_norm": 7.980252648333873, "learning_rate": 9.768388610681714e-06, "loss": 0.8992, "step": 2788 }, { "epoch": 0.25, "grad_norm": 6.226357920448103, "learning_rate": 9.768171249832303e-06, "loss": 0.8617, "step": 2789 }, { "epoch": 0.25, "grad_norm": 5.800945881532723, "learning_rate": 9.76795378945776e-06, "loss": 0.9122, "step": 2790 }, { "epoch": 0.25, "grad_norm": 9.905904372245397, "learning_rate": 9.767736229562624e-06, "loss": 0.9085, "step": 2791 }, { "epoch": 0.25, "grad_norm": 7.901053884306244, "learning_rate": 9.767518570151435e-06, "loss": 0.854, "step": 2792 }, { "epoch": 0.25, "grad_norm": 5.989533017615994, "learning_rate": 9.767300811228736e-06, "loss": 0.8354, "step": 2793 }, { "epoch": 0.25, "grad_norm": 8.453291011593855, "learning_rate": 9.767082952799072e-06, "loss": 0.8895, "step": 2794 }, { "epoch": 0.25, "grad_norm": 5.473722835903454, "learning_rate": 9.766864994866993e-06, "loss": 0.8713, "step": 2795 }, { "epoch": 0.25, "grad_norm": 8.602226396793602, "learning_rate": 9.766646937437045e-06, "loss": 0.9775, "step": 2796 }, { "epoch": 0.25, "grad_norm": 10.045253860252014, "learning_rate": 9.766428780513781e-06, "loss": 0.9078, "step": 2797 }, { "epoch": 0.25, "grad_norm": 6.6039178957952664, "learning_rate": 9.766210524101755e-06, "loss": 0.8625, "step": 2798 }, { "epoch": 0.25, "grad_norm": 7.714226728403699, "learning_rate": 9.765992168205523e-06, "loss": 0.8221, "step": 2799 }, { "epoch": 0.25, "grad_norm": 7.013580469644222, "learning_rate": 9.76577371282964e-06, "loss": 0.9081, "step": 2800 }, { "epoch": 0.25, "grad_norm": 6.817975849668641, "learning_rate": 9.76555515797867e-06, "loss": 0.8279, "step": 2801 }, { "epoch": 0.25, "grad_norm": 5.5432454934953865, "learning_rate": 9.76533650365717e-06, "loss": 0.8664, "step": 2802 }, { "epoch": 0.25, "grad_norm": 6.004204562818076, "learning_rate": 9.765117749869709e-06, "loss": 0.8008, "step": 2803 }, { "epoch": 0.25, "grad_norm": 5.932230656094097, "learning_rate": 9.764898896620848e-06, "loss": 0.8336, "step": 2804 }, { "epoch": 0.25, "grad_norm": 6.810491546622725, "learning_rate": 9.764679943915159e-06, "loss": 0.8977, "step": 2805 }, { "epoch": 0.25, "grad_norm": 6.735327187310551, "learning_rate": 9.764460891757208e-06, "loss": 0.8214, "step": 2806 }, { "epoch": 0.25, "grad_norm": 7.850600389140342, "learning_rate": 9.764241740151573e-06, "loss": 0.8575, "step": 2807 }, { "epoch": 0.25, "grad_norm": 5.586181325088457, "learning_rate": 9.764022489102824e-06, "loss": 0.918, "step": 2808 }, { "epoch": 0.25, "grad_norm": 6.7699324345170675, "learning_rate": 9.76380313861554e-06, "loss": 0.8617, "step": 2809 }, { "epoch": 0.25, "grad_norm": 6.944715026188692, "learning_rate": 9.763583688694296e-06, "loss": 0.8427, "step": 2810 }, { "epoch": 0.25, "grad_norm": 5.576624296391786, "learning_rate": 9.763364139343674e-06, "loss": 0.9615, "step": 2811 }, { "epoch": 0.25, "grad_norm": 7.969423724425987, "learning_rate": 9.763144490568259e-06, "loss": 0.8437, "step": 2812 }, { "epoch": 0.25, "grad_norm": 7.496369947213672, "learning_rate": 9.762924742372633e-06, "loss": 0.8955, "step": 2813 }, { "epoch": 0.25, "grad_norm": 6.933321967741968, "learning_rate": 9.762704894761385e-06, "loss": 0.8868, "step": 2814 }, { "epoch": 0.25, "grad_norm": 11.896269551557472, "learning_rate": 9.762484947739101e-06, "loss": 0.8659, "step": 2815 }, { "epoch": 0.25, "grad_norm": 6.49834176504775, "learning_rate": 9.762264901310374e-06, "loss": 0.9035, "step": 2816 }, { "epoch": 0.25, "grad_norm": 6.148391319083141, "learning_rate": 9.762044755479796e-06, "loss": 0.8264, "step": 2817 }, { "epoch": 0.25, "grad_norm": 8.238339876336386, "learning_rate": 9.761824510251962e-06, "loss": 0.9057, "step": 2818 }, { "epoch": 0.25, "grad_norm": 6.970052578710862, "learning_rate": 9.761604165631471e-06, "loss": 0.9477, "step": 2819 }, { "epoch": 0.25, "grad_norm": 8.687872467926363, "learning_rate": 9.761383721622919e-06, "loss": 0.9206, "step": 2820 }, { "epoch": 0.25, "grad_norm": 5.915848235167734, "learning_rate": 9.76116317823091e-06, "loss": 0.9, "step": 2821 }, { "epoch": 0.25, "grad_norm": 6.068579697066319, "learning_rate": 9.760942535460046e-06, "loss": 0.9263, "step": 2822 }, { "epoch": 0.25, "grad_norm": 8.2731666311295, "learning_rate": 9.760721793314933e-06, "loss": 0.9102, "step": 2823 }, { "epoch": 0.25, "grad_norm": 7.035729977822004, "learning_rate": 9.76050095180018e-06, "loss": 0.8776, "step": 2824 }, { "epoch": 0.25, "grad_norm": 6.501370770699194, "learning_rate": 9.760280010920393e-06, "loss": 0.9212, "step": 2825 }, { "epoch": 0.25, "grad_norm": 7.379689991471207, "learning_rate": 9.760058970680186e-06, "loss": 0.8326, "step": 2826 }, { "epoch": 0.25, "grad_norm": 9.397104606522134, "learning_rate": 9.759837831084173e-06, "loss": 0.913, "step": 2827 }, { "epoch": 0.25, "grad_norm": 5.842582420718361, "learning_rate": 9.759616592136968e-06, "loss": 0.8716, "step": 2828 }, { "epoch": 0.25, "grad_norm": 8.322911114899028, "learning_rate": 9.75939525384319e-06, "loss": 0.8255, "step": 2829 }, { "epoch": 0.25, "grad_norm": 9.106312041272632, "learning_rate": 9.75917381620746e-06, "loss": 0.9267, "step": 2830 }, { "epoch": 0.25, "grad_norm": 6.59906955614898, "learning_rate": 9.758952279234399e-06, "loss": 0.8449, "step": 2831 }, { "epoch": 0.25, "grad_norm": 8.455592523900059, "learning_rate": 9.758730642928631e-06, "loss": 0.8447, "step": 2832 }, { "epoch": 0.25, "grad_norm": 5.020823342756915, "learning_rate": 9.758508907294781e-06, "loss": 0.862, "step": 2833 }, { "epoch": 0.25, "grad_norm": 6.433792988007039, "learning_rate": 9.75828707233748e-06, "loss": 0.9121, "step": 2834 }, { "epoch": 0.25, "grad_norm": 5.05886167094975, "learning_rate": 9.758065138061356e-06, "loss": 0.9358, "step": 2835 }, { "epoch": 0.25, "grad_norm": 6.188982499603427, "learning_rate": 9.757843104471042e-06, "loss": 0.8718, "step": 2836 }, { "epoch": 0.25, "grad_norm": 8.60841006301467, "learning_rate": 9.757620971571174e-06, "loss": 0.8494, "step": 2837 }, { "epoch": 0.25, "grad_norm": 10.83398398502046, "learning_rate": 9.757398739366386e-06, "loss": 0.9197, "step": 2838 }, { "epoch": 0.25, "grad_norm": 6.808321032052221, "learning_rate": 9.757176407861319e-06, "loss": 0.8221, "step": 2839 }, { "epoch": 0.25, "grad_norm": 5.773297689805632, "learning_rate": 9.756953977060612e-06, "loss": 0.9085, "step": 2840 }, { "epoch": 0.25, "grad_norm": 6.069904897518965, "learning_rate": 9.756731446968907e-06, "loss": 0.9257, "step": 2841 }, { "epoch": 0.25, "grad_norm": 7.965376229778286, "learning_rate": 9.756508817590854e-06, "loss": 0.828, "step": 2842 }, { "epoch": 0.25, "grad_norm": 7.257811111312696, "learning_rate": 9.756286088931092e-06, "loss": 0.8145, "step": 2843 }, { "epoch": 0.25, "grad_norm": 6.643831391214958, "learning_rate": 9.756063260994275e-06, "loss": 0.8877, "step": 2844 }, { "epoch": 0.25, "grad_norm": 7.087507987376631, "learning_rate": 9.755840333785054e-06, "loss": 0.8683, "step": 2845 }, { "epoch": 0.25, "grad_norm": 6.26934372742727, "learning_rate": 9.75561730730808e-06, "loss": 0.8762, "step": 2846 }, { "epoch": 0.25, "grad_norm": 7.7785316511649185, "learning_rate": 9.755394181568009e-06, "loss": 0.9274, "step": 2847 }, { "epoch": 0.25, "grad_norm": 6.67807526834406, "learning_rate": 9.7551709565695e-06, "loss": 0.9736, "step": 2848 }, { "epoch": 0.25, "grad_norm": 5.681002116778516, "learning_rate": 9.75494763231721e-06, "loss": 0.8239, "step": 2849 }, { "epoch": 0.25, "grad_norm": 6.855690226134151, "learning_rate": 9.754724208815803e-06, "loss": 0.8363, "step": 2850 }, { "epoch": 0.25, "grad_norm": 7.74102483362881, "learning_rate": 9.75450068606994e-06, "loss": 0.9335, "step": 2851 }, { "epoch": 0.25, "grad_norm": 6.670280530294297, "learning_rate": 9.754277064084285e-06, "loss": 0.8493, "step": 2852 }, { "epoch": 0.25, "grad_norm": 5.718225502667886, "learning_rate": 9.75405334286351e-06, "loss": 0.8894, "step": 2853 }, { "epoch": 0.25, "grad_norm": 7.074470864918526, "learning_rate": 9.753829522412282e-06, "loss": 0.8212, "step": 2854 }, { "epoch": 0.25, "grad_norm": 7.383515466506689, "learning_rate": 9.753605602735274e-06, "loss": 0.8817, "step": 2855 }, { "epoch": 0.25, "grad_norm": 5.395338639125125, "learning_rate": 9.75338158383716e-06, "loss": 0.9058, "step": 2856 }, { "epoch": 0.25, "grad_norm": 9.420861052455948, "learning_rate": 9.753157465722613e-06, "loss": 0.9086, "step": 2857 }, { "epoch": 0.25, "grad_norm": 8.38096713872705, "learning_rate": 9.752933248396315e-06, "loss": 0.8873, "step": 2858 }, { "epoch": 0.26, "grad_norm": 6.417842873070168, "learning_rate": 9.752708931862944e-06, "loss": 0.8344, "step": 2859 }, { "epoch": 0.26, "grad_norm": 6.369877953894997, "learning_rate": 9.752484516127182e-06, "loss": 0.9023, "step": 2860 }, { "epoch": 0.26, "grad_norm": 5.470930717282282, "learning_rate": 9.752260001193714e-06, "loss": 0.889, "step": 2861 }, { "epoch": 0.26, "grad_norm": 7.624514304144448, "learning_rate": 9.752035387067224e-06, "loss": 0.8687, "step": 2862 }, { "epoch": 0.26, "grad_norm": 5.697291337858547, "learning_rate": 9.751810673752403e-06, "loss": 0.8582, "step": 2863 }, { "epoch": 0.26, "grad_norm": 8.198871372912354, "learning_rate": 9.75158586125394e-06, "loss": 0.8397, "step": 2864 }, { "epoch": 0.26, "grad_norm": 6.1966499117576745, "learning_rate": 9.75136094957653e-06, "loss": 0.8914, "step": 2865 }, { "epoch": 0.26, "grad_norm": 7.076035933448314, "learning_rate": 9.751135938724863e-06, "loss": 0.8432, "step": 2866 }, { "epoch": 0.26, "grad_norm": 5.7321615396185805, "learning_rate": 9.75091082870364e-06, "loss": 0.8619, "step": 2867 }, { "epoch": 0.26, "grad_norm": 10.236162654072077, "learning_rate": 9.750685619517557e-06, "loss": 0.8831, "step": 2868 }, { "epoch": 0.26, "grad_norm": 6.0368359989912825, "learning_rate": 9.750460311171316e-06, "loss": 0.8882, "step": 2869 }, { "epoch": 0.26, "grad_norm": 5.396985931217516, "learning_rate": 9.750234903669619e-06, "loss": 0.9123, "step": 2870 }, { "epoch": 0.26, "grad_norm": 7.263403710716351, "learning_rate": 9.750009397017171e-06, "loss": 0.8641, "step": 2871 }, { "epoch": 0.26, "grad_norm": 6.314063925399342, "learning_rate": 9.749783791218678e-06, "loss": 0.8502, "step": 2872 }, { "epoch": 0.26, "grad_norm": 7.322122493961188, "learning_rate": 9.749558086278851e-06, "loss": 0.9168, "step": 2873 }, { "epoch": 0.26, "grad_norm": 5.62407953546833, "learning_rate": 9.749332282202402e-06, "loss": 0.8678, "step": 2874 }, { "epoch": 0.26, "grad_norm": 6.0998585526245375, "learning_rate": 9.749106378994041e-06, "loss": 0.8696, "step": 2875 }, { "epoch": 0.26, "grad_norm": 6.4229610528251255, "learning_rate": 9.748880376658485e-06, "loss": 0.8164, "step": 2876 }, { "epoch": 0.26, "grad_norm": 7.381250291008569, "learning_rate": 9.74865427520045e-06, "loss": 0.9883, "step": 2877 }, { "epoch": 0.26, "grad_norm": 6.363132886885148, "learning_rate": 9.748428074624658e-06, "loss": 0.8567, "step": 2878 }, { "epoch": 0.26, "grad_norm": 6.620487746608921, "learning_rate": 9.748201774935827e-06, "loss": 0.8501, "step": 2879 }, { "epoch": 0.26, "grad_norm": 7.827970743734044, "learning_rate": 9.747975376138686e-06, "loss": 0.8584, "step": 2880 }, { "epoch": 0.26, "grad_norm": 5.523694046270023, "learning_rate": 9.747748878237952e-06, "loss": 0.867, "step": 2881 }, { "epoch": 0.26, "grad_norm": 7.3878216838003175, "learning_rate": 9.747522281238362e-06, "loss": 0.8426, "step": 2882 }, { "epoch": 0.26, "grad_norm": 8.137851769640685, "learning_rate": 9.747295585144638e-06, "loss": 0.868, "step": 2883 }, { "epoch": 0.26, "grad_norm": 6.343942854787575, "learning_rate": 9.747068789961517e-06, "loss": 0.8851, "step": 2884 }, { "epoch": 0.26, "grad_norm": 6.0798125758845565, "learning_rate": 9.74684189569373e-06, "loss": 0.9381, "step": 2885 }, { "epoch": 0.26, "grad_norm": 6.14861414292764, "learning_rate": 9.746614902346014e-06, "loss": 0.9195, "step": 2886 }, { "epoch": 0.26, "grad_norm": 6.888916749207589, "learning_rate": 9.746387809923107e-06, "loss": 0.7806, "step": 2887 }, { "epoch": 0.26, "grad_norm": 6.851349533372259, "learning_rate": 9.746160618429748e-06, "loss": 0.8738, "step": 2888 }, { "epoch": 0.26, "grad_norm": 6.264630920492703, "learning_rate": 9.745933327870682e-06, "loss": 0.8982, "step": 2889 }, { "epoch": 0.26, "grad_norm": 7.7055387728737506, "learning_rate": 9.74570593825065e-06, "loss": 0.8632, "step": 2890 }, { "epoch": 0.26, "grad_norm": 5.932900363596224, "learning_rate": 9.745478449574399e-06, "loss": 0.8282, "step": 2891 }, { "epoch": 0.26, "grad_norm": 6.0599422807706285, "learning_rate": 9.745250861846677e-06, "loss": 0.8605, "step": 2892 }, { "epoch": 0.26, "grad_norm": 6.405757695219403, "learning_rate": 9.745023175072237e-06, "loss": 0.8511, "step": 2893 }, { "epoch": 0.26, "grad_norm": 6.610856757394888, "learning_rate": 9.744795389255829e-06, "loss": 0.8605, "step": 2894 }, { "epoch": 0.26, "grad_norm": 6.941718061509259, "learning_rate": 9.744567504402207e-06, "loss": 0.9416, "step": 2895 }, { "epoch": 0.26, "grad_norm": 5.955518524405605, "learning_rate": 9.744339520516131e-06, "loss": 0.926, "step": 2896 }, { "epoch": 0.26, "grad_norm": 5.77032982974119, "learning_rate": 9.744111437602358e-06, "loss": 0.896, "step": 2897 }, { "epoch": 0.26, "grad_norm": 6.082696542555093, "learning_rate": 9.743883255665645e-06, "loss": 0.8159, "step": 2898 }, { "epoch": 0.26, "grad_norm": 6.779114541081023, "learning_rate": 9.743654974710759e-06, "loss": 0.9015, "step": 2899 }, { "epoch": 0.26, "grad_norm": 6.983439664921714, "learning_rate": 9.743426594742464e-06, "loss": 0.7906, "step": 2900 }, { "epoch": 0.26, "grad_norm": 7.193667111972571, "learning_rate": 9.743198115765529e-06, "loss": 0.8952, "step": 2901 }, { "epoch": 0.26, "grad_norm": 7.141342924566357, "learning_rate": 9.742969537784718e-06, "loss": 0.8941, "step": 2902 }, { "epoch": 0.26, "grad_norm": 5.628371712677069, "learning_rate": 9.742740860804805e-06, "loss": 0.833, "step": 2903 }, { "epoch": 0.26, "grad_norm": 6.388684274794961, "learning_rate": 9.742512084830565e-06, "loss": 0.8006, "step": 2904 }, { "epoch": 0.26, "grad_norm": 8.672479523925197, "learning_rate": 9.74228320986677e-06, "loss": 0.8271, "step": 2905 }, { "epoch": 0.26, "grad_norm": 5.091641752497882, "learning_rate": 9.742054235918197e-06, "loss": 0.8692, "step": 2906 }, { "epoch": 0.26, "grad_norm": 7.1540331560805335, "learning_rate": 9.74182516298963e-06, "loss": 0.9671, "step": 2907 }, { "epoch": 0.26, "grad_norm": 8.056251956754588, "learning_rate": 9.741595991085843e-06, "loss": 0.9516, "step": 2908 }, { "epoch": 0.26, "grad_norm": 8.867196236690921, "learning_rate": 9.741366720211627e-06, "loss": 0.8891, "step": 2909 }, { "epoch": 0.26, "grad_norm": 6.126255083200468, "learning_rate": 9.741137350371762e-06, "loss": 0.8397, "step": 2910 }, { "epoch": 0.26, "grad_norm": 7.015000204456142, "learning_rate": 9.740907881571039e-06, "loss": 0.8738, "step": 2911 }, { "epoch": 0.26, "grad_norm": 7.147275929650224, "learning_rate": 9.740678313814244e-06, "loss": 0.8892, "step": 2912 }, { "epoch": 0.26, "grad_norm": 7.807774428538608, "learning_rate": 9.740448647106174e-06, "loss": 0.899, "step": 2913 }, { "epoch": 0.26, "grad_norm": 10.828103234112147, "learning_rate": 9.740218881451619e-06, "loss": 0.7848, "step": 2914 }, { "epoch": 0.26, "grad_norm": 6.119381502937452, "learning_rate": 9.739989016855376e-06, "loss": 0.8818, "step": 2915 }, { "epoch": 0.26, "grad_norm": 6.960927044661801, "learning_rate": 9.739759053322243e-06, "loss": 0.9317, "step": 2916 }, { "epoch": 0.26, "grad_norm": 7.328862698144004, "learning_rate": 9.73952899085702e-06, "loss": 0.8599, "step": 2917 }, { "epoch": 0.26, "grad_norm": 6.019691564053099, "learning_rate": 9.739298829464506e-06, "loss": 0.9338, "step": 2918 }, { "epoch": 0.26, "grad_norm": 6.761833974412195, "learning_rate": 9.73906856914951e-06, "loss": 0.8485, "step": 2919 }, { "epoch": 0.26, "grad_norm": 8.208942206094564, "learning_rate": 9.738838209916836e-06, "loss": 0.8938, "step": 2920 }, { "epoch": 0.26, "grad_norm": 9.308792645723306, "learning_rate": 9.738607751771291e-06, "loss": 0.8737, "step": 2921 }, { "epoch": 0.26, "grad_norm": 7.055625211557587, "learning_rate": 9.738377194717687e-06, "loss": 0.8509, "step": 2922 }, { "epoch": 0.26, "grad_norm": 9.437071853954981, "learning_rate": 9.738146538760837e-06, "loss": 0.8952, "step": 2923 }, { "epoch": 0.26, "grad_norm": 5.979620390703249, "learning_rate": 9.737915783905553e-06, "loss": 0.886, "step": 2924 }, { "epoch": 0.26, "grad_norm": 5.264675802428782, "learning_rate": 9.737684930156654e-06, "loss": 0.861, "step": 2925 }, { "epoch": 0.26, "grad_norm": 7.97857322062904, "learning_rate": 9.737453977518957e-06, "loss": 0.9481, "step": 2926 }, { "epoch": 0.26, "grad_norm": 6.204513346029063, "learning_rate": 9.737222925997283e-06, "loss": 0.8769, "step": 2927 }, { "epoch": 0.26, "grad_norm": 6.403017162918406, "learning_rate": 9.736991775596456e-06, "loss": 0.8891, "step": 2928 }, { "epoch": 0.26, "grad_norm": 5.442809919887207, "learning_rate": 9.736760526321296e-06, "loss": 0.9208, "step": 2929 }, { "epoch": 0.26, "grad_norm": 5.763889608189788, "learning_rate": 9.736529178176636e-06, "loss": 0.8936, "step": 2930 }, { "epoch": 0.26, "grad_norm": 7.562682083101366, "learning_rate": 9.736297731167304e-06, "loss": 0.8586, "step": 2931 }, { "epoch": 0.26, "grad_norm": 6.888473653598032, "learning_rate": 9.736066185298125e-06, "loss": 0.8179, "step": 2932 }, { "epoch": 0.26, "grad_norm": 5.88260428310222, "learning_rate": 9.735834540573939e-06, "loss": 0.815, "step": 2933 }, { "epoch": 0.26, "grad_norm": 7.267593251941493, "learning_rate": 9.735602796999578e-06, "loss": 0.9035, "step": 2934 }, { "epoch": 0.26, "grad_norm": 6.3493494664780155, "learning_rate": 9.73537095457988e-06, "loss": 0.8981, "step": 2935 }, { "epoch": 0.26, "grad_norm": 5.909088475938786, "learning_rate": 9.73513901331968e-06, "loss": 0.8694, "step": 2936 }, { "epoch": 0.26, "grad_norm": 6.625050847153191, "learning_rate": 9.734906973223826e-06, "loss": 0.8215, "step": 2937 }, { "epoch": 0.26, "grad_norm": 6.5072111731740065, "learning_rate": 9.734674834297157e-06, "loss": 0.8131, "step": 2938 }, { "epoch": 0.26, "grad_norm": 8.90791703021879, "learning_rate": 9.734442596544521e-06, "loss": 0.8875, "step": 2939 }, { "epoch": 0.26, "grad_norm": 8.348527307421849, "learning_rate": 9.734210259970764e-06, "loss": 0.9028, "step": 2940 }, { "epoch": 0.26, "grad_norm": 5.362296515600365, "learning_rate": 9.733977824580734e-06, "loss": 0.8756, "step": 2941 }, { "epoch": 0.26, "grad_norm": 5.605867000641782, "learning_rate": 9.733745290379286e-06, "loss": 0.8215, "step": 2942 }, { "epoch": 0.26, "grad_norm": 7.627514498670066, "learning_rate": 9.73351265737127e-06, "loss": 0.8336, "step": 2943 }, { "epoch": 0.26, "grad_norm": 7.282500455703085, "learning_rate": 9.733279925561544e-06, "loss": 0.8803, "step": 2944 }, { "epoch": 0.26, "grad_norm": 7.22762304400222, "learning_rate": 9.733047094954967e-06, "loss": 0.9233, "step": 2945 }, { "epoch": 0.26, "grad_norm": 5.347144310677438, "learning_rate": 9.732814165556394e-06, "loss": 0.8143, "step": 2946 }, { "epoch": 0.26, "grad_norm": 6.699640097327885, "learning_rate": 9.73258113737069e-06, "loss": 0.8497, "step": 2947 }, { "epoch": 0.26, "grad_norm": 5.0083609867977374, "learning_rate": 9.732348010402723e-06, "loss": 0.8671, "step": 2948 }, { "epoch": 0.26, "grad_norm": 7.03530284541389, "learning_rate": 9.732114784657352e-06, "loss": 0.8384, "step": 2949 }, { "epoch": 0.26, "grad_norm": 5.216601275165391, "learning_rate": 9.731881460139448e-06, "loss": 0.9267, "step": 2950 }, { "epoch": 0.26, "grad_norm": 7.275725666034609, "learning_rate": 9.731648036853882e-06, "loss": 0.8775, "step": 2951 }, { "epoch": 0.26, "grad_norm": 7.600833917916779, "learning_rate": 9.731414514805525e-06, "loss": 0.8959, "step": 2952 }, { "epoch": 0.26, "grad_norm": 5.962874566436721, "learning_rate": 9.731180893999252e-06, "loss": 0.8608, "step": 2953 }, { "epoch": 0.26, "grad_norm": 5.975862917741863, "learning_rate": 9.73094717443994e-06, "loss": 0.8517, "step": 2954 }, { "epoch": 0.26, "grad_norm": 7.308199314459953, "learning_rate": 9.730713356132464e-06, "loss": 0.8908, "step": 2955 }, { "epoch": 0.26, "grad_norm": 4.9865638546773505, "learning_rate": 9.730479439081707e-06, "loss": 0.8101, "step": 2956 }, { "epoch": 0.26, "grad_norm": 7.1604430967937684, "learning_rate": 9.730245423292552e-06, "loss": 0.8953, "step": 2957 }, { "epoch": 0.26, "grad_norm": 7.7685934922296935, "learning_rate": 9.730011308769883e-06, "loss": 0.9055, "step": 2958 }, { "epoch": 0.26, "grad_norm": 5.924664523432183, "learning_rate": 9.729777095518586e-06, "loss": 0.8468, "step": 2959 }, { "epoch": 0.26, "grad_norm": 7.030315269337679, "learning_rate": 9.72954278354355e-06, "loss": 0.8628, "step": 2960 }, { "epoch": 0.26, "grad_norm": 5.599489734621938, "learning_rate": 9.729308372849667e-06, "loss": 0.8848, "step": 2961 }, { "epoch": 0.26, "grad_norm": 8.954298784004244, "learning_rate": 9.729073863441828e-06, "loss": 0.9052, "step": 2962 }, { "epoch": 0.26, "grad_norm": 7.327207387368721, "learning_rate": 9.728839255324928e-06, "loss": 0.8665, "step": 2963 }, { "epoch": 0.26, "grad_norm": 7.813953664625126, "learning_rate": 9.728604548503865e-06, "loss": 0.8904, "step": 2964 }, { "epoch": 0.26, "grad_norm": 7.192390636695897, "learning_rate": 9.728369742983539e-06, "loss": 0.7919, "step": 2965 }, { "epoch": 0.26, "grad_norm": 4.73410436949795, "learning_rate": 9.728134838768847e-06, "loss": 0.8438, "step": 2966 }, { "epoch": 0.26, "grad_norm": 6.266903500290609, "learning_rate": 9.727899835864697e-06, "loss": 0.8595, "step": 2967 }, { "epoch": 0.26, "grad_norm": 6.435451006971222, "learning_rate": 9.72766473427599e-06, "loss": 0.9297, "step": 2968 }, { "epoch": 0.26, "grad_norm": 6.826049793218305, "learning_rate": 9.727429534007638e-06, "loss": 0.8379, "step": 2969 }, { "epoch": 0.26, "grad_norm": 6.376219537423548, "learning_rate": 9.727194235064544e-06, "loss": 0.7575, "step": 2970 }, { "epoch": 0.27, "grad_norm": 6.52114784233041, "learning_rate": 9.726958837451624e-06, "loss": 0.9139, "step": 2971 }, { "epoch": 0.27, "grad_norm": 9.652068023351346, "learning_rate": 9.726723341173791e-06, "loss": 0.8638, "step": 2972 }, { "epoch": 0.27, "grad_norm": 6.942138630915804, "learning_rate": 9.726487746235957e-06, "loss": 0.8446, "step": 2973 }, { "epoch": 0.27, "grad_norm": 6.174683412330826, "learning_rate": 9.726252052643046e-06, "loss": 0.7962, "step": 2974 }, { "epoch": 0.27, "grad_norm": 6.744745658971282, "learning_rate": 9.726016260399971e-06, "loss": 0.879, "step": 2975 }, { "epoch": 0.27, "grad_norm": 7.416093318737307, "learning_rate": 9.725780369511657e-06, "loss": 0.7969, "step": 2976 }, { "epoch": 0.27, "grad_norm": 6.8958048999102255, "learning_rate": 9.725544379983028e-06, "loss": 0.8167, "step": 2977 }, { "epoch": 0.27, "grad_norm": 6.997307011734548, "learning_rate": 9.725308291819007e-06, "loss": 0.8657, "step": 2978 }, { "epoch": 0.27, "grad_norm": 6.663988356169776, "learning_rate": 9.725072105024524e-06, "loss": 0.8249, "step": 2979 }, { "epoch": 0.27, "grad_norm": 8.772938558538891, "learning_rate": 9.724835819604509e-06, "loss": 0.8484, "step": 2980 }, { "epoch": 0.27, "grad_norm": 6.17723222878287, "learning_rate": 9.724599435563893e-06, "loss": 0.7995, "step": 2981 }, { "epoch": 0.27, "grad_norm": 8.188131035102536, "learning_rate": 9.724362952907611e-06, "loss": 0.9012, "step": 2982 }, { "epoch": 0.27, "grad_norm": 5.601561459703495, "learning_rate": 9.724126371640598e-06, "loss": 0.846, "step": 2983 }, { "epoch": 0.27, "grad_norm": 7.339322277599898, "learning_rate": 9.723889691767793e-06, "loss": 0.9109, "step": 2984 }, { "epoch": 0.27, "grad_norm": 6.890204103196911, "learning_rate": 9.723652913294138e-06, "loss": 0.8524, "step": 2985 }, { "epoch": 0.27, "grad_norm": 7.417764764532755, "learning_rate": 9.723416036224571e-06, "loss": 0.9152, "step": 2986 }, { "epoch": 0.27, "grad_norm": 6.830040889304214, "learning_rate": 9.72317906056404e-06, "loss": 0.88, "step": 2987 }, { "epoch": 0.27, "grad_norm": 8.35729496556518, "learning_rate": 9.722941986317487e-06, "loss": 0.8499, "step": 2988 }, { "epoch": 0.27, "grad_norm": 5.819337618132686, "learning_rate": 9.722704813489863e-06, "loss": 0.8945, "step": 2989 }, { "epoch": 0.27, "grad_norm": 5.624524072529956, "learning_rate": 9.722467542086121e-06, "loss": 0.8033, "step": 2990 }, { "epoch": 0.27, "grad_norm": 5.739670631633869, "learning_rate": 9.722230172111208e-06, "loss": 0.8676, "step": 2991 }, { "epoch": 0.27, "grad_norm": 5.705037771502696, "learning_rate": 9.721992703570085e-06, "loss": 0.9331, "step": 2992 }, { "epoch": 0.27, "grad_norm": 6.72512206209416, "learning_rate": 9.721755136467703e-06, "loss": 0.8404, "step": 2993 }, { "epoch": 0.27, "grad_norm": 7.81601356479582, "learning_rate": 9.721517470809025e-06, "loss": 0.8172, "step": 2994 }, { "epoch": 0.27, "grad_norm": 6.738913689577057, "learning_rate": 9.721279706599008e-06, "loss": 0.893, "step": 2995 }, { "epoch": 0.27, "grad_norm": 4.559604397382546, "learning_rate": 9.721041843842617e-06, "loss": 0.8801, "step": 2996 }, { "epoch": 0.27, "grad_norm": 6.289574030331662, "learning_rate": 9.720803882544817e-06, "loss": 0.849, "step": 2997 }, { "epoch": 0.27, "grad_norm": 7.295826900174799, "learning_rate": 9.720565822710573e-06, "loss": 0.8815, "step": 2998 }, { "epoch": 0.27, "grad_norm": 8.378654818142778, "learning_rate": 9.720327664344857e-06, "loss": 0.8586, "step": 2999 }, { "epoch": 0.27, "grad_norm": 6.009031717970705, "learning_rate": 9.720089407452638e-06, "loss": 0.8706, "step": 3000 }, { "epoch": 0.27, "grad_norm": 7.247181007463583, "learning_rate": 9.71985105203889e-06, "loss": 0.8761, "step": 3001 }, { "epoch": 0.27, "grad_norm": 6.392037019565254, "learning_rate": 9.719612598108586e-06, "loss": 0.8636, "step": 3002 }, { "epoch": 0.27, "grad_norm": 6.502236968620264, "learning_rate": 9.719374045666705e-06, "loss": 0.8564, "step": 3003 }, { "epoch": 0.27, "grad_norm": 6.804786698984423, "learning_rate": 9.719135394718226e-06, "loss": 0.9188, "step": 3004 }, { "epoch": 0.27, "grad_norm": 5.798845885189531, "learning_rate": 9.71889664526813e-06, "loss": 0.8413, "step": 3005 }, { "epoch": 0.27, "grad_norm": 6.751671984973259, "learning_rate": 9.718657797321403e-06, "loss": 0.8483, "step": 3006 }, { "epoch": 0.27, "grad_norm": 6.657856568841324, "learning_rate": 9.718418850883026e-06, "loss": 0.8498, "step": 3007 }, { "epoch": 0.27, "grad_norm": 8.309178735831084, "learning_rate": 9.718179805957989e-06, "loss": 0.9353, "step": 3008 }, { "epoch": 0.27, "grad_norm": 4.960153357440854, "learning_rate": 9.717940662551282e-06, "loss": 0.8651, "step": 3009 }, { "epoch": 0.27, "grad_norm": 6.721039105715273, "learning_rate": 9.717701420667896e-06, "loss": 0.8622, "step": 3010 }, { "epoch": 0.27, "grad_norm": 8.109305585256216, "learning_rate": 9.717462080312822e-06, "loss": 0.9345, "step": 3011 }, { "epoch": 0.27, "grad_norm": 6.576706305775325, "learning_rate": 9.71722264149106e-06, "loss": 0.8873, "step": 3012 }, { "epoch": 0.27, "grad_norm": 7.083828073709991, "learning_rate": 9.716983104207606e-06, "loss": 0.8397, "step": 3013 }, { "epoch": 0.27, "grad_norm": 7.432312790722551, "learning_rate": 9.71674346846746e-06, "loss": 0.9376, "step": 3014 }, { "epoch": 0.27, "grad_norm": 6.289393904218191, "learning_rate": 9.716503734275622e-06, "loss": 0.8611, "step": 3015 }, { "epoch": 0.27, "grad_norm": 4.913699490982757, "learning_rate": 9.7162639016371e-06, "loss": 0.8188, "step": 3016 }, { "epoch": 0.27, "grad_norm": 7.401470100349791, "learning_rate": 9.716023970556896e-06, "loss": 0.9262, "step": 3017 }, { "epoch": 0.27, "grad_norm": 6.737600280104129, "learning_rate": 9.715783941040021e-06, "loss": 0.8469, "step": 3018 }, { "epoch": 0.27, "grad_norm": 8.35588559436862, "learning_rate": 9.715543813091481e-06, "loss": 0.8515, "step": 3019 }, { "epoch": 0.27, "grad_norm": 5.888578498117389, "learning_rate": 9.715303586716292e-06, "loss": 0.8081, "step": 3020 }, { "epoch": 0.27, "grad_norm": 5.083627670957956, "learning_rate": 9.715063261919467e-06, "loss": 0.8631, "step": 3021 }, { "epoch": 0.27, "grad_norm": 6.4877558159340385, "learning_rate": 9.714822838706021e-06, "loss": 0.8517, "step": 3022 }, { "epoch": 0.27, "grad_norm": 6.423370863219031, "learning_rate": 9.714582317080976e-06, "loss": 0.9301, "step": 3023 }, { "epoch": 0.27, "grad_norm": 7.077866947948512, "learning_rate": 9.714341697049348e-06, "loss": 0.8866, "step": 3024 }, { "epoch": 0.27, "grad_norm": 6.7058037974219165, "learning_rate": 9.714100978616162e-06, "loss": 0.8504, "step": 3025 }, { "epoch": 0.27, "grad_norm": 7.126247431815928, "learning_rate": 9.713860161786441e-06, "loss": 0.8418, "step": 3026 }, { "epoch": 0.27, "grad_norm": 6.12274760390638, "learning_rate": 9.713619246565214e-06, "loss": 0.8604, "step": 3027 }, { "epoch": 0.27, "grad_norm": 7.1009013808003365, "learning_rate": 9.713378232957507e-06, "loss": 0.9589, "step": 3028 }, { "epoch": 0.27, "grad_norm": 5.294754227005092, "learning_rate": 9.713137120968353e-06, "loss": 0.8648, "step": 3029 }, { "epoch": 0.27, "grad_norm": 6.72011645450055, "learning_rate": 9.71289591060278e-06, "loss": 0.7517, "step": 3030 }, { "epoch": 0.27, "grad_norm": 8.324603758460063, "learning_rate": 9.712654601865827e-06, "loss": 0.821, "step": 3031 }, { "epoch": 0.27, "grad_norm": 7.30609037253442, "learning_rate": 9.712413194762532e-06, "loss": 0.9233, "step": 3032 }, { "epoch": 0.27, "grad_norm": 6.864252397438357, "learning_rate": 9.712171689297929e-06, "loss": 0.932, "step": 3033 }, { "epoch": 0.27, "grad_norm": 5.828146196493721, "learning_rate": 9.71193008547706e-06, "loss": 0.8605, "step": 3034 }, { "epoch": 0.27, "grad_norm": 7.226277686181857, "learning_rate": 9.711688383304973e-06, "loss": 0.8937, "step": 3035 }, { "epoch": 0.27, "grad_norm": 7.763712090779456, "learning_rate": 9.711446582786709e-06, "loss": 0.7996, "step": 3036 }, { "epoch": 0.27, "grad_norm": 6.323801297513233, "learning_rate": 9.711204683927314e-06, "loss": 0.8789, "step": 3037 }, { "epoch": 0.27, "grad_norm": 6.392012742147735, "learning_rate": 9.71096268673184e-06, "loss": 0.8255, "step": 3038 }, { "epoch": 0.27, "grad_norm": 6.506087504924075, "learning_rate": 9.710720591205336e-06, "loss": 0.8745, "step": 3039 }, { "epoch": 0.27, "grad_norm": 6.925964866910463, "learning_rate": 9.710478397352855e-06, "loss": 0.846, "step": 3040 }, { "epoch": 0.27, "grad_norm": 8.91284595079221, "learning_rate": 9.710236105179455e-06, "loss": 0.869, "step": 3041 }, { "epoch": 0.27, "grad_norm": 7.0869896512749735, "learning_rate": 9.70999371469019e-06, "loss": 0.921, "step": 3042 }, { "epoch": 0.27, "grad_norm": 8.494163483001028, "learning_rate": 9.709751225890122e-06, "loss": 0.9077, "step": 3043 }, { "epoch": 0.27, "grad_norm": 8.2843385399789, "learning_rate": 9.709508638784311e-06, "loss": 0.859, "step": 3044 }, { "epoch": 0.27, "grad_norm": 8.530931226316387, "learning_rate": 9.70926595337782e-06, "loss": 0.8734, "step": 3045 }, { "epoch": 0.27, "grad_norm": 7.581384372045062, "learning_rate": 9.709023169675717e-06, "loss": 0.9032, "step": 3046 }, { "epoch": 0.27, "grad_norm": 6.549839701851494, "learning_rate": 9.708780287683066e-06, "loss": 0.8154, "step": 3047 }, { "epoch": 0.27, "grad_norm": 5.937033524548889, "learning_rate": 9.70853730740494e-06, "loss": 0.8937, "step": 3048 }, { "epoch": 0.27, "grad_norm": 5.5795631402627635, "learning_rate": 9.708294228846408e-06, "loss": 0.902, "step": 3049 }, { "epoch": 0.27, "grad_norm": 6.402778556155768, "learning_rate": 9.708051052012546e-06, "loss": 0.8409, "step": 3050 }, { "epoch": 0.27, "grad_norm": 8.196068560860864, "learning_rate": 9.70780777690843e-06, "loss": 0.9531, "step": 3051 }, { "epoch": 0.27, "grad_norm": 6.893785059208222, "learning_rate": 9.707564403539134e-06, "loss": 0.874, "step": 3052 }, { "epoch": 0.27, "grad_norm": 6.09095217779118, "learning_rate": 9.707320931909742e-06, "loss": 0.8364, "step": 3053 }, { "epoch": 0.27, "grad_norm": 7.657812298400147, "learning_rate": 9.707077362025334e-06, "loss": 0.9116, "step": 3054 }, { "epoch": 0.27, "grad_norm": 6.825614940690465, "learning_rate": 9.706833693890993e-06, "loss": 0.8039, "step": 3055 }, { "epoch": 0.27, "grad_norm": 5.944091454606696, "learning_rate": 9.706589927511809e-06, "loss": 0.9097, "step": 3056 }, { "epoch": 0.27, "grad_norm": 7.713767355178581, "learning_rate": 9.706346062892867e-06, "loss": 0.8914, "step": 3057 }, { "epoch": 0.27, "grad_norm": 5.254164684885612, "learning_rate": 9.706102100039257e-06, "loss": 0.8299, "step": 3058 }, { "epoch": 0.27, "grad_norm": 5.470055035643572, "learning_rate": 9.705858038956072e-06, "loss": 0.8037, "step": 3059 }, { "epoch": 0.27, "grad_norm": 5.782994962006137, "learning_rate": 9.705613879648404e-06, "loss": 0.8406, "step": 3060 }, { "epoch": 0.27, "grad_norm": 7.674539505744804, "learning_rate": 9.705369622121353e-06, "loss": 0.8432, "step": 3061 }, { "epoch": 0.27, "grad_norm": 8.667483778679136, "learning_rate": 9.705125266380017e-06, "loss": 0.7977, "step": 3062 }, { "epoch": 0.27, "grad_norm": 6.317012052566255, "learning_rate": 9.704880812429494e-06, "loss": 0.8351, "step": 3063 }, { "epoch": 0.27, "grad_norm": 6.776689852676243, "learning_rate": 9.704636260274887e-06, "loss": 0.848, "step": 3064 }, { "epoch": 0.27, "grad_norm": 6.279969165424993, "learning_rate": 9.704391609921302e-06, "loss": 0.8754, "step": 3065 }, { "epoch": 0.27, "grad_norm": 7.121376733645529, "learning_rate": 9.704146861373844e-06, "loss": 0.8667, "step": 3066 }, { "epoch": 0.27, "grad_norm": 7.466646014938607, "learning_rate": 9.703902014637623e-06, "loss": 0.8281, "step": 3067 }, { "epoch": 0.27, "grad_norm": 5.54825064934803, "learning_rate": 9.703657069717748e-06, "loss": 0.8476, "step": 3068 }, { "epoch": 0.27, "grad_norm": 6.438999694595895, "learning_rate": 9.703412026619332e-06, "loss": 0.881, "step": 3069 }, { "epoch": 0.27, "grad_norm": 7.793220747208214, "learning_rate": 9.703166885347492e-06, "loss": 0.98, "step": 3070 }, { "epoch": 0.27, "grad_norm": 6.9333955266146585, "learning_rate": 9.702921645907341e-06, "loss": 0.7855, "step": 3071 }, { "epoch": 0.27, "grad_norm": 6.538153711101955, "learning_rate": 9.702676308304001e-06, "loss": 0.8254, "step": 3072 }, { "epoch": 0.27, "grad_norm": 7.011271229283828, "learning_rate": 9.702430872542592e-06, "loss": 0.8777, "step": 3073 }, { "epoch": 0.27, "grad_norm": 6.732567330834858, "learning_rate": 9.702185338628237e-06, "loss": 0.7949, "step": 3074 }, { "epoch": 0.27, "grad_norm": 6.248069808278189, "learning_rate": 9.70193970656606e-06, "loss": 0.8518, "step": 3075 }, { "epoch": 0.27, "grad_norm": 8.312674565886622, "learning_rate": 9.701693976361188e-06, "loss": 0.8338, "step": 3076 }, { "epoch": 0.27, "grad_norm": 7.922212125874724, "learning_rate": 9.701448148018751e-06, "loss": 0.9342, "step": 3077 }, { "epoch": 0.27, "grad_norm": 6.5156878344543605, "learning_rate": 9.701202221543881e-06, "loss": 0.8259, "step": 3078 }, { "epoch": 0.27, "grad_norm": 7.028429782770404, "learning_rate": 9.70095619694171e-06, "loss": 0.8851, "step": 3079 }, { "epoch": 0.27, "grad_norm": 7.1805829835697725, "learning_rate": 9.700710074217372e-06, "loss": 0.8366, "step": 3080 }, { "epoch": 0.27, "grad_norm": 5.8203835597586835, "learning_rate": 9.700463853376006e-06, "loss": 0.8549, "step": 3081 }, { "epoch": 0.27, "grad_norm": 5.706546882775098, "learning_rate": 9.700217534422752e-06, "loss": 0.9334, "step": 3082 }, { "epoch": 0.28, "grad_norm": 8.0726992502605, "learning_rate": 9.69997111736275e-06, "loss": 0.8607, "step": 3083 }, { "epoch": 0.28, "grad_norm": 5.968332838288017, "learning_rate": 9.699724602201142e-06, "loss": 0.8509, "step": 3084 }, { "epoch": 0.28, "grad_norm": 6.393826786263297, "learning_rate": 9.699477988943077e-06, "loss": 0.9329, "step": 3085 }, { "epoch": 0.28, "grad_norm": 8.109300297285262, "learning_rate": 9.6992312775937e-06, "loss": 0.8247, "step": 3086 }, { "epoch": 0.28, "grad_norm": 5.50054179465883, "learning_rate": 9.698984468158162e-06, "loss": 0.8819, "step": 3087 }, { "epoch": 0.28, "grad_norm": 6.394411122300468, "learning_rate": 9.698737560641613e-06, "loss": 0.9489, "step": 3088 }, { "epoch": 0.28, "grad_norm": 5.407657738506801, "learning_rate": 9.698490555049208e-06, "loss": 0.8444, "step": 3089 }, { "epoch": 0.28, "grad_norm": 7.273921730997827, "learning_rate": 9.6982434513861e-06, "loss": 0.8846, "step": 3090 }, { "epoch": 0.28, "grad_norm": 7.542645993087722, "learning_rate": 9.69799624965745e-06, "loss": 0.8066, "step": 3091 }, { "epoch": 0.28, "grad_norm": 8.059994910208939, "learning_rate": 9.697748949868419e-06, "loss": 0.8113, "step": 3092 }, { "epoch": 0.28, "grad_norm": 4.966063802944681, "learning_rate": 9.697501552024165e-06, "loss": 0.8055, "step": 3093 }, { "epoch": 0.28, "grad_norm": 7.60635306931067, "learning_rate": 9.697254056129852e-06, "loss": 0.9323, "step": 3094 }, { "epoch": 0.28, "grad_norm": 5.215033859835412, "learning_rate": 9.69700646219065e-06, "loss": 0.9075, "step": 3095 }, { "epoch": 0.28, "grad_norm": 7.950879798369478, "learning_rate": 9.696758770211723e-06, "loss": 0.8296, "step": 3096 }, { "epoch": 0.28, "grad_norm": 5.480255455704876, "learning_rate": 9.696510980198243e-06, "loss": 0.8272, "step": 3097 }, { "epoch": 0.28, "grad_norm": 7.210900774178484, "learning_rate": 9.69626309215538e-06, "loss": 0.826, "step": 3098 }, { "epoch": 0.28, "grad_norm": 8.818406427982387, "learning_rate": 9.696015106088311e-06, "loss": 0.8786, "step": 3099 }, { "epoch": 0.28, "grad_norm": 6.888962304077095, "learning_rate": 9.69576702200221e-06, "loss": 0.8406, "step": 3100 }, { "epoch": 0.28, "grad_norm": 6.758322634006026, "learning_rate": 9.695518839902258e-06, "loss": 0.8815, "step": 3101 }, { "epoch": 0.28, "grad_norm": 6.041040975960407, "learning_rate": 9.69527055979363e-06, "loss": 0.8772, "step": 3102 }, { "epoch": 0.28, "grad_norm": 7.493073765176016, "learning_rate": 9.695022181681514e-06, "loss": 0.8644, "step": 3103 }, { "epoch": 0.28, "grad_norm": 6.398889080429567, "learning_rate": 9.69477370557109e-06, "loss": 0.8232, "step": 3104 }, { "epoch": 0.28, "grad_norm": 8.314973260921656, "learning_rate": 9.694525131467547e-06, "loss": 0.9314, "step": 3105 }, { "epoch": 0.28, "grad_norm": 7.406315483476398, "learning_rate": 9.694276459376075e-06, "loss": 0.8469, "step": 3106 }, { "epoch": 0.28, "grad_norm": 7.825182870472401, "learning_rate": 9.69402768930186e-06, "loss": 0.9465, "step": 3107 }, { "epoch": 0.28, "grad_norm": 6.497608041587692, "learning_rate": 9.693778821250096e-06, "loss": 0.8122, "step": 3108 }, { "epoch": 0.28, "grad_norm": 5.887614620199217, "learning_rate": 9.693529855225977e-06, "loss": 0.8395, "step": 3109 }, { "epoch": 0.28, "grad_norm": 7.302208079550322, "learning_rate": 9.693280791234706e-06, "loss": 0.8805, "step": 3110 }, { "epoch": 0.28, "grad_norm": 6.25617539695755, "learning_rate": 9.693031629281473e-06, "loss": 0.8314, "step": 3111 }, { "epoch": 0.28, "grad_norm": 7.970438627994429, "learning_rate": 9.692782369371482e-06, "loss": 0.8578, "step": 3112 }, { "epoch": 0.28, "grad_norm": 6.058051769939529, "learning_rate": 9.692533011509939e-06, "loss": 0.8264, "step": 3113 }, { "epoch": 0.28, "grad_norm": 5.337832567677477, "learning_rate": 9.692283555702044e-06, "loss": 0.8891, "step": 3114 }, { "epoch": 0.28, "grad_norm": 6.437364920677783, "learning_rate": 9.692034001953005e-06, "loss": 0.8312, "step": 3115 }, { "epoch": 0.28, "grad_norm": 7.835990075673858, "learning_rate": 9.691784350268031e-06, "loss": 0.8553, "step": 3116 }, { "epoch": 0.28, "grad_norm": 5.380677278052266, "learning_rate": 9.691534600652335e-06, "loss": 0.816, "step": 3117 }, { "epoch": 0.28, "grad_norm": 6.787092320460224, "learning_rate": 9.69128475311113e-06, "loss": 0.8289, "step": 3118 }, { "epoch": 0.28, "grad_norm": 7.790469621322249, "learning_rate": 9.691034807649626e-06, "loss": 0.9361, "step": 3119 }, { "epoch": 0.28, "grad_norm": 5.920477493960096, "learning_rate": 9.690784764273046e-06, "loss": 0.8681, "step": 3120 }, { "epoch": 0.28, "grad_norm": 7.308046893441458, "learning_rate": 9.690534622986606e-06, "loss": 0.9, "step": 3121 }, { "epoch": 0.28, "grad_norm": 6.318040749637306, "learning_rate": 9.690284383795528e-06, "loss": 0.8635, "step": 3122 }, { "epoch": 0.28, "grad_norm": 6.6210665613162325, "learning_rate": 9.690034046705034e-06, "loss": 0.8076, "step": 3123 }, { "epoch": 0.28, "grad_norm": 6.945470680369091, "learning_rate": 9.689783611720352e-06, "loss": 0.8656, "step": 3124 }, { "epoch": 0.28, "grad_norm": 6.474235876074848, "learning_rate": 9.689533078846707e-06, "loss": 0.9374, "step": 3125 }, { "epoch": 0.28, "grad_norm": 7.175672950706755, "learning_rate": 9.68928244808933e-06, "loss": 0.8464, "step": 3126 }, { "epoch": 0.28, "grad_norm": 7.636105792392971, "learning_rate": 9.689031719453448e-06, "loss": 0.879, "step": 3127 }, { "epoch": 0.28, "grad_norm": 6.585011262553224, "learning_rate": 9.688780892944301e-06, "loss": 0.9177, "step": 3128 }, { "epoch": 0.28, "grad_norm": 7.704417870731716, "learning_rate": 9.688529968567118e-06, "loss": 0.8571, "step": 3129 }, { "epoch": 0.28, "grad_norm": 5.415263129546187, "learning_rate": 9.688278946327142e-06, "loss": 0.7728, "step": 3130 }, { "epoch": 0.28, "grad_norm": 7.278537664929393, "learning_rate": 9.68802782622961e-06, "loss": 0.8681, "step": 3131 }, { "epoch": 0.28, "grad_norm": 6.769998127805124, "learning_rate": 9.687776608279762e-06, "loss": 0.9236, "step": 3132 }, { "epoch": 0.28, "grad_norm": 6.268969041925272, "learning_rate": 9.687525292482845e-06, "loss": 0.8591, "step": 3133 }, { "epoch": 0.28, "grad_norm": 6.929442309200846, "learning_rate": 9.687273878844104e-06, "loss": 0.8484, "step": 3134 }, { "epoch": 0.28, "grad_norm": 6.5056650256862225, "learning_rate": 9.687022367368783e-06, "loss": 0.8376, "step": 3135 }, { "epoch": 0.28, "grad_norm": 6.158105730098482, "learning_rate": 9.686770758062136e-06, "loss": 0.8718, "step": 3136 }, { "epoch": 0.28, "grad_norm": 6.393676039939339, "learning_rate": 9.686519050929413e-06, "loss": 0.9134, "step": 3137 }, { "epoch": 0.28, "grad_norm": 6.055929185913235, "learning_rate": 9.68626724597587e-06, "loss": 0.8547, "step": 3138 }, { "epoch": 0.28, "grad_norm": 7.834701431409927, "learning_rate": 9.686015343206757e-06, "loss": 0.8531, "step": 3139 }, { "epoch": 0.28, "grad_norm": 6.415379673532301, "learning_rate": 9.68576334262734e-06, "loss": 0.9031, "step": 3140 }, { "epoch": 0.28, "grad_norm": 6.504060836737989, "learning_rate": 9.685511244242871e-06, "loss": 0.8711, "step": 3141 }, { "epoch": 0.28, "grad_norm": 7.460845434511139, "learning_rate": 9.685259048058618e-06, "loss": 0.8177, "step": 3142 }, { "epoch": 0.28, "grad_norm": 6.384491072062221, "learning_rate": 9.685006754079842e-06, "loss": 0.8618, "step": 3143 }, { "epoch": 0.28, "grad_norm": 7.6375318072833025, "learning_rate": 9.684754362311811e-06, "loss": 0.8295, "step": 3144 }, { "epoch": 0.28, "grad_norm": 6.15981836815742, "learning_rate": 9.684501872759792e-06, "loss": 0.8692, "step": 3145 }, { "epoch": 0.28, "grad_norm": 8.043785083444916, "learning_rate": 9.684249285429055e-06, "loss": 0.7807, "step": 3146 }, { "epoch": 0.28, "grad_norm": 6.75189633989718, "learning_rate": 9.683996600324874e-06, "loss": 0.8626, "step": 3147 }, { "epoch": 0.28, "grad_norm": 6.568543420948107, "learning_rate": 9.68374381745252e-06, "loss": 0.8825, "step": 3148 }, { "epoch": 0.28, "grad_norm": 6.263052597374487, "learning_rate": 9.68349093681727e-06, "loss": 0.8815, "step": 3149 }, { "epoch": 0.28, "grad_norm": 5.067041652148552, "learning_rate": 9.683237958424406e-06, "loss": 0.8472, "step": 3150 }, { "epoch": 0.28, "grad_norm": 7.245109202971791, "learning_rate": 9.682984882279204e-06, "loss": 0.9488, "step": 3151 }, { "epoch": 0.28, "grad_norm": 6.022896106761805, "learning_rate": 9.682731708386948e-06, "loss": 0.8679, "step": 3152 }, { "epoch": 0.28, "grad_norm": 9.92315186851741, "learning_rate": 9.682478436752924e-06, "loss": 0.8758, "step": 3153 }, { "epoch": 0.28, "grad_norm": 6.454874714822032, "learning_rate": 9.682225067382417e-06, "loss": 0.8318, "step": 3154 }, { "epoch": 0.28, "grad_norm": 7.754173493299103, "learning_rate": 9.681971600280714e-06, "loss": 0.9687, "step": 3155 }, { "epoch": 0.28, "grad_norm": 6.1088758266358125, "learning_rate": 9.681718035453108e-06, "loss": 0.9343, "step": 3156 }, { "epoch": 0.28, "grad_norm": 6.258531043098854, "learning_rate": 9.681464372904892e-06, "loss": 0.8337, "step": 3157 }, { "epoch": 0.28, "grad_norm": 8.148949817233103, "learning_rate": 9.681210612641359e-06, "loss": 0.8502, "step": 3158 }, { "epoch": 0.28, "grad_norm": 6.130661953832419, "learning_rate": 9.680956754667805e-06, "loss": 0.8584, "step": 3159 }, { "epoch": 0.28, "grad_norm": 8.965400212516986, "learning_rate": 9.680702798989529e-06, "loss": 0.8964, "step": 3160 }, { "epoch": 0.28, "grad_norm": 6.8014045043185645, "learning_rate": 9.680448745611835e-06, "loss": 0.8632, "step": 3161 }, { "epoch": 0.28, "grad_norm": 6.252974689431325, "learning_rate": 9.680194594540021e-06, "loss": 0.8341, "step": 3162 }, { "epoch": 0.28, "grad_norm": 5.216286371943972, "learning_rate": 9.679940345779398e-06, "loss": 0.9139, "step": 3163 }, { "epoch": 0.28, "grad_norm": 5.57946937979855, "learning_rate": 9.679685999335265e-06, "loss": 0.8534, "step": 3164 }, { "epoch": 0.28, "grad_norm": 6.985706025320577, "learning_rate": 9.679431555212938e-06, "loss": 0.7925, "step": 3165 }, { "epoch": 0.28, "grad_norm": 7.804563194858407, "learning_rate": 9.679177013417724e-06, "loss": 0.8916, "step": 3166 }, { "epoch": 0.28, "grad_norm": 7.555506626811849, "learning_rate": 9.678922373954938e-06, "loss": 0.8665, "step": 3167 }, { "epoch": 0.28, "grad_norm": 4.949576602526726, "learning_rate": 9.678667636829892e-06, "loss": 0.885, "step": 3168 }, { "epoch": 0.28, "grad_norm": 7.292594944687893, "learning_rate": 9.678412802047907e-06, "loss": 0.8441, "step": 3169 }, { "epoch": 0.28, "grad_norm": 6.860988032981282, "learning_rate": 9.678157869614299e-06, "loss": 0.8866, "step": 3170 }, { "epoch": 0.28, "grad_norm": 6.408207106265668, "learning_rate": 9.677902839534391e-06, "loss": 0.8996, "step": 3171 }, { "epoch": 0.28, "grad_norm": 4.988207734842082, "learning_rate": 9.677647711813508e-06, "loss": 0.9256, "step": 3172 }, { "epoch": 0.28, "grad_norm": 7.809379842703858, "learning_rate": 9.677392486456969e-06, "loss": 0.8721, "step": 3173 }, { "epoch": 0.28, "grad_norm": 5.943788628379881, "learning_rate": 9.677137163470107e-06, "loss": 0.8829, "step": 3174 }, { "epoch": 0.28, "grad_norm": 6.384666078039953, "learning_rate": 9.676881742858249e-06, "loss": 0.8813, "step": 3175 }, { "epoch": 0.28, "grad_norm": 7.474707675153014, "learning_rate": 9.676626224626728e-06, "loss": 0.9031, "step": 3176 }, { "epoch": 0.28, "grad_norm": 10.913266601187164, "learning_rate": 9.676370608780876e-06, "loss": 0.835, "step": 3177 }, { "epoch": 0.28, "grad_norm": 7.3687479560024425, "learning_rate": 9.676114895326028e-06, "loss": 0.9084, "step": 3178 }, { "epoch": 0.28, "grad_norm": 8.437121011407891, "learning_rate": 9.675859084267521e-06, "loss": 0.8293, "step": 3179 }, { "epoch": 0.28, "grad_norm": 5.751903686782551, "learning_rate": 9.675603175610697e-06, "loss": 0.8889, "step": 3180 }, { "epoch": 0.28, "grad_norm": 5.989918664110538, "learning_rate": 9.675347169360896e-06, "loss": 0.8153, "step": 3181 }, { "epoch": 0.28, "grad_norm": 7.675388371990813, "learning_rate": 9.67509106552346e-06, "loss": 0.8343, "step": 3182 }, { "epoch": 0.28, "grad_norm": 6.486240394544817, "learning_rate": 9.67483486410374e-06, "loss": 0.896, "step": 3183 }, { "epoch": 0.28, "grad_norm": 9.53227553846843, "learning_rate": 9.674578565107077e-06, "loss": 0.8414, "step": 3184 }, { "epoch": 0.28, "grad_norm": 5.656020192978489, "learning_rate": 9.674322168538825e-06, "loss": 0.8995, "step": 3185 }, { "epoch": 0.28, "grad_norm": 8.274969651396205, "learning_rate": 9.674065674404333e-06, "loss": 0.8966, "step": 3186 }, { "epoch": 0.28, "grad_norm": 5.899276773606116, "learning_rate": 9.673809082708956e-06, "loss": 0.9166, "step": 3187 }, { "epoch": 0.28, "grad_norm": 7.771447449385443, "learning_rate": 9.673552393458051e-06, "loss": 0.8239, "step": 3188 }, { "epoch": 0.28, "grad_norm": 6.601009897481193, "learning_rate": 9.673295606656975e-06, "loss": 0.9053, "step": 3189 }, { "epoch": 0.28, "grad_norm": 8.421944188060367, "learning_rate": 9.673038722311086e-06, "loss": 0.9374, "step": 3190 }, { "epoch": 0.28, "grad_norm": 6.309512076353031, "learning_rate": 9.672781740425748e-06, "loss": 0.8751, "step": 3191 }, { "epoch": 0.28, "grad_norm": 5.256443469602372, "learning_rate": 9.672524661006325e-06, "loss": 0.8961, "step": 3192 }, { "epoch": 0.28, "grad_norm": 6.7702887809561965, "learning_rate": 9.672267484058183e-06, "loss": 0.8463, "step": 3193 }, { "epoch": 0.28, "grad_norm": 4.5743138489856845, "learning_rate": 9.672010209586687e-06, "loss": 0.8855, "step": 3194 }, { "epoch": 0.29, "grad_norm": 6.832818103543249, "learning_rate": 9.67175283759721e-06, "loss": 0.8285, "step": 3195 }, { "epoch": 0.29, "grad_norm": 5.865656925945503, "learning_rate": 9.671495368095125e-06, "loss": 0.8645, "step": 3196 }, { "epoch": 0.29, "grad_norm": 5.735240508132219, "learning_rate": 9.671237801085804e-06, "loss": 0.8669, "step": 3197 }, { "epoch": 0.29, "grad_norm": 8.05606042808752, "learning_rate": 9.670980136574623e-06, "loss": 0.8491, "step": 3198 }, { "epoch": 0.29, "grad_norm": 6.728683492847892, "learning_rate": 9.670722374566962e-06, "loss": 0.8156, "step": 3199 }, { "epoch": 0.29, "grad_norm": 6.765082700843074, "learning_rate": 9.670464515068199e-06, "loss": 0.8497, "step": 3200 }, { "epoch": 0.29, "grad_norm": 7.160938816513407, "learning_rate": 9.67020655808372e-06, "loss": 0.9139, "step": 3201 }, { "epoch": 0.29, "grad_norm": 5.439873484383166, "learning_rate": 9.669948503618904e-06, "loss": 0.8755, "step": 3202 }, { "epoch": 0.29, "grad_norm": 6.467998032568611, "learning_rate": 9.66969035167914e-06, "loss": 0.8342, "step": 3203 }, { "epoch": 0.29, "grad_norm": 6.315771730196795, "learning_rate": 9.669432102269818e-06, "loss": 0.9044, "step": 3204 }, { "epoch": 0.29, "grad_norm": 5.095019958915507, "learning_rate": 9.669173755396325e-06, "loss": 0.8882, "step": 3205 }, { "epoch": 0.29, "grad_norm": 7.079103404471578, "learning_rate": 9.668915311064055e-06, "loss": 0.8498, "step": 3206 }, { "epoch": 0.29, "grad_norm": 6.512489273438903, "learning_rate": 9.668656769278403e-06, "loss": 0.8556, "step": 3207 }, { "epoch": 0.29, "grad_norm": 6.63484293921342, "learning_rate": 9.668398130044767e-06, "loss": 0.8854, "step": 3208 }, { "epoch": 0.29, "grad_norm": 5.660146844284628, "learning_rate": 9.668139393368543e-06, "loss": 0.8833, "step": 3209 }, { "epoch": 0.29, "grad_norm": 5.632639072263363, "learning_rate": 9.667880559255131e-06, "loss": 0.7798, "step": 3210 }, { "epoch": 0.29, "grad_norm": 6.162952336637108, "learning_rate": 9.667621627709935e-06, "loss": 0.8469, "step": 3211 }, { "epoch": 0.29, "grad_norm": 6.019741071547217, "learning_rate": 9.667362598738362e-06, "loss": 0.8546, "step": 3212 }, { "epoch": 0.29, "grad_norm": 8.595872600380877, "learning_rate": 9.667103472345813e-06, "loss": 0.9477, "step": 3213 }, { "epoch": 0.29, "grad_norm": 6.808182812482724, "learning_rate": 9.6668442485377e-06, "loss": 0.8089, "step": 3214 }, { "epoch": 0.29, "grad_norm": 8.373317893705277, "learning_rate": 9.666584927319434e-06, "loss": 0.9004, "step": 3215 }, { "epoch": 0.29, "grad_norm": 7.440600601312156, "learning_rate": 9.666325508696429e-06, "loss": 0.9576, "step": 3216 }, { "epoch": 0.29, "grad_norm": 8.698930879546522, "learning_rate": 9.666065992674096e-06, "loss": 0.7462, "step": 3217 }, { "epoch": 0.29, "grad_norm": 5.39945366945253, "learning_rate": 9.665806379257853e-06, "loss": 0.8152, "step": 3218 }, { "epoch": 0.29, "grad_norm": 5.117147210582255, "learning_rate": 9.665546668453123e-06, "loss": 0.8506, "step": 3219 }, { "epoch": 0.29, "grad_norm": 6.120538698544707, "learning_rate": 9.665286860265322e-06, "loss": 0.7932, "step": 3220 }, { "epoch": 0.29, "grad_norm": 5.836145542175159, "learning_rate": 9.665026954699874e-06, "loss": 0.9218, "step": 3221 }, { "epoch": 0.29, "grad_norm": 6.9838692142903565, "learning_rate": 9.664766951762206e-06, "loss": 0.778, "step": 3222 }, { "epoch": 0.29, "grad_norm": 5.121592358941899, "learning_rate": 9.664506851457742e-06, "loss": 0.8784, "step": 3223 }, { "epoch": 0.29, "grad_norm": 7.982416383203774, "learning_rate": 9.664246653791913e-06, "loss": 0.8628, "step": 3224 }, { "epoch": 0.29, "grad_norm": 7.095868318260642, "learning_rate": 9.66398635877015e-06, "loss": 0.8858, "step": 3225 }, { "epoch": 0.29, "grad_norm": 6.001861291180722, "learning_rate": 9.663725966397886e-06, "loss": 0.9059, "step": 3226 }, { "epoch": 0.29, "grad_norm": 7.1564442362735665, "learning_rate": 9.663465476680555e-06, "loss": 0.9026, "step": 3227 }, { "epoch": 0.29, "grad_norm": 5.828267874227665, "learning_rate": 9.663204889623595e-06, "loss": 0.884, "step": 3228 }, { "epoch": 0.29, "grad_norm": 6.719416698457259, "learning_rate": 9.662944205232447e-06, "loss": 0.8864, "step": 3229 }, { "epoch": 0.29, "grad_norm": 6.592579387418979, "learning_rate": 9.662683423512548e-06, "loss": 0.8789, "step": 3230 }, { "epoch": 0.29, "grad_norm": 6.70215959677774, "learning_rate": 9.662422544469346e-06, "loss": 0.8698, "step": 3231 }, { "epoch": 0.29, "grad_norm": 6.2826388946792004, "learning_rate": 9.662161568108281e-06, "loss": 0.842, "step": 3232 }, { "epoch": 0.29, "grad_norm": 5.27689763267672, "learning_rate": 9.661900494434805e-06, "loss": 0.8781, "step": 3233 }, { "epoch": 0.29, "grad_norm": 7.704530347439892, "learning_rate": 9.661639323454367e-06, "loss": 0.8746, "step": 3234 }, { "epoch": 0.29, "grad_norm": 8.540422916435023, "learning_rate": 9.661378055172413e-06, "loss": 0.9256, "step": 3235 }, { "epoch": 0.29, "grad_norm": 5.706133546919614, "learning_rate": 9.661116689594403e-06, "loss": 0.8956, "step": 3236 }, { "epoch": 0.29, "grad_norm": 7.879708412698, "learning_rate": 9.66085522672579e-06, "loss": 0.8957, "step": 3237 }, { "epoch": 0.29, "grad_norm": 7.587427742006504, "learning_rate": 9.660593666572032e-06, "loss": 0.8406, "step": 3238 }, { "epoch": 0.29, "grad_norm": 6.882327532801281, "learning_rate": 9.660332009138585e-06, "loss": 0.8733, "step": 3239 }, { "epoch": 0.29, "grad_norm": 6.485961387937878, "learning_rate": 9.660070254430914e-06, "loss": 0.8527, "step": 3240 }, { "epoch": 0.29, "grad_norm": 6.159733893175498, "learning_rate": 9.659808402454483e-06, "loss": 0.8797, "step": 3241 }, { "epoch": 0.29, "grad_norm": 7.8419444495901685, "learning_rate": 9.659546453214757e-06, "loss": 0.8529, "step": 3242 }, { "epoch": 0.29, "grad_norm": 6.341351250475801, "learning_rate": 9.6592844067172e-06, "loss": 0.9046, "step": 3243 }, { "epoch": 0.29, "grad_norm": 7.6091916582488315, "learning_rate": 9.659022262967288e-06, "loss": 0.928, "step": 3244 }, { "epoch": 0.29, "grad_norm": 5.675894952145962, "learning_rate": 9.658760021970487e-06, "loss": 0.8248, "step": 3245 }, { "epoch": 0.29, "grad_norm": 6.669820888433391, "learning_rate": 9.658497683732274e-06, "loss": 0.8474, "step": 3246 }, { "epoch": 0.29, "grad_norm": 5.731281506261774, "learning_rate": 9.658235248258122e-06, "loss": 0.8812, "step": 3247 }, { "epoch": 0.29, "grad_norm": 7.581281009637838, "learning_rate": 9.657972715553515e-06, "loss": 0.834, "step": 3248 }, { "epoch": 0.29, "grad_norm": 6.9748642136000605, "learning_rate": 9.657710085623924e-06, "loss": 0.7966, "step": 3249 }, { "epoch": 0.29, "grad_norm": 5.633024215553253, "learning_rate": 9.657447358474837e-06, "loss": 0.8935, "step": 3250 }, { "epoch": 0.29, "grad_norm": 7.891154672980486, "learning_rate": 9.657184534111737e-06, "loss": 0.9034, "step": 3251 }, { "epoch": 0.29, "grad_norm": 7.539125200335768, "learning_rate": 9.656921612540107e-06, "loss": 0.8857, "step": 3252 }, { "epoch": 0.29, "grad_norm": 6.153059015627392, "learning_rate": 9.656658593765438e-06, "loss": 0.8992, "step": 3253 }, { "epoch": 0.29, "grad_norm": 6.748770334691978, "learning_rate": 9.656395477793218e-06, "loss": 0.843, "step": 3254 }, { "epoch": 0.29, "grad_norm": 5.600675452495197, "learning_rate": 9.65613226462894e-06, "loss": 0.8954, "step": 3255 }, { "epoch": 0.29, "grad_norm": 5.725131329293039, "learning_rate": 9.655868954278099e-06, "loss": 0.8785, "step": 3256 }, { "epoch": 0.29, "grad_norm": 9.469562012035917, "learning_rate": 9.655605546746188e-06, "loss": 0.8538, "step": 3257 }, { "epoch": 0.29, "grad_norm": 8.521536526411392, "learning_rate": 9.655342042038706e-06, "loss": 0.9058, "step": 3258 }, { "epoch": 0.29, "grad_norm": 6.069242427430499, "learning_rate": 9.655078440161157e-06, "loss": 0.8818, "step": 3259 }, { "epoch": 0.29, "grad_norm": 5.31136303426663, "learning_rate": 9.654814741119037e-06, "loss": 0.8661, "step": 3260 }, { "epoch": 0.29, "grad_norm": 6.498782823953539, "learning_rate": 9.654550944917856e-06, "loss": 0.8503, "step": 3261 }, { "epoch": 0.29, "grad_norm": 7.301957183058016, "learning_rate": 9.654287051563115e-06, "loss": 0.8726, "step": 3262 }, { "epoch": 0.29, "grad_norm": 7.738990029288649, "learning_rate": 9.654023061060324e-06, "loss": 0.8516, "step": 3263 }, { "epoch": 0.29, "grad_norm": 8.654299354941623, "learning_rate": 9.653758973414995e-06, "loss": 0.8309, "step": 3264 }, { "epoch": 0.29, "grad_norm": 7.411897694986788, "learning_rate": 9.65349478863264e-06, "loss": 0.8122, "step": 3265 }, { "epoch": 0.29, "grad_norm": 11.102965441242938, "learning_rate": 9.653230506718771e-06, "loss": 0.8418, "step": 3266 }, { "epoch": 0.29, "grad_norm": 6.730850119903953, "learning_rate": 9.652966127678906e-06, "loss": 0.8319, "step": 3267 }, { "epoch": 0.29, "grad_norm": 5.7429308842034335, "learning_rate": 9.652701651518564e-06, "loss": 0.799, "step": 3268 }, { "epoch": 0.29, "grad_norm": 9.32940240849665, "learning_rate": 9.652437078243261e-06, "loss": 0.8482, "step": 3269 }, { "epoch": 0.29, "grad_norm": 6.507926132588859, "learning_rate": 9.652172407858525e-06, "loss": 0.8795, "step": 3270 }, { "epoch": 0.29, "grad_norm": 6.327345818068146, "learning_rate": 9.651907640369877e-06, "loss": 0.8139, "step": 3271 }, { "epoch": 0.29, "grad_norm": 8.22565246792412, "learning_rate": 9.651642775782846e-06, "loss": 0.8435, "step": 3272 }, { "epoch": 0.29, "grad_norm": 7.393530691249871, "learning_rate": 9.651377814102958e-06, "loss": 0.9151, "step": 3273 }, { "epoch": 0.29, "grad_norm": 7.922593234843766, "learning_rate": 9.651112755335745e-06, "loss": 0.8923, "step": 3274 }, { "epoch": 0.29, "grad_norm": 7.950403544262727, "learning_rate": 9.65084759948674e-06, "loss": 0.8568, "step": 3275 }, { "epoch": 0.29, "grad_norm": 4.751361811284917, "learning_rate": 9.650582346561475e-06, "loss": 0.8195, "step": 3276 }, { "epoch": 0.29, "grad_norm": 6.636788899586379, "learning_rate": 9.650316996565488e-06, "loss": 0.8931, "step": 3277 }, { "epoch": 0.29, "grad_norm": 6.689404616876132, "learning_rate": 9.650051549504317e-06, "loss": 0.8665, "step": 3278 }, { "epoch": 0.29, "grad_norm": 6.803147688962341, "learning_rate": 9.649786005383506e-06, "loss": 0.8516, "step": 3279 }, { "epoch": 0.29, "grad_norm": 7.042305161005341, "learning_rate": 9.649520364208591e-06, "loss": 0.883, "step": 3280 }, { "epoch": 0.29, "grad_norm": 5.993775653036826, "learning_rate": 9.649254625985126e-06, "loss": 0.8943, "step": 3281 }, { "epoch": 0.29, "grad_norm": 6.384939201549452, "learning_rate": 9.648988790718647e-06, "loss": 0.8525, "step": 3282 }, { "epoch": 0.29, "grad_norm": 8.01121459257376, "learning_rate": 9.648722858414712e-06, "loss": 0.8279, "step": 3283 }, { "epoch": 0.29, "grad_norm": 7.318889196851436, "learning_rate": 9.648456829078866e-06, "loss": 0.8841, "step": 3284 }, { "epoch": 0.29, "grad_norm": 6.081013441060796, "learning_rate": 9.648190702716664e-06, "loss": 0.8501, "step": 3285 }, { "epoch": 0.29, "grad_norm": 8.808064504684433, "learning_rate": 9.647924479333661e-06, "loss": 0.8574, "step": 3286 }, { "epoch": 0.29, "grad_norm": 8.902832492827207, "learning_rate": 9.647658158935412e-06, "loss": 0.9167, "step": 3287 }, { "epoch": 0.29, "grad_norm": 4.723950472690446, "learning_rate": 9.64739174152748e-06, "loss": 0.8002, "step": 3288 }, { "epoch": 0.29, "grad_norm": 6.875158382660294, "learning_rate": 9.64712522711542e-06, "loss": 0.9007, "step": 3289 }, { "epoch": 0.29, "grad_norm": 8.067225216481924, "learning_rate": 9.646858615704798e-06, "loss": 0.9136, "step": 3290 }, { "epoch": 0.29, "grad_norm": 7.213739952198843, "learning_rate": 9.64659190730118e-06, "loss": 0.8295, "step": 3291 }, { "epoch": 0.29, "grad_norm": 7.218594232852929, "learning_rate": 9.646325101910132e-06, "loss": 0.7895, "step": 3292 }, { "epoch": 0.29, "grad_norm": 6.311624435328392, "learning_rate": 9.646058199537221e-06, "loss": 0.8508, "step": 3293 }, { "epoch": 0.29, "grad_norm": 6.814927968197268, "learning_rate": 9.645791200188021e-06, "loss": 0.8651, "step": 3294 }, { "epoch": 0.29, "grad_norm": 6.466737169763425, "learning_rate": 9.645524103868105e-06, "loss": 0.7959, "step": 3295 }, { "epoch": 0.29, "grad_norm": 6.340550626756768, "learning_rate": 9.645256910583045e-06, "loss": 0.7638, "step": 3296 }, { "epoch": 0.29, "grad_norm": 5.5786509406678, "learning_rate": 9.644989620338421e-06, "loss": 0.7974, "step": 3297 }, { "epoch": 0.29, "grad_norm": 7.799530810684751, "learning_rate": 9.644722233139811e-06, "loss": 0.7909, "step": 3298 }, { "epoch": 0.29, "grad_norm": 5.406752796406822, "learning_rate": 9.644454748992795e-06, "loss": 0.8319, "step": 3299 }, { "epoch": 0.29, "grad_norm": 6.84689073541873, "learning_rate": 9.644187167902958e-06, "loss": 0.7951, "step": 3300 }, { "epoch": 0.29, "grad_norm": 5.757869804849618, "learning_rate": 9.643919489875885e-06, "loss": 0.8163, "step": 3301 }, { "epoch": 0.29, "grad_norm": 6.152601867040951, "learning_rate": 9.643651714917161e-06, "loss": 0.7986, "step": 3302 }, { "epoch": 0.29, "grad_norm": 5.972063663911529, "learning_rate": 9.643383843032378e-06, "loss": 0.9055, "step": 3303 }, { "epoch": 0.29, "grad_norm": 7.3052503304995975, "learning_rate": 9.643115874227127e-06, "loss": 0.842, "step": 3304 }, { "epoch": 0.29, "grad_norm": 6.469812637890127, "learning_rate": 9.642847808507e-06, "loss": 0.8654, "step": 3305 }, { "epoch": 0.29, "grad_norm": 5.546746591786719, "learning_rate": 9.642579645877592e-06, "loss": 0.8524, "step": 3306 }, { "epoch": 0.3, "grad_norm": 6.533281032679706, "learning_rate": 9.642311386344501e-06, "loss": 0.888, "step": 3307 }, { "epoch": 0.3, "grad_norm": 5.643224084104299, "learning_rate": 9.642043029913327e-06, "loss": 0.8424, "step": 3308 }, { "epoch": 0.3, "grad_norm": 6.7130529852478285, "learning_rate": 9.641774576589671e-06, "loss": 0.7892, "step": 3309 }, { "epoch": 0.3, "grad_norm": 8.896989878956347, "learning_rate": 9.641506026379136e-06, "loss": 0.8847, "step": 3310 }, { "epoch": 0.3, "grad_norm": 8.041873234756183, "learning_rate": 9.641237379287325e-06, "loss": 0.9059, "step": 3311 }, { "epoch": 0.3, "grad_norm": 6.258076931982257, "learning_rate": 9.64096863531985e-06, "loss": 0.884, "step": 3312 }, { "epoch": 0.3, "grad_norm": 6.843383546446361, "learning_rate": 9.640699794482318e-06, "loss": 0.839, "step": 3313 }, { "epoch": 0.3, "grad_norm": 5.419672283985424, "learning_rate": 9.64043085678034e-06, "loss": 0.8385, "step": 3314 }, { "epoch": 0.3, "grad_norm": 5.627641947321581, "learning_rate": 9.64016182221953e-06, "loss": 0.8717, "step": 3315 }, { "epoch": 0.3, "grad_norm": 6.624852147561525, "learning_rate": 9.639892690805506e-06, "loss": 0.8068, "step": 3316 }, { "epoch": 0.3, "grad_norm": 6.864658873493239, "learning_rate": 9.63962346254388e-06, "loss": 0.8386, "step": 3317 }, { "epoch": 0.3, "grad_norm": 6.7338139283483915, "learning_rate": 9.639354137440278e-06, "loss": 0.8599, "step": 3318 }, { "epoch": 0.3, "grad_norm": 4.758086011234576, "learning_rate": 9.639084715500316e-06, "loss": 0.7661, "step": 3319 }, { "epoch": 0.3, "grad_norm": 4.852930876868596, "learning_rate": 9.63881519672962e-06, "loss": 0.8511, "step": 3320 }, { "epoch": 0.3, "grad_norm": 6.9736862068141425, "learning_rate": 9.638545581133819e-06, "loss": 0.8602, "step": 3321 }, { "epoch": 0.3, "grad_norm": 11.82276474569912, "learning_rate": 9.638275868718534e-06, "loss": 0.8198, "step": 3322 }, { "epoch": 0.3, "grad_norm": 5.771761479629931, "learning_rate": 9.638006059489397e-06, "loss": 0.8255, "step": 3323 }, { "epoch": 0.3, "grad_norm": 6.279806030623002, "learning_rate": 9.637736153452044e-06, "loss": 0.8593, "step": 3324 }, { "epoch": 0.3, "grad_norm": 6.737409577622434, "learning_rate": 9.637466150612103e-06, "loss": 0.8456, "step": 3325 }, { "epoch": 0.3, "grad_norm": 5.9483347844199566, "learning_rate": 9.637196050975212e-06, "loss": 0.8414, "step": 3326 }, { "epoch": 0.3, "grad_norm": 6.389414333049423, "learning_rate": 9.63692585454701e-06, "loss": 0.8499, "step": 3327 }, { "epoch": 0.3, "grad_norm": 6.556196115502422, "learning_rate": 9.636655561333134e-06, "loss": 0.8956, "step": 3328 }, { "epoch": 0.3, "grad_norm": 6.65732393623127, "learning_rate": 9.63638517133923e-06, "loss": 0.8797, "step": 3329 }, { "epoch": 0.3, "grad_norm": 8.022036509560413, "learning_rate": 9.636114684570936e-06, "loss": 0.9197, "step": 3330 }, { "epoch": 0.3, "grad_norm": 7.540375557093509, "learning_rate": 9.635844101033902e-06, "loss": 0.8004, "step": 3331 }, { "epoch": 0.3, "grad_norm": 5.312068524439566, "learning_rate": 9.635573420733775e-06, "loss": 0.8872, "step": 3332 }, { "epoch": 0.3, "grad_norm": 6.17558454806183, "learning_rate": 9.635302643676203e-06, "loss": 0.8655, "step": 3333 }, { "epoch": 0.3, "grad_norm": 6.654042195661868, "learning_rate": 9.635031769866841e-06, "loss": 0.9031, "step": 3334 }, { "epoch": 0.3, "grad_norm": 7.2121930830064835, "learning_rate": 9.63476079931134e-06, "loss": 0.8693, "step": 3335 }, { "epoch": 0.3, "grad_norm": 7.5531078763406425, "learning_rate": 9.63448973201536e-06, "loss": 0.8366, "step": 3336 }, { "epoch": 0.3, "grad_norm": 5.567827311141023, "learning_rate": 9.634218567984554e-06, "loss": 0.8829, "step": 3337 }, { "epoch": 0.3, "grad_norm": 5.963964174311849, "learning_rate": 9.633947307224584e-06, "loss": 0.7813, "step": 3338 }, { "epoch": 0.3, "grad_norm": 6.331547233935041, "learning_rate": 9.633675949741112e-06, "loss": 0.892, "step": 3339 }, { "epoch": 0.3, "grad_norm": 6.068419907924525, "learning_rate": 9.633404495539805e-06, "loss": 0.8276, "step": 3340 }, { "epoch": 0.3, "grad_norm": 9.291975998275534, "learning_rate": 9.633132944626322e-06, "loss": 0.8635, "step": 3341 }, { "epoch": 0.3, "grad_norm": 7.026333863081252, "learning_rate": 9.632861297006337e-06, "loss": 0.8926, "step": 3342 }, { "epoch": 0.3, "grad_norm": 5.659741342317169, "learning_rate": 9.63258955268552e-06, "loss": 0.8415, "step": 3343 }, { "epoch": 0.3, "grad_norm": 8.7727945925817, "learning_rate": 9.63231771166954e-06, "loss": 0.9476, "step": 3344 }, { "epoch": 0.3, "grad_norm": 6.265032495582461, "learning_rate": 9.632045773964072e-06, "loss": 0.84, "step": 3345 }, { "epoch": 0.3, "grad_norm": 6.930967365961253, "learning_rate": 9.631773739574793e-06, "loss": 0.7982, "step": 3346 }, { "epoch": 0.3, "grad_norm": 7.790109417668356, "learning_rate": 9.631501608507379e-06, "loss": 0.822, "step": 3347 }, { "epoch": 0.3, "grad_norm": 7.21076479784875, "learning_rate": 9.631229380767513e-06, "loss": 0.8805, "step": 3348 }, { "epoch": 0.3, "grad_norm": 6.664679373058769, "learning_rate": 9.630957056360875e-06, "loss": 0.8874, "step": 3349 }, { "epoch": 0.3, "grad_norm": 7.191654599892978, "learning_rate": 9.63068463529315e-06, "loss": 0.8527, "step": 3350 }, { "epoch": 0.3, "grad_norm": 7.928166168223627, "learning_rate": 9.630412117570028e-06, "loss": 0.8199, "step": 3351 }, { "epoch": 0.3, "grad_norm": 8.700043591230171, "learning_rate": 9.63013950319719e-06, "loss": 0.8265, "step": 3352 }, { "epoch": 0.3, "grad_norm": 6.966426912297472, "learning_rate": 9.62986679218033e-06, "loss": 0.8032, "step": 3353 }, { "epoch": 0.3, "grad_norm": 5.595881450642672, "learning_rate": 9.62959398452514e-06, "loss": 0.8813, "step": 3354 }, { "epoch": 0.3, "grad_norm": 7.642856834016924, "learning_rate": 9.629321080237315e-06, "loss": 0.8582, "step": 3355 }, { "epoch": 0.3, "grad_norm": 5.772016770497053, "learning_rate": 9.629048079322549e-06, "loss": 0.9213, "step": 3356 }, { "epoch": 0.3, "grad_norm": 7.440615603525215, "learning_rate": 9.628774981786544e-06, "loss": 0.9015, "step": 3357 }, { "epoch": 0.3, "grad_norm": 5.592831518786836, "learning_rate": 9.628501787634996e-06, "loss": 0.8974, "step": 3358 }, { "epoch": 0.3, "grad_norm": 7.278952815241524, "learning_rate": 9.62822849687361e-06, "loss": 0.8932, "step": 3359 }, { "epoch": 0.3, "grad_norm": 8.259218372906293, "learning_rate": 9.627955109508089e-06, "loss": 0.8614, "step": 3360 }, { "epoch": 0.3, "grad_norm": 10.030660706226964, "learning_rate": 9.627681625544142e-06, "loss": 0.8497, "step": 3361 }, { "epoch": 0.3, "grad_norm": 5.7346740908553775, "learning_rate": 9.627408044987474e-06, "loss": 0.9084, "step": 3362 }, { "epoch": 0.3, "grad_norm": 6.470426279547836, "learning_rate": 9.627134367843798e-06, "loss": 0.8905, "step": 3363 }, { "epoch": 0.3, "grad_norm": 6.146327641158245, "learning_rate": 9.626860594118826e-06, "loss": 0.8707, "step": 3364 }, { "epoch": 0.3, "grad_norm": 5.543680580533257, "learning_rate": 9.62658672381827e-06, "loss": 0.9132, "step": 3365 }, { "epoch": 0.3, "grad_norm": 7.744839366650756, "learning_rate": 9.626312756947848e-06, "loss": 0.8274, "step": 3366 }, { "epoch": 0.3, "grad_norm": 6.231240320506806, "learning_rate": 9.62603869351328e-06, "loss": 0.8854, "step": 3367 }, { "epoch": 0.3, "grad_norm": 6.161760805925736, "learning_rate": 9.625764533520285e-06, "loss": 1.0146, "step": 3368 }, { "epoch": 0.3, "grad_norm": 7.2850656129913824, "learning_rate": 9.625490276974585e-06, "loss": 0.8114, "step": 3369 }, { "epoch": 0.3, "grad_norm": 7.175369543935384, "learning_rate": 9.625215923881905e-06, "loss": 0.7775, "step": 3370 }, { "epoch": 0.3, "grad_norm": 6.800946895838132, "learning_rate": 9.624941474247973e-06, "loss": 0.8201, "step": 3371 }, { "epoch": 0.3, "grad_norm": 7.55102228187858, "learning_rate": 9.624666928078515e-06, "loss": 0.9652, "step": 3372 }, { "epoch": 0.3, "grad_norm": 10.55409484785092, "learning_rate": 9.624392285379263e-06, "loss": 0.8431, "step": 3373 }, { "epoch": 0.3, "grad_norm": 7.620122896290645, "learning_rate": 9.62411754615595e-06, "loss": 0.9095, "step": 3374 }, { "epoch": 0.3, "grad_norm": 5.473874117005483, "learning_rate": 9.623842710414308e-06, "loss": 0.8687, "step": 3375 }, { "epoch": 0.3, "grad_norm": 6.52348434570083, "learning_rate": 9.623567778160077e-06, "loss": 0.8433, "step": 3376 }, { "epoch": 0.3, "grad_norm": 6.0668878966377715, "learning_rate": 9.623292749398993e-06, "loss": 0.8711, "step": 3377 }, { "epoch": 0.3, "grad_norm": 7.470576058597409, "learning_rate": 9.623017624136799e-06, "loss": 0.9129, "step": 3378 }, { "epoch": 0.3, "grad_norm": 7.683444069021747, "learning_rate": 9.622742402379234e-06, "loss": 0.8524, "step": 3379 }, { "epoch": 0.3, "grad_norm": 7.615756000338311, "learning_rate": 9.622467084132048e-06, "loss": 0.8258, "step": 3380 }, { "epoch": 0.3, "grad_norm": 6.043361183688054, "learning_rate": 9.622191669400983e-06, "loss": 0.8662, "step": 3381 }, { "epoch": 0.3, "grad_norm": 6.268358447106379, "learning_rate": 9.621916158191789e-06, "loss": 0.8419, "step": 3382 }, { "epoch": 0.3, "grad_norm": 8.450451396666109, "learning_rate": 9.621640550510218e-06, "loss": 0.7574, "step": 3383 }, { "epoch": 0.3, "grad_norm": 6.992651651470137, "learning_rate": 9.621364846362023e-06, "loss": 0.8132, "step": 3384 }, { "epoch": 0.3, "grad_norm": 5.277339903337183, "learning_rate": 9.621089045752954e-06, "loss": 0.78, "step": 3385 }, { "epoch": 0.3, "grad_norm": 6.768864616516342, "learning_rate": 9.620813148688773e-06, "loss": 0.8698, "step": 3386 }, { "epoch": 0.3, "grad_norm": 6.972507706359331, "learning_rate": 9.620537155175238e-06, "loss": 0.8485, "step": 3387 }, { "epoch": 0.3, "grad_norm": 8.719492046496953, "learning_rate": 9.620261065218106e-06, "loss": 0.8954, "step": 3388 }, { "epoch": 0.3, "grad_norm": 4.9614397086320725, "learning_rate": 9.619984878823145e-06, "loss": 0.8315, "step": 3389 }, { "epoch": 0.3, "grad_norm": 10.946851612448748, "learning_rate": 9.619708595996116e-06, "loss": 0.8787, "step": 3390 }, { "epoch": 0.3, "grad_norm": 5.541238550805727, "learning_rate": 9.619432216742787e-06, "loss": 0.8401, "step": 3391 }, { "epoch": 0.3, "grad_norm": 7.428930741635411, "learning_rate": 9.619155741068929e-06, "loss": 0.9019, "step": 3392 }, { "epoch": 0.3, "grad_norm": 6.7662750656522475, "learning_rate": 9.618879168980308e-06, "loss": 0.872, "step": 3393 }, { "epoch": 0.3, "grad_norm": 5.824038273913719, "learning_rate": 9.6186025004827e-06, "loss": 0.8158, "step": 3394 }, { "epoch": 0.3, "grad_norm": 6.0066369173077625, "learning_rate": 9.61832573558188e-06, "loss": 0.8632, "step": 3395 }, { "epoch": 0.3, "grad_norm": 6.918052776994631, "learning_rate": 9.618048874283623e-06, "loss": 0.9108, "step": 3396 }, { "epoch": 0.3, "grad_norm": 7.850860855466185, "learning_rate": 9.61777191659371e-06, "loss": 0.8424, "step": 3397 }, { "epoch": 0.3, "grad_norm": 5.748728067665785, "learning_rate": 9.617494862517922e-06, "loss": 0.8977, "step": 3398 }, { "epoch": 0.3, "grad_norm": 5.642096426084905, "learning_rate": 9.61721771206204e-06, "loss": 0.8856, "step": 3399 }, { "epoch": 0.3, "grad_norm": 8.487060401058544, "learning_rate": 9.61694046523185e-06, "loss": 0.9016, "step": 3400 }, { "epoch": 0.3, "grad_norm": 6.191498690134695, "learning_rate": 9.616663122033139e-06, "loss": 0.8338, "step": 3401 }, { "epoch": 0.3, "grad_norm": 4.58416574174315, "learning_rate": 9.616385682471693e-06, "loss": 0.7772, "step": 3402 }, { "epoch": 0.3, "grad_norm": 4.332135260966037, "learning_rate": 9.61610814655331e-06, "loss": 0.8161, "step": 3403 }, { "epoch": 0.3, "grad_norm": 5.917316674591485, "learning_rate": 9.615830514283776e-06, "loss": 0.8997, "step": 3404 }, { "epoch": 0.3, "grad_norm": 9.39296016235748, "learning_rate": 9.615552785668888e-06, "loss": 0.9031, "step": 3405 }, { "epoch": 0.3, "grad_norm": 5.513347492140642, "learning_rate": 9.615274960714445e-06, "loss": 0.8885, "step": 3406 }, { "epoch": 0.3, "grad_norm": 7.051032938346747, "learning_rate": 9.614997039426243e-06, "loss": 0.8879, "step": 3407 }, { "epoch": 0.3, "grad_norm": 8.106094900155611, "learning_rate": 9.614719021810085e-06, "loss": 0.8076, "step": 3408 }, { "epoch": 0.3, "grad_norm": 7.55306556161794, "learning_rate": 9.614440907871773e-06, "loss": 0.7935, "step": 3409 }, { "epoch": 0.3, "grad_norm": 6.625618214025119, "learning_rate": 9.614162697617116e-06, "loss": 0.8569, "step": 3410 }, { "epoch": 0.3, "grad_norm": 4.773199754625026, "learning_rate": 9.613884391051912e-06, "loss": 0.9128, "step": 3411 }, { "epoch": 0.3, "grad_norm": 6.535228390514244, "learning_rate": 9.61360598818198e-06, "loss": 0.7988, "step": 3412 }, { "epoch": 0.3, "grad_norm": 8.293915425492607, "learning_rate": 9.613327489013124e-06, "loss": 0.859, "step": 3413 }, { "epoch": 0.3, "grad_norm": 5.548229995068532, "learning_rate": 9.61304889355116e-06, "loss": 0.7892, "step": 3414 }, { "epoch": 0.3, "grad_norm": 7.282683917853358, "learning_rate": 9.612770201801905e-06, "loss": 0.8482, "step": 3415 }, { "epoch": 0.3, "grad_norm": 6.9507552079744555, "learning_rate": 9.612491413771172e-06, "loss": 0.8861, "step": 3416 }, { "epoch": 0.3, "grad_norm": 5.657796515927321, "learning_rate": 9.612212529464781e-06, "loss": 0.8761, "step": 3417 }, { "epoch": 0.3, "grad_norm": 4.971418912424004, "learning_rate": 9.611933548888557e-06, "loss": 0.9099, "step": 3418 }, { "epoch": 0.31, "grad_norm": 5.173698598289152, "learning_rate": 9.611654472048318e-06, "loss": 0.8989, "step": 3419 }, { "epoch": 0.31, "grad_norm": 6.121841069723469, "learning_rate": 9.611375298949892e-06, "loss": 0.8276, "step": 3420 }, { "epoch": 0.31, "grad_norm": 8.114944892276505, "learning_rate": 9.611096029599105e-06, "loss": 0.8687, "step": 3421 }, { "epoch": 0.31, "grad_norm": 6.048883322055748, "learning_rate": 9.610816664001785e-06, "loss": 0.8776, "step": 3422 }, { "epoch": 0.31, "grad_norm": 5.384000117718312, "learning_rate": 9.610537202163766e-06, "loss": 0.8181, "step": 3423 }, { "epoch": 0.31, "grad_norm": 6.225639309710435, "learning_rate": 9.610257644090881e-06, "loss": 0.902, "step": 3424 }, { "epoch": 0.31, "grad_norm": 7.227230702748475, "learning_rate": 9.609977989788963e-06, "loss": 0.8019, "step": 3425 }, { "epoch": 0.31, "grad_norm": 6.163592062823973, "learning_rate": 9.60969823926385e-06, "loss": 0.8099, "step": 3426 }, { "epoch": 0.31, "grad_norm": 8.001670570950575, "learning_rate": 9.60941839252138e-06, "loss": 0.771, "step": 3427 }, { "epoch": 0.31, "grad_norm": 6.895884706004711, "learning_rate": 9.609138449567397e-06, "loss": 0.8404, "step": 3428 }, { "epoch": 0.31, "grad_norm": 7.269332821667848, "learning_rate": 9.608858410407743e-06, "loss": 0.8495, "step": 3429 }, { "epoch": 0.31, "grad_norm": 6.792041287272132, "learning_rate": 9.608578275048262e-06, "loss": 0.8602, "step": 3430 }, { "epoch": 0.31, "grad_norm": 7.380643228161576, "learning_rate": 9.608298043494803e-06, "loss": 0.8435, "step": 3431 }, { "epoch": 0.31, "grad_norm": 6.229900574347504, "learning_rate": 9.608017715753213e-06, "loss": 0.7892, "step": 3432 }, { "epoch": 0.31, "grad_norm": 5.171037572613418, "learning_rate": 9.607737291829346e-06, "loss": 0.8797, "step": 3433 }, { "epoch": 0.31, "grad_norm": 5.751272082337137, "learning_rate": 9.607456771729054e-06, "loss": 0.7965, "step": 3434 }, { "epoch": 0.31, "grad_norm": 6.905273700507282, "learning_rate": 9.60717615545819e-06, "loss": 0.9135, "step": 3435 }, { "epoch": 0.31, "grad_norm": 8.56859321769154, "learning_rate": 9.606895443022615e-06, "loss": 0.8424, "step": 3436 }, { "epoch": 0.31, "grad_norm": 6.794086058787489, "learning_rate": 9.606614634428186e-06, "loss": 0.8683, "step": 3437 }, { "epoch": 0.31, "grad_norm": 6.079769686661954, "learning_rate": 9.606333729680766e-06, "loss": 0.8742, "step": 3438 }, { "epoch": 0.31, "grad_norm": 5.970446584919136, "learning_rate": 9.606052728786216e-06, "loss": 0.9455, "step": 3439 }, { "epoch": 0.31, "grad_norm": 6.342584215525004, "learning_rate": 9.605771631750402e-06, "loss": 0.9009, "step": 3440 }, { "epoch": 0.31, "grad_norm": 5.3417769737565965, "learning_rate": 9.605490438579192e-06, "loss": 0.8446, "step": 3441 }, { "epoch": 0.31, "grad_norm": 5.950888431712724, "learning_rate": 9.605209149278456e-06, "loss": 0.8703, "step": 3442 }, { "epoch": 0.31, "grad_norm": 7.591338590840988, "learning_rate": 9.604927763854063e-06, "loss": 0.8676, "step": 3443 }, { "epoch": 0.31, "grad_norm": 7.438493641844304, "learning_rate": 9.604646282311887e-06, "loss": 0.8816, "step": 3444 }, { "epoch": 0.31, "grad_norm": 5.725969056923775, "learning_rate": 9.604364704657806e-06, "loss": 0.8778, "step": 3445 }, { "epoch": 0.31, "grad_norm": 8.543729130245039, "learning_rate": 9.604083030897694e-06, "loss": 0.8939, "step": 3446 }, { "epoch": 0.31, "grad_norm": 7.499654552495176, "learning_rate": 9.603801261037432e-06, "loss": 0.7936, "step": 3447 }, { "epoch": 0.31, "grad_norm": 5.748152230023837, "learning_rate": 9.603519395082898e-06, "loss": 0.8896, "step": 3448 }, { "epoch": 0.31, "grad_norm": 9.781860167701735, "learning_rate": 9.603237433039981e-06, "loss": 0.9457, "step": 3449 }, { "epoch": 0.31, "grad_norm": 5.956923969345379, "learning_rate": 9.602955374914563e-06, "loss": 0.9545, "step": 3450 }, { "epoch": 0.31, "grad_norm": 6.412439508867281, "learning_rate": 9.602673220712532e-06, "loss": 0.8682, "step": 3451 }, { "epoch": 0.31, "grad_norm": 7.689709767197531, "learning_rate": 9.602390970439777e-06, "loss": 0.9145, "step": 3452 }, { "epoch": 0.31, "grad_norm": 6.846444710629745, "learning_rate": 9.60210862410219e-06, "loss": 0.7755, "step": 3453 }, { "epoch": 0.31, "grad_norm": 7.06589211119371, "learning_rate": 9.601826181705662e-06, "loss": 0.8784, "step": 3454 }, { "epoch": 0.31, "grad_norm": 6.4469908659287345, "learning_rate": 9.601543643256092e-06, "loss": 0.8999, "step": 3455 }, { "epoch": 0.31, "grad_norm": 6.8900432115264225, "learning_rate": 9.601261008759377e-06, "loss": 0.8767, "step": 3456 }, { "epoch": 0.31, "grad_norm": 7.689289012532748, "learning_rate": 9.600978278221412e-06, "loss": 0.8465, "step": 3457 }, { "epoch": 0.31, "grad_norm": 6.352669505701338, "learning_rate": 9.600695451648104e-06, "loss": 0.9459, "step": 3458 }, { "epoch": 0.31, "grad_norm": 6.720708121195964, "learning_rate": 9.60041252904535e-06, "loss": 0.802, "step": 3459 }, { "epoch": 0.31, "grad_norm": 6.288847250301714, "learning_rate": 9.600129510419063e-06, "loss": 0.8834, "step": 3460 }, { "epoch": 0.31, "grad_norm": 6.298504657229926, "learning_rate": 9.599846395775145e-06, "loss": 0.8282, "step": 3461 }, { "epoch": 0.31, "grad_norm": 5.901691860386791, "learning_rate": 9.599563185119509e-06, "loss": 0.8662, "step": 3462 }, { "epoch": 0.31, "grad_norm": 8.305908656057918, "learning_rate": 9.599279878458062e-06, "loss": 0.8779, "step": 3463 }, { "epoch": 0.31, "grad_norm": 7.470330919245296, "learning_rate": 9.59899647579672e-06, "loss": 0.8925, "step": 3464 }, { "epoch": 0.31, "grad_norm": 6.041278595778447, "learning_rate": 9.598712977141399e-06, "loss": 0.8543, "step": 3465 }, { "epoch": 0.31, "grad_norm": 6.772698524322452, "learning_rate": 9.598429382498016e-06, "loss": 0.8951, "step": 3466 }, { "epoch": 0.31, "grad_norm": 7.363549125634146, "learning_rate": 9.598145691872488e-06, "loss": 0.8617, "step": 3467 }, { "epoch": 0.31, "grad_norm": 6.013020003109283, "learning_rate": 9.59786190527074e-06, "loss": 0.8716, "step": 3468 }, { "epoch": 0.31, "grad_norm": 5.276986824280207, "learning_rate": 9.597578022698695e-06, "loss": 0.9078, "step": 3469 }, { "epoch": 0.31, "grad_norm": 5.899679535170715, "learning_rate": 9.597294044162276e-06, "loss": 0.841, "step": 3470 }, { "epoch": 0.31, "grad_norm": 5.917423273737217, "learning_rate": 9.597009969667412e-06, "loss": 0.8716, "step": 3471 }, { "epoch": 0.31, "grad_norm": 10.93843433813374, "learning_rate": 9.59672579922003e-06, "loss": 0.8852, "step": 3472 }, { "epoch": 0.31, "grad_norm": 8.781614828753245, "learning_rate": 9.596441532826066e-06, "loss": 0.8981, "step": 3473 }, { "epoch": 0.31, "grad_norm": 10.721843403661758, "learning_rate": 9.596157170491451e-06, "loss": 0.8888, "step": 3474 }, { "epoch": 0.31, "grad_norm": 9.009056533090284, "learning_rate": 9.59587271222212e-06, "loss": 0.9025, "step": 3475 }, { "epoch": 0.31, "grad_norm": 5.540411852855542, "learning_rate": 9.595588158024012e-06, "loss": 0.8564, "step": 3476 }, { "epoch": 0.31, "grad_norm": 10.481661549739771, "learning_rate": 9.595303507903064e-06, "loss": 0.8731, "step": 3477 }, { "epoch": 0.31, "grad_norm": 5.9004372436782, "learning_rate": 9.595018761865217e-06, "loss": 0.8173, "step": 3478 }, { "epoch": 0.31, "grad_norm": 5.3169315298902795, "learning_rate": 9.594733919916419e-06, "loss": 0.8674, "step": 3479 }, { "epoch": 0.31, "grad_norm": 5.712873194231846, "learning_rate": 9.594448982062614e-06, "loss": 0.9338, "step": 3480 }, { "epoch": 0.31, "grad_norm": 8.935733517204914, "learning_rate": 9.594163948309745e-06, "loss": 0.9382, "step": 3481 }, { "epoch": 0.31, "grad_norm": 4.4088970447402, "learning_rate": 9.593878818663767e-06, "loss": 0.8779, "step": 3482 }, { "epoch": 0.31, "grad_norm": 5.686290836717649, "learning_rate": 9.593593593130627e-06, "loss": 0.8311, "step": 3483 }, { "epoch": 0.31, "grad_norm": 7.493249908380524, "learning_rate": 9.593308271716283e-06, "loss": 0.8745, "step": 3484 }, { "epoch": 0.31, "grad_norm": 7.034599733549633, "learning_rate": 9.593022854426686e-06, "loss": 0.8647, "step": 3485 }, { "epoch": 0.31, "grad_norm": 8.431445491480448, "learning_rate": 9.592737341267796e-06, "loss": 0.8671, "step": 3486 }, { "epoch": 0.31, "grad_norm": 5.143320709843091, "learning_rate": 9.59245173224557e-06, "loss": 0.8548, "step": 3487 }, { "epoch": 0.31, "grad_norm": 7.892591827684415, "learning_rate": 9.592166027365974e-06, "loss": 0.8646, "step": 3488 }, { "epoch": 0.31, "grad_norm": 5.62270628164334, "learning_rate": 9.591880226634969e-06, "loss": 0.8788, "step": 3489 }, { "epoch": 0.31, "grad_norm": 5.567567247826698, "learning_rate": 9.591594330058517e-06, "loss": 0.8644, "step": 3490 }, { "epoch": 0.31, "grad_norm": 5.2697946448700055, "learning_rate": 9.591308337642592e-06, "loss": 0.8208, "step": 3491 }, { "epoch": 0.31, "grad_norm": 6.135293718266525, "learning_rate": 9.591022249393161e-06, "loss": 0.8274, "step": 3492 }, { "epoch": 0.31, "grad_norm": 7.051272736692218, "learning_rate": 9.590736065316193e-06, "loss": 0.8949, "step": 3493 }, { "epoch": 0.31, "grad_norm": 8.83517831624625, "learning_rate": 9.590449785417665e-06, "loss": 0.8228, "step": 3494 }, { "epoch": 0.31, "grad_norm": 11.528558542606188, "learning_rate": 9.590163409703547e-06, "loss": 0.8224, "step": 3495 }, { "epoch": 0.31, "grad_norm": 8.23527683112531, "learning_rate": 9.589876938179824e-06, "loss": 0.8564, "step": 3496 }, { "epoch": 0.31, "grad_norm": 7.773637246293891, "learning_rate": 9.58959037085247e-06, "loss": 0.8302, "step": 3497 }, { "epoch": 0.31, "grad_norm": 6.226228439480646, "learning_rate": 9.589303707727466e-06, "loss": 0.8571, "step": 3498 }, { "epoch": 0.31, "grad_norm": 6.064724556539138, "learning_rate": 9.5890169488108e-06, "loss": 0.8032, "step": 3499 }, { "epoch": 0.31, "grad_norm": 7.066756078546219, "learning_rate": 9.588730094108453e-06, "loss": 0.9226, "step": 3500 }, { "epoch": 0.31, "grad_norm": 7.561285042621237, "learning_rate": 9.588443143626415e-06, "loss": 0.9036, "step": 3501 }, { "epoch": 0.31, "grad_norm": 7.0780105970633365, "learning_rate": 9.588156097370676e-06, "loss": 0.86, "step": 3502 }, { "epoch": 0.31, "grad_norm": 6.370910609109139, "learning_rate": 9.587868955347225e-06, "loss": 0.8648, "step": 3503 }, { "epoch": 0.31, "grad_norm": 5.685044625824857, "learning_rate": 9.58758171756206e-06, "loss": 0.8796, "step": 3504 }, { "epoch": 0.31, "grad_norm": 7.454638883754281, "learning_rate": 9.58729438402117e-06, "loss": 0.7542, "step": 3505 }, { "epoch": 0.31, "grad_norm": 7.207949946284218, "learning_rate": 9.587006954730557e-06, "loss": 0.8774, "step": 3506 }, { "epoch": 0.31, "grad_norm": 7.18903139943031, "learning_rate": 9.586719429696219e-06, "loss": 0.8676, "step": 3507 }, { "epoch": 0.31, "grad_norm": 6.407761451978195, "learning_rate": 9.586431808924157e-06, "loss": 0.8257, "step": 3508 }, { "epoch": 0.31, "grad_norm": 6.6814902192315815, "learning_rate": 9.586144092420375e-06, "loss": 0.8532, "step": 3509 }, { "epoch": 0.31, "grad_norm": 6.065459929800284, "learning_rate": 9.58585628019088e-06, "loss": 0.8995, "step": 3510 }, { "epoch": 0.31, "grad_norm": 6.349327067724237, "learning_rate": 9.585568372241677e-06, "loss": 0.8536, "step": 3511 }, { "epoch": 0.31, "grad_norm": 5.116770283818196, "learning_rate": 9.585280368578777e-06, "loss": 0.7847, "step": 3512 }, { "epoch": 0.31, "grad_norm": 6.447525895038339, "learning_rate": 9.584992269208191e-06, "loss": 0.876, "step": 3513 }, { "epoch": 0.31, "grad_norm": 6.160070681561613, "learning_rate": 9.584704074135931e-06, "loss": 0.8786, "step": 3514 }, { "epoch": 0.31, "grad_norm": 5.991508798298955, "learning_rate": 9.584415783368015e-06, "loss": 0.8439, "step": 3515 }, { "epoch": 0.31, "grad_norm": 6.078877639677518, "learning_rate": 9.584127396910458e-06, "loss": 0.8369, "step": 3516 }, { "epoch": 0.31, "grad_norm": 5.986532875413086, "learning_rate": 9.583838914769283e-06, "loss": 0.8342, "step": 3517 }, { "epoch": 0.31, "grad_norm": 7.734577723806855, "learning_rate": 9.583550336950507e-06, "loss": 0.8001, "step": 3518 }, { "epoch": 0.31, "grad_norm": 6.078435101495751, "learning_rate": 9.583261663460157e-06, "loss": 0.9061, "step": 3519 }, { "epoch": 0.31, "grad_norm": 6.104943835138198, "learning_rate": 9.582972894304255e-06, "loss": 0.8464, "step": 3520 }, { "epoch": 0.31, "grad_norm": 6.760774102832016, "learning_rate": 9.582684029488832e-06, "loss": 0.8567, "step": 3521 }, { "epoch": 0.31, "grad_norm": 7.577183366463439, "learning_rate": 9.582395069019915e-06, "loss": 0.8804, "step": 3522 }, { "epoch": 0.31, "grad_norm": 5.083501739213018, "learning_rate": 9.582106012903536e-06, "loss": 0.9069, "step": 3523 }, { "epoch": 0.31, "grad_norm": 5.178221645753052, "learning_rate": 9.58181686114573e-06, "loss": 0.8398, "step": 3524 }, { "epoch": 0.31, "grad_norm": 5.657797459968471, "learning_rate": 9.58152761375253e-06, "loss": 0.8762, "step": 3525 }, { "epoch": 0.31, "grad_norm": 6.0415751617880105, "learning_rate": 9.581238270729976e-06, "loss": 0.8285, "step": 3526 }, { "epoch": 0.31, "grad_norm": 5.658327797583851, "learning_rate": 9.580948832084103e-06, "loss": 0.8062, "step": 3527 }, { "epoch": 0.31, "grad_norm": 7.398126484986254, "learning_rate": 9.580659297820958e-06, "loss": 0.801, "step": 3528 }, { "epoch": 0.31, "grad_norm": 7.335328780702546, "learning_rate": 9.580369667946578e-06, "loss": 0.8987, "step": 3529 }, { "epoch": 0.31, "grad_norm": 7.775932283323086, "learning_rate": 9.580079942467015e-06, "loss": 0.8799, "step": 3530 }, { "epoch": 0.32, "grad_norm": 8.02179323497502, "learning_rate": 9.579790121388312e-06, "loss": 0.8606, "step": 3531 }, { "epoch": 0.32, "grad_norm": 6.448164874991882, "learning_rate": 9.579500204716522e-06, "loss": 0.878, "step": 3532 }, { "epoch": 0.32, "grad_norm": 9.465945541540695, "learning_rate": 9.579210192457694e-06, "loss": 0.8331, "step": 3533 }, { "epoch": 0.32, "grad_norm": 5.092231595299681, "learning_rate": 9.578920084617879e-06, "loss": 0.8322, "step": 3534 }, { "epoch": 0.32, "grad_norm": 7.960821820322847, "learning_rate": 9.578629881203137e-06, "loss": 0.8479, "step": 3535 }, { "epoch": 0.32, "grad_norm": 6.185038301974877, "learning_rate": 9.57833958221952e-06, "loss": 0.8046, "step": 3536 }, { "epoch": 0.32, "grad_norm": 5.913534608168803, "learning_rate": 9.578049187673093e-06, "loss": 0.8453, "step": 3537 }, { "epoch": 0.32, "grad_norm": 7.117089176969306, "learning_rate": 9.577758697569916e-06, "loss": 0.8244, "step": 3538 }, { "epoch": 0.32, "grad_norm": 5.8022435831414265, "learning_rate": 9.577468111916051e-06, "loss": 0.7794, "step": 3539 }, { "epoch": 0.32, "grad_norm": 5.7594133363091276, "learning_rate": 9.577177430717562e-06, "loss": 0.8762, "step": 3540 }, { "epoch": 0.32, "grad_norm": 9.365907390698078, "learning_rate": 9.57688665398052e-06, "loss": 0.8585, "step": 3541 }, { "epoch": 0.32, "grad_norm": 8.228988452911693, "learning_rate": 9.576595781710992e-06, "loss": 0.8735, "step": 3542 }, { "epoch": 0.32, "grad_norm": 9.117395832446263, "learning_rate": 9.576304813915048e-06, "loss": 0.8257, "step": 3543 }, { "epoch": 0.32, "grad_norm": 5.7967095319313255, "learning_rate": 9.576013750598763e-06, "loss": 0.7856, "step": 3544 }, { "epoch": 0.32, "grad_norm": 7.879915063171007, "learning_rate": 9.575722591768212e-06, "loss": 0.8825, "step": 3545 }, { "epoch": 0.32, "grad_norm": 6.127377578624026, "learning_rate": 9.575431337429476e-06, "loss": 0.8597, "step": 3546 }, { "epoch": 0.32, "grad_norm": 7.494929632308291, "learning_rate": 9.575139987588628e-06, "loss": 0.8052, "step": 3547 }, { "epoch": 0.32, "grad_norm": 6.071826386100276, "learning_rate": 9.574848542251751e-06, "loss": 0.8832, "step": 3548 }, { "epoch": 0.32, "grad_norm": 5.909086387468175, "learning_rate": 9.574557001424932e-06, "loss": 0.8079, "step": 3549 }, { "epoch": 0.32, "grad_norm": 5.543895788913973, "learning_rate": 9.574265365114253e-06, "loss": 0.9742, "step": 3550 }, { "epoch": 0.32, "grad_norm": 10.261507511817912, "learning_rate": 9.5739736333258e-06, "loss": 0.8194, "step": 3551 }, { "epoch": 0.32, "grad_norm": 6.178597692573573, "learning_rate": 9.573681806065666e-06, "loss": 0.8389, "step": 3552 }, { "epoch": 0.32, "grad_norm": 6.136957961370826, "learning_rate": 9.573389883339943e-06, "loss": 0.8928, "step": 3553 }, { "epoch": 0.32, "grad_norm": 5.103130609146673, "learning_rate": 9.573097865154717e-06, "loss": 0.7965, "step": 3554 }, { "epoch": 0.32, "grad_norm": 7.531100683969125, "learning_rate": 9.57280575151609e-06, "loss": 0.9365, "step": 3555 }, { "epoch": 0.32, "grad_norm": 5.438707807336985, "learning_rate": 9.572513542430158e-06, "loss": 0.9404, "step": 3556 }, { "epoch": 0.32, "grad_norm": 12.510956189121323, "learning_rate": 9.572221237903019e-06, "loss": 0.9042, "step": 3557 }, { "epoch": 0.32, "grad_norm": 6.71511973466415, "learning_rate": 9.571928837940774e-06, "loss": 0.8138, "step": 3558 }, { "epoch": 0.32, "grad_norm": 5.530865009003993, "learning_rate": 9.571636342549528e-06, "loss": 0.8626, "step": 3559 }, { "epoch": 0.32, "grad_norm": 5.3570619655240455, "learning_rate": 9.571343751735385e-06, "loss": 0.8249, "step": 3560 }, { "epoch": 0.32, "grad_norm": 5.5808934796384735, "learning_rate": 9.571051065504451e-06, "loss": 0.8639, "step": 3561 }, { "epoch": 0.32, "grad_norm": 6.5879357594156325, "learning_rate": 9.570758283862838e-06, "loss": 0.8361, "step": 3562 }, { "epoch": 0.32, "grad_norm": 6.434025515774722, "learning_rate": 9.570465406816653e-06, "loss": 0.8593, "step": 3563 }, { "epoch": 0.32, "grad_norm": 7.080492888510507, "learning_rate": 9.570172434372014e-06, "loss": 0.8955, "step": 3564 }, { "epoch": 0.32, "grad_norm": 5.817578576454439, "learning_rate": 9.569879366535033e-06, "loss": 0.9307, "step": 3565 }, { "epoch": 0.32, "grad_norm": 6.4998311905496235, "learning_rate": 9.56958620331183e-06, "loss": 0.7963, "step": 3566 }, { "epoch": 0.32, "grad_norm": 6.9537449062458885, "learning_rate": 9.56929294470852e-06, "loss": 0.931, "step": 3567 }, { "epoch": 0.32, "grad_norm": 8.349300873220086, "learning_rate": 9.568999590731226e-06, "loss": 0.8261, "step": 3568 }, { "epoch": 0.32, "grad_norm": 5.821407050283483, "learning_rate": 9.568706141386072e-06, "loss": 0.9506, "step": 3569 }, { "epoch": 0.32, "grad_norm": 7.383183807424078, "learning_rate": 9.568412596679184e-06, "loss": 0.894, "step": 3570 }, { "epoch": 0.32, "grad_norm": 6.541549333089124, "learning_rate": 9.568118956616688e-06, "loss": 0.8499, "step": 3571 }, { "epoch": 0.32, "grad_norm": 6.981076109066628, "learning_rate": 9.567825221204711e-06, "loss": 0.8281, "step": 3572 }, { "epoch": 0.32, "grad_norm": 11.476418726736789, "learning_rate": 9.567531390449387e-06, "loss": 0.872, "step": 3573 }, { "epoch": 0.32, "grad_norm": 5.82834843583265, "learning_rate": 9.567237464356847e-06, "loss": 0.8483, "step": 3574 }, { "epoch": 0.32, "grad_norm": 8.752497297451695, "learning_rate": 9.566943442933228e-06, "loss": 0.8874, "step": 3575 }, { "epoch": 0.32, "grad_norm": 6.661386617341038, "learning_rate": 9.566649326184665e-06, "loss": 0.8321, "step": 3576 }, { "epoch": 0.32, "grad_norm": 9.408791812347566, "learning_rate": 9.566355114117299e-06, "loss": 0.8781, "step": 3577 }, { "epoch": 0.32, "grad_norm": 6.105298346332253, "learning_rate": 9.56606080673727e-06, "loss": 0.8238, "step": 3578 }, { "epoch": 0.32, "grad_norm": 6.3069398204274565, "learning_rate": 9.56576640405072e-06, "loss": 0.8696, "step": 3579 }, { "epoch": 0.32, "grad_norm": 6.785982028185541, "learning_rate": 9.565471906063797e-06, "loss": 0.889, "step": 3580 }, { "epoch": 0.32, "grad_norm": 7.628102150510248, "learning_rate": 9.565177312782645e-06, "loss": 0.8361, "step": 3581 }, { "epoch": 0.32, "grad_norm": 7.254412522562947, "learning_rate": 9.564882624213414e-06, "loss": 0.9199, "step": 3582 }, { "epoch": 0.32, "grad_norm": 6.811736623323117, "learning_rate": 9.564587840362255e-06, "loss": 0.7547, "step": 3583 }, { "epoch": 0.32, "grad_norm": 5.902416913932424, "learning_rate": 9.564292961235322e-06, "loss": 0.9145, "step": 3584 }, { "epoch": 0.32, "grad_norm": 7.710749511392299, "learning_rate": 9.56399798683877e-06, "loss": 0.8359, "step": 3585 }, { "epoch": 0.32, "grad_norm": 5.317837393414583, "learning_rate": 9.563702917178753e-06, "loss": 0.82, "step": 3586 }, { "epoch": 0.32, "grad_norm": 6.379340663770807, "learning_rate": 9.563407752261432e-06, "loss": 0.8441, "step": 3587 }, { "epoch": 0.32, "grad_norm": 12.964687228627342, "learning_rate": 9.56311249209297e-06, "loss": 0.9542, "step": 3588 }, { "epoch": 0.32, "grad_norm": 4.874905957816876, "learning_rate": 9.562817136679526e-06, "loss": 0.7692, "step": 3589 }, { "epoch": 0.32, "grad_norm": 9.550593555768234, "learning_rate": 9.562521686027266e-06, "loss": 0.8194, "step": 3590 }, { "epoch": 0.32, "grad_norm": 6.340986892956327, "learning_rate": 9.56222614014236e-06, "loss": 0.8947, "step": 3591 }, { "epoch": 0.32, "grad_norm": 5.444066094587974, "learning_rate": 9.561930499030973e-06, "loss": 0.9147, "step": 3592 }, { "epoch": 0.32, "grad_norm": 5.535042506238342, "learning_rate": 9.561634762699277e-06, "loss": 0.8043, "step": 3593 }, { "epoch": 0.32, "grad_norm": 6.351937957246336, "learning_rate": 9.561338931153446e-06, "loss": 0.8672, "step": 3594 }, { "epoch": 0.32, "grad_norm": 5.730848813544495, "learning_rate": 9.561043004399653e-06, "loss": 0.8443, "step": 3595 }, { "epoch": 0.32, "grad_norm": 7.256581815710109, "learning_rate": 9.560746982444075e-06, "loss": 0.9205, "step": 3596 }, { "epoch": 0.32, "grad_norm": 5.545959349596419, "learning_rate": 9.560450865292895e-06, "loss": 0.8474, "step": 3597 }, { "epoch": 0.32, "grad_norm": 5.3934775079334045, "learning_rate": 9.560154652952288e-06, "loss": 0.8384, "step": 3598 }, { "epoch": 0.32, "grad_norm": 5.750547701404012, "learning_rate": 9.559858345428438e-06, "loss": 0.8978, "step": 3599 }, { "epoch": 0.32, "grad_norm": 6.317195834458686, "learning_rate": 9.559561942727534e-06, "loss": 0.9396, "step": 3600 }, { "epoch": 0.32, "grad_norm": 6.285283119095217, "learning_rate": 9.559265444855757e-06, "loss": 0.8576, "step": 3601 }, { "epoch": 0.32, "grad_norm": 6.50972893020817, "learning_rate": 9.558968851819299e-06, "loss": 0.8267, "step": 3602 }, { "epoch": 0.32, "grad_norm": 7.35375508649762, "learning_rate": 9.558672163624352e-06, "loss": 0.8351, "step": 3603 }, { "epoch": 0.32, "grad_norm": 6.357580069865971, "learning_rate": 9.558375380277105e-06, "loss": 0.872, "step": 3604 }, { "epoch": 0.32, "grad_norm": 6.516381986570741, "learning_rate": 9.558078501783754e-06, "loss": 0.8076, "step": 3605 }, { "epoch": 0.32, "grad_norm": 5.53746657289533, "learning_rate": 9.557781528150498e-06, "loss": 0.8097, "step": 3606 }, { "epoch": 0.32, "grad_norm": 7.145415457239077, "learning_rate": 9.557484459383534e-06, "loss": 0.9258, "step": 3607 }, { "epoch": 0.32, "grad_norm": 7.018001378222648, "learning_rate": 9.557187295489061e-06, "loss": 0.8673, "step": 3608 }, { "epoch": 0.32, "grad_norm": 8.102729899828129, "learning_rate": 9.556890036473285e-06, "loss": 0.7436, "step": 3609 }, { "epoch": 0.32, "grad_norm": 5.739893018175834, "learning_rate": 9.55659268234241e-06, "loss": 0.9437, "step": 3610 }, { "epoch": 0.32, "grad_norm": 7.11421551326104, "learning_rate": 9.556295233102639e-06, "loss": 0.8122, "step": 3611 }, { "epoch": 0.32, "grad_norm": 5.011533995494063, "learning_rate": 9.555997688760184e-06, "loss": 0.7962, "step": 3612 }, { "epoch": 0.32, "grad_norm": 6.955880461251037, "learning_rate": 9.555700049321255e-06, "loss": 0.8828, "step": 3613 }, { "epoch": 0.32, "grad_norm": 7.694181517246303, "learning_rate": 9.555402314792064e-06, "loss": 0.8835, "step": 3614 }, { "epoch": 0.32, "grad_norm": 8.690225477390795, "learning_rate": 9.555104485178826e-06, "loss": 0.8627, "step": 3615 }, { "epoch": 0.32, "grad_norm": 4.840025375498851, "learning_rate": 9.554806560487758e-06, "loss": 0.8871, "step": 3616 }, { "epoch": 0.32, "grad_norm": 5.505621260247724, "learning_rate": 9.554508540725076e-06, "loss": 0.8452, "step": 3617 }, { "epoch": 0.32, "grad_norm": 6.76189597737123, "learning_rate": 9.554210425897004e-06, "loss": 0.9474, "step": 3618 }, { "epoch": 0.32, "grad_norm": 8.190951763224067, "learning_rate": 9.553912216009762e-06, "loss": 0.7859, "step": 3619 }, { "epoch": 0.32, "grad_norm": 6.380855987495822, "learning_rate": 9.553613911069575e-06, "loss": 0.8881, "step": 3620 }, { "epoch": 0.32, "grad_norm": 7.0780666386543505, "learning_rate": 9.553315511082672e-06, "loss": 0.8641, "step": 3621 }, { "epoch": 0.32, "grad_norm": 6.357535309015798, "learning_rate": 9.553017016055277e-06, "loss": 0.8591, "step": 3622 }, { "epoch": 0.32, "grad_norm": 6.9332422059054775, "learning_rate": 9.552718425993623e-06, "loss": 0.852, "step": 3623 }, { "epoch": 0.32, "grad_norm": 6.566532311851289, "learning_rate": 9.552419740903945e-06, "loss": 0.8524, "step": 3624 }, { "epoch": 0.32, "grad_norm": 6.242189253068217, "learning_rate": 9.55212096079247e-06, "loss": 0.8566, "step": 3625 }, { "epoch": 0.32, "grad_norm": 6.614162139022515, "learning_rate": 9.551822085665442e-06, "loss": 0.8595, "step": 3626 }, { "epoch": 0.32, "grad_norm": 7.3168672333218625, "learning_rate": 9.551523115529095e-06, "loss": 0.8494, "step": 3627 }, { "epoch": 0.32, "grad_norm": 6.214722240220039, "learning_rate": 9.551224050389671e-06, "loss": 0.8829, "step": 3628 }, { "epoch": 0.32, "grad_norm": 5.39593903947895, "learning_rate": 9.550924890253414e-06, "loss": 0.8124, "step": 3629 }, { "epoch": 0.32, "grad_norm": 5.202938227182617, "learning_rate": 9.550625635126564e-06, "loss": 0.9282, "step": 3630 }, { "epoch": 0.32, "grad_norm": 5.7481690130726095, "learning_rate": 9.55032628501537e-06, "loss": 0.8321, "step": 3631 }, { "epoch": 0.32, "grad_norm": 6.916545475208054, "learning_rate": 9.55002683992608e-06, "loss": 0.8357, "step": 3632 }, { "epoch": 0.32, "grad_norm": 5.613298054409408, "learning_rate": 9.549727299864944e-06, "loss": 0.8546, "step": 3633 }, { "epoch": 0.32, "grad_norm": 5.96381170049042, "learning_rate": 9.549427664838213e-06, "loss": 0.9277, "step": 3634 }, { "epoch": 0.32, "grad_norm": 7.012144668974965, "learning_rate": 9.549127934852144e-06, "loss": 0.869, "step": 3635 }, { "epoch": 0.32, "grad_norm": 5.41290605306973, "learning_rate": 9.548828109912993e-06, "loss": 0.8657, "step": 3636 }, { "epoch": 0.32, "grad_norm": 6.849696741824696, "learning_rate": 9.548528190027015e-06, "loss": 0.8095, "step": 3637 }, { "epoch": 0.32, "grad_norm": 6.918520928842044, "learning_rate": 9.548228175200474e-06, "loss": 0.9114, "step": 3638 }, { "epoch": 0.32, "grad_norm": 5.904793029193729, "learning_rate": 9.547928065439628e-06, "loss": 0.8066, "step": 3639 }, { "epoch": 0.32, "grad_norm": 6.943050747839899, "learning_rate": 9.547627860750744e-06, "loss": 0.8553, "step": 3640 }, { "epoch": 0.32, "grad_norm": 6.536200899930148, "learning_rate": 9.547327561140088e-06, "loss": 0.8643, "step": 3641 }, { "epoch": 0.32, "grad_norm": 8.791893664429239, "learning_rate": 9.547027166613928e-06, "loss": 0.9466, "step": 3642 }, { "epoch": 0.32, "grad_norm": 6.012767063053217, "learning_rate": 9.546726677178532e-06, "loss": 0.8739, "step": 3643 }, { "epoch": 0.33, "grad_norm": 5.877191253181346, "learning_rate": 9.546426092840176e-06, "loss": 0.7888, "step": 3644 }, { "epoch": 0.33, "grad_norm": 6.872271780293323, "learning_rate": 9.546125413605131e-06, "loss": 0.9118, "step": 3645 }, { "epoch": 0.33, "grad_norm": 6.588376972617259, "learning_rate": 9.545824639479675e-06, "loss": 0.8647, "step": 3646 }, { "epoch": 0.33, "grad_norm": 5.931808483366734, "learning_rate": 9.545523770470083e-06, "loss": 0.8781, "step": 3647 }, { "epoch": 0.33, "grad_norm": 7.184685313949505, "learning_rate": 9.545222806582636e-06, "loss": 0.8876, "step": 3648 }, { "epoch": 0.33, "grad_norm": 4.895307968677732, "learning_rate": 9.54492174782362e-06, "loss": 0.8829, "step": 3649 }, { "epoch": 0.33, "grad_norm": 7.968562103586833, "learning_rate": 9.544620594199314e-06, "loss": 0.8332, "step": 3650 }, { "epoch": 0.33, "grad_norm": 6.707689824882312, "learning_rate": 9.544319345716006e-06, "loss": 0.8467, "step": 3651 }, { "epoch": 0.33, "grad_norm": 6.023819707374252, "learning_rate": 9.544018002379983e-06, "loss": 0.8958, "step": 3652 }, { "epoch": 0.33, "grad_norm": 6.007150097353445, "learning_rate": 9.543716564197537e-06, "loss": 0.8053, "step": 3653 }, { "epoch": 0.33, "grad_norm": 6.5800666245311605, "learning_rate": 9.543415031174955e-06, "loss": 0.8399, "step": 3654 }, { "epoch": 0.33, "grad_norm": 5.701517811843559, "learning_rate": 9.543113403318535e-06, "loss": 0.9074, "step": 3655 }, { "epoch": 0.33, "grad_norm": 5.725607151905608, "learning_rate": 9.542811680634575e-06, "loss": 0.8839, "step": 3656 }, { "epoch": 0.33, "grad_norm": 5.344530155934459, "learning_rate": 9.542509863129367e-06, "loss": 0.8068, "step": 3657 }, { "epoch": 0.33, "grad_norm": 5.829513063575374, "learning_rate": 9.542207950809214e-06, "loss": 0.8008, "step": 3658 }, { "epoch": 0.33, "grad_norm": 6.813158808999898, "learning_rate": 9.541905943680417e-06, "loss": 0.861, "step": 3659 }, { "epoch": 0.33, "grad_norm": 8.338683059159795, "learning_rate": 9.541603841749281e-06, "loss": 0.8966, "step": 3660 }, { "epoch": 0.33, "grad_norm": 8.209107833960832, "learning_rate": 9.54130164502211e-06, "loss": 0.9108, "step": 3661 }, { "epoch": 0.33, "grad_norm": 6.904739315771883, "learning_rate": 9.540999353505214e-06, "loss": 0.8716, "step": 3662 }, { "epoch": 0.33, "grad_norm": 6.298362237321713, "learning_rate": 9.540696967204899e-06, "loss": 0.7907, "step": 3663 }, { "epoch": 0.33, "grad_norm": 6.6745802384293285, "learning_rate": 9.54039448612748e-06, "loss": 0.8326, "step": 3664 }, { "epoch": 0.33, "grad_norm": 5.608860018078317, "learning_rate": 9.540091910279268e-06, "loss": 0.8998, "step": 3665 }, { "epoch": 0.33, "grad_norm": 8.010272846414624, "learning_rate": 9.539789239666581e-06, "loss": 0.9593, "step": 3666 }, { "epoch": 0.33, "grad_norm": 5.238395071988777, "learning_rate": 9.539486474295735e-06, "loss": 0.8193, "step": 3667 }, { "epoch": 0.33, "grad_norm": 6.4991890676154975, "learning_rate": 9.539183614173053e-06, "loss": 0.8596, "step": 3668 }, { "epoch": 0.33, "grad_norm": 6.2773830361348315, "learning_rate": 9.538880659304852e-06, "loss": 0.8184, "step": 3669 }, { "epoch": 0.33, "grad_norm": 5.959664184711099, "learning_rate": 9.538577609697455e-06, "loss": 0.8946, "step": 3670 }, { "epoch": 0.33, "grad_norm": 6.554411934066708, "learning_rate": 9.538274465357193e-06, "loss": 0.847, "step": 3671 }, { "epoch": 0.33, "grad_norm": 10.087580267241716, "learning_rate": 9.537971226290388e-06, "loss": 0.9063, "step": 3672 }, { "epoch": 0.33, "grad_norm": 6.175930450932842, "learning_rate": 9.537667892503372e-06, "loss": 0.8998, "step": 3673 }, { "epoch": 0.33, "grad_norm": 6.761344346556277, "learning_rate": 9.537364464002476e-06, "loss": 0.8898, "step": 3674 }, { "epoch": 0.33, "grad_norm": 9.022367507919409, "learning_rate": 9.537060940794034e-06, "loss": 0.7735, "step": 3675 }, { "epoch": 0.33, "grad_norm": 6.071179829433664, "learning_rate": 9.53675732288438e-06, "loss": 0.8973, "step": 3676 }, { "epoch": 0.33, "grad_norm": 8.003399852503, "learning_rate": 9.536453610279855e-06, "loss": 0.8321, "step": 3677 }, { "epoch": 0.33, "grad_norm": 6.27473447232985, "learning_rate": 9.536149802986791e-06, "loss": 0.8484, "step": 3678 }, { "epoch": 0.33, "grad_norm": 10.299095992301412, "learning_rate": 9.535845901011537e-06, "loss": 0.8339, "step": 3679 }, { "epoch": 0.33, "grad_norm": 6.410998409590012, "learning_rate": 9.535541904360433e-06, "loss": 0.8886, "step": 3680 }, { "epoch": 0.33, "grad_norm": 5.617820983313239, "learning_rate": 9.535237813039824e-06, "loss": 0.8071, "step": 3681 }, { "epoch": 0.33, "grad_norm": 8.10709690413607, "learning_rate": 9.534933627056058e-06, "loss": 0.7827, "step": 3682 }, { "epoch": 0.33, "grad_norm": 7.0568007676573155, "learning_rate": 9.534629346415482e-06, "loss": 0.8318, "step": 3683 }, { "epoch": 0.33, "grad_norm": 8.26450949763047, "learning_rate": 9.534324971124452e-06, "loss": 0.9278, "step": 3684 }, { "epoch": 0.33, "grad_norm": 6.243416457858234, "learning_rate": 9.534020501189316e-06, "loss": 0.8398, "step": 3685 }, { "epoch": 0.33, "grad_norm": 5.454239366548421, "learning_rate": 9.533715936616432e-06, "loss": 0.7901, "step": 3686 }, { "epoch": 0.33, "grad_norm": 6.541788799792835, "learning_rate": 9.533411277412156e-06, "loss": 0.8215, "step": 3687 }, { "epoch": 0.33, "grad_norm": 6.814785213599591, "learning_rate": 9.533106523582849e-06, "loss": 0.8822, "step": 3688 }, { "epoch": 0.33, "grad_norm": 6.551371584659849, "learning_rate": 9.532801675134869e-06, "loss": 0.8078, "step": 3689 }, { "epoch": 0.33, "grad_norm": 5.370040477074983, "learning_rate": 9.53249673207458e-06, "loss": 0.7974, "step": 3690 }, { "epoch": 0.33, "grad_norm": 5.129265484975013, "learning_rate": 9.532191694408351e-06, "loss": 0.8853, "step": 3691 }, { "epoch": 0.33, "grad_norm": 10.629380055947278, "learning_rate": 9.531886562142542e-06, "loss": 0.8701, "step": 3692 }, { "epoch": 0.33, "grad_norm": 5.101324212214007, "learning_rate": 9.531581335283528e-06, "loss": 0.8469, "step": 3693 }, { "epoch": 0.33, "grad_norm": 7.032133990008763, "learning_rate": 9.531276013837678e-06, "loss": 0.829, "step": 3694 }, { "epoch": 0.33, "grad_norm": 5.761453891895764, "learning_rate": 9.530970597811365e-06, "loss": 0.8449, "step": 3695 }, { "epoch": 0.33, "grad_norm": 8.210846345511031, "learning_rate": 9.530665087210962e-06, "loss": 0.8748, "step": 3696 }, { "epoch": 0.33, "grad_norm": 6.896821472840307, "learning_rate": 9.530359482042846e-06, "loss": 0.8706, "step": 3697 }, { "epoch": 0.33, "grad_norm": 6.947541877224841, "learning_rate": 9.5300537823134e-06, "loss": 0.8099, "step": 3698 }, { "epoch": 0.33, "grad_norm": 6.278245781553129, "learning_rate": 9.529747988029e-06, "loss": 0.8913, "step": 3699 }, { "epoch": 0.33, "grad_norm": 8.262844261150006, "learning_rate": 9.52944209919603e-06, "loss": 0.9055, "step": 3700 }, { "epoch": 0.33, "grad_norm": 9.792405740057614, "learning_rate": 9.529136115820878e-06, "loss": 0.8476, "step": 3701 }, { "epoch": 0.33, "grad_norm": 8.33032553613948, "learning_rate": 9.528830037909927e-06, "loss": 0.8378, "step": 3702 }, { "epoch": 0.33, "grad_norm": 9.868030506736863, "learning_rate": 9.528523865469566e-06, "loss": 0.8506, "step": 3703 }, { "epoch": 0.33, "grad_norm": 6.425065562257276, "learning_rate": 9.528217598506189e-06, "loss": 0.7908, "step": 3704 }, { "epoch": 0.33, "grad_norm": 6.855087504312761, "learning_rate": 9.527911237026184e-06, "loss": 0.8505, "step": 3705 }, { "epoch": 0.33, "grad_norm": 5.4114711282143695, "learning_rate": 9.52760478103595e-06, "loss": 0.892, "step": 3706 }, { "epoch": 0.33, "grad_norm": 5.934746273380926, "learning_rate": 9.527298230541879e-06, "loss": 0.8119, "step": 3707 }, { "epoch": 0.33, "grad_norm": 6.485316085226549, "learning_rate": 9.526991585550372e-06, "loss": 0.828, "step": 3708 }, { "epoch": 0.33, "grad_norm": 5.047054290539383, "learning_rate": 9.52668484606783e-06, "loss": 0.8175, "step": 3709 }, { "epoch": 0.33, "grad_norm": 5.604450584497912, "learning_rate": 9.526378012100657e-06, "loss": 0.8588, "step": 3710 }, { "epoch": 0.33, "grad_norm": 8.529257932331767, "learning_rate": 9.526071083655255e-06, "loss": 0.8614, "step": 3711 }, { "epoch": 0.33, "grad_norm": 7.183911066760248, "learning_rate": 9.52576406073803e-06, "loss": 0.8601, "step": 3712 }, { "epoch": 0.33, "grad_norm": 6.370669633130773, "learning_rate": 9.525456943355393e-06, "loss": 0.8721, "step": 3713 }, { "epoch": 0.33, "grad_norm": 6.027485464209958, "learning_rate": 9.525149731513752e-06, "loss": 0.8801, "step": 3714 }, { "epoch": 0.33, "grad_norm": 5.84320924935301, "learning_rate": 9.524842425219519e-06, "loss": 0.9022, "step": 3715 }, { "epoch": 0.33, "grad_norm": 4.030887965649005, "learning_rate": 9.52453502447911e-06, "loss": 0.7968, "step": 3716 }, { "epoch": 0.33, "grad_norm": 6.86208440511362, "learning_rate": 9.524227529298942e-06, "loss": 0.8382, "step": 3717 }, { "epoch": 0.33, "grad_norm": 7.380756114451333, "learning_rate": 9.523919939685433e-06, "loss": 0.9008, "step": 3718 }, { "epoch": 0.33, "grad_norm": 4.970636895730147, "learning_rate": 9.523612255645e-06, "loss": 0.8659, "step": 3719 }, { "epoch": 0.33, "grad_norm": 9.035561723833426, "learning_rate": 9.52330447718407e-06, "loss": 0.8289, "step": 3720 }, { "epoch": 0.33, "grad_norm": 6.576031975913975, "learning_rate": 9.522996604309065e-06, "loss": 0.8906, "step": 3721 }, { "epoch": 0.33, "grad_norm": 5.126273131961999, "learning_rate": 9.52268863702641e-06, "loss": 0.8673, "step": 3722 }, { "epoch": 0.33, "grad_norm": 8.989672233155252, "learning_rate": 9.522380575342534e-06, "loss": 0.8657, "step": 3723 }, { "epoch": 0.33, "grad_norm": 5.739046111690475, "learning_rate": 9.522072419263867e-06, "loss": 0.7641, "step": 3724 }, { "epoch": 0.33, "grad_norm": 6.077397089351855, "learning_rate": 9.521764168796842e-06, "loss": 0.8907, "step": 3725 }, { "epoch": 0.33, "grad_norm": 5.106916185813112, "learning_rate": 9.521455823947893e-06, "loss": 0.8885, "step": 3726 }, { "epoch": 0.33, "grad_norm": 7.12483961363923, "learning_rate": 9.521147384723455e-06, "loss": 0.8842, "step": 3727 }, { "epoch": 0.33, "grad_norm": 5.309374569211711, "learning_rate": 9.520838851129965e-06, "loss": 0.8781, "step": 3728 }, { "epoch": 0.33, "grad_norm": 11.05057475917475, "learning_rate": 9.520530223173868e-06, "loss": 0.8296, "step": 3729 }, { "epoch": 0.33, "grad_norm": 6.67937120316523, "learning_rate": 9.5202215008616e-06, "loss": 0.8343, "step": 3730 }, { "epoch": 0.33, "grad_norm": 7.934933139457643, "learning_rate": 9.519912684199608e-06, "loss": 0.8484, "step": 3731 }, { "epoch": 0.33, "grad_norm": 5.875917074405749, "learning_rate": 9.519603773194335e-06, "loss": 0.8276, "step": 3732 }, { "epoch": 0.33, "grad_norm": 6.326086630867792, "learning_rate": 9.519294767852234e-06, "loss": 0.8375, "step": 3733 }, { "epoch": 0.33, "grad_norm": 6.813928287659874, "learning_rate": 9.51898566817975e-06, "loss": 0.8669, "step": 3734 }, { "epoch": 0.33, "grad_norm": 4.6909444791732255, "learning_rate": 9.518676474183337e-06, "loss": 0.8502, "step": 3735 }, { "epoch": 0.33, "grad_norm": 5.506939653731695, "learning_rate": 9.518367185869449e-06, "loss": 0.8095, "step": 3736 }, { "epoch": 0.33, "grad_norm": 5.884874803178075, "learning_rate": 9.518057803244541e-06, "loss": 0.9271, "step": 3737 }, { "epoch": 0.33, "grad_norm": 8.489366685503393, "learning_rate": 9.517748326315069e-06, "loss": 0.874, "step": 3738 }, { "epoch": 0.33, "grad_norm": 5.234189803602368, "learning_rate": 9.517438755087496e-06, "loss": 0.7762, "step": 3739 }, { "epoch": 0.33, "grad_norm": 7.5263246865537, "learning_rate": 9.517129089568283e-06, "loss": 0.8799, "step": 3740 }, { "epoch": 0.33, "grad_norm": 7.03703581512222, "learning_rate": 9.516819329763891e-06, "loss": 0.8676, "step": 3741 }, { "epoch": 0.33, "grad_norm": 6.086825876688838, "learning_rate": 9.516509475680787e-06, "loss": 0.8225, "step": 3742 }, { "epoch": 0.33, "grad_norm": 8.138970682632438, "learning_rate": 9.516199527325438e-06, "loss": 0.8124, "step": 3743 }, { "epoch": 0.33, "grad_norm": 8.41225374783552, "learning_rate": 9.515889484704317e-06, "loss": 0.899, "step": 3744 }, { "epoch": 0.33, "grad_norm": 5.560385359151254, "learning_rate": 9.51557934782389e-06, "loss": 0.9314, "step": 3745 }, { "epoch": 0.33, "grad_norm": 7.247378635443559, "learning_rate": 9.515269116690634e-06, "loss": 0.8463, "step": 3746 }, { "epoch": 0.33, "grad_norm": 5.461447238197859, "learning_rate": 9.514958791311023e-06, "loss": 0.7991, "step": 3747 }, { "epoch": 0.33, "grad_norm": 6.183569608367944, "learning_rate": 9.514648371691537e-06, "loss": 0.8631, "step": 3748 }, { "epoch": 0.33, "grad_norm": 6.8211217104102175, "learning_rate": 9.51433785783865e-06, "loss": 0.898, "step": 3749 }, { "epoch": 0.33, "grad_norm": 6.014793092797788, "learning_rate": 9.514027249758848e-06, "loss": 0.887, "step": 3750 }, { "epoch": 0.33, "grad_norm": 7.481200404598645, "learning_rate": 9.513716547458614e-06, "loss": 0.8796, "step": 3751 }, { "epoch": 0.33, "grad_norm": 6.769288352484387, "learning_rate": 9.513405750944428e-06, "loss": 0.828, "step": 3752 }, { "epoch": 0.33, "grad_norm": 6.358787818373338, "learning_rate": 9.513094860222784e-06, "loss": 0.8256, "step": 3753 }, { "epoch": 0.33, "grad_norm": 7.153201421312751, "learning_rate": 9.51278387530017e-06, "loss": 0.8475, "step": 3754 }, { "epoch": 0.33, "grad_norm": 7.191054295442497, "learning_rate": 9.512472796183072e-06, "loss": 0.8489, "step": 3755 }, { "epoch": 0.34, "grad_norm": 6.615478994081103, "learning_rate": 9.512161622877989e-06, "loss": 0.8439, "step": 3756 }, { "epoch": 0.34, "grad_norm": 5.952598738946193, "learning_rate": 9.511850355391412e-06, "loss": 0.8245, "step": 3757 }, { "epoch": 0.34, "grad_norm": 6.652171037892268, "learning_rate": 9.51153899372984e-06, "loss": 0.8267, "step": 3758 }, { "epoch": 0.34, "grad_norm": 5.846863457111111, "learning_rate": 9.511227537899773e-06, "loss": 0.8539, "step": 3759 }, { "epoch": 0.34, "grad_norm": 6.273774034812369, "learning_rate": 9.510915987907708e-06, "loss": 0.8751, "step": 3760 }, { "epoch": 0.34, "grad_norm": 7.9542678378893585, "learning_rate": 9.510604343760152e-06, "loss": 0.8645, "step": 3761 }, { "epoch": 0.34, "grad_norm": 5.461339285576556, "learning_rate": 9.510292605463609e-06, "loss": 0.8497, "step": 3762 }, { "epoch": 0.34, "grad_norm": 6.050102990424352, "learning_rate": 9.509980773024583e-06, "loss": 0.8516, "step": 3763 }, { "epoch": 0.34, "grad_norm": 4.951712935379132, "learning_rate": 9.509668846449586e-06, "loss": 0.8364, "step": 3764 }, { "epoch": 0.34, "grad_norm": 8.404082498973393, "learning_rate": 9.50935682574513e-06, "loss": 0.8997, "step": 3765 }, { "epoch": 0.34, "grad_norm": 6.514381336972579, "learning_rate": 9.509044710917723e-06, "loss": 0.8305, "step": 3766 }, { "epoch": 0.34, "grad_norm": 6.397477389880263, "learning_rate": 9.508732501973883e-06, "loss": 0.871, "step": 3767 }, { "epoch": 0.34, "grad_norm": 4.895512096232699, "learning_rate": 9.508420198920128e-06, "loss": 0.7947, "step": 3768 }, { "epoch": 0.34, "grad_norm": 6.188782382227938, "learning_rate": 9.508107801762972e-06, "loss": 0.8238, "step": 3769 }, { "epoch": 0.34, "grad_norm": 6.368487492040881, "learning_rate": 9.507795310508938e-06, "loss": 0.7848, "step": 3770 }, { "epoch": 0.34, "grad_norm": 7.368776565094237, "learning_rate": 9.50748272516455e-06, "loss": 0.9058, "step": 3771 }, { "epoch": 0.34, "grad_norm": 6.112407427607459, "learning_rate": 9.50717004573633e-06, "loss": 0.8901, "step": 3772 }, { "epoch": 0.34, "grad_norm": 6.798450863140684, "learning_rate": 9.506857272230808e-06, "loss": 0.9335, "step": 3773 }, { "epoch": 0.34, "grad_norm": 6.792386108033275, "learning_rate": 9.50654440465451e-06, "loss": 0.9043, "step": 3774 }, { "epoch": 0.34, "grad_norm": 6.875447161592311, "learning_rate": 9.506231443013966e-06, "loss": 0.833, "step": 3775 }, { "epoch": 0.34, "grad_norm": 5.70724910875346, "learning_rate": 9.50591838731571e-06, "loss": 0.9261, "step": 3776 }, { "epoch": 0.34, "grad_norm": 5.035179483827681, "learning_rate": 9.505605237566273e-06, "loss": 0.857, "step": 3777 }, { "epoch": 0.34, "grad_norm": 5.681895221381081, "learning_rate": 9.505291993772195e-06, "loss": 0.9033, "step": 3778 }, { "epoch": 0.34, "grad_norm": 7.59832825554872, "learning_rate": 9.504978655940013e-06, "loss": 0.8348, "step": 3779 }, { "epoch": 0.34, "grad_norm": 6.95465567398358, "learning_rate": 9.504665224076268e-06, "loss": 0.8337, "step": 3780 }, { "epoch": 0.34, "grad_norm": 6.780315846969243, "learning_rate": 9.504351698187503e-06, "loss": 0.8737, "step": 3781 }, { "epoch": 0.34, "grad_norm": 6.620895034451064, "learning_rate": 9.504038078280257e-06, "loss": 0.8567, "step": 3782 }, { "epoch": 0.34, "grad_norm": 7.054188871599694, "learning_rate": 9.503724364361083e-06, "loss": 0.8781, "step": 3783 }, { "epoch": 0.34, "grad_norm": 8.477316506862064, "learning_rate": 9.503410556436523e-06, "loss": 0.7884, "step": 3784 }, { "epoch": 0.34, "grad_norm": 6.511885021916745, "learning_rate": 9.503096654513132e-06, "loss": 0.827, "step": 3785 }, { "epoch": 0.34, "grad_norm": 4.898500794400286, "learning_rate": 9.50278265859746e-06, "loss": 0.8118, "step": 3786 }, { "epoch": 0.34, "grad_norm": 5.776420018689311, "learning_rate": 9.502468568696059e-06, "loss": 0.8617, "step": 3787 }, { "epoch": 0.34, "grad_norm": 6.3048458265796015, "learning_rate": 9.50215438481549e-06, "loss": 0.7822, "step": 3788 }, { "epoch": 0.34, "grad_norm": 5.5657949447840185, "learning_rate": 9.501840106962306e-06, "loss": 0.7977, "step": 3789 }, { "epoch": 0.34, "grad_norm": 5.728054767775824, "learning_rate": 9.501525735143068e-06, "loss": 0.8375, "step": 3790 }, { "epoch": 0.34, "grad_norm": 5.457081430010932, "learning_rate": 9.50121126936434e-06, "loss": 0.8477, "step": 3791 }, { "epoch": 0.34, "grad_norm": 5.0782433011071335, "learning_rate": 9.500896709632682e-06, "loss": 0.8529, "step": 3792 }, { "epoch": 0.34, "grad_norm": 6.7552304511393215, "learning_rate": 9.500582055954664e-06, "loss": 0.8775, "step": 3793 }, { "epoch": 0.34, "grad_norm": 4.92949003381526, "learning_rate": 9.50026730833685e-06, "loss": 0.8454, "step": 3794 }, { "epoch": 0.34, "grad_norm": 5.422065188794628, "learning_rate": 9.499952466785813e-06, "loss": 0.8162, "step": 3795 }, { "epoch": 0.34, "grad_norm": 8.169358958917492, "learning_rate": 9.49963753130812e-06, "loss": 0.8635, "step": 3796 }, { "epoch": 0.34, "grad_norm": 5.513608321270964, "learning_rate": 9.499322501910351e-06, "loss": 0.8637, "step": 3797 }, { "epoch": 0.34, "grad_norm": 7.110536145709867, "learning_rate": 9.499007378599076e-06, "loss": 0.8116, "step": 3798 }, { "epoch": 0.34, "grad_norm": 11.534080314436258, "learning_rate": 9.498692161380877e-06, "loss": 0.8809, "step": 3799 }, { "epoch": 0.34, "grad_norm": 5.608406478039386, "learning_rate": 9.49837685026233e-06, "loss": 0.938, "step": 3800 }, { "epoch": 0.34, "grad_norm": 8.037031172188538, "learning_rate": 9.498061445250017e-06, "loss": 0.8658, "step": 3801 }, { "epoch": 0.34, "grad_norm": 8.032756886062943, "learning_rate": 9.497745946350524e-06, "loss": 0.821, "step": 3802 }, { "epoch": 0.34, "grad_norm": 6.360978976709623, "learning_rate": 9.497430353570431e-06, "loss": 0.8721, "step": 3803 }, { "epoch": 0.34, "grad_norm": 7.339875951814523, "learning_rate": 9.49711466691633e-06, "loss": 0.8387, "step": 3804 }, { "epoch": 0.34, "grad_norm": 6.333001569965068, "learning_rate": 9.496798886394809e-06, "loss": 0.8053, "step": 3805 }, { "epoch": 0.34, "grad_norm": 6.210439779830197, "learning_rate": 9.496483012012458e-06, "loss": 0.766, "step": 3806 }, { "epoch": 0.34, "grad_norm": 4.373852368358451, "learning_rate": 9.496167043775873e-06, "loss": 0.8654, "step": 3807 }, { "epoch": 0.34, "grad_norm": 5.889944431966112, "learning_rate": 9.495850981691645e-06, "loss": 0.8893, "step": 3808 }, { "epoch": 0.34, "grad_norm": 4.911679220822672, "learning_rate": 9.495534825766376e-06, "loss": 0.8482, "step": 3809 }, { "epoch": 0.34, "grad_norm": 6.322465434979383, "learning_rate": 9.495218576006663e-06, "loss": 0.859, "step": 3810 }, { "epoch": 0.34, "grad_norm": 5.413139185272793, "learning_rate": 9.494902232419103e-06, "loss": 0.7437, "step": 3811 }, { "epoch": 0.34, "grad_norm": 9.590724933742544, "learning_rate": 9.494585795010304e-06, "loss": 0.8508, "step": 3812 }, { "epoch": 0.34, "grad_norm": 6.965268361289983, "learning_rate": 9.494269263786871e-06, "loss": 0.8621, "step": 3813 }, { "epoch": 0.34, "grad_norm": 6.8286335956164255, "learning_rate": 9.493952638755409e-06, "loss": 0.896, "step": 3814 }, { "epoch": 0.34, "grad_norm": 5.972433183208948, "learning_rate": 9.493635919922529e-06, "loss": 0.8459, "step": 3815 }, { "epoch": 0.34, "grad_norm": 4.904377352205877, "learning_rate": 9.493319107294835e-06, "loss": 0.8647, "step": 3816 }, { "epoch": 0.34, "grad_norm": 7.1348445320086675, "learning_rate": 9.49300220087895e-06, "loss": 0.8368, "step": 3817 }, { "epoch": 0.34, "grad_norm": 6.758166138928909, "learning_rate": 9.49268520068148e-06, "loss": 0.8088, "step": 3818 }, { "epoch": 0.34, "grad_norm": 5.646194225752188, "learning_rate": 9.492368106709046e-06, "loss": 0.8055, "step": 3819 }, { "epoch": 0.34, "grad_norm": 7.034540016930402, "learning_rate": 9.492050918968267e-06, "loss": 0.8128, "step": 3820 }, { "epoch": 0.34, "grad_norm": 6.297212488908784, "learning_rate": 9.491733637465764e-06, "loss": 0.852, "step": 3821 }, { "epoch": 0.34, "grad_norm": 6.368007037607074, "learning_rate": 9.491416262208155e-06, "loss": 0.8101, "step": 3822 }, { "epoch": 0.34, "grad_norm": 4.9641992071244685, "learning_rate": 9.491098793202068e-06, "loss": 0.8775, "step": 3823 }, { "epoch": 0.34, "grad_norm": 6.755214162181776, "learning_rate": 9.49078123045413e-06, "loss": 0.9137, "step": 3824 }, { "epoch": 0.34, "grad_norm": 6.266613587890417, "learning_rate": 9.490463573970969e-06, "loss": 0.8324, "step": 3825 }, { "epoch": 0.34, "grad_norm": 6.430524432709173, "learning_rate": 9.490145823759213e-06, "loss": 0.8731, "step": 3826 }, { "epoch": 0.34, "grad_norm": 5.379278897926894, "learning_rate": 9.489827979825496e-06, "loss": 0.8396, "step": 3827 }, { "epoch": 0.34, "grad_norm": 6.044732234523021, "learning_rate": 9.489510042176456e-06, "loss": 0.8541, "step": 3828 }, { "epoch": 0.34, "grad_norm": 7.209323934333235, "learning_rate": 9.489192010818722e-06, "loss": 0.8351, "step": 3829 }, { "epoch": 0.34, "grad_norm": 6.394154234455542, "learning_rate": 9.488873885758937e-06, "loss": 0.9217, "step": 3830 }, { "epoch": 0.34, "grad_norm": 5.441841477215282, "learning_rate": 9.48855566700374e-06, "loss": 0.8094, "step": 3831 }, { "epoch": 0.34, "grad_norm": 6.442577976455668, "learning_rate": 9.488237354559775e-06, "loss": 0.8283, "step": 3832 }, { "epoch": 0.34, "grad_norm": 7.3672605070953345, "learning_rate": 9.487918948433683e-06, "loss": 0.8289, "step": 3833 }, { "epoch": 0.34, "grad_norm": 5.257439576060358, "learning_rate": 9.48760044863211e-06, "loss": 0.9499, "step": 3834 }, { "epoch": 0.34, "grad_norm": 6.734184023413846, "learning_rate": 9.487281855161707e-06, "loss": 0.8488, "step": 3835 }, { "epoch": 0.34, "grad_norm": 5.130516886262024, "learning_rate": 9.486963168029122e-06, "loss": 0.8018, "step": 3836 }, { "epoch": 0.34, "grad_norm": 6.437399026959671, "learning_rate": 9.486644387241006e-06, "loss": 0.8625, "step": 3837 }, { "epoch": 0.34, "grad_norm": 10.136175968527017, "learning_rate": 9.486325512804015e-06, "loss": 0.8726, "step": 3838 }, { "epoch": 0.34, "grad_norm": 5.3728614915417126, "learning_rate": 9.486006544724805e-06, "loss": 0.9571, "step": 3839 }, { "epoch": 0.34, "grad_norm": 6.081438764102822, "learning_rate": 9.48568748301003e-06, "loss": 0.8895, "step": 3840 }, { "epoch": 0.34, "grad_norm": 6.208663225690133, "learning_rate": 9.485368327666354e-06, "loss": 0.8454, "step": 3841 }, { "epoch": 0.34, "grad_norm": 7.539375474706128, "learning_rate": 9.485049078700437e-06, "loss": 0.8759, "step": 3842 }, { "epoch": 0.34, "grad_norm": 6.8562878039738, "learning_rate": 9.484729736118943e-06, "loss": 0.8098, "step": 3843 }, { "epoch": 0.34, "grad_norm": 5.524471618434453, "learning_rate": 9.484410299928535e-06, "loss": 0.8821, "step": 3844 }, { "epoch": 0.34, "grad_norm": 5.670055824816685, "learning_rate": 9.484090770135885e-06, "loss": 0.7858, "step": 3845 }, { "epoch": 0.34, "grad_norm": 4.684532640585301, "learning_rate": 9.483771146747658e-06, "loss": 0.8081, "step": 3846 }, { "epoch": 0.34, "grad_norm": 6.216452262121781, "learning_rate": 9.483451429770529e-06, "loss": 0.8153, "step": 3847 }, { "epoch": 0.34, "grad_norm": 5.599131626390742, "learning_rate": 9.48313161921117e-06, "loss": 0.7956, "step": 3848 }, { "epoch": 0.34, "grad_norm": 6.409077961262756, "learning_rate": 9.482811715076256e-06, "loss": 0.8395, "step": 3849 }, { "epoch": 0.34, "grad_norm": 6.107259224834636, "learning_rate": 9.482491717372464e-06, "loss": 0.8102, "step": 3850 }, { "epoch": 0.34, "grad_norm": 6.236455514075973, "learning_rate": 9.482171626106476e-06, "loss": 0.7661, "step": 3851 }, { "epoch": 0.34, "grad_norm": 5.133823257787381, "learning_rate": 9.481851441284967e-06, "loss": 0.8189, "step": 3852 }, { "epoch": 0.34, "grad_norm": 7.418793699895365, "learning_rate": 9.481531162914628e-06, "loss": 0.8624, "step": 3853 }, { "epoch": 0.34, "grad_norm": 7.352080205285083, "learning_rate": 9.48121079100214e-06, "loss": 0.876, "step": 3854 }, { "epoch": 0.34, "grad_norm": 7.229177211555696, "learning_rate": 9.48089032555419e-06, "loss": 0.8822, "step": 3855 }, { "epoch": 0.34, "grad_norm": 6.648797124318408, "learning_rate": 9.480569766577466e-06, "loss": 0.929, "step": 3856 }, { "epoch": 0.34, "grad_norm": 7.090931849841327, "learning_rate": 9.480249114078662e-06, "loss": 0.8021, "step": 3857 }, { "epoch": 0.34, "grad_norm": 4.858880982888204, "learning_rate": 9.47992836806447e-06, "loss": 0.8663, "step": 3858 }, { "epoch": 0.34, "grad_norm": 7.321479211496744, "learning_rate": 9.479607528541582e-06, "loss": 0.8924, "step": 3859 }, { "epoch": 0.34, "grad_norm": 6.222967023981757, "learning_rate": 9.479286595516698e-06, "loss": 0.898, "step": 3860 }, { "epoch": 0.34, "grad_norm": 6.285195753771031, "learning_rate": 9.478965568996516e-06, "loss": 0.9163, "step": 3861 }, { "epoch": 0.34, "grad_norm": 7.754916364535729, "learning_rate": 9.478644448987737e-06, "loss": 0.8194, "step": 3862 }, { "epoch": 0.34, "grad_norm": 6.656211267799222, "learning_rate": 9.478323235497062e-06, "loss": 0.8295, "step": 3863 }, { "epoch": 0.34, "grad_norm": 5.502014022138221, "learning_rate": 9.478001928531197e-06, "loss": 0.8849, "step": 3864 }, { "epoch": 0.34, "grad_norm": 7.495767554558469, "learning_rate": 9.47768052809685e-06, "loss": 0.8495, "step": 3865 }, { "epoch": 0.34, "grad_norm": 6.243738926776204, "learning_rate": 9.477359034200727e-06, "loss": 0.8424, "step": 3866 }, { "epoch": 0.34, "grad_norm": 7.6702341436533885, "learning_rate": 9.477037446849538e-06, "loss": 0.848, "step": 3867 }, { "epoch": 0.35, "grad_norm": 3.887446484326033, "learning_rate": 9.476715766049997e-06, "loss": 0.9434, "step": 3868 }, { "epoch": 0.35, "grad_norm": 4.944062244856191, "learning_rate": 9.47639399180882e-06, "loss": 0.8621, "step": 3869 }, { "epoch": 0.35, "grad_norm": 6.306804907096448, "learning_rate": 9.47607212413272e-06, "loss": 0.8789, "step": 3870 }, { "epoch": 0.35, "grad_norm": 6.449146271247235, "learning_rate": 9.47575016302842e-06, "loss": 0.8102, "step": 3871 }, { "epoch": 0.35, "grad_norm": 7.773554480955309, "learning_rate": 9.475428108502633e-06, "loss": 0.8819, "step": 3872 }, { "epoch": 0.35, "grad_norm": 5.612145213437585, "learning_rate": 9.475105960562088e-06, "loss": 0.8216, "step": 3873 }, { "epoch": 0.35, "grad_norm": 6.4768192578436246, "learning_rate": 9.474783719213505e-06, "loss": 0.8365, "step": 3874 }, { "epoch": 0.35, "grad_norm": 6.837452456051207, "learning_rate": 9.474461384463612e-06, "loss": 0.9091, "step": 3875 }, { "epoch": 0.35, "grad_norm": 5.640339434296615, "learning_rate": 9.474138956319136e-06, "loss": 0.8162, "step": 3876 }, { "epoch": 0.35, "grad_norm": 7.486134291606943, "learning_rate": 9.473816434786807e-06, "loss": 0.8462, "step": 3877 }, { "epoch": 0.35, "grad_norm": 7.628469275086714, "learning_rate": 9.473493819873358e-06, "loss": 0.8338, "step": 3878 }, { "epoch": 0.35, "grad_norm": 7.088135000964081, "learning_rate": 9.473171111585522e-06, "loss": 0.8548, "step": 3879 }, { "epoch": 0.35, "grad_norm": 5.168878844813929, "learning_rate": 9.472848309930036e-06, "loss": 0.8006, "step": 3880 }, { "epoch": 0.35, "grad_norm": 5.953254628962278, "learning_rate": 9.472525414913637e-06, "loss": 0.7236, "step": 3881 }, { "epoch": 0.35, "grad_norm": 6.1051727031882175, "learning_rate": 9.472202426543064e-06, "loss": 0.8983, "step": 3882 }, { "epoch": 0.35, "grad_norm": 6.693922117372038, "learning_rate": 9.471879344825059e-06, "loss": 0.8917, "step": 3883 }, { "epoch": 0.35, "grad_norm": 6.8261310338093315, "learning_rate": 9.471556169766367e-06, "loss": 0.8016, "step": 3884 }, { "epoch": 0.35, "grad_norm": 7.847072377642449, "learning_rate": 9.471232901373732e-06, "loss": 0.834, "step": 3885 }, { "epoch": 0.35, "grad_norm": 6.269041846266168, "learning_rate": 9.4709095396539e-06, "loss": 0.8539, "step": 3886 }, { "epoch": 0.35, "grad_norm": 8.407160949736248, "learning_rate": 9.470586084613627e-06, "loss": 0.9528, "step": 3887 }, { "epoch": 0.35, "grad_norm": 6.625429327017089, "learning_rate": 9.470262536259655e-06, "loss": 0.8272, "step": 3888 }, { "epoch": 0.35, "grad_norm": 8.137311813709639, "learning_rate": 9.469938894598744e-06, "loss": 0.9183, "step": 3889 }, { "epoch": 0.35, "grad_norm": 7.106720971149278, "learning_rate": 9.46961515963765e-06, "loss": 0.8345, "step": 3890 }, { "epoch": 0.35, "grad_norm": 5.230130636976884, "learning_rate": 9.469291331383126e-06, "loss": 0.8055, "step": 3891 }, { "epoch": 0.35, "grad_norm": 6.681413279294933, "learning_rate": 9.468967409841933e-06, "loss": 0.8317, "step": 3892 }, { "epoch": 0.35, "grad_norm": 7.987934213115087, "learning_rate": 9.46864339502083e-06, "loss": 0.8281, "step": 3893 }, { "epoch": 0.35, "grad_norm": 7.518269841764804, "learning_rate": 9.468319286926586e-06, "loss": 0.8164, "step": 3894 }, { "epoch": 0.35, "grad_norm": 7.076778144447679, "learning_rate": 9.46799508556596e-06, "loss": 0.8665, "step": 3895 }, { "epoch": 0.35, "grad_norm": 7.287434071373366, "learning_rate": 9.467670790945723e-06, "loss": 0.8614, "step": 3896 }, { "epoch": 0.35, "grad_norm": 7.466698001605577, "learning_rate": 9.467346403072641e-06, "loss": 0.8689, "step": 3897 }, { "epoch": 0.35, "grad_norm": 6.052599027321478, "learning_rate": 9.467021921953488e-06, "loss": 0.8313, "step": 3898 }, { "epoch": 0.35, "grad_norm": 5.099035528307151, "learning_rate": 9.466697347595032e-06, "loss": 0.8008, "step": 3899 }, { "epoch": 0.35, "grad_norm": 4.508056946263018, "learning_rate": 9.466372680004052e-06, "loss": 0.9037, "step": 3900 }, { "epoch": 0.35, "grad_norm": 6.152639120302343, "learning_rate": 9.466047919187324e-06, "loss": 0.8671, "step": 3901 }, { "epoch": 0.35, "grad_norm": 8.185106288406503, "learning_rate": 9.465723065151626e-06, "loss": 0.8937, "step": 3902 }, { "epoch": 0.35, "grad_norm": 7.584100976154333, "learning_rate": 9.465398117903739e-06, "loss": 0.8267, "step": 3903 }, { "epoch": 0.35, "grad_norm": 5.74324567754363, "learning_rate": 9.465073077450444e-06, "loss": 0.8947, "step": 3904 }, { "epoch": 0.35, "grad_norm": 6.440556881118943, "learning_rate": 9.46474794379853e-06, "loss": 0.8921, "step": 3905 }, { "epoch": 0.35, "grad_norm": 8.67447912555166, "learning_rate": 9.464422716954777e-06, "loss": 0.8318, "step": 3906 }, { "epoch": 0.35, "grad_norm": 5.617937706789082, "learning_rate": 9.464097396925978e-06, "loss": 0.8725, "step": 3907 }, { "epoch": 0.35, "grad_norm": 5.557031987121199, "learning_rate": 9.463771983718923e-06, "loss": 0.8042, "step": 3908 }, { "epoch": 0.35, "grad_norm": 8.404633023299303, "learning_rate": 9.4634464773404e-06, "loss": 0.8732, "step": 3909 }, { "epoch": 0.35, "grad_norm": 6.138552905280347, "learning_rate": 9.46312087779721e-06, "loss": 0.8243, "step": 3910 }, { "epoch": 0.35, "grad_norm": 6.836074439865496, "learning_rate": 9.462795185096145e-06, "loss": 0.8562, "step": 3911 }, { "epoch": 0.35, "grad_norm": 4.96404453043018, "learning_rate": 9.462469399244002e-06, "loss": 0.8596, "step": 3912 }, { "epoch": 0.35, "grad_norm": 5.787811332256712, "learning_rate": 9.462143520247587e-06, "loss": 0.8963, "step": 3913 }, { "epoch": 0.35, "grad_norm": 8.384375595573509, "learning_rate": 9.461817548113695e-06, "loss": 0.8843, "step": 3914 }, { "epoch": 0.35, "grad_norm": 8.678717487563077, "learning_rate": 9.461491482849132e-06, "loss": 0.9014, "step": 3915 }, { "epoch": 0.35, "grad_norm": 6.321754818890408, "learning_rate": 9.461165324460706e-06, "loss": 0.8253, "step": 3916 }, { "epoch": 0.35, "grad_norm": 4.6736905745726345, "learning_rate": 9.460839072955225e-06, "loss": 0.8508, "step": 3917 }, { "epoch": 0.35, "grad_norm": 6.73069128183645, "learning_rate": 9.460512728339494e-06, "loss": 0.7263, "step": 3918 }, { "epoch": 0.35, "grad_norm": 8.319258225568124, "learning_rate": 9.460186290620333e-06, "loss": 0.7863, "step": 3919 }, { "epoch": 0.35, "grad_norm": 10.123839665898322, "learning_rate": 9.459859759804548e-06, "loss": 0.8575, "step": 3920 }, { "epoch": 0.35, "grad_norm": 8.056663942780128, "learning_rate": 9.459533135898955e-06, "loss": 0.8744, "step": 3921 }, { "epoch": 0.35, "grad_norm": 8.082430694296736, "learning_rate": 9.459206418910379e-06, "loss": 0.8321, "step": 3922 }, { "epoch": 0.35, "grad_norm": 5.376350513831872, "learning_rate": 9.45887960884563e-06, "loss": 0.8779, "step": 3923 }, { "epoch": 0.35, "grad_norm": 7.290170522772144, "learning_rate": 9.458552705711538e-06, "loss": 0.9171, "step": 3924 }, { "epoch": 0.35, "grad_norm": 5.835131439946, "learning_rate": 9.45822570951492e-06, "loss": 0.8194, "step": 3925 }, { "epoch": 0.35, "grad_norm": 5.287700501614024, "learning_rate": 9.457898620262604e-06, "loss": 0.8075, "step": 3926 }, { "epoch": 0.35, "grad_norm": 7.262028027939857, "learning_rate": 9.457571437961417e-06, "loss": 0.8757, "step": 3927 }, { "epoch": 0.35, "grad_norm": 5.714173363732083, "learning_rate": 9.457244162618186e-06, "loss": 0.9068, "step": 3928 }, { "epoch": 0.35, "grad_norm": 6.989295916553715, "learning_rate": 9.456916794239748e-06, "loss": 0.8778, "step": 3929 }, { "epoch": 0.35, "grad_norm": 8.014936835365715, "learning_rate": 9.45658933283293e-06, "loss": 0.8642, "step": 3930 }, { "epoch": 0.35, "grad_norm": 5.29945266045268, "learning_rate": 9.45626177840457e-06, "loss": 0.8858, "step": 3931 }, { "epoch": 0.35, "grad_norm": 5.513904953646336, "learning_rate": 9.455934130961506e-06, "loss": 0.8666, "step": 3932 }, { "epoch": 0.35, "grad_norm": 8.26905056966033, "learning_rate": 9.455606390510574e-06, "loss": 0.82, "step": 3933 }, { "epoch": 0.35, "grad_norm": 6.328995088873731, "learning_rate": 9.455278557058616e-06, "loss": 0.7873, "step": 3934 }, { "epoch": 0.35, "grad_norm": 6.427441203792534, "learning_rate": 9.454950630612473e-06, "loss": 0.8561, "step": 3935 }, { "epoch": 0.35, "grad_norm": 6.406495305097177, "learning_rate": 9.454622611178996e-06, "loss": 0.8611, "step": 3936 }, { "epoch": 0.35, "grad_norm": 6.815865845149242, "learning_rate": 9.454294498765024e-06, "loss": 0.8997, "step": 3937 }, { "epoch": 0.35, "grad_norm": 7.390519770857665, "learning_rate": 9.453966293377412e-06, "loss": 0.861, "step": 3938 }, { "epoch": 0.35, "grad_norm": 4.614263355867397, "learning_rate": 9.453637995023007e-06, "loss": 0.8336, "step": 3939 }, { "epoch": 0.35, "grad_norm": 5.894388570662001, "learning_rate": 9.45330960370866e-06, "loss": 0.875, "step": 3940 }, { "epoch": 0.35, "grad_norm": 6.3401102991867475, "learning_rate": 9.45298111944123e-06, "loss": 0.8091, "step": 3941 }, { "epoch": 0.35, "grad_norm": 4.8320690873332985, "learning_rate": 9.452652542227569e-06, "loss": 0.8453, "step": 3942 }, { "epoch": 0.35, "grad_norm": 8.21498563699384, "learning_rate": 9.452323872074538e-06, "loss": 0.8932, "step": 3943 }, { "epoch": 0.35, "grad_norm": 6.047918737543299, "learning_rate": 9.451995108988996e-06, "loss": 0.9228, "step": 3944 }, { "epoch": 0.35, "grad_norm": 5.504261842911873, "learning_rate": 9.451666252977808e-06, "loss": 0.7907, "step": 3945 }, { "epoch": 0.35, "grad_norm": 6.9333248734641595, "learning_rate": 9.451337304047835e-06, "loss": 0.7688, "step": 3946 }, { "epoch": 0.35, "grad_norm": 7.676185204890987, "learning_rate": 9.451008262205944e-06, "loss": 0.92, "step": 3947 }, { "epoch": 0.35, "grad_norm": 6.737546669856385, "learning_rate": 9.450679127459002e-06, "loss": 0.874, "step": 3948 }, { "epoch": 0.35, "grad_norm": 5.889043653571835, "learning_rate": 9.45034989981388e-06, "loss": 0.8584, "step": 3949 }, { "epoch": 0.35, "grad_norm": 7.959899715404131, "learning_rate": 9.45002057927745e-06, "loss": 0.8355, "step": 3950 }, { "epoch": 0.35, "grad_norm": 6.180190793483549, "learning_rate": 9.449691165856587e-06, "loss": 0.9, "step": 3951 }, { "epoch": 0.35, "grad_norm": 8.196966054626714, "learning_rate": 9.449361659558166e-06, "loss": 0.8395, "step": 3952 }, { "epoch": 0.35, "grad_norm": 6.0892171164671725, "learning_rate": 9.449032060389063e-06, "loss": 0.7685, "step": 3953 }, { "epoch": 0.35, "grad_norm": 6.997038607116231, "learning_rate": 9.448702368356159e-06, "loss": 0.9043, "step": 3954 }, { "epoch": 0.35, "grad_norm": 5.907717472019158, "learning_rate": 9.448372583466336e-06, "loss": 0.8638, "step": 3955 }, { "epoch": 0.35, "grad_norm": 5.744117114891816, "learning_rate": 9.448042705726478e-06, "loss": 0.7931, "step": 3956 }, { "epoch": 0.35, "grad_norm": 5.635957525537869, "learning_rate": 9.447712735143467e-06, "loss": 0.7995, "step": 3957 }, { "epoch": 0.35, "grad_norm": 4.7305436574088855, "learning_rate": 9.447382671724194e-06, "loss": 0.8262, "step": 3958 }, { "epoch": 0.35, "grad_norm": 6.946360872762408, "learning_rate": 9.447052515475548e-06, "loss": 0.7889, "step": 3959 }, { "epoch": 0.35, "grad_norm": 5.627178413902884, "learning_rate": 9.446722266404419e-06, "loss": 0.7874, "step": 3960 }, { "epoch": 0.35, "grad_norm": 7.272772597654644, "learning_rate": 9.446391924517702e-06, "loss": 0.8981, "step": 3961 }, { "epoch": 0.35, "grad_norm": 8.220749775988565, "learning_rate": 9.44606148982229e-06, "loss": 0.9034, "step": 3962 }, { "epoch": 0.35, "grad_norm": 7.63553930795546, "learning_rate": 9.445730962325082e-06, "loss": 0.8283, "step": 3963 }, { "epoch": 0.35, "grad_norm": 6.188851486320986, "learning_rate": 9.445400342032974e-06, "loss": 0.8134, "step": 3964 }, { "epoch": 0.35, "grad_norm": 5.7029971490227815, "learning_rate": 9.44506962895287e-06, "loss": 0.7972, "step": 3965 }, { "epoch": 0.35, "grad_norm": 7.737148247603911, "learning_rate": 9.444738823091673e-06, "loss": 0.7919, "step": 3966 }, { "epoch": 0.35, "grad_norm": 5.838934210180715, "learning_rate": 9.444407924456287e-06, "loss": 0.754, "step": 3967 }, { "epoch": 0.35, "grad_norm": 7.621725259075146, "learning_rate": 9.444076933053618e-06, "loss": 0.8708, "step": 3968 }, { "epoch": 0.35, "grad_norm": 5.884161656451834, "learning_rate": 9.443745848890576e-06, "loss": 0.8689, "step": 3969 }, { "epoch": 0.35, "grad_norm": 6.542759466017919, "learning_rate": 9.44341467197407e-06, "loss": 0.7931, "step": 3970 }, { "epoch": 0.35, "grad_norm": 5.85715058140632, "learning_rate": 9.443083402311015e-06, "loss": 0.8654, "step": 3971 }, { "epoch": 0.35, "grad_norm": 5.9354989976947055, "learning_rate": 9.442752039908322e-06, "loss": 0.8734, "step": 3972 }, { "epoch": 0.35, "grad_norm": 7.448150035195055, "learning_rate": 9.442420584772913e-06, "loss": 0.7504, "step": 3973 }, { "epoch": 0.35, "grad_norm": 6.916869412370929, "learning_rate": 9.4420890369117e-06, "loss": 0.7766, "step": 3974 }, { "epoch": 0.35, "grad_norm": 7.92938572394969, "learning_rate": 9.441757396331607e-06, "loss": 0.9213, "step": 3975 }, { "epoch": 0.35, "grad_norm": 7.119004313595765, "learning_rate": 9.441425663039557e-06, "loss": 0.8697, "step": 3976 }, { "epoch": 0.35, "grad_norm": 8.179017902798371, "learning_rate": 9.441093837042472e-06, "loss": 0.8843, "step": 3977 }, { "epoch": 0.35, "grad_norm": 8.548589110546544, "learning_rate": 9.440761918347278e-06, "loss": 0.8762, "step": 3978 }, { "epoch": 0.35, "grad_norm": 6.470191206036765, "learning_rate": 9.440429906960906e-06, "loss": 0.7797, "step": 3979 }, { "epoch": 0.36, "grad_norm": 6.519880221685968, "learning_rate": 9.440097802890282e-06, "loss": 0.8563, "step": 3980 }, { "epoch": 0.36, "grad_norm": 8.349072442323493, "learning_rate": 9.439765606142342e-06, "loss": 0.7744, "step": 3981 }, { "epoch": 0.36, "grad_norm": 6.743323057944755, "learning_rate": 9.439433316724017e-06, "loss": 0.8231, "step": 3982 }, { "epoch": 0.36, "grad_norm": 8.57929494929673, "learning_rate": 9.439100934642242e-06, "loss": 0.8499, "step": 3983 }, { "epoch": 0.36, "grad_norm": 5.68304987096151, "learning_rate": 9.43876845990396e-06, "loss": 0.9037, "step": 3984 }, { "epoch": 0.36, "grad_norm": 8.666602331399481, "learning_rate": 9.438435892516104e-06, "loss": 0.9182, "step": 3985 }, { "epoch": 0.36, "grad_norm": 6.606894555627416, "learning_rate": 9.438103232485621e-06, "loss": 0.8489, "step": 3986 }, { "epoch": 0.36, "grad_norm": 7.336517150725684, "learning_rate": 9.43777047981945e-06, "loss": 0.8058, "step": 3987 }, { "epoch": 0.36, "grad_norm": 5.555900508916189, "learning_rate": 9.43743763452454e-06, "loss": 0.8181, "step": 3988 }, { "epoch": 0.36, "grad_norm": 7.5296009184470405, "learning_rate": 9.437104696607837e-06, "loss": 0.8611, "step": 3989 }, { "epoch": 0.36, "grad_norm": 7.498124953347789, "learning_rate": 9.436771666076293e-06, "loss": 0.9013, "step": 3990 }, { "epoch": 0.36, "grad_norm": 7.418509529091909, "learning_rate": 9.436438542936853e-06, "loss": 0.8241, "step": 3991 }, { "epoch": 0.36, "grad_norm": 8.539649129308645, "learning_rate": 9.436105327196476e-06, "loss": 0.7957, "step": 3992 }, { "epoch": 0.36, "grad_norm": 7.391126348787983, "learning_rate": 9.435772018862115e-06, "loss": 0.8031, "step": 3993 }, { "epoch": 0.36, "grad_norm": 4.796072302541556, "learning_rate": 9.435438617940727e-06, "loss": 0.891, "step": 3994 }, { "epoch": 0.36, "grad_norm": 5.561878494604155, "learning_rate": 9.435105124439271e-06, "loss": 0.8645, "step": 3995 }, { "epoch": 0.36, "grad_norm": 6.807892638957509, "learning_rate": 9.43477153836471e-06, "loss": 0.8148, "step": 3996 }, { "epoch": 0.36, "grad_norm": 5.892666251783934, "learning_rate": 9.434437859724003e-06, "loss": 0.778, "step": 3997 }, { "epoch": 0.36, "grad_norm": 7.745435229355464, "learning_rate": 9.434104088524119e-06, "loss": 0.8569, "step": 3998 }, { "epoch": 0.36, "grad_norm": 6.745898461485092, "learning_rate": 9.433770224772021e-06, "loss": 0.847, "step": 3999 }, { "epoch": 0.36, "grad_norm": 5.32879181917969, "learning_rate": 9.433436268474679e-06, "loss": 0.8067, "step": 4000 }, { "epoch": 0.36, "grad_norm": 5.519874437644737, "learning_rate": 9.433102219639065e-06, "loss": 0.8879, "step": 4001 }, { "epoch": 0.36, "grad_norm": 8.67992441824841, "learning_rate": 9.43276807827215e-06, "loss": 0.8189, "step": 4002 }, { "epoch": 0.36, "grad_norm": 7.824068570876983, "learning_rate": 9.432433844380909e-06, "loss": 0.9759, "step": 4003 }, { "epoch": 0.36, "grad_norm": 5.506669151432578, "learning_rate": 9.432099517972319e-06, "loss": 0.8657, "step": 4004 }, { "epoch": 0.36, "grad_norm": 7.191633773161874, "learning_rate": 9.431765099053357e-06, "loss": 0.8149, "step": 4005 }, { "epoch": 0.36, "grad_norm": 8.751026432575095, "learning_rate": 9.431430587631006e-06, "loss": 0.8386, "step": 4006 }, { "epoch": 0.36, "grad_norm": 5.0664615573512215, "learning_rate": 9.431095983712243e-06, "loss": 0.9023, "step": 4007 }, { "epoch": 0.36, "grad_norm": 7.298937982726953, "learning_rate": 9.430761287304056e-06, "loss": 0.8731, "step": 4008 }, { "epoch": 0.36, "grad_norm": 6.456315490546447, "learning_rate": 9.430426498413432e-06, "loss": 0.8445, "step": 4009 }, { "epoch": 0.36, "grad_norm": 7.823697748346717, "learning_rate": 9.430091617047356e-06, "loss": 0.8736, "step": 4010 }, { "epoch": 0.36, "grad_norm": 6.311627957312553, "learning_rate": 9.429756643212819e-06, "loss": 0.9419, "step": 4011 }, { "epoch": 0.36, "grad_norm": 6.228108486166968, "learning_rate": 9.429421576916814e-06, "loss": 0.876, "step": 4012 }, { "epoch": 0.36, "grad_norm": 9.277530322099935, "learning_rate": 9.429086418166333e-06, "loss": 0.9067, "step": 4013 }, { "epoch": 0.36, "grad_norm": 6.259770464309468, "learning_rate": 9.428751166968372e-06, "loss": 0.8577, "step": 4014 }, { "epoch": 0.36, "grad_norm": 5.447599638900323, "learning_rate": 9.428415823329932e-06, "loss": 0.9179, "step": 4015 }, { "epoch": 0.36, "grad_norm": 5.647898893221981, "learning_rate": 9.428080387258008e-06, "loss": 0.8631, "step": 4016 }, { "epoch": 0.36, "grad_norm": 6.188434513448295, "learning_rate": 9.427744858759602e-06, "loss": 0.8974, "step": 4017 }, { "epoch": 0.36, "grad_norm": 6.448741924141476, "learning_rate": 9.42740923784172e-06, "loss": 0.8599, "step": 4018 }, { "epoch": 0.36, "grad_norm": 7.343073688701032, "learning_rate": 9.427073524511366e-06, "loss": 0.8388, "step": 4019 }, { "epoch": 0.36, "grad_norm": 6.589558677677655, "learning_rate": 9.426737718775547e-06, "loss": 0.9158, "step": 4020 }, { "epoch": 0.36, "grad_norm": 6.982896349748364, "learning_rate": 9.426401820641274e-06, "loss": 0.8615, "step": 4021 }, { "epoch": 0.36, "grad_norm": 6.239567514540737, "learning_rate": 9.426065830115553e-06, "loss": 0.7917, "step": 4022 }, { "epoch": 0.36, "grad_norm": 5.7573849156429775, "learning_rate": 9.425729747205404e-06, "loss": 0.8345, "step": 4023 }, { "epoch": 0.36, "grad_norm": 5.801016334765673, "learning_rate": 9.425393571917837e-06, "loss": 0.8447, "step": 4024 }, { "epoch": 0.36, "grad_norm": 6.48706002201493, "learning_rate": 9.425057304259872e-06, "loss": 0.8647, "step": 4025 }, { "epoch": 0.36, "grad_norm": 4.438639425551503, "learning_rate": 9.424720944238527e-06, "loss": 0.8553, "step": 4026 }, { "epoch": 0.36, "grad_norm": 7.309202031210394, "learning_rate": 9.424384491860821e-06, "loss": 0.8567, "step": 4027 }, { "epoch": 0.36, "grad_norm": 5.143033970027757, "learning_rate": 9.424047947133778e-06, "loss": 0.8256, "step": 4028 }, { "epoch": 0.36, "grad_norm": 6.966334445423384, "learning_rate": 9.423711310064423e-06, "loss": 0.8199, "step": 4029 }, { "epoch": 0.36, "grad_norm": 7.412412153084456, "learning_rate": 9.42337458065978e-06, "loss": 0.8771, "step": 4030 }, { "epoch": 0.36, "grad_norm": 7.244566547495396, "learning_rate": 9.423037758926883e-06, "loss": 0.7986, "step": 4031 }, { "epoch": 0.36, "grad_norm": 5.963461234991387, "learning_rate": 9.422700844872758e-06, "loss": 0.7998, "step": 4032 }, { "epoch": 0.36, "grad_norm": 6.978142898110764, "learning_rate": 9.422363838504439e-06, "loss": 0.7275, "step": 4033 }, { "epoch": 0.36, "grad_norm": 6.482714158481245, "learning_rate": 9.422026739828958e-06, "loss": 0.7616, "step": 4034 }, { "epoch": 0.36, "grad_norm": 6.2370892832236615, "learning_rate": 9.421689548853352e-06, "loss": 0.8748, "step": 4035 }, { "epoch": 0.36, "grad_norm": 6.445613961593261, "learning_rate": 9.421352265584663e-06, "loss": 0.7895, "step": 4036 }, { "epoch": 0.36, "grad_norm": 7.249561297585087, "learning_rate": 9.421014890029925e-06, "loss": 0.9042, "step": 4037 }, { "epoch": 0.36, "grad_norm": 5.246486934713767, "learning_rate": 9.420677422196185e-06, "loss": 0.8928, "step": 4038 }, { "epoch": 0.36, "grad_norm": 5.7804784037967245, "learning_rate": 9.420339862090485e-06, "loss": 0.8648, "step": 4039 }, { "epoch": 0.36, "grad_norm": 5.61254421645711, "learning_rate": 9.42000220971987e-06, "loss": 0.7611, "step": 4040 }, { "epoch": 0.36, "grad_norm": 4.504121654781611, "learning_rate": 9.419664465091389e-06, "loss": 0.8925, "step": 4041 }, { "epoch": 0.36, "grad_norm": 5.844011620663812, "learning_rate": 9.419326628212091e-06, "loss": 0.8382, "step": 4042 }, { "epoch": 0.36, "grad_norm": 6.360136452343533, "learning_rate": 9.418988699089029e-06, "loss": 0.8777, "step": 4043 }, { "epoch": 0.36, "grad_norm": 4.808976088887901, "learning_rate": 9.418650677729254e-06, "loss": 0.7933, "step": 4044 }, { "epoch": 0.36, "grad_norm": 6.748157457116361, "learning_rate": 9.418312564139822e-06, "loss": 0.9138, "step": 4045 }, { "epoch": 0.36, "grad_norm": 8.350534290483683, "learning_rate": 9.417974358327792e-06, "loss": 0.842, "step": 4046 }, { "epoch": 0.36, "grad_norm": 6.6169275080348084, "learning_rate": 9.417636060300224e-06, "loss": 0.8184, "step": 4047 }, { "epoch": 0.36, "grad_norm": 6.0848210829061795, "learning_rate": 9.417297670064175e-06, "loss": 0.8271, "step": 4048 }, { "epoch": 0.36, "grad_norm": 6.3900307463238155, "learning_rate": 9.416959187626713e-06, "loss": 0.8424, "step": 4049 }, { "epoch": 0.36, "grad_norm": 7.368493337680685, "learning_rate": 9.4166206129949e-06, "loss": 0.8527, "step": 4050 }, { "epoch": 0.36, "grad_norm": 6.149691808202422, "learning_rate": 9.416281946175804e-06, "loss": 0.8665, "step": 4051 }, { "epoch": 0.36, "grad_norm": 7.025585590090299, "learning_rate": 9.415943187176494e-06, "loss": 0.8529, "step": 4052 }, { "epoch": 0.36, "grad_norm": 6.295905421631316, "learning_rate": 9.415604336004041e-06, "loss": 0.8373, "step": 4053 }, { "epoch": 0.36, "grad_norm": 4.83039467234084, "learning_rate": 9.415265392665516e-06, "loss": 0.8526, "step": 4054 }, { "epoch": 0.36, "grad_norm": 5.1389834791789095, "learning_rate": 9.414926357167998e-06, "loss": 0.84, "step": 4055 }, { "epoch": 0.36, "grad_norm": 7.944072022279057, "learning_rate": 9.414587229518557e-06, "loss": 0.8312, "step": 4056 }, { "epoch": 0.36, "grad_norm": 5.4928633158159546, "learning_rate": 9.414248009724279e-06, "loss": 0.8684, "step": 4057 }, { "epoch": 0.36, "grad_norm": 7.21331563458297, "learning_rate": 9.413908697792237e-06, "loss": 0.8358, "step": 4058 }, { "epoch": 0.36, "grad_norm": 6.346525617381498, "learning_rate": 9.413569293729521e-06, "loss": 0.8518, "step": 4059 }, { "epoch": 0.36, "grad_norm": 7.254646524543499, "learning_rate": 9.41322979754321e-06, "loss": 0.8294, "step": 4060 }, { "epoch": 0.36, "grad_norm": 5.780296557434875, "learning_rate": 9.412890209240393e-06, "loss": 0.7869, "step": 4061 }, { "epoch": 0.36, "grad_norm": 6.078705324756505, "learning_rate": 9.412550528828155e-06, "loss": 0.8016, "step": 4062 }, { "epoch": 0.36, "grad_norm": 5.929555352585771, "learning_rate": 9.41221075631359e-06, "loss": 0.8271, "step": 4063 }, { "epoch": 0.36, "grad_norm": 8.569512272471407, "learning_rate": 9.411870891703786e-06, "loss": 0.9015, "step": 4064 }, { "epoch": 0.36, "grad_norm": 5.639980599077865, "learning_rate": 9.41153093500584e-06, "loss": 0.7614, "step": 4065 }, { "epoch": 0.36, "grad_norm": 5.884095968615489, "learning_rate": 9.411190886226847e-06, "loss": 0.8509, "step": 4066 }, { "epoch": 0.36, "grad_norm": 8.621115716612975, "learning_rate": 9.410850745373903e-06, "loss": 0.8876, "step": 4067 }, { "epoch": 0.36, "grad_norm": 5.925257320880341, "learning_rate": 9.410510512454111e-06, "loss": 0.8913, "step": 4068 }, { "epoch": 0.36, "grad_norm": 4.981394047076977, "learning_rate": 9.41017018747457e-06, "loss": 0.8078, "step": 4069 }, { "epoch": 0.36, "grad_norm": 6.225170007359369, "learning_rate": 9.409829770442385e-06, "loss": 0.855, "step": 4070 }, { "epoch": 0.36, "grad_norm": 4.583389322637002, "learning_rate": 9.409489261364662e-06, "loss": 0.7814, "step": 4071 }, { "epoch": 0.36, "grad_norm": 5.6206346601098165, "learning_rate": 9.409148660248505e-06, "loss": 0.9044, "step": 4072 }, { "epoch": 0.36, "grad_norm": 5.848326359575929, "learning_rate": 9.408807967101028e-06, "loss": 0.8813, "step": 4073 }, { "epoch": 0.36, "grad_norm": 5.23416491812388, "learning_rate": 9.408467181929338e-06, "loss": 0.8466, "step": 4074 }, { "epoch": 0.36, "grad_norm": 5.19406282070563, "learning_rate": 9.40812630474055e-06, "loss": 0.8192, "step": 4075 }, { "epoch": 0.36, "grad_norm": 5.619306814918696, "learning_rate": 9.407785335541779e-06, "loss": 0.9269, "step": 4076 }, { "epoch": 0.36, "grad_norm": 6.048357645297921, "learning_rate": 9.407444274340142e-06, "loss": 0.794, "step": 4077 }, { "epoch": 0.36, "grad_norm": 6.761295287532734, "learning_rate": 9.40710312114276e-06, "loss": 0.834, "step": 4078 }, { "epoch": 0.36, "grad_norm": 6.38922467757638, "learning_rate": 9.406761875956748e-06, "loss": 0.7574, "step": 4079 }, { "epoch": 0.36, "grad_norm": 5.251972770379315, "learning_rate": 9.406420538789237e-06, "loss": 0.8266, "step": 4080 }, { "epoch": 0.36, "grad_norm": 6.671402205661649, "learning_rate": 9.406079109647343e-06, "loss": 0.8307, "step": 4081 }, { "epoch": 0.36, "grad_norm": 6.665099628578525, "learning_rate": 9.405737588538199e-06, "loss": 0.9054, "step": 4082 }, { "epoch": 0.36, "grad_norm": 5.750251520967442, "learning_rate": 9.405395975468932e-06, "loss": 0.8494, "step": 4083 }, { "epoch": 0.36, "grad_norm": 8.748818871388774, "learning_rate": 9.40505427044667e-06, "loss": 0.834, "step": 4084 }, { "epoch": 0.36, "grad_norm": 5.8094259853288985, "learning_rate": 9.404712473478547e-06, "loss": 0.8103, "step": 4085 }, { "epoch": 0.36, "grad_norm": 7.088633839119641, "learning_rate": 9.404370584571698e-06, "loss": 0.8552, "step": 4086 }, { "epoch": 0.36, "grad_norm": 7.900635407596523, "learning_rate": 9.404028603733257e-06, "loss": 0.8337, "step": 4087 }, { "epoch": 0.36, "grad_norm": 7.581933057237395, "learning_rate": 9.403686530970365e-06, "loss": 0.8087, "step": 4088 }, { "epoch": 0.36, "grad_norm": 6.894869788675074, "learning_rate": 9.403344366290161e-06, "loss": 0.8538, "step": 4089 }, { "epoch": 0.36, "grad_norm": 5.177469179533167, "learning_rate": 9.403002109699786e-06, "loss": 0.8438, "step": 4090 }, { "epoch": 0.36, "grad_norm": 6.699913014225009, "learning_rate": 9.402659761206384e-06, "loss": 0.8601, "step": 4091 }, { "epoch": 0.37, "grad_norm": 4.139045172089933, "learning_rate": 9.4023173208171e-06, "loss": 0.8223, "step": 4092 }, { "epoch": 0.37, "grad_norm": 5.527026136808844, "learning_rate": 9.401974788539086e-06, "loss": 0.8087, "step": 4093 }, { "epoch": 0.37, "grad_norm": 6.538049406997626, "learning_rate": 9.401632164379485e-06, "loss": 0.8609, "step": 4094 }, { "epoch": 0.37, "grad_norm": 7.216153341222045, "learning_rate": 9.401289448345455e-06, "loss": 0.8869, "step": 4095 }, { "epoch": 0.37, "grad_norm": 6.49700990414356, "learning_rate": 9.400946640444143e-06, "loss": 0.8449, "step": 4096 }, { "epoch": 0.37, "grad_norm": 5.325892000681776, "learning_rate": 9.400603740682712e-06, "loss": 0.7727, "step": 4097 }, { "epoch": 0.37, "grad_norm": 5.632258695197307, "learning_rate": 9.400260749068311e-06, "loss": 0.8814, "step": 4098 }, { "epoch": 0.37, "grad_norm": 6.0745058348040635, "learning_rate": 9.399917665608107e-06, "loss": 0.8581, "step": 4099 }, { "epoch": 0.37, "grad_norm": 7.171725720041989, "learning_rate": 9.399574490309254e-06, "loss": 0.9053, "step": 4100 }, { "epoch": 0.37, "grad_norm": 6.93356504083452, "learning_rate": 9.399231223178919e-06, "loss": 0.8308, "step": 4101 }, { "epoch": 0.37, "grad_norm": 6.915945308995971, "learning_rate": 9.398887864224269e-06, "loss": 0.8152, "step": 4102 }, { "epoch": 0.37, "grad_norm": 5.553260573458361, "learning_rate": 9.398544413452465e-06, "loss": 0.8271, "step": 4103 }, { "epoch": 0.37, "grad_norm": 5.042196083697854, "learning_rate": 9.398200870870681e-06, "loss": 0.8612, "step": 4104 }, { "epoch": 0.37, "grad_norm": 6.623886831015347, "learning_rate": 9.397857236486086e-06, "loss": 0.9203, "step": 4105 }, { "epoch": 0.37, "grad_norm": 7.770118194982789, "learning_rate": 9.397513510305851e-06, "loss": 0.8103, "step": 4106 }, { "epoch": 0.37, "grad_norm": 6.751010577671964, "learning_rate": 9.397169692337154e-06, "loss": 0.7862, "step": 4107 }, { "epoch": 0.37, "grad_norm": 7.6282650471613, "learning_rate": 9.396825782587167e-06, "loss": 0.7739, "step": 4108 }, { "epoch": 0.37, "grad_norm": 7.663303535640229, "learning_rate": 9.396481781063072e-06, "loss": 0.8669, "step": 4109 }, { "epoch": 0.37, "grad_norm": 7.617495589582204, "learning_rate": 9.396137687772047e-06, "loss": 0.8446, "step": 4110 }, { "epoch": 0.37, "grad_norm": 8.088133707224628, "learning_rate": 9.395793502721276e-06, "loss": 0.79, "step": 4111 }, { "epoch": 0.37, "grad_norm": 4.534911311471697, "learning_rate": 9.395449225917941e-06, "loss": 0.8703, "step": 4112 }, { "epoch": 0.37, "grad_norm": 5.879024837690329, "learning_rate": 9.395104857369231e-06, "loss": 0.8699, "step": 4113 }, { "epoch": 0.37, "grad_norm": 5.872429245706731, "learning_rate": 9.39476039708233e-06, "loss": 0.8548, "step": 4114 }, { "epoch": 0.37, "grad_norm": 7.3200802740680055, "learning_rate": 9.394415845064431e-06, "loss": 0.8499, "step": 4115 }, { "epoch": 0.37, "grad_norm": 5.334078476928942, "learning_rate": 9.394071201322726e-06, "loss": 0.8258, "step": 4116 }, { "epoch": 0.37, "grad_norm": 6.804075622351396, "learning_rate": 9.393726465864405e-06, "loss": 0.8286, "step": 4117 }, { "epoch": 0.37, "grad_norm": 5.226510886778001, "learning_rate": 9.39338163869667e-06, "loss": 0.9365, "step": 4118 }, { "epoch": 0.37, "grad_norm": 5.384944577881193, "learning_rate": 9.393036719826711e-06, "loss": 0.8641, "step": 4119 }, { "epoch": 0.37, "grad_norm": 5.682514794258588, "learning_rate": 9.392691709261733e-06, "loss": 0.8529, "step": 4120 }, { "epoch": 0.37, "grad_norm": 7.400989719999163, "learning_rate": 9.392346607008935e-06, "loss": 0.8787, "step": 4121 }, { "epoch": 0.37, "grad_norm": 3.8691947120610255, "learning_rate": 9.39200141307552e-06, "loss": 0.8184, "step": 4122 }, { "epoch": 0.37, "grad_norm": 5.778652408382469, "learning_rate": 9.391656127468696e-06, "loss": 0.8352, "step": 4123 }, { "epoch": 0.37, "grad_norm": 6.602605217082588, "learning_rate": 9.391310750195665e-06, "loss": 0.8523, "step": 4124 }, { "epoch": 0.37, "grad_norm": 6.186921807978088, "learning_rate": 9.39096528126364e-06, "loss": 0.8481, "step": 4125 }, { "epoch": 0.37, "grad_norm": 6.326505091953193, "learning_rate": 9.390619720679833e-06, "loss": 0.9144, "step": 4126 }, { "epoch": 0.37, "grad_norm": 6.597502591238907, "learning_rate": 9.39027406845145e-06, "loss": 0.8127, "step": 4127 }, { "epoch": 0.37, "grad_norm": 5.692323823014313, "learning_rate": 9.389928324585713e-06, "loss": 0.8472, "step": 4128 }, { "epoch": 0.37, "grad_norm": 8.46676575268346, "learning_rate": 9.389582489089837e-06, "loss": 0.886, "step": 4129 }, { "epoch": 0.37, "grad_norm": 6.6212253638091285, "learning_rate": 9.38923656197104e-06, "loss": 0.8725, "step": 4130 }, { "epoch": 0.37, "grad_norm": 5.6110314410435524, "learning_rate": 9.388890543236539e-06, "loss": 0.8518, "step": 4131 }, { "epoch": 0.37, "grad_norm": 5.54843948671427, "learning_rate": 9.388544432893562e-06, "loss": 0.8602, "step": 4132 }, { "epoch": 0.37, "grad_norm": 5.683441603359437, "learning_rate": 9.38819823094933e-06, "loss": 0.8373, "step": 4133 }, { "epoch": 0.37, "grad_norm": 7.527798564132173, "learning_rate": 9.38785193741107e-06, "loss": 0.8314, "step": 4134 }, { "epoch": 0.37, "grad_norm": 8.43295574119505, "learning_rate": 9.387505552286008e-06, "loss": 0.8477, "step": 4135 }, { "epoch": 0.37, "grad_norm": 6.575464182979885, "learning_rate": 9.387159075581378e-06, "loss": 0.8621, "step": 4136 }, { "epoch": 0.37, "grad_norm": 6.490905909549163, "learning_rate": 9.38681250730441e-06, "loss": 0.7674, "step": 4137 }, { "epoch": 0.37, "grad_norm": 6.69286738294564, "learning_rate": 9.386465847462339e-06, "loss": 0.8374, "step": 4138 }, { "epoch": 0.37, "grad_norm": 6.083088493133638, "learning_rate": 9.386119096062396e-06, "loss": 0.8673, "step": 4139 }, { "epoch": 0.37, "grad_norm": 6.305075502074553, "learning_rate": 9.385772253111826e-06, "loss": 0.8933, "step": 4140 }, { "epoch": 0.37, "grad_norm": 6.746566164748403, "learning_rate": 9.385425318617862e-06, "loss": 0.8592, "step": 4141 }, { "epoch": 0.37, "grad_norm": 8.103635165033205, "learning_rate": 9.38507829258775e-06, "loss": 0.8032, "step": 4142 }, { "epoch": 0.37, "grad_norm": 6.069928405029586, "learning_rate": 9.384731175028733e-06, "loss": 0.9222, "step": 4143 }, { "epoch": 0.37, "grad_norm": 6.3643556074590455, "learning_rate": 9.384383965948055e-06, "loss": 0.8492, "step": 4144 }, { "epoch": 0.37, "grad_norm": 5.3395836253426445, "learning_rate": 9.384036665352961e-06, "loss": 0.8336, "step": 4145 }, { "epoch": 0.37, "grad_norm": 5.763296186313993, "learning_rate": 9.383689273250702e-06, "loss": 0.8538, "step": 4146 }, { "epoch": 0.37, "grad_norm": 5.31832533707095, "learning_rate": 9.383341789648533e-06, "loss": 0.8369, "step": 4147 }, { "epoch": 0.37, "grad_norm": 6.280557918984372, "learning_rate": 9.3829942145537e-06, "loss": 0.8608, "step": 4148 }, { "epoch": 0.37, "grad_norm": 6.063728548077029, "learning_rate": 9.382646547973464e-06, "loss": 0.8458, "step": 4149 }, { "epoch": 0.37, "grad_norm": 5.523396393634446, "learning_rate": 9.382298789915079e-06, "loss": 0.8528, "step": 4150 }, { "epoch": 0.37, "grad_norm": 4.54100705711264, "learning_rate": 9.381950940385803e-06, "loss": 0.7553, "step": 4151 }, { "epoch": 0.37, "grad_norm": 6.2961408456632615, "learning_rate": 9.381602999392896e-06, "loss": 0.7686, "step": 4152 }, { "epoch": 0.37, "grad_norm": 7.749566103104944, "learning_rate": 9.381254966943624e-06, "loss": 0.8518, "step": 4153 }, { "epoch": 0.37, "grad_norm": 7.125573305467597, "learning_rate": 9.380906843045248e-06, "loss": 0.7937, "step": 4154 }, { "epoch": 0.37, "grad_norm": 5.895048481282751, "learning_rate": 9.380558627705036e-06, "loss": 0.863, "step": 4155 }, { "epoch": 0.37, "grad_norm": 5.5717905072356, "learning_rate": 9.380210320930256e-06, "loss": 0.8986, "step": 4156 }, { "epoch": 0.37, "grad_norm": 5.637090765060546, "learning_rate": 9.37986192272818e-06, "loss": 0.8917, "step": 4157 }, { "epoch": 0.37, "grad_norm": 6.993530843801003, "learning_rate": 9.379513433106075e-06, "loss": 0.8378, "step": 4158 }, { "epoch": 0.37, "grad_norm": 8.117855409699876, "learning_rate": 9.379164852071221e-06, "loss": 0.8441, "step": 4159 }, { "epoch": 0.37, "grad_norm": 4.857117890667359, "learning_rate": 9.378816179630888e-06, "loss": 0.8554, "step": 4160 }, { "epoch": 0.37, "grad_norm": 5.9806133568479085, "learning_rate": 9.378467415792358e-06, "loss": 0.8996, "step": 4161 }, { "epoch": 0.37, "grad_norm": 5.727875819859236, "learning_rate": 9.37811856056291e-06, "loss": 0.8374, "step": 4162 }, { "epoch": 0.37, "grad_norm": 5.784603310787402, "learning_rate": 9.377769613949825e-06, "loss": 0.8768, "step": 4163 }, { "epoch": 0.37, "grad_norm": 5.848503059586507, "learning_rate": 9.377420575960385e-06, "loss": 0.8472, "step": 4164 }, { "epoch": 0.37, "grad_norm": 5.68936620822105, "learning_rate": 9.377071446601878e-06, "loss": 0.858, "step": 4165 }, { "epoch": 0.37, "grad_norm": 5.114862406439199, "learning_rate": 9.376722225881591e-06, "loss": 0.8396, "step": 4166 }, { "epoch": 0.37, "grad_norm": 7.875364863901053, "learning_rate": 9.37637291380681e-06, "loss": 0.827, "step": 4167 }, { "epoch": 0.37, "grad_norm": 6.702120842212274, "learning_rate": 9.376023510384831e-06, "loss": 0.8458, "step": 4168 }, { "epoch": 0.37, "grad_norm": 6.518755538146358, "learning_rate": 9.375674015622941e-06, "loss": 0.8253, "step": 4169 }, { "epoch": 0.37, "grad_norm": 6.631253760323815, "learning_rate": 9.375324429528443e-06, "loss": 0.8653, "step": 4170 }, { "epoch": 0.37, "grad_norm": 5.338345343457585, "learning_rate": 9.374974752108626e-06, "loss": 0.8447, "step": 4171 }, { "epoch": 0.37, "grad_norm": 7.257998550546608, "learning_rate": 9.374624983370795e-06, "loss": 0.8086, "step": 4172 }, { "epoch": 0.37, "grad_norm": 5.429756906077807, "learning_rate": 9.374275123322245e-06, "loss": 0.9058, "step": 4173 }, { "epoch": 0.37, "grad_norm": 9.044870236958722, "learning_rate": 9.373925171970282e-06, "loss": 0.8593, "step": 4174 }, { "epoch": 0.37, "grad_norm": 4.63642419578049, "learning_rate": 9.37357512932221e-06, "loss": 0.8725, "step": 4175 }, { "epoch": 0.37, "grad_norm": 4.879325162366704, "learning_rate": 9.373224995385336e-06, "loss": 0.8629, "step": 4176 }, { "epoch": 0.37, "grad_norm": 8.335036251444922, "learning_rate": 9.372874770166966e-06, "loss": 0.9211, "step": 4177 }, { "epoch": 0.37, "grad_norm": 7.044634635740906, "learning_rate": 9.372524453674412e-06, "loss": 0.8561, "step": 4178 }, { "epoch": 0.37, "grad_norm": 8.265686366951105, "learning_rate": 9.372174045914987e-06, "loss": 0.8514, "step": 4179 }, { "epoch": 0.37, "grad_norm": 6.678080930912243, "learning_rate": 9.371823546896001e-06, "loss": 0.7979, "step": 4180 }, { "epoch": 0.37, "grad_norm": 7.751116784504044, "learning_rate": 9.371472956624775e-06, "loss": 0.8439, "step": 4181 }, { "epoch": 0.37, "grad_norm": 7.4584734358888225, "learning_rate": 9.371122275108625e-06, "loss": 0.8732, "step": 4182 }, { "epoch": 0.37, "grad_norm": 4.73503212634114, "learning_rate": 9.370771502354867e-06, "loss": 0.8559, "step": 4183 }, { "epoch": 0.37, "grad_norm": 6.374986659000164, "learning_rate": 9.370420638370828e-06, "loss": 0.8539, "step": 4184 }, { "epoch": 0.37, "grad_norm": 6.418519779362382, "learning_rate": 9.37006968316383e-06, "loss": 0.8911, "step": 4185 }, { "epoch": 0.37, "grad_norm": 5.285369366086445, "learning_rate": 9.369718636741196e-06, "loss": 0.8948, "step": 4186 }, { "epoch": 0.37, "grad_norm": 7.6924506792129534, "learning_rate": 9.369367499110254e-06, "loss": 0.8408, "step": 4187 }, { "epoch": 0.37, "grad_norm": 7.5927187294461005, "learning_rate": 9.369016270278335e-06, "loss": 0.9031, "step": 4188 }, { "epoch": 0.37, "grad_norm": 5.379870611375347, "learning_rate": 9.36866495025277e-06, "loss": 0.8172, "step": 4189 }, { "epoch": 0.37, "grad_norm": 6.06319823238156, "learning_rate": 9.368313539040892e-06, "loss": 0.848, "step": 4190 }, { "epoch": 0.37, "grad_norm": 6.815531155517509, "learning_rate": 9.367962036650035e-06, "loss": 0.8042, "step": 4191 }, { "epoch": 0.37, "grad_norm": 6.597975106976378, "learning_rate": 9.367610443087536e-06, "loss": 0.8064, "step": 4192 }, { "epoch": 0.37, "grad_norm": 5.293436914751377, "learning_rate": 9.367258758360733e-06, "loss": 0.9103, "step": 4193 }, { "epoch": 0.37, "grad_norm": 4.99316343831238, "learning_rate": 9.36690698247697e-06, "loss": 0.856, "step": 4194 }, { "epoch": 0.37, "grad_norm": 7.911543975374293, "learning_rate": 9.366555115443584e-06, "loss": 0.7736, "step": 4195 }, { "epoch": 0.37, "grad_norm": 5.9393088792338515, "learning_rate": 9.366203157267923e-06, "loss": 0.7731, "step": 4196 }, { "epoch": 0.37, "grad_norm": 5.927321662756173, "learning_rate": 9.365851107957333e-06, "loss": 0.8444, "step": 4197 }, { "epoch": 0.37, "grad_norm": 7.885770132108312, "learning_rate": 9.365498967519164e-06, "loss": 0.9403, "step": 4198 }, { "epoch": 0.37, "grad_norm": 6.069652030586314, "learning_rate": 9.365146735960763e-06, "loss": 0.844, "step": 4199 }, { "epoch": 0.37, "grad_norm": 5.60124942388124, "learning_rate": 9.364794413289483e-06, "loss": 0.7784, "step": 4200 }, { "epoch": 0.37, "grad_norm": 6.163774498350081, "learning_rate": 9.36444199951268e-06, "loss": 0.8196, "step": 4201 }, { "epoch": 0.37, "grad_norm": 9.682753938417639, "learning_rate": 9.364089494637707e-06, "loss": 0.9063, "step": 4202 }, { "epoch": 0.37, "grad_norm": 5.540770108217958, "learning_rate": 9.363736898671923e-06, "loss": 0.8683, "step": 4203 }, { "epoch": 0.38, "grad_norm": 7.811097890585324, "learning_rate": 9.363384211622688e-06, "loss": 0.8357, "step": 4204 }, { "epoch": 0.38, "grad_norm": 6.910705659166817, "learning_rate": 9.363031433497362e-06, "loss": 0.8115, "step": 4205 }, { "epoch": 0.38, "grad_norm": 6.365238193088776, "learning_rate": 9.362678564303311e-06, "loss": 0.7909, "step": 4206 }, { "epoch": 0.38, "grad_norm": 6.3510476844499015, "learning_rate": 9.362325604047899e-06, "loss": 0.801, "step": 4207 }, { "epoch": 0.38, "grad_norm": 6.856115252931735, "learning_rate": 9.361972552738495e-06, "loss": 0.8856, "step": 4208 }, { "epoch": 0.38, "grad_norm": 5.416956022113308, "learning_rate": 9.361619410382463e-06, "loss": 0.8956, "step": 4209 }, { "epoch": 0.38, "grad_norm": 4.823312070477005, "learning_rate": 9.36126617698718e-06, "loss": 0.886, "step": 4210 }, { "epoch": 0.38, "grad_norm": 6.367144832749693, "learning_rate": 9.360912852560015e-06, "loss": 0.8743, "step": 4211 }, { "epoch": 0.38, "grad_norm": 6.644403520906778, "learning_rate": 9.360559437108346e-06, "loss": 0.7568, "step": 4212 }, { "epoch": 0.38, "grad_norm": 5.332275888202728, "learning_rate": 9.360205930639547e-06, "loss": 0.8229, "step": 4213 }, { "epoch": 0.38, "grad_norm": 6.886918816473951, "learning_rate": 9.359852333161e-06, "loss": 0.8362, "step": 4214 }, { "epoch": 0.38, "grad_norm": 5.834393949065181, "learning_rate": 9.35949864468008e-06, "loss": 0.8072, "step": 4215 }, { "epoch": 0.38, "grad_norm": 7.330326880041444, "learning_rate": 9.359144865204176e-06, "loss": 0.8598, "step": 4216 }, { "epoch": 0.38, "grad_norm": 5.788126886359555, "learning_rate": 9.358790994740669e-06, "loss": 0.8957, "step": 4217 }, { "epoch": 0.38, "grad_norm": 6.014165574322836, "learning_rate": 9.358437033296943e-06, "loss": 0.8507, "step": 4218 }, { "epoch": 0.38, "grad_norm": 6.049513476981598, "learning_rate": 9.358082980880392e-06, "loss": 0.8163, "step": 4219 }, { "epoch": 0.38, "grad_norm": 5.395693523525062, "learning_rate": 9.3577288374984e-06, "loss": 0.8007, "step": 4220 }, { "epoch": 0.38, "grad_norm": 5.105039626943591, "learning_rate": 9.357374603158364e-06, "loss": 0.8219, "step": 4221 }, { "epoch": 0.38, "grad_norm": 8.223965727975415, "learning_rate": 9.357020277867674e-06, "loss": 0.8846, "step": 4222 }, { "epoch": 0.38, "grad_norm": 9.295629358665192, "learning_rate": 9.35666586163373e-06, "loss": 0.7818, "step": 4223 }, { "epoch": 0.38, "grad_norm": 5.713351597904885, "learning_rate": 9.356311354463924e-06, "loss": 0.7903, "step": 4224 }, { "epoch": 0.38, "grad_norm": 6.617096689184785, "learning_rate": 9.35595675636566e-06, "loss": 0.8682, "step": 4225 }, { "epoch": 0.38, "grad_norm": 4.785333182216809, "learning_rate": 9.355602067346337e-06, "loss": 0.8409, "step": 4226 }, { "epoch": 0.38, "grad_norm": 7.566145019164834, "learning_rate": 9.35524728741336e-06, "loss": 0.7906, "step": 4227 }, { "epoch": 0.38, "grad_norm": 5.136943298837495, "learning_rate": 9.354892416574135e-06, "loss": 0.7747, "step": 4228 }, { "epoch": 0.38, "grad_norm": 7.553953512226192, "learning_rate": 9.354537454836067e-06, "loss": 0.873, "step": 4229 }, { "epoch": 0.38, "grad_norm": 7.311252365644838, "learning_rate": 9.354182402206566e-06, "loss": 0.8036, "step": 4230 }, { "epoch": 0.38, "grad_norm": 6.097597660662817, "learning_rate": 9.353827258693043e-06, "loss": 0.8583, "step": 4231 }, { "epoch": 0.38, "grad_norm": 7.739314986290021, "learning_rate": 9.35347202430291e-06, "loss": 0.8858, "step": 4232 }, { "epoch": 0.38, "grad_norm": 6.313571533153005, "learning_rate": 9.353116699043583e-06, "loss": 0.8058, "step": 4233 }, { "epoch": 0.38, "grad_norm": 6.805854935996429, "learning_rate": 9.352761282922479e-06, "loss": 0.8168, "step": 4234 }, { "epoch": 0.38, "grad_norm": 6.535529316215074, "learning_rate": 9.352405775947014e-06, "loss": 0.8235, "step": 4235 }, { "epoch": 0.38, "grad_norm": 6.476044590396591, "learning_rate": 9.35205017812461e-06, "loss": 0.9101, "step": 4236 }, { "epoch": 0.38, "grad_norm": 5.445937184544161, "learning_rate": 9.351694489462689e-06, "loss": 0.8733, "step": 4237 }, { "epoch": 0.38, "grad_norm": 6.061095670128932, "learning_rate": 9.351338709968676e-06, "loss": 0.8919, "step": 4238 }, { "epoch": 0.38, "grad_norm": 4.906015308536657, "learning_rate": 9.350982839649998e-06, "loss": 0.8164, "step": 4239 }, { "epoch": 0.38, "grad_norm": 5.881305757583553, "learning_rate": 9.35062687851408e-06, "loss": 0.858, "step": 4240 }, { "epoch": 0.38, "grad_norm": 6.185309417685991, "learning_rate": 9.350270826568356e-06, "loss": 0.8383, "step": 4241 }, { "epoch": 0.38, "grad_norm": 7.127411191575992, "learning_rate": 9.349914683820252e-06, "loss": 0.7821, "step": 4242 }, { "epoch": 0.38, "grad_norm": 5.714765347150255, "learning_rate": 9.349558450277207e-06, "loss": 0.8355, "step": 4243 }, { "epoch": 0.38, "grad_norm": 6.391600149573789, "learning_rate": 9.349202125946656e-06, "loss": 0.8765, "step": 4244 }, { "epoch": 0.38, "grad_norm": 6.341236911514885, "learning_rate": 9.348845710836034e-06, "loss": 0.832, "step": 4245 }, { "epoch": 0.38, "grad_norm": 6.162026130439689, "learning_rate": 9.348489204952781e-06, "loss": 0.8597, "step": 4246 }, { "epoch": 0.38, "grad_norm": 6.671547479688158, "learning_rate": 9.348132608304338e-06, "loss": 0.9192, "step": 4247 }, { "epoch": 0.38, "grad_norm": 4.456145263968649, "learning_rate": 9.347775920898153e-06, "loss": 0.8443, "step": 4248 }, { "epoch": 0.38, "grad_norm": 7.5237172892436135, "learning_rate": 9.347419142741664e-06, "loss": 0.9078, "step": 4249 }, { "epoch": 0.38, "grad_norm": 5.847702686183809, "learning_rate": 9.347062273842321e-06, "loss": 0.8252, "step": 4250 }, { "epoch": 0.38, "grad_norm": 6.7965487743060455, "learning_rate": 9.346705314207573e-06, "loss": 0.8386, "step": 4251 }, { "epoch": 0.38, "grad_norm": 5.3759110355259905, "learning_rate": 9.346348263844873e-06, "loss": 0.783, "step": 4252 }, { "epoch": 0.38, "grad_norm": 6.0118384620454135, "learning_rate": 9.345991122761668e-06, "loss": 0.8703, "step": 4253 }, { "epoch": 0.38, "grad_norm": 5.5725085086027235, "learning_rate": 9.345633890965418e-06, "loss": 0.7744, "step": 4254 }, { "epoch": 0.38, "grad_norm": 6.374924460635833, "learning_rate": 9.345276568463577e-06, "loss": 0.8529, "step": 4255 }, { "epoch": 0.38, "grad_norm": 6.572340610802262, "learning_rate": 9.344919155263604e-06, "loss": 0.8405, "step": 4256 }, { "epoch": 0.38, "grad_norm": 7.314437163312313, "learning_rate": 9.344561651372958e-06, "loss": 0.9246, "step": 4257 }, { "epoch": 0.38, "grad_norm": 7.093939518182437, "learning_rate": 9.344204056799102e-06, "loss": 0.8543, "step": 4258 }, { "epoch": 0.38, "grad_norm": 6.179530133294514, "learning_rate": 9.3438463715495e-06, "loss": 0.7699, "step": 4259 }, { "epoch": 0.38, "grad_norm": 7.762505166855348, "learning_rate": 9.343488595631619e-06, "loss": 0.873, "step": 4260 }, { "epoch": 0.38, "grad_norm": 6.963042856942487, "learning_rate": 9.343130729052926e-06, "loss": 0.8564, "step": 4261 }, { "epoch": 0.38, "grad_norm": 5.641218894450112, "learning_rate": 9.342772771820887e-06, "loss": 0.8399, "step": 4262 }, { "epoch": 0.38, "grad_norm": 6.689875145638253, "learning_rate": 9.342414723942979e-06, "loss": 0.8565, "step": 4263 }, { "epoch": 0.38, "grad_norm": 6.056474673237054, "learning_rate": 9.342056585426674e-06, "loss": 0.82, "step": 4264 }, { "epoch": 0.38, "grad_norm": 7.240532086506664, "learning_rate": 9.341698356279446e-06, "loss": 0.842, "step": 4265 }, { "epoch": 0.38, "grad_norm": 6.169032573799889, "learning_rate": 9.341340036508773e-06, "loss": 0.8565, "step": 4266 }, { "epoch": 0.38, "grad_norm": 8.497585143575701, "learning_rate": 9.340981626122134e-06, "loss": 0.8313, "step": 4267 }, { "epoch": 0.38, "grad_norm": 6.77263547684289, "learning_rate": 9.34062312512701e-06, "loss": 0.9087, "step": 4268 }, { "epoch": 0.38, "grad_norm": 6.118237699528283, "learning_rate": 9.340264533530886e-06, "loss": 0.8626, "step": 4269 }, { "epoch": 0.38, "grad_norm": 7.7859350277594075, "learning_rate": 9.339905851341242e-06, "loss": 0.8797, "step": 4270 }, { "epoch": 0.38, "grad_norm": 5.600349599057544, "learning_rate": 9.339547078565568e-06, "loss": 0.7786, "step": 4271 }, { "epoch": 0.38, "grad_norm": 6.449801960611563, "learning_rate": 9.339188215211352e-06, "loss": 0.6884, "step": 4272 }, { "epoch": 0.38, "grad_norm": 6.375935239878161, "learning_rate": 9.338829261286087e-06, "loss": 0.7891, "step": 4273 }, { "epoch": 0.38, "grad_norm": 5.063388020342288, "learning_rate": 9.338470216797261e-06, "loss": 0.7923, "step": 4274 }, { "epoch": 0.38, "grad_norm": 5.40814049296346, "learning_rate": 9.33811108175237e-06, "loss": 0.8538, "step": 4275 }, { "epoch": 0.38, "grad_norm": 7.597402610994749, "learning_rate": 9.337751856158912e-06, "loss": 0.8646, "step": 4276 }, { "epoch": 0.38, "grad_norm": 5.888521865219829, "learning_rate": 9.337392540024383e-06, "loss": 0.8589, "step": 4277 }, { "epoch": 0.38, "grad_norm": 7.206648754962669, "learning_rate": 9.337033133356284e-06, "loss": 0.7939, "step": 4278 }, { "epoch": 0.38, "grad_norm": 7.1704856561570125, "learning_rate": 9.336673636162116e-06, "loss": 0.8381, "step": 4279 }, { "epoch": 0.38, "grad_norm": 5.885888324496254, "learning_rate": 9.336314048449382e-06, "loss": 0.852, "step": 4280 }, { "epoch": 0.38, "grad_norm": 5.086169991644454, "learning_rate": 9.33595437022559e-06, "loss": 0.8292, "step": 4281 }, { "epoch": 0.38, "grad_norm": 6.308688149146375, "learning_rate": 9.335594601498246e-06, "loss": 0.8078, "step": 4282 }, { "epoch": 0.38, "grad_norm": 5.5636153519785125, "learning_rate": 9.335234742274858e-06, "loss": 0.8634, "step": 4283 }, { "epoch": 0.38, "grad_norm": 7.936106150752359, "learning_rate": 9.33487479256294e-06, "loss": 0.7766, "step": 4284 }, { "epoch": 0.38, "grad_norm": 6.848401017570743, "learning_rate": 9.334514752370004e-06, "loss": 0.9126, "step": 4285 }, { "epoch": 0.38, "grad_norm": 5.231825795867669, "learning_rate": 9.334154621703564e-06, "loss": 0.8721, "step": 4286 }, { "epoch": 0.38, "grad_norm": 6.718384398197964, "learning_rate": 9.33379440057114e-06, "loss": 0.8391, "step": 4287 }, { "epoch": 0.38, "grad_norm": 5.0032220738051, "learning_rate": 9.333434088980246e-06, "loss": 0.8704, "step": 4288 }, { "epoch": 0.38, "grad_norm": 8.375546595725357, "learning_rate": 9.333073686938406e-06, "loss": 0.8605, "step": 4289 }, { "epoch": 0.38, "grad_norm": 8.090490395201389, "learning_rate": 9.332713194453142e-06, "loss": 0.9013, "step": 4290 }, { "epoch": 0.38, "grad_norm": 5.389158355548935, "learning_rate": 9.332352611531982e-06, "loss": 0.7937, "step": 4291 }, { "epoch": 0.38, "grad_norm": 7.2912037056374475, "learning_rate": 9.331991938182444e-06, "loss": 0.8264, "step": 4292 }, { "epoch": 0.38, "grad_norm": 6.029326445907776, "learning_rate": 9.331631174412063e-06, "loss": 0.8329, "step": 4293 }, { "epoch": 0.38, "grad_norm": 5.159995059805666, "learning_rate": 9.331270320228368e-06, "loss": 0.9023, "step": 4294 }, { "epoch": 0.38, "grad_norm": 6.964686225193782, "learning_rate": 9.33090937563889e-06, "loss": 0.793, "step": 4295 }, { "epoch": 0.38, "grad_norm": 7.699528193617971, "learning_rate": 9.330548340651164e-06, "loss": 0.8727, "step": 4296 }, { "epoch": 0.38, "grad_norm": 6.169470136001731, "learning_rate": 9.330187215272724e-06, "loss": 0.7635, "step": 4297 }, { "epoch": 0.38, "grad_norm": 6.384229365473055, "learning_rate": 9.329825999511109e-06, "loss": 0.7864, "step": 4298 }, { "epoch": 0.38, "grad_norm": 5.642744545924761, "learning_rate": 9.32946469337386e-06, "loss": 0.799, "step": 4299 }, { "epoch": 0.38, "grad_norm": 6.817878872478266, "learning_rate": 9.329103296868514e-06, "loss": 0.82, "step": 4300 }, { "epoch": 0.38, "grad_norm": 8.036888595957784, "learning_rate": 9.32874181000262e-06, "loss": 0.9031, "step": 4301 }, { "epoch": 0.38, "grad_norm": 8.256451480673778, "learning_rate": 9.32838023278372e-06, "loss": 0.772, "step": 4302 }, { "epoch": 0.38, "grad_norm": 6.384401119957896, "learning_rate": 9.32801856521936e-06, "loss": 0.8683, "step": 4303 }, { "epoch": 0.38, "grad_norm": 6.145436329663048, "learning_rate": 9.327656807317092e-06, "loss": 0.8643, "step": 4304 }, { "epoch": 0.38, "grad_norm": 5.7588488749905205, "learning_rate": 9.327294959084466e-06, "loss": 0.8561, "step": 4305 }, { "epoch": 0.38, "grad_norm": 6.74710087129823, "learning_rate": 9.326933020529032e-06, "loss": 0.8589, "step": 4306 }, { "epoch": 0.38, "grad_norm": 4.576226253192541, "learning_rate": 9.326570991658349e-06, "loss": 0.8775, "step": 4307 }, { "epoch": 0.38, "grad_norm": 5.686530594321312, "learning_rate": 9.326208872479971e-06, "loss": 0.8109, "step": 4308 }, { "epoch": 0.38, "grad_norm": 6.691535447222282, "learning_rate": 9.325846663001457e-06, "loss": 0.8279, "step": 4309 }, { "epoch": 0.38, "grad_norm": 8.436218900351195, "learning_rate": 9.325484363230368e-06, "loss": 0.7762, "step": 4310 }, { "epoch": 0.38, "grad_norm": 6.414517665682313, "learning_rate": 9.325121973174265e-06, "loss": 0.8566, "step": 4311 }, { "epoch": 0.38, "grad_norm": 7.173297352186554, "learning_rate": 9.324759492840713e-06, "loss": 0.8444, "step": 4312 }, { "epoch": 0.38, "grad_norm": 4.97605195714034, "learning_rate": 9.324396922237278e-06, "loss": 0.8218, "step": 4313 }, { "epoch": 0.38, "grad_norm": 5.60891932114542, "learning_rate": 9.324034261371527e-06, "loss": 0.8226, "step": 4314 }, { "epoch": 0.38, "grad_norm": 5.574912900920701, "learning_rate": 9.323671510251029e-06, "loss": 0.8378, "step": 4315 }, { "epoch": 0.39, "grad_norm": 7.385025247785694, "learning_rate": 9.32330866888336e-06, "loss": 0.8643, "step": 4316 }, { "epoch": 0.39, "grad_norm": 7.458643993504901, "learning_rate": 9.322945737276088e-06, "loss": 0.8851, "step": 4317 }, { "epoch": 0.39, "grad_norm": 5.63581409716872, "learning_rate": 9.322582715436789e-06, "loss": 0.7675, "step": 4318 }, { "epoch": 0.39, "grad_norm": 5.529800985273648, "learning_rate": 9.322219603373046e-06, "loss": 0.9076, "step": 4319 }, { "epoch": 0.39, "grad_norm": 7.047900649789184, "learning_rate": 9.321856401092433e-06, "loss": 0.772, "step": 4320 }, { "epoch": 0.39, "grad_norm": 6.308892532142045, "learning_rate": 9.321493108602532e-06, "loss": 0.8343, "step": 4321 }, { "epoch": 0.39, "grad_norm": 6.581800803850134, "learning_rate": 9.321129725910926e-06, "loss": 0.8932, "step": 4322 }, { "epoch": 0.39, "grad_norm": 7.689896762525324, "learning_rate": 9.3207662530252e-06, "loss": 0.8637, "step": 4323 }, { "epoch": 0.39, "grad_norm": 5.77560223405044, "learning_rate": 9.320402689952942e-06, "loss": 0.8444, "step": 4324 }, { "epoch": 0.39, "grad_norm": 5.734211083351347, "learning_rate": 9.320039036701738e-06, "loss": 0.8892, "step": 4325 }, { "epoch": 0.39, "grad_norm": 8.039113570848922, "learning_rate": 9.31967529327918e-06, "loss": 0.9226, "step": 4326 }, { "epoch": 0.39, "grad_norm": 6.542494568331635, "learning_rate": 9.319311459692862e-06, "loss": 0.8151, "step": 4327 }, { "epoch": 0.39, "grad_norm": 6.673367677320942, "learning_rate": 9.318947535950375e-06, "loss": 0.8323, "step": 4328 }, { "epoch": 0.39, "grad_norm": 5.940795820063644, "learning_rate": 9.318583522059317e-06, "loss": 0.8207, "step": 4329 }, { "epoch": 0.39, "grad_norm": 8.505982963422932, "learning_rate": 9.318219418027284e-06, "loss": 0.8379, "step": 4330 }, { "epoch": 0.39, "grad_norm": 5.208320585427727, "learning_rate": 9.317855223861878e-06, "loss": 0.8008, "step": 4331 }, { "epoch": 0.39, "grad_norm": 5.507675363730609, "learning_rate": 9.317490939570702e-06, "loss": 0.818, "step": 4332 }, { "epoch": 0.39, "grad_norm": 5.03228554788471, "learning_rate": 9.317126565161357e-06, "loss": 0.8385, "step": 4333 }, { "epoch": 0.39, "grad_norm": 5.505075197345783, "learning_rate": 9.316762100641448e-06, "loss": 0.8563, "step": 4334 }, { "epoch": 0.39, "grad_norm": 7.991044786583493, "learning_rate": 9.316397546018583e-06, "loss": 0.8769, "step": 4335 }, { "epoch": 0.39, "grad_norm": 6.417279339127916, "learning_rate": 9.316032901300374e-06, "loss": 0.843, "step": 4336 }, { "epoch": 0.39, "grad_norm": 6.122843203492062, "learning_rate": 9.31566816649443e-06, "loss": 0.8008, "step": 4337 }, { "epoch": 0.39, "grad_norm": 8.032643581133637, "learning_rate": 9.315303341608364e-06, "loss": 0.8663, "step": 4338 }, { "epoch": 0.39, "grad_norm": 7.51247204316075, "learning_rate": 9.31493842664979e-06, "loss": 0.8238, "step": 4339 }, { "epoch": 0.39, "grad_norm": 4.936236067160519, "learning_rate": 9.314573421626326e-06, "loss": 0.7919, "step": 4340 }, { "epoch": 0.39, "grad_norm": 5.746648446059553, "learning_rate": 9.314208326545592e-06, "loss": 0.7591, "step": 4341 }, { "epoch": 0.39, "grad_norm": 6.506814486233366, "learning_rate": 9.313843141415206e-06, "loss": 0.9274, "step": 4342 }, { "epoch": 0.39, "grad_norm": 6.436127351951089, "learning_rate": 9.313477866242792e-06, "loss": 0.8371, "step": 4343 }, { "epoch": 0.39, "grad_norm": 6.911529607741707, "learning_rate": 9.313112501035975e-06, "loss": 0.8038, "step": 4344 }, { "epoch": 0.39, "grad_norm": 6.492835974289569, "learning_rate": 9.312747045802377e-06, "loss": 0.8218, "step": 4345 }, { "epoch": 0.39, "grad_norm": 5.7365613150107535, "learning_rate": 9.312381500549631e-06, "loss": 0.8155, "step": 4346 }, { "epoch": 0.39, "grad_norm": 6.2837704003185895, "learning_rate": 9.312015865285366e-06, "loss": 0.8441, "step": 4347 }, { "epoch": 0.39, "grad_norm": 7.129655183365047, "learning_rate": 9.311650140017212e-06, "loss": 0.8218, "step": 4348 }, { "epoch": 0.39, "grad_norm": 5.9815294611422365, "learning_rate": 9.311284324752804e-06, "loss": 0.847, "step": 4349 }, { "epoch": 0.39, "grad_norm": 6.668906164597301, "learning_rate": 9.310918419499777e-06, "loss": 0.8542, "step": 4350 }, { "epoch": 0.39, "grad_norm": 4.9057448491530415, "learning_rate": 9.310552424265768e-06, "loss": 0.7865, "step": 4351 }, { "epoch": 0.39, "grad_norm": 7.351692926996722, "learning_rate": 9.31018633905842e-06, "loss": 0.9269, "step": 4352 }, { "epoch": 0.39, "grad_norm": 6.283725995153035, "learning_rate": 9.309820163885368e-06, "loss": 0.8742, "step": 4353 }, { "epoch": 0.39, "grad_norm": 7.5898157233488535, "learning_rate": 9.30945389875426e-06, "loss": 0.8158, "step": 4354 }, { "epoch": 0.39, "grad_norm": 6.233919442897969, "learning_rate": 9.309087543672739e-06, "loss": 0.8496, "step": 4355 }, { "epoch": 0.39, "grad_norm": 5.752994429182826, "learning_rate": 9.308721098648452e-06, "loss": 0.847, "step": 4356 }, { "epoch": 0.39, "grad_norm": 5.539894713204919, "learning_rate": 9.308354563689049e-06, "loss": 0.8668, "step": 4357 }, { "epoch": 0.39, "grad_norm": 10.909235142424608, "learning_rate": 9.307987938802178e-06, "loss": 0.8044, "step": 4358 }, { "epoch": 0.39, "grad_norm": 6.78464534932656, "learning_rate": 9.307621223995496e-06, "loss": 0.8215, "step": 4359 }, { "epoch": 0.39, "grad_norm": 4.798815352983658, "learning_rate": 9.307254419276653e-06, "loss": 0.8615, "step": 4360 }, { "epoch": 0.39, "grad_norm": 5.527291973213971, "learning_rate": 9.306887524653305e-06, "loss": 0.9004, "step": 4361 }, { "epoch": 0.39, "grad_norm": 5.10565406406722, "learning_rate": 9.306520540133113e-06, "loss": 0.8408, "step": 4362 }, { "epoch": 0.39, "grad_norm": 5.552091232066093, "learning_rate": 9.306153465723736e-06, "loss": 0.8908, "step": 4363 }, { "epoch": 0.39, "grad_norm": 7.830602917196874, "learning_rate": 9.305786301432836e-06, "loss": 0.7946, "step": 4364 }, { "epoch": 0.39, "grad_norm": 7.394372912242018, "learning_rate": 9.305419047268077e-06, "loss": 0.8586, "step": 4365 }, { "epoch": 0.39, "grad_norm": 5.49090933406075, "learning_rate": 9.305051703237122e-06, "loss": 0.8654, "step": 4366 }, { "epoch": 0.39, "grad_norm": 6.622587190197653, "learning_rate": 9.304684269347643e-06, "loss": 0.8211, "step": 4367 }, { "epoch": 0.39, "grad_norm": 4.694005044050268, "learning_rate": 9.304316745607307e-06, "loss": 0.8051, "step": 4368 }, { "epoch": 0.39, "grad_norm": 5.736987060905926, "learning_rate": 9.303949132023781e-06, "loss": 0.8127, "step": 4369 }, { "epoch": 0.39, "grad_norm": 6.711658469118172, "learning_rate": 9.303581428604746e-06, "loss": 0.8265, "step": 4370 }, { "epoch": 0.39, "grad_norm": 6.149537768698715, "learning_rate": 9.303213635357874e-06, "loss": 0.8095, "step": 4371 }, { "epoch": 0.39, "grad_norm": 7.5844938939618745, "learning_rate": 9.30284575229084e-06, "loss": 0.8464, "step": 4372 }, { "epoch": 0.39, "grad_norm": 6.523607577629111, "learning_rate": 9.302477779411324e-06, "loss": 0.86, "step": 4373 }, { "epoch": 0.39, "grad_norm": 6.705816058207198, "learning_rate": 9.302109716727006e-06, "loss": 0.8674, "step": 4374 }, { "epoch": 0.39, "grad_norm": 6.842880076022102, "learning_rate": 9.30174156424557e-06, "loss": 0.8109, "step": 4375 }, { "epoch": 0.39, "grad_norm": 6.0834085074500175, "learning_rate": 9.3013733219747e-06, "loss": 0.8089, "step": 4376 }, { "epoch": 0.39, "grad_norm": 6.610893446462115, "learning_rate": 9.30100498992208e-06, "loss": 0.8902, "step": 4377 }, { "epoch": 0.39, "grad_norm": 6.0217759314467605, "learning_rate": 9.300636568095403e-06, "loss": 0.8605, "step": 4378 }, { "epoch": 0.39, "grad_norm": 6.357175366880541, "learning_rate": 9.300268056502353e-06, "loss": 0.7841, "step": 4379 }, { "epoch": 0.39, "grad_norm": 6.996411991537311, "learning_rate": 9.299899455150624e-06, "loss": 0.836, "step": 4380 }, { "epoch": 0.39, "grad_norm": 5.170984270639248, "learning_rate": 9.299530764047913e-06, "loss": 0.8013, "step": 4381 }, { "epoch": 0.39, "grad_norm": 6.4043245632517465, "learning_rate": 9.299161983201912e-06, "loss": 0.8962, "step": 4382 }, { "epoch": 0.39, "grad_norm": 5.2353953653265854, "learning_rate": 9.29879311262032e-06, "loss": 0.829, "step": 4383 }, { "epoch": 0.39, "grad_norm": 6.634029896596374, "learning_rate": 9.298424152310837e-06, "loss": 0.8072, "step": 4384 }, { "epoch": 0.39, "grad_norm": 6.631814599037949, "learning_rate": 9.29805510228116e-06, "loss": 0.8519, "step": 4385 }, { "epoch": 0.39, "grad_norm": 5.797499165907972, "learning_rate": 9.297685962538998e-06, "loss": 0.8534, "step": 4386 }, { "epoch": 0.39, "grad_norm": 4.791608975422356, "learning_rate": 9.297316733092053e-06, "loss": 0.7407, "step": 4387 }, { "epoch": 0.39, "grad_norm": 6.112398368456486, "learning_rate": 9.296947413948031e-06, "loss": 0.854, "step": 4388 }, { "epoch": 0.39, "grad_norm": 5.212035789700138, "learning_rate": 9.296578005114642e-06, "loss": 0.8576, "step": 4389 }, { "epoch": 0.39, "grad_norm": 5.216902088388244, "learning_rate": 9.296208506599597e-06, "loss": 0.883, "step": 4390 }, { "epoch": 0.39, "grad_norm": 6.602784740199572, "learning_rate": 9.295838918410608e-06, "loss": 0.7801, "step": 4391 }, { "epoch": 0.39, "grad_norm": 4.653722148426496, "learning_rate": 9.29546924055539e-06, "loss": 0.8486, "step": 4392 }, { "epoch": 0.39, "grad_norm": 6.389514508531621, "learning_rate": 9.295099473041657e-06, "loss": 0.8219, "step": 4393 }, { "epoch": 0.39, "grad_norm": 5.823606759527557, "learning_rate": 9.294729615877131e-06, "loss": 0.9042, "step": 4394 }, { "epoch": 0.39, "grad_norm": 7.551974443410783, "learning_rate": 9.294359669069527e-06, "loss": 0.8711, "step": 4395 }, { "epoch": 0.39, "grad_norm": 5.8093333398081795, "learning_rate": 9.293989632626571e-06, "loss": 0.8165, "step": 4396 }, { "epoch": 0.39, "grad_norm": 7.182319160354132, "learning_rate": 9.293619506555986e-06, "loss": 0.7937, "step": 4397 }, { "epoch": 0.39, "grad_norm": 4.51734375109269, "learning_rate": 9.293249290865493e-06, "loss": 0.865, "step": 4398 }, { "epoch": 0.39, "grad_norm": 6.7745689885718345, "learning_rate": 9.292878985562826e-06, "loss": 0.904, "step": 4399 }, { "epoch": 0.39, "grad_norm": 6.087617306345822, "learning_rate": 9.292508590655711e-06, "loss": 0.842, "step": 4400 }, { "epoch": 0.39, "grad_norm": 6.698109916900253, "learning_rate": 9.29213810615188e-06, "loss": 0.8114, "step": 4401 }, { "epoch": 0.39, "grad_norm": 8.105229392625235, "learning_rate": 9.291767532059065e-06, "loss": 0.8513, "step": 4402 }, { "epoch": 0.39, "grad_norm": 6.38839980316784, "learning_rate": 9.291396868385e-06, "loss": 0.8356, "step": 4403 }, { "epoch": 0.39, "grad_norm": 6.210212850235889, "learning_rate": 9.291026115137426e-06, "loss": 0.8876, "step": 4404 }, { "epoch": 0.39, "grad_norm": 6.440264237820538, "learning_rate": 9.290655272324078e-06, "loss": 0.8485, "step": 4405 }, { "epoch": 0.39, "grad_norm": 5.912029853657919, "learning_rate": 9.290284339952698e-06, "loss": 0.8581, "step": 4406 }, { "epoch": 0.39, "grad_norm": 6.028063718428649, "learning_rate": 9.289913318031027e-06, "loss": 0.8206, "step": 4407 }, { "epoch": 0.39, "grad_norm": 8.35255596808982, "learning_rate": 9.28954220656681e-06, "loss": 0.9145, "step": 4408 }, { "epoch": 0.39, "grad_norm": 6.686852935801378, "learning_rate": 9.289171005567795e-06, "loss": 0.8561, "step": 4409 }, { "epoch": 0.39, "grad_norm": 5.957278845109005, "learning_rate": 9.288799715041728e-06, "loss": 0.8793, "step": 4410 }, { "epoch": 0.39, "grad_norm": 8.11654153425826, "learning_rate": 9.28842833499636e-06, "loss": 0.7913, "step": 4411 }, { "epoch": 0.39, "grad_norm": 5.7246318746618865, "learning_rate": 9.288056865439439e-06, "loss": 0.8224, "step": 4412 }, { "epoch": 0.39, "grad_norm": 5.77711808835208, "learning_rate": 9.287685306378724e-06, "loss": 0.7921, "step": 4413 }, { "epoch": 0.39, "grad_norm": 6.667114303770183, "learning_rate": 9.287313657821967e-06, "loss": 0.8893, "step": 4414 }, { "epoch": 0.39, "grad_norm": 5.983479502409955, "learning_rate": 9.286941919776928e-06, "loss": 0.806, "step": 4415 }, { "epoch": 0.39, "grad_norm": 4.7811817169256265, "learning_rate": 9.286570092251364e-06, "loss": 0.8279, "step": 4416 }, { "epoch": 0.39, "grad_norm": 5.416666260184944, "learning_rate": 9.286198175253035e-06, "loss": 0.8352, "step": 4417 }, { "epoch": 0.39, "grad_norm": 6.256330650942187, "learning_rate": 9.285826168789707e-06, "loss": 0.8542, "step": 4418 }, { "epoch": 0.39, "grad_norm": 5.70038586914247, "learning_rate": 9.285454072869145e-06, "loss": 0.8063, "step": 4419 }, { "epoch": 0.39, "grad_norm": 7.852249976771596, "learning_rate": 9.285081887499113e-06, "loss": 0.8408, "step": 4420 }, { "epoch": 0.39, "grad_norm": 5.129210274692351, "learning_rate": 9.28470961268738e-06, "loss": 0.8103, "step": 4421 }, { "epoch": 0.39, "grad_norm": 5.400203200277461, "learning_rate": 9.284337248441717e-06, "loss": 0.8831, "step": 4422 }, { "epoch": 0.39, "grad_norm": 5.326375778532082, "learning_rate": 9.283964794769897e-06, "loss": 0.8404, "step": 4423 }, { "epoch": 0.39, "grad_norm": 6.446147266339482, "learning_rate": 9.283592251679694e-06, "loss": 0.8391, "step": 4424 }, { "epoch": 0.39, "grad_norm": 7.157748236480059, "learning_rate": 9.283219619178884e-06, "loss": 0.8758, "step": 4425 }, { "epoch": 0.39, "grad_norm": 6.583639309983387, "learning_rate": 9.282846897275245e-06, "loss": 0.7768, "step": 4426 }, { "epoch": 0.39, "grad_norm": 6.162724002954905, "learning_rate": 9.282474085976553e-06, "loss": 0.8339, "step": 4427 }, { "epoch": 0.4, "grad_norm": 5.547515039850337, "learning_rate": 9.282101185290597e-06, "loss": 0.8585, "step": 4428 }, { "epoch": 0.4, "grad_norm": 7.731549206484719, "learning_rate": 9.281728195225154e-06, "loss": 0.8841, "step": 4429 }, { "epoch": 0.4, "grad_norm": 6.910253333087144, "learning_rate": 9.281355115788014e-06, "loss": 0.8583, "step": 4430 }, { "epoch": 0.4, "grad_norm": 6.3358170967018275, "learning_rate": 9.28098194698696e-06, "loss": 0.8144, "step": 4431 }, { "epoch": 0.4, "grad_norm": 4.959933946438981, "learning_rate": 9.280608688829782e-06, "loss": 0.8568, "step": 4432 }, { "epoch": 0.4, "grad_norm": 5.926828936236354, "learning_rate": 9.280235341324275e-06, "loss": 0.9302, "step": 4433 }, { "epoch": 0.4, "grad_norm": 6.806216895680405, "learning_rate": 9.279861904478226e-06, "loss": 0.8305, "step": 4434 }, { "epoch": 0.4, "grad_norm": 6.425698991032682, "learning_rate": 9.279488378299434e-06, "loss": 0.7534, "step": 4435 }, { "epoch": 0.4, "grad_norm": 6.219476793574893, "learning_rate": 9.279114762795694e-06, "loss": 0.7853, "step": 4436 }, { "epoch": 0.4, "grad_norm": 7.17955882788759, "learning_rate": 9.278741057974804e-06, "loss": 0.7896, "step": 4437 }, { "epoch": 0.4, "grad_norm": 5.490353714839437, "learning_rate": 9.278367263844565e-06, "loss": 0.8017, "step": 4438 }, { "epoch": 0.4, "grad_norm": 6.894614576984033, "learning_rate": 9.277993380412779e-06, "loss": 0.8766, "step": 4439 }, { "epoch": 0.4, "grad_norm": 5.746740027413282, "learning_rate": 9.277619407687248e-06, "loss": 0.8512, "step": 4440 }, { "epoch": 0.4, "grad_norm": 6.344007958987455, "learning_rate": 9.277245345675782e-06, "loss": 0.8566, "step": 4441 }, { "epoch": 0.4, "grad_norm": 7.367381995463293, "learning_rate": 9.276871194386186e-06, "loss": 0.8045, "step": 4442 }, { "epoch": 0.4, "grad_norm": 8.936519213020619, "learning_rate": 9.276496953826269e-06, "loss": 0.7777, "step": 4443 }, { "epoch": 0.4, "grad_norm": 5.065741517797181, "learning_rate": 9.276122624003845e-06, "loss": 0.8161, "step": 4444 }, { "epoch": 0.4, "grad_norm": 5.400053138991957, "learning_rate": 9.275748204926724e-06, "loss": 0.83, "step": 4445 }, { "epoch": 0.4, "grad_norm": 6.4115173219454125, "learning_rate": 9.275373696602724e-06, "loss": 0.7389, "step": 4446 }, { "epoch": 0.4, "grad_norm": 5.932314848476022, "learning_rate": 9.274999099039662e-06, "loss": 0.7819, "step": 4447 }, { "epoch": 0.4, "grad_norm": 6.924769543374132, "learning_rate": 9.274624412245355e-06, "loss": 0.8026, "step": 4448 }, { "epoch": 0.4, "grad_norm": 6.730134288395782, "learning_rate": 9.274249636227623e-06, "loss": 0.8706, "step": 4449 }, { "epoch": 0.4, "grad_norm": 5.144299548063782, "learning_rate": 9.273874770994293e-06, "loss": 0.7695, "step": 4450 }, { "epoch": 0.4, "grad_norm": 4.9730323602936535, "learning_rate": 9.273499816553184e-06, "loss": 0.7982, "step": 4451 }, { "epoch": 0.4, "grad_norm": 7.294618931645382, "learning_rate": 9.273124772912128e-06, "loss": 0.8906, "step": 4452 }, { "epoch": 0.4, "grad_norm": 7.109562412389788, "learning_rate": 9.272749640078948e-06, "loss": 0.8686, "step": 4453 }, { "epoch": 0.4, "grad_norm": 8.081966968274, "learning_rate": 9.272374418061478e-06, "loss": 0.8362, "step": 4454 }, { "epoch": 0.4, "grad_norm": 9.34008839554988, "learning_rate": 9.271999106867546e-06, "loss": 0.8251, "step": 4455 }, { "epoch": 0.4, "grad_norm": 6.30622389541799, "learning_rate": 9.271623706504991e-06, "loss": 0.8061, "step": 4456 }, { "epoch": 0.4, "grad_norm": 5.617883467896862, "learning_rate": 9.271248216981645e-06, "loss": 0.8333, "step": 4457 }, { "epoch": 0.4, "grad_norm": 6.234373016333952, "learning_rate": 9.270872638305347e-06, "loss": 0.829, "step": 4458 }, { "epoch": 0.4, "grad_norm": 5.21448071901405, "learning_rate": 9.270496970483933e-06, "loss": 0.8157, "step": 4459 }, { "epoch": 0.4, "grad_norm": 4.804136612153014, "learning_rate": 9.27012121352525e-06, "loss": 0.8365, "step": 4460 }, { "epoch": 0.4, "grad_norm": 5.591344943022942, "learning_rate": 9.269745367437136e-06, "loss": 0.8268, "step": 4461 }, { "epoch": 0.4, "grad_norm": 8.119767227870872, "learning_rate": 9.26936943222744e-06, "loss": 0.8566, "step": 4462 }, { "epoch": 0.4, "grad_norm": 6.435060735129838, "learning_rate": 9.268993407904004e-06, "loss": 0.9066, "step": 4463 }, { "epoch": 0.4, "grad_norm": 5.932751072336364, "learning_rate": 9.268617294474681e-06, "loss": 0.8802, "step": 4464 }, { "epoch": 0.4, "grad_norm": 5.405576485531296, "learning_rate": 9.268241091947321e-06, "loss": 0.8625, "step": 4465 }, { "epoch": 0.4, "grad_norm": 6.736086092017031, "learning_rate": 9.267864800329776e-06, "loss": 0.7884, "step": 4466 }, { "epoch": 0.4, "grad_norm": 5.193313356457381, "learning_rate": 9.267488419629898e-06, "loss": 0.8608, "step": 4467 }, { "epoch": 0.4, "grad_norm": 5.2944916780835065, "learning_rate": 9.267111949855545e-06, "loss": 0.9154, "step": 4468 }, { "epoch": 0.4, "grad_norm": 7.238054846844717, "learning_rate": 9.266735391014576e-06, "loss": 0.8578, "step": 4469 }, { "epoch": 0.4, "grad_norm": 6.786347868243233, "learning_rate": 9.266358743114849e-06, "loss": 0.8121, "step": 4470 }, { "epoch": 0.4, "grad_norm": 8.934844210993619, "learning_rate": 9.265982006164226e-06, "loss": 0.818, "step": 4471 }, { "epoch": 0.4, "grad_norm": 7.206032622228986, "learning_rate": 9.26560518017057e-06, "loss": 0.8797, "step": 4472 }, { "epoch": 0.4, "grad_norm": 5.85396257649498, "learning_rate": 9.26522826514175e-06, "loss": 0.83, "step": 4473 }, { "epoch": 0.4, "grad_norm": 5.307579321340161, "learning_rate": 9.26485126108563e-06, "loss": 0.8419, "step": 4474 }, { "epoch": 0.4, "grad_norm": 6.457092279639356, "learning_rate": 9.26447416801008e-06, "loss": 0.9025, "step": 4475 }, { "epoch": 0.4, "grad_norm": 5.790405559107533, "learning_rate": 9.26409698592297e-06, "loss": 0.8727, "step": 4476 }, { "epoch": 0.4, "grad_norm": 7.159003884990671, "learning_rate": 9.263719714832173e-06, "loss": 0.9158, "step": 4477 }, { "epoch": 0.4, "grad_norm": 7.0037296283234705, "learning_rate": 9.263342354745565e-06, "loss": 0.8316, "step": 4478 }, { "epoch": 0.4, "grad_norm": 5.6615278163750125, "learning_rate": 9.262964905671021e-06, "loss": 0.8481, "step": 4479 }, { "epoch": 0.4, "grad_norm": 5.335806548352132, "learning_rate": 9.262587367616422e-06, "loss": 0.8071, "step": 4480 }, { "epoch": 0.4, "grad_norm": 6.756787679015833, "learning_rate": 9.262209740589646e-06, "loss": 0.8115, "step": 4481 }, { "epoch": 0.4, "grad_norm": 5.621887760066865, "learning_rate": 9.261832024598576e-06, "loss": 0.8272, "step": 4482 }, { "epoch": 0.4, "grad_norm": 5.553849919140367, "learning_rate": 9.261454219651097e-06, "loss": 0.9167, "step": 4483 }, { "epoch": 0.4, "grad_norm": 5.928402081631073, "learning_rate": 9.26107632575509e-06, "loss": 0.8725, "step": 4484 }, { "epoch": 0.4, "grad_norm": 5.271218922001157, "learning_rate": 9.260698342918448e-06, "loss": 0.8599, "step": 4485 }, { "epoch": 0.4, "grad_norm": 8.345454270488993, "learning_rate": 9.26032027114906e-06, "loss": 0.8432, "step": 4486 }, { "epoch": 0.4, "grad_norm": 5.357701623476278, "learning_rate": 9.259942110454815e-06, "loss": 0.9108, "step": 4487 }, { "epoch": 0.4, "grad_norm": 6.174531177371189, "learning_rate": 9.259563860843608e-06, "loss": 0.8065, "step": 4488 }, { "epoch": 0.4, "grad_norm": 6.295649704974207, "learning_rate": 9.259185522323333e-06, "loss": 0.7996, "step": 4489 }, { "epoch": 0.4, "grad_norm": 6.421546926449522, "learning_rate": 9.258807094901888e-06, "loss": 0.8297, "step": 4490 }, { "epoch": 0.4, "grad_norm": 6.460814608297221, "learning_rate": 9.258428578587174e-06, "loss": 0.8173, "step": 4491 }, { "epoch": 0.4, "grad_norm": 4.790350480405788, "learning_rate": 9.258049973387086e-06, "loss": 0.8587, "step": 4492 }, { "epoch": 0.4, "grad_norm": 5.921812056973853, "learning_rate": 9.25767127930953e-06, "loss": 0.8597, "step": 4493 }, { "epoch": 0.4, "grad_norm": 6.715286101269676, "learning_rate": 9.257292496362412e-06, "loss": 0.8034, "step": 4494 }, { "epoch": 0.4, "grad_norm": 4.857201392732599, "learning_rate": 9.256913624553633e-06, "loss": 0.8683, "step": 4495 }, { "epoch": 0.4, "grad_norm": 8.495371389209048, "learning_rate": 9.256534663891109e-06, "loss": 0.8533, "step": 4496 }, { "epoch": 0.4, "grad_norm": 5.896327631081806, "learning_rate": 9.256155614382744e-06, "loss": 0.7857, "step": 4497 }, { "epoch": 0.4, "grad_norm": 6.15319544115759, "learning_rate": 9.25577647603645e-06, "loss": 0.8178, "step": 4498 }, { "epoch": 0.4, "grad_norm": 6.30476293926229, "learning_rate": 9.255397248860143e-06, "loss": 0.8957, "step": 4499 }, { "epoch": 0.4, "grad_norm": 6.510767745782046, "learning_rate": 9.255017932861739e-06, "loss": 0.771, "step": 4500 }, { "epoch": 0.4, "grad_norm": 4.22962304386071, "learning_rate": 9.254638528049152e-06, "loss": 0.8419, "step": 4501 }, { "epoch": 0.4, "grad_norm": 5.842237816131411, "learning_rate": 9.254259034430304e-06, "loss": 0.7841, "step": 4502 }, { "epoch": 0.4, "grad_norm": 5.280048032703478, "learning_rate": 9.253879452013115e-06, "loss": 0.8442, "step": 4503 }, { "epoch": 0.4, "grad_norm": 7.2182974561679325, "learning_rate": 9.253499780805509e-06, "loss": 0.7675, "step": 4504 }, { "epoch": 0.4, "grad_norm": 6.187389725325743, "learning_rate": 9.253120020815409e-06, "loss": 0.8536, "step": 4505 }, { "epoch": 0.4, "grad_norm": 6.062254852603474, "learning_rate": 9.252740172050743e-06, "loss": 0.7873, "step": 4506 }, { "epoch": 0.4, "grad_norm": 7.660422862453042, "learning_rate": 9.252360234519439e-06, "loss": 0.8491, "step": 4507 }, { "epoch": 0.4, "grad_norm": 4.5924847606869905, "learning_rate": 9.251980208229427e-06, "loss": 0.8409, "step": 4508 }, { "epoch": 0.4, "grad_norm": 8.48407995674932, "learning_rate": 9.251600093188642e-06, "loss": 0.8013, "step": 4509 }, { "epoch": 0.4, "grad_norm": 5.786509252658318, "learning_rate": 9.251219889405014e-06, "loss": 0.8487, "step": 4510 }, { "epoch": 0.4, "grad_norm": 7.2668582677131806, "learning_rate": 9.250839596886482e-06, "loss": 0.8405, "step": 4511 }, { "epoch": 0.4, "grad_norm": 5.856085928063963, "learning_rate": 9.250459215640983e-06, "loss": 0.7965, "step": 4512 }, { "epoch": 0.4, "grad_norm": 8.181744766791967, "learning_rate": 9.250078745676456e-06, "loss": 0.8633, "step": 4513 }, { "epoch": 0.4, "grad_norm": 5.712586651951771, "learning_rate": 9.249698187000841e-06, "loss": 0.8348, "step": 4514 }, { "epoch": 0.4, "grad_norm": 5.082382429968132, "learning_rate": 9.249317539622085e-06, "loss": 0.8471, "step": 4515 }, { "epoch": 0.4, "grad_norm": 6.939470985014774, "learning_rate": 9.24893680354813e-06, "loss": 0.8417, "step": 4516 }, { "epoch": 0.4, "grad_norm": 5.84107526748278, "learning_rate": 9.248555978786923e-06, "loss": 0.8463, "step": 4517 }, { "epoch": 0.4, "grad_norm": 5.423829657882289, "learning_rate": 9.248175065346416e-06, "loss": 0.8494, "step": 4518 }, { "epoch": 0.4, "grad_norm": 4.4503172952325665, "learning_rate": 9.247794063234559e-06, "loss": 0.7853, "step": 4519 }, { "epoch": 0.4, "grad_norm": 6.407323444107418, "learning_rate": 9.247412972459303e-06, "loss": 0.92, "step": 4520 }, { "epoch": 0.4, "grad_norm": 5.193147010651306, "learning_rate": 9.2470317930286e-06, "loss": 0.778, "step": 4521 }, { "epoch": 0.4, "grad_norm": 5.649815156631681, "learning_rate": 9.246650524950411e-06, "loss": 0.8484, "step": 4522 }, { "epoch": 0.4, "grad_norm": 4.82847858445869, "learning_rate": 9.246269168232693e-06, "loss": 0.7893, "step": 4523 }, { "epoch": 0.4, "grad_norm": 6.68654607910471, "learning_rate": 9.245887722883405e-06, "loss": 0.8074, "step": 4524 }, { "epoch": 0.4, "grad_norm": 7.273545539970788, "learning_rate": 9.245506188910509e-06, "loss": 0.8886, "step": 4525 }, { "epoch": 0.4, "grad_norm": 6.695577564232889, "learning_rate": 9.245124566321968e-06, "loss": 0.8683, "step": 4526 }, { "epoch": 0.4, "grad_norm": 5.221788081239436, "learning_rate": 9.244742855125749e-06, "loss": 0.8049, "step": 4527 }, { "epoch": 0.4, "grad_norm": 5.626461263880344, "learning_rate": 9.244361055329818e-06, "loss": 0.8965, "step": 4528 }, { "epoch": 0.4, "grad_norm": 5.622863026788422, "learning_rate": 9.243979166942146e-06, "loss": 0.8112, "step": 4529 }, { "epoch": 0.4, "grad_norm": 6.849876253183992, "learning_rate": 9.243597189970704e-06, "loss": 0.7988, "step": 4530 }, { "epoch": 0.4, "grad_norm": 5.4470922734163985, "learning_rate": 9.24321512442346e-06, "loss": 0.8262, "step": 4531 }, { "epoch": 0.4, "grad_norm": 5.809774094160614, "learning_rate": 9.242832970308398e-06, "loss": 0.8593, "step": 4532 }, { "epoch": 0.4, "grad_norm": 6.744728442010087, "learning_rate": 9.242450727633485e-06, "loss": 0.845, "step": 4533 }, { "epoch": 0.4, "grad_norm": 10.946559754660253, "learning_rate": 9.242068396406706e-06, "loss": 0.81, "step": 4534 }, { "epoch": 0.4, "grad_norm": 5.375874996676751, "learning_rate": 9.24168597663604e-06, "loss": 0.7788, "step": 4535 }, { "epoch": 0.4, "grad_norm": 5.144671000797504, "learning_rate": 9.241303468329467e-06, "loss": 0.8297, "step": 4536 }, { "epoch": 0.4, "grad_norm": 9.15762997874979, "learning_rate": 9.240920871494972e-06, "loss": 0.7781, "step": 4537 }, { "epoch": 0.4, "grad_norm": 6.24367246562476, "learning_rate": 9.240538186140542e-06, "loss": 0.7973, "step": 4538 }, { "epoch": 0.4, "grad_norm": 6.839971557384537, "learning_rate": 9.240155412274163e-06, "loss": 0.8287, "step": 4539 }, { "epoch": 0.41, "grad_norm": 6.666460818164944, "learning_rate": 9.239772549903828e-06, "loss": 0.9396, "step": 4540 }, { "epoch": 0.41, "grad_norm": 4.286544164530982, "learning_rate": 9.239389599037524e-06, "loss": 0.8346, "step": 4541 }, { "epoch": 0.41, "grad_norm": 5.234382728748434, "learning_rate": 9.239006559683246e-06, "loss": 0.8207, "step": 4542 }, { "epoch": 0.41, "grad_norm": 8.556425511275856, "learning_rate": 9.23862343184899e-06, "loss": 0.8743, "step": 4543 }, { "epoch": 0.41, "grad_norm": 6.960531096786359, "learning_rate": 9.238240215542753e-06, "loss": 0.8379, "step": 4544 }, { "epoch": 0.41, "grad_norm": 5.9028261976243215, "learning_rate": 9.237856910772532e-06, "loss": 0.8614, "step": 4545 }, { "epoch": 0.41, "grad_norm": 4.707959503311916, "learning_rate": 9.23747351754633e-06, "loss": 0.841, "step": 4546 }, { "epoch": 0.41, "grad_norm": 5.836712954200549, "learning_rate": 9.237090035872148e-06, "loss": 0.8048, "step": 4547 }, { "epoch": 0.41, "grad_norm": 7.030422949048965, "learning_rate": 9.236706465757987e-06, "loss": 0.8285, "step": 4548 }, { "epoch": 0.41, "grad_norm": 5.1294575354180525, "learning_rate": 9.23632280721186e-06, "loss": 0.8369, "step": 4549 }, { "epoch": 0.41, "grad_norm": 8.291893863695924, "learning_rate": 9.235939060241772e-06, "loss": 0.8458, "step": 4550 }, { "epoch": 0.41, "grad_norm": 7.405755177790691, "learning_rate": 9.235555224855731e-06, "loss": 0.8589, "step": 4551 }, { "epoch": 0.41, "grad_norm": 6.63150307477105, "learning_rate": 9.235171301061752e-06, "loss": 0.8356, "step": 4552 }, { "epoch": 0.41, "grad_norm": 6.479639959671529, "learning_rate": 9.234787288867847e-06, "loss": 0.8011, "step": 4553 }, { "epoch": 0.41, "grad_norm": 5.189328568776002, "learning_rate": 9.23440318828203e-06, "loss": 0.8798, "step": 4554 }, { "epoch": 0.41, "grad_norm": 5.956024874147626, "learning_rate": 9.234018999312321e-06, "loss": 0.8664, "step": 4555 }, { "epoch": 0.41, "grad_norm": 6.404609216417384, "learning_rate": 9.233634721966737e-06, "loss": 0.8559, "step": 4556 }, { "epoch": 0.41, "grad_norm": 4.826431471290572, "learning_rate": 9.233250356253301e-06, "loss": 0.7984, "step": 4557 }, { "epoch": 0.41, "grad_norm": 6.82008905173775, "learning_rate": 9.232865902180032e-06, "loss": 0.8184, "step": 4558 }, { "epoch": 0.41, "grad_norm": 6.896683001128371, "learning_rate": 9.232481359754959e-06, "loss": 0.9005, "step": 4559 }, { "epoch": 0.41, "grad_norm": 5.010135887965424, "learning_rate": 9.232096728986106e-06, "loss": 0.7915, "step": 4560 }, { "epoch": 0.41, "grad_norm": 4.662167349470587, "learning_rate": 9.231712009881501e-06, "loss": 0.8354, "step": 4561 }, { "epoch": 0.41, "grad_norm": 7.607293285105917, "learning_rate": 9.231327202449177e-06, "loss": 0.8039, "step": 4562 }, { "epoch": 0.41, "grad_norm": 7.329431041390225, "learning_rate": 9.230942306697164e-06, "loss": 0.8862, "step": 4563 }, { "epoch": 0.41, "grad_norm": 5.3813538023988245, "learning_rate": 9.230557322633494e-06, "loss": 0.9056, "step": 4564 }, { "epoch": 0.41, "grad_norm": 5.691697842566199, "learning_rate": 9.230172250266206e-06, "loss": 0.8901, "step": 4565 }, { "epoch": 0.41, "grad_norm": 8.036077988664768, "learning_rate": 9.229787089603337e-06, "loss": 0.8477, "step": 4566 }, { "epoch": 0.41, "grad_norm": 6.58321789079827, "learning_rate": 9.229401840652923e-06, "loss": 0.817, "step": 4567 }, { "epoch": 0.41, "grad_norm": 5.686150347577597, "learning_rate": 9.22901650342301e-06, "loss": 0.831, "step": 4568 }, { "epoch": 0.41, "grad_norm": 5.438881342272416, "learning_rate": 9.228631077921637e-06, "loss": 0.8572, "step": 4569 }, { "epoch": 0.41, "grad_norm": 5.475035050687916, "learning_rate": 9.228245564156852e-06, "loss": 0.8276, "step": 4570 }, { "epoch": 0.41, "grad_norm": 6.90008814898264, "learning_rate": 9.2278599621367e-06, "loss": 0.7754, "step": 4571 }, { "epoch": 0.41, "grad_norm": 4.88885779427092, "learning_rate": 9.227474271869233e-06, "loss": 0.8844, "step": 4572 }, { "epoch": 0.41, "grad_norm": 6.261933266481776, "learning_rate": 9.227088493362494e-06, "loss": 0.8202, "step": 4573 }, { "epoch": 0.41, "grad_norm": 6.402536838503419, "learning_rate": 9.226702626624544e-06, "loss": 0.8992, "step": 4574 }, { "epoch": 0.41, "grad_norm": 8.770798751245627, "learning_rate": 9.226316671663431e-06, "loss": 0.8266, "step": 4575 }, { "epoch": 0.41, "grad_norm": 5.519834236116662, "learning_rate": 9.225930628487211e-06, "loss": 0.8517, "step": 4576 }, { "epoch": 0.41, "grad_norm": 6.0478768363276245, "learning_rate": 9.225544497103947e-06, "loss": 0.8423, "step": 4577 }, { "epoch": 0.41, "grad_norm": 8.353613508116252, "learning_rate": 9.225158277521695e-06, "loss": 0.8092, "step": 4578 }, { "epoch": 0.41, "grad_norm": 7.949163525123579, "learning_rate": 9.224771969748517e-06, "loss": 0.8947, "step": 4579 }, { "epoch": 0.41, "grad_norm": 6.932634832778146, "learning_rate": 9.224385573792473e-06, "loss": 0.8463, "step": 4580 }, { "epoch": 0.41, "grad_norm": 5.7475945243768845, "learning_rate": 9.223999089661635e-06, "loss": 0.8433, "step": 4581 }, { "epoch": 0.41, "grad_norm": 7.884380892089099, "learning_rate": 9.223612517364064e-06, "loss": 0.8622, "step": 4582 }, { "epoch": 0.41, "grad_norm": 5.612144580129278, "learning_rate": 9.223225856907834e-06, "loss": 0.8212, "step": 4583 }, { "epoch": 0.41, "grad_norm": 5.908675815504412, "learning_rate": 9.22283910830101e-06, "loss": 0.8037, "step": 4584 }, { "epoch": 0.41, "grad_norm": 5.512613757981434, "learning_rate": 9.222452271551669e-06, "loss": 0.8767, "step": 4585 }, { "epoch": 0.41, "grad_norm": 8.0025005645414, "learning_rate": 9.222065346667883e-06, "loss": 0.8208, "step": 4586 }, { "epoch": 0.41, "grad_norm": 6.4600158957402, "learning_rate": 9.221678333657731e-06, "loss": 0.7815, "step": 4587 }, { "epoch": 0.41, "grad_norm": 6.318225675733331, "learning_rate": 9.221291232529289e-06, "loss": 0.8081, "step": 4588 }, { "epoch": 0.41, "grad_norm": 6.537378300024944, "learning_rate": 9.220904043290635e-06, "loss": 0.8256, "step": 4589 }, { "epoch": 0.41, "grad_norm": 6.794534035816491, "learning_rate": 9.220516765949852e-06, "loss": 0.8297, "step": 4590 }, { "epoch": 0.41, "grad_norm": 4.3698229396578725, "learning_rate": 9.220129400515027e-06, "loss": 0.8077, "step": 4591 }, { "epoch": 0.41, "grad_norm": 5.711427577827574, "learning_rate": 9.21974194699424e-06, "loss": 0.7931, "step": 4592 }, { "epoch": 0.41, "grad_norm": 8.305680097640726, "learning_rate": 9.219354405395584e-06, "loss": 0.8309, "step": 4593 }, { "epoch": 0.41, "grad_norm": 5.037205886059536, "learning_rate": 9.218966775727144e-06, "loss": 0.801, "step": 4594 }, { "epoch": 0.41, "grad_norm": 7.216392573814221, "learning_rate": 9.218579057997011e-06, "loss": 0.8539, "step": 4595 }, { "epoch": 0.41, "grad_norm": 6.784012827658881, "learning_rate": 9.218191252213279e-06, "loss": 0.8278, "step": 4596 }, { "epoch": 0.41, "grad_norm": 6.256887071164107, "learning_rate": 9.217803358384043e-06, "loss": 0.8466, "step": 4597 }, { "epoch": 0.41, "grad_norm": 6.740580377308551, "learning_rate": 9.217415376517397e-06, "loss": 0.817, "step": 4598 }, { "epoch": 0.41, "grad_norm": 6.713056279429526, "learning_rate": 9.217027306621442e-06, "loss": 0.8491, "step": 4599 }, { "epoch": 0.41, "grad_norm": 5.3650550937095405, "learning_rate": 9.216639148704277e-06, "loss": 0.9137, "step": 4600 }, { "epoch": 0.41, "grad_norm": 6.592982110412086, "learning_rate": 9.216250902774004e-06, "loss": 0.8564, "step": 4601 }, { "epoch": 0.41, "grad_norm": 8.119878921422362, "learning_rate": 9.215862568838727e-06, "loss": 0.8773, "step": 4602 }, { "epoch": 0.41, "grad_norm": 6.263948718499047, "learning_rate": 9.215474146906552e-06, "loss": 0.7967, "step": 4603 }, { "epoch": 0.41, "grad_norm": 6.64974423929634, "learning_rate": 9.215085636985586e-06, "loss": 0.8212, "step": 4604 }, { "epoch": 0.41, "grad_norm": 5.916795611874067, "learning_rate": 9.214697039083937e-06, "loss": 0.9272, "step": 4605 }, { "epoch": 0.41, "grad_norm": 6.068924874527984, "learning_rate": 9.214308353209718e-06, "loss": 0.8338, "step": 4606 }, { "epoch": 0.41, "grad_norm": 5.920273975271022, "learning_rate": 9.213919579371043e-06, "loss": 0.8347, "step": 4607 }, { "epoch": 0.41, "grad_norm": 5.54222852905615, "learning_rate": 9.213530717576023e-06, "loss": 0.8239, "step": 4608 }, { "epoch": 0.41, "grad_norm": 9.20698075647077, "learning_rate": 9.213141767832777e-06, "loss": 0.8713, "step": 4609 }, { "epoch": 0.41, "grad_norm": 7.2198623847524095, "learning_rate": 9.212752730149424e-06, "loss": 0.838, "step": 4610 }, { "epoch": 0.41, "grad_norm": 4.336151561267071, "learning_rate": 9.212363604534081e-06, "loss": 0.7938, "step": 4611 }, { "epoch": 0.41, "grad_norm": 6.541458810217811, "learning_rate": 9.211974390994875e-06, "loss": 0.8429, "step": 4612 }, { "epoch": 0.41, "grad_norm": 6.524534528245318, "learning_rate": 9.211585089539928e-06, "loss": 0.8542, "step": 4613 }, { "epoch": 0.41, "grad_norm": 6.667307202933177, "learning_rate": 9.211195700177364e-06, "loss": 0.8256, "step": 4614 }, { "epoch": 0.41, "grad_norm": 7.523153746195941, "learning_rate": 9.210806222915312e-06, "loss": 0.8609, "step": 4615 }, { "epoch": 0.41, "grad_norm": 6.189639886734872, "learning_rate": 9.210416657761903e-06, "loss": 0.7957, "step": 4616 }, { "epoch": 0.41, "grad_norm": 6.256961265546148, "learning_rate": 9.210027004725267e-06, "loss": 0.9084, "step": 4617 }, { "epoch": 0.41, "grad_norm": 6.29107621214498, "learning_rate": 9.209637263813536e-06, "loss": 0.8536, "step": 4618 }, { "epoch": 0.41, "grad_norm": 6.844041776292312, "learning_rate": 9.209247435034848e-06, "loss": 0.82, "step": 4619 }, { "epoch": 0.41, "grad_norm": 7.172019367627463, "learning_rate": 9.208857518397337e-06, "loss": 0.755, "step": 4620 }, { "epoch": 0.41, "grad_norm": 6.7100666384306535, "learning_rate": 9.208467513909141e-06, "loss": 0.8701, "step": 4621 }, { "epoch": 0.41, "grad_norm": 5.271468745916825, "learning_rate": 9.208077421578403e-06, "loss": 0.7857, "step": 4622 }, { "epoch": 0.41, "grad_norm": 5.28144293408788, "learning_rate": 9.207687241413263e-06, "loss": 0.8624, "step": 4623 }, { "epoch": 0.41, "grad_norm": 4.630682910704307, "learning_rate": 9.207296973421869e-06, "loss": 0.7886, "step": 4624 }, { "epoch": 0.41, "grad_norm": 5.579614243262499, "learning_rate": 9.206906617612364e-06, "loss": 0.8378, "step": 4625 }, { "epoch": 0.41, "grad_norm": 5.717739067057811, "learning_rate": 9.206516173992895e-06, "loss": 0.8611, "step": 4626 }, { "epoch": 0.41, "grad_norm": 5.869163635461895, "learning_rate": 9.206125642571613e-06, "loss": 0.8275, "step": 4627 }, { "epoch": 0.41, "grad_norm": 5.260387305419084, "learning_rate": 9.20573502335667e-06, "loss": 0.7527, "step": 4628 }, { "epoch": 0.41, "grad_norm": 7.11694951709995, "learning_rate": 9.20534431635622e-06, "loss": 0.8224, "step": 4629 }, { "epoch": 0.41, "grad_norm": 5.671453393637832, "learning_rate": 9.204953521578415e-06, "loss": 0.804, "step": 4630 }, { "epoch": 0.41, "grad_norm": 5.084484566426668, "learning_rate": 9.204562639031414e-06, "loss": 0.8027, "step": 4631 }, { "epoch": 0.41, "grad_norm": 6.191963861879142, "learning_rate": 9.204171668723377e-06, "loss": 0.8724, "step": 4632 }, { "epoch": 0.41, "grad_norm": 4.742019131577059, "learning_rate": 9.203780610662462e-06, "loss": 0.8161, "step": 4633 }, { "epoch": 0.41, "grad_norm": 6.517110353378128, "learning_rate": 9.203389464856835e-06, "loss": 0.8465, "step": 4634 }, { "epoch": 0.41, "grad_norm": 5.557451909605078, "learning_rate": 9.202998231314656e-06, "loss": 0.7583, "step": 4635 }, { "epoch": 0.41, "grad_norm": 6.255230740605198, "learning_rate": 9.202606910044094e-06, "loss": 0.8325, "step": 4636 }, { "epoch": 0.41, "grad_norm": 6.9381139425849, "learning_rate": 9.202215501053318e-06, "loss": 0.7769, "step": 4637 }, { "epoch": 0.41, "grad_norm": 6.126971115325527, "learning_rate": 9.201824004350495e-06, "loss": 0.8265, "step": 4638 }, { "epoch": 0.41, "grad_norm": 6.14302129626966, "learning_rate": 9.201432419943796e-06, "loss": 0.8527, "step": 4639 }, { "epoch": 0.41, "grad_norm": 9.047350643744016, "learning_rate": 9.2010407478414e-06, "loss": 0.7991, "step": 4640 }, { "epoch": 0.41, "grad_norm": 6.351892687375672, "learning_rate": 9.200648988051474e-06, "loss": 0.8875, "step": 4641 }, { "epoch": 0.41, "grad_norm": 4.7312209621873595, "learning_rate": 9.200257140582202e-06, "loss": 0.8918, "step": 4642 }, { "epoch": 0.41, "grad_norm": 5.824827510372964, "learning_rate": 9.199865205441762e-06, "loss": 0.7928, "step": 4643 }, { "epoch": 0.41, "grad_norm": 7.992042857263485, "learning_rate": 9.199473182638332e-06, "loss": 0.8176, "step": 4644 }, { "epoch": 0.41, "grad_norm": 6.466490418297384, "learning_rate": 9.199081072180097e-06, "loss": 0.9162, "step": 4645 }, { "epoch": 0.41, "grad_norm": 7.108331579051042, "learning_rate": 9.198688874075237e-06, "loss": 0.8075, "step": 4646 }, { "epoch": 0.41, "grad_norm": 6.440688074380756, "learning_rate": 9.198296588331945e-06, "loss": 0.89, "step": 4647 }, { "epoch": 0.41, "grad_norm": 7.278771069180701, "learning_rate": 9.197904214958405e-06, "loss": 0.834, "step": 4648 }, { "epoch": 0.41, "grad_norm": 7.020776656374454, "learning_rate": 9.197511753962806e-06, "loss": 0.7754, "step": 4649 }, { "epoch": 0.41, "grad_norm": 4.486778318272757, "learning_rate": 9.197119205353344e-06, "loss": 0.8648, "step": 4650 }, { "epoch": 0.41, "grad_norm": 6.163259032246637, "learning_rate": 9.196726569138211e-06, "loss": 0.7614, "step": 4651 }, { "epoch": 0.42, "grad_norm": 8.42909766315542, "learning_rate": 9.196333845325597e-06, "loss": 0.8476, "step": 4652 }, { "epoch": 0.42, "grad_norm": 5.298328093684551, "learning_rate": 9.195941033923707e-06, "loss": 0.8435, "step": 4653 }, { "epoch": 0.42, "grad_norm": 4.732883823542008, "learning_rate": 9.195548134940737e-06, "loss": 0.8053, "step": 4654 }, { "epoch": 0.42, "grad_norm": 4.135783926416529, "learning_rate": 9.195155148384888e-06, "loss": 0.7672, "step": 4655 }, { "epoch": 0.42, "grad_norm": 7.086962787080362, "learning_rate": 9.19476207426436e-06, "loss": 0.7725, "step": 4656 }, { "epoch": 0.42, "grad_norm": 6.24128555485995, "learning_rate": 9.194368912587363e-06, "loss": 0.7839, "step": 4657 }, { "epoch": 0.42, "grad_norm": 5.909360972369439, "learning_rate": 9.1939756633621e-06, "loss": 0.8338, "step": 4658 }, { "epoch": 0.42, "grad_norm": 8.342587624993977, "learning_rate": 9.193582326596779e-06, "loss": 0.7846, "step": 4659 }, { "epoch": 0.42, "grad_norm": 5.600642787529243, "learning_rate": 9.19318890229961e-06, "loss": 0.8406, "step": 4660 }, { "epoch": 0.42, "grad_norm": 8.68669382010525, "learning_rate": 9.192795390478806e-06, "loss": 0.8319, "step": 4661 }, { "epoch": 0.42, "grad_norm": 7.217584273366493, "learning_rate": 9.19240179114258e-06, "loss": 0.8393, "step": 4662 }, { "epoch": 0.42, "grad_norm": 8.019891615918773, "learning_rate": 9.19200810429915e-06, "loss": 0.8506, "step": 4663 }, { "epoch": 0.42, "grad_norm": 5.662247761179312, "learning_rate": 9.191614329956728e-06, "loss": 0.8422, "step": 4664 }, { "epoch": 0.42, "grad_norm": 4.745001954240607, "learning_rate": 9.191220468123538e-06, "loss": 0.7695, "step": 4665 }, { "epoch": 0.42, "grad_norm": 5.467657801754695, "learning_rate": 9.1908265188078e-06, "loss": 0.8456, "step": 4666 }, { "epoch": 0.42, "grad_norm": 7.768773580694558, "learning_rate": 9.190432482017734e-06, "loss": 0.7899, "step": 4667 }, { "epoch": 0.42, "grad_norm": 6.308218653628511, "learning_rate": 9.19003835776157e-06, "loss": 0.8584, "step": 4668 }, { "epoch": 0.42, "grad_norm": 4.924914123839482, "learning_rate": 9.18964414604753e-06, "loss": 0.8314, "step": 4669 }, { "epoch": 0.42, "grad_norm": 5.42287658367356, "learning_rate": 9.189249846883841e-06, "loss": 0.8423, "step": 4670 }, { "epoch": 0.42, "grad_norm": 7.036877929079618, "learning_rate": 9.188855460278738e-06, "loss": 0.9313, "step": 4671 }, { "epoch": 0.42, "grad_norm": 7.22465979459289, "learning_rate": 9.188460986240451e-06, "loss": 0.8601, "step": 4672 }, { "epoch": 0.42, "grad_norm": 6.123062176567409, "learning_rate": 9.188066424777214e-06, "loss": 0.8594, "step": 4673 }, { "epoch": 0.42, "grad_norm": 5.4438097888581725, "learning_rate": 9.187671775897261e-06, "loss": 0.8169, "step": 4674 }, { "epoch": 0.42, "grad_norm": 5.4852635164904555, "learning_rate": 9.18727703960883e-06, "loss": 0.7808, "step": 4675 }, { "epoch": 0.42, "grad_norm": 5.818026606187768, "learning_rate": 9.186882215920163e-06, "loss": 0.821, "step": 4676 }, { "epoch": 0.42, "grad_norm": 5.5844334815031225, "learning_rate": 9.186487304839496e-06, "loss": 0.8387, "step": 4677 }, { "epoch": 0.42, "grad_norm": 5.2162828075218295, "learning_rate": 9.186092306375076e-06, "loss": 0.8179, "step": 4678 }, { "epoch": 0.42, "grad_norm": 6.780998978306586, "learning_rate": 9.185697220535147e-06, "loss": 0.8427, "step": 4679 }, { "epoch": 0.42, "grad_norm": 5.240944431682982, "learning_rate": 9.185302047327955e-06, "loss": 0.8637, "step": 4680 }, { "epoch": 0.42, "grad_norm": 5.140846689883739, "learning_rate": 9.184906786761748e-06, "loss": 0.8378, "step": 4681 }, { "epoch": 0.42, "grad_norm": 5.653230751909554, "learning_rate": 9.184511438844777e-06, "loss": 0.815, "step": 4682 }, { "epoch": 0.42, "grad_norm": 6.132026678794383, "learning_rate": 9.184116003585293e-06, "loss": 0.8134, "step": 4683 }, { "epoch": 0.42, "grad_norm": 5.332322945799071, "learning_rate": 9.183720480991552e-06, "loss": 0.8287, "step": 4684 }, { "epoch": 0.42, "grad_norm": 5.603455083524731, "learning_rate": 9.183324871071806e-06, "loss": 0.8304, "step": 4685 }, { "epoch": 0.42, "grad_norm": 7.778333443927217, "learning_rate": 9.182929173834314e-06, "loss": 0.8071, "step": 4686 }, { "epoch": 0.42, "grad_norm": 7.196576039852963, "learning_rate": 9.182533389287338e-06, "loss": 0.8096, "step": 4687 }, { "epoch": 0.42, "grad_norm": 5.678430904212422, "learning_rate": 9.182137517439138e-06, "loss": 0.7657, "step": 4688 }, { "epoch": 0.42, "grad_norm": 7.30986348863734, "learning_rate": 9.181741558297973e-06, "loss": 0.8759, "step": 4689 }, { "epoch": 0.42, "grad_norm": 10.362392867464221, "learning_rate": 9.181345511872113e-06, "loss": 0.8088, "step": 4690 }, { "epoch": 0.42, "grad_norm": 6.113062511617782, "learning_rate": 9.180949378169822e-06, "loss": 0.8406, "step": 4691 }, { "epoch": 0.42, "grad_norm": 8.001669995935284, "learning_rate": 9.18055315719937e-06, "loss": 0.8064, "step": 4692 }, { "epoch": 0.42, "grad_norm": 5.837290097287818, "learning_rate": 9.180156848969024e-06, "loss": 0.8201, "step": 4693 }, { "epoch": 0.42, "grad_norm": 6.977175091855017, "learning_rate": 9.17976045348706e-06, "loss": 0.9142, "step": 4694 }, { "epoch": 0.42, "grad_norm": 6.265072693362719, "learning_rate": 9.179363970761748e-06, "loss": 0.8345, "step": 4695 }, { "epoch": 0.42, "grad_norm": 6.350564551550255, "learning_rate": 9.178967400801367e-06, "loss": 0.838, "step": 4696 }, { "epoch": 0.42, "grad_norm": 8.319501784920893, "learning_rate": 9.178570743614192e-06, "loss": 0.8714, "step": 4697 }, { "epoch": 0.42, "grad_norm": 4.499125368322284, "learning_rate": 9.178173999208506e-06, "loss": 0.8316, "step": 4698 }, { "epoch": 0.42, "grad_norm": 6.03822431203044, "learning_rate": 9.177777167592588e-06, "loss": 0.6912, "step": 4699 }, { "epoch": 0.42, "grad_norm": 6.743944262018504, "learning_rate": 9.177380248774718e-06, "loss": 0.817, "step": 4700 }, { "epoch": 0.42, "grad_norm": 5.4934930510815665, "learning_rate": 9.176983242763184e-06, "loss": 0.8252, "step": 4701 }, { "epoch": 0.42, "grad_norm": 6.811376148267023, "learning_rate": 9.176586149566274e-06, "loss": 0.8985, "step": 4702 }, { "epoch": 0.42, "grad_norm": 6.04859307179697, "learning_rate": 9.176188969192275e-06, "loss": 0.8933, "step": 4703 }, { "epoch": 0.42, "grad_norm": 6.467906847842432, "learning_rate": 9.175791701649476e-06, "loss": 0.82, "step": 4704 }, { "epoch": 0.42, "grad_norm": 7.999325723413462, "learning_rate": 9.17539434694617e-06, "loss": 0.792, "step": 4705 }, { "epoch": 0.42, "grad_norm": 5.991976808612682, "learning_rate": 9.17499690509065e-06, "loss": 0.8442, "step": 4706 }, { "epoch": 0.42, "grad_norm": 6.520590685550164, "learning_rate": 9.174599376091215e-06, "loss": 0.851, "step": 4707 }, { "epoch": 0.42, "grad_norm": 6.679056358880233, "learning_rate": 9.174201759956158e-06, "loss": 0.8756, "step": 4708 }, { "epoch": 0.42, "grad_norm": 5.911535175533622, "learning_rate": 9.173804056693781e-06, "loss": 0.8224, "step": 4709 }, { "epoch": 0.42, "grad_norm": 5.875786909931423, "learning_rate": 9.173406266312386e-06, "loss": 0.7981, "step": 4710 }, { "epoch": 0.42, "grad_norm": 5.252866175679899, "learning_rate": 9.173008388820273e-06, "loss": 0.7933, "step": 4711 }, { "epoch": 0.42, "grad_norm": 6.525629012749577, "learning_rate": 9.17261042422575e-06, "loss": 0.8385, "step": 4712 }, { "epoch": 0.42, "grad_norm": 5.670774964646604, "learning_rate": 9.172212372537123e-06, "loss": 0.8983, "step": 4713 }, { "epoch": 0.42, "grad_norm": 5.059195413178181, "learning_rate": 9.171814233762698e-06, "loss": 0.8536, "step": 4714 }, { "epoch": 0.42, "grad_norm": 5.714048120285158, "learning_rate": 9.171416007910787e-06, "loss": 0.8427, "step": 4715 }, { "epoch": 0.42, "grad_norm": 5.44401816765405, "learning_rate": 9.171017694989704e-06, "loss": 0.857, "step": 4716 }, { "epoch": 0.42, "grad_norm": 5.70308517100645, "learning_rate": 9.170619295007759e-06, "loss": 0.8446, "step": 4717 }, { "epoch": 0.42, "grad_norm": 5.982646097468416, "learning_rate": 9.170220807973272e-06, "loss": 0.8179, "step": 4718 }, { "epoch": 0.42, "grad_norm": 5.661065177151935, "learning_rate": 9.169822233894555e-06, "loss": 0.8797, "step": 4719 }, { "epoch": 0.42, "grad_norm": 5.5834532028960275, "learning_rate": 9.169423572779934e-06, "loss": 0.8209, "step": 4720 }, { "epoch": 0.42, "grad_norm": 7.396275344176002, "learning_rate": 9.169024824637725e-06, "loss": 0.8675, "step": 4721 }, { "epoch": 0.42, "grad_norm": 6.313810103392968, "learning_rate": 9.168625989476253e-06, "loss": 0.8381, "step": 4722 }, { "epoch": 0.42, "grad_norm": 7.645864530196455, "learning_rate": 9.16822706730384e-06, "loss": 0.8302, "step": 4723 }, { "epoch": 0.42, "grad_norm": 4.453695941460281, "learning_rate": 9.167828058128819e-06, "loss": 0.8403, "step": 4724 }, { "epoch": 0.42, "grad_norm": 5.663758337860535, "learning_rate": 9.167428961959514e-06, "loss": 0.8575, "step": 4725 }, { "epoch": 0.42, "grad_norm": 4.894484204774418, "learning_rate": 9.167029778804255e-06, "loss": 0.8878, "step": 4726 }, { "epoch": 0.42, "grad_norm": 7.2405249285199975, "learning_rate": 9.166630508671374e-06, "loss": 0.8807, "step": 4727 }, { "epoch": 0.42, "grad_norm": 5.9216020963619425, "learning_rate": 9.166231151569209e-06, "loss": 0.8942, "step": 4728 }, { "epoch": 0.42, "grad_norm": 7.539897808644549, "learning_rate": 9.16583170750609e-06, "loss": 0.9102, "step": 4729 }, { "epoch": 0.42, "grad_norm": 5.5400170078339555, "learning_rate": 9.16543217649036e-06, "loss": 0.8575, "step": 4730 }, { "epoch": 0.42, "grad_norm": 5.052055944275131, "learning_rate": 9.165032558530353e-06, "loss": 0.9064, "step": 4731 }, { "epoch": 0.42, "grad_norm": 6.59272018370884, "learning_rate": 9.164632853634412e-06, "loss": 0.8693, "step": 4732 }, { "epoch": 0.42, "grad_norm": 6.908606450948162, "learning_rate": 9.16423306181088e-06, "loss": 0.7869, "step": 4733 }, { "epoch": 0.42, "grad_norm": 6.562565151775326, "learning_rate": 9.163833183068105e-06, "loss": 0.89, "step": 4734 }, { "epoch": 0.42, "grad_norm": 5.912969043096607, "learning_rate": 9.163433217414431e-06, "loss": 0.8339, "step": 4735 }, { "epoch": 0.42, "grad_norm": 6.803424779387007, "learning_rate": 9.163033164858204e-06, "loss": 0.8829, "step": 4736 }, { "epoch": 0.42, "grad_norm": 6.774843240823779, "learning_rate": 9.16263302540778e-06, "loss": 0.7715, "step": 4737 }, { "epoch": 0.42, "grad_norm": 5.981890811756283, "learning_rate": 9.162232799071505e-06, "loss": 0.8624, "step": 4738 }, { "epoch": 0.42, "grad_norm": 6.470433102993778, "learning_rate": 9.161832485857736e-06, "loss": 0.8495, "step": 4739 }, { "epoch": 0.42, "grad_norm": 5.754494412402604, "learning_rate": 9.161432085774826e-06, "loss": 0.8733, "step": 4740 }, { "epoch": 0.42, "grad_norm": 5.149976065165343, "learning_rate": 9.161031598831139e-06, "loss": 0.8768, "step": 4741 }, { "epoch": 0.42, "grad_norm": 6.3280547097008855, "learning_rate": 9.160631025035026e-06, "loss": 0.7943, "step": 4742 }, { "epoch": 0.42, "grad_norm": 5.241206134376276, "learning_rate": 9.160230364394852e-06, "loss": 0.7718, "step": 4743 }, { "epoch": 0.42, "grad_norm": 7.121132152060691, "learning_rate": 9.159829616918982e-06, "loss": 0.8631, "step": 4744 }, { "epoch": 0.42, "grad_norm": 7.337349700946734, "learning_rate": 9.159428782615779e-06, "loss": 0.7977, "step": 4745 }, { "epoch": 0.42, "grad_norm": 5.621838942914706, "learning_rate": 9.159027861493608e-06, "loss": 0.7948, "step": 4746 }, { "epoch": 0.42, "grad_norm": 4.642965770128866, "learning_rate": 9.158626853560839e-06, "loss": 0.8606, "step": 4747 }, { "epoch": 0.42, "grad_norm": 5.6556209701010145, "learning_rate": 9.158225758825841e-06, "loss": 0.868, "step": 4748 }, { "epoch": 0.42, "grad_norm": 7.454390074640307, "learning_rate": 9.157824577296987e-06, "loss": 0.8533, "step": 4749 }, { "epoch": 0.42, "grad_norm": 5.632472262392171, "learning_rate": 9.15742330898265e-06, "loss": 0.8153, "step": 4750 }, { "epoch": 0.42, "grad_norm": 6.485322466204349, "learning_rate": 9.157021953891207e-06, "loss": 0.8347, "step": 4751 }, { "epoch": 0.42, "grad_norm": 6.082983454006973, "learning_rate": 9.156620512031033e-06, "loss": 0.8395, "step": 4752 }, { "epoch": 0.42, "grad_norm": 6.124842004751423, "learning_rate": 9.15621898341051e-06, "loss": 0.8083, "step": 4753 }, { "epoch": 0.42, "grad_norm": 10.345170486231046, "learning_rate": 9.155817368038017e-06, "loss": 0.9008, "step": 4754 }, { "epoch": 0.42, "grad_norm": 4.965529425096823, "learning_rate": 9.155415665921938e-06, "loss": 0.7945, "step": 4755 }, { "epoch": 0.42, "grad_norm": 5.523920055144245, "learning_rate": 9.155013877070657e-06, "loss": 0.9048, "step": 4756 }, { "epoch": 0.42, "grad_norm": 5.150256100001417, "learning_rate": 9.15461200149256e-06, "loss": 0.8731, "step": 4757 }, { "epoch": 0.42, "grad_norm": 6.220728975424585, "learning_rate": 9.154210039196037e-06, "loss": 0.7604, "step": 4758 }, { "epoch": 0.42, "grad_norm": 6.672365428656922, "learning_rate": 9.153807990189475e-06, "loss": 0.8065, "step": 4759 }, { "epoch": 0.42, "grad_norm": 6.800048114974959, "learning_rate": 9.15340585448127e-06, "loss": 0.8596, "step": 4760 }, { "epoch": 0.42, "grad_norm": 5.81196063105043, "learning_rate": 9.153003632079812e-06, "loss": 0.8403, "step": 4761 }, { "epoch": 0.42, "grad_norm": 5.83589784933157, "learning_rate": 9.1526013229935e-06, "loss": 0.8167, "step": 4762 }, { "epoch": 0.42, "grad_norm": 6.51862353525165, "learning_rate": 9.15219892723073e-06, "loss": 0.7817, "step": 4763 }, { "epoch": 0.42, "grad_norm": 4.480691028693473, "learning_rate": 9.1517964447999e-06, "loss": 0.8806, "step": 4764 }, { "epoch": 0.43, "grad_norm": 7.0450123865421, "learning_rate": 9.15139387570941e-06, "loss": 0.8278, "step": 4765 }, { "epoch": 0.43, "grad_norm": 6.296029513174919, "learning_rate": 9.150991219967663e-06, "loss": 0.8919, "step": 4766 }, { "epoch": 0.43, "grad_norm": 5.980978718104485, "learning_rate": 9.150588477583067e-06, "loss": 0.8677, "step": 4767 }, { "epoch": 0.43, "grad_norm": 5.9910759133192055, "learning_rate": 9.150185648564025e-06, "loss": 0.7479, "step": 4768 }, { "epoch": 0.43, "grad_norm": 5.724675384546899, "learning_rate": 9.149782732918948e-06, "loss": 0.8443, "step": 4769 }, { "epoch": 0.43, "grad_norm": 5.598570563913732, "learning_rate": 9.149379730656243e-06, "loss": 0.8436, "step": 4770 }, { "epoch": 0.43, "grad_norm": 7.595044656557839, "learning_rate": 9.148976641784324e-06, "loss": 0.8133, "step": 4771 }, { "epoch": 0.43, "grad_norm": 5.956477102004142, "learning_rate": 9.148573466311602e-06, "loss": 0.849, "step": 4772 }, { "epoch": 0.43, "grad_norm": 5.818363610099549, "learning_rate": 9.148170204246496e-06, "loss": 0.8941, "step": 4773 }, { "epoch": 0.43, "grad_norm": 8.190669552081799, "learning_rate": 9.14776685559742e-06, "loss": 0.8661, "step": 4774 }, { "epoch": 0.43, "grad_norm": 5.345349705564131, "learning_rate": 9.147363420372794e-06, "loss": 0.8202, "step": 4775 }, { "epoch": 0.43, "grad_norm": 5.568846597915994, "learning_rate": 9.146959898581042e-06, "loss": 0.8467, "step": 4776 }, { "epoch": 0.43, "grad_norm": 7.3493734136729225, "learning_rate": 9.146556290230582e-06, "loss": 0.8407, "step": 4777 }, { "epoch": 0.43, "grad_norm": 9.225867706035114, "learning_rate": 9.14615259532984e-06, "loss": 0.8335, "step": 4778 }, { "epoch": 0.43, "grad_norm": 7.083970305487933, "learning_rate": 9.145748813887243e-06, "loss": 0.8362, "step": 4779 }, { "epoch": 0.43, "grad_norm": 6.40676860100397, "learning_rate": 9.145344945911217e-06, "loss": 0.7882, "step": 4780 }, { "epoch": 0.43, "grad_norm": 6.209606567690158, "learning_rate": 9.144940991410196e-06, "loss": 0.8521, "step": 4781 }, { "epoch": 0.43, "grad_norm": 5.206305356943972, "learning_rate": 9.144536950392606e-06, "loss": 0.7845, "step": 4782 }, { "epoch": 0.43, "grad_norm": 7.3792686262589475, "learning_rate": 9.144132822866886e-06, "loss": 0.8385, "step": 4783 }, { "epoch": 0.43, "grad_norm": 6.734815921185136, "learning_rate": 9.143728608841469e-06, "loss": 0.8237, "step": 4784 }, { "epoch": 0.43, "grad_norm": 8.070390396417901, "learning_rate": 9.14332430832479e-06, "loss": 0.81, "step": 4785 }, { "epoch": 0.43, "grad_norm": 5.476656473164878, "learning_rate": 9.142919921325291e-06, "loss": 0.8128, "step": 4786 }, { "epoch": 0.43, "grad_norm": 5.143225017711418, "learning_rate": 9.14251544785141e-06, "loss": 0.9165, "step": 4787 }, { "epoch": 0.43, "grad_norm": 8.370541604439898, "learning_rate": 9.142110887911593e-06, "loss": 0.8762, "step": 4788 }, { "epoch": 0.43, "grad_norm": 6.840805087338073, "learning_rate": 9.141706241514281e-06, "loss": 0.811, "step": 4789 }, { "epoch": 0.43, "grad_norm": 6.552757841728568, "learning_rate": 9.141301508667923e-06, "loss": 0.8021, "step": 4790 }, { "epoch": 0.43, "grad_norm": 5.01865046269057, "learning_rate": 9.140896689380964e-06, "loss": 0.8641, "step": 4791 }, { "epoch": 0.43, "grad_norm": 5.826348439505471, "learning_rate": 9.140491783661856e-06, "loss": 0.7974, "step": 4792 }, { "epoch": 0.43, "grad_norm": 6.359400314798292, "learning_rate": 9.140086791519047e-06, "loss": 0.7876, "step": 4793 }, { "epoch": 0.43, "grad_norm": 5.936475302558658, "learning_rate": 9.139681712960994e-06, "loss": 0.8361, "step": 4794 }, { "epoch": 0.43, "grad_norm": 5.175232159373111, "learning_rate": 9.139276547996153e-06, "loss": 0.767, "step": 4795 }, { "epoch": 0.43, "grad_norm": 5.855227244206391, "learning_rate": 9.138871296632977e-06, "loss": 0.8444, "step": 4796 }, { "epoch": 0.43, "grad_norm": 6.522852548848016, "learning_rate": 9.138465958879928e-06, "loss": 0.8045, "step": 4797 }, { "epoch": 0.43, "grad_norm": 5.0357053652451755, "learning_rate": 9.138060534745463e-06, "loss": 0.7962, "step": 4798 }, { "epoch": 0.43, "grad_norm": 6.581275590192676, "learning_rate": 9.137655024238049e-06, "loss": 0.8259, "step": 4799 }, { "epoch": 0.43, "grad_norm": 6.0547731548834, "learning_rate": 9.137249427366145e-06, "loss": 0.8104, "step": 4800 }, { "epoch": 0.43, "grad_norm": 5.051660682139256, "learning_rate": 9.136843744138224e-06, "loss": 0.8516, "step": 4801 }, { "epoch": 0.43, "grad_norm": 6.4830719191996, "learning_rate": 9.136437974562745e-06, "loss": 0.8605, "step": 4802 }, { "epoch": 0.43, "grad_norm": 4.8539992544952755, "learning_rate": 9.136032118648184e-06, "loss": 0.815, "step": 4803 }, { "epoch": 0.43, "grad_norm": 5.766836564654038, "learning_rate": 9.13562617640301e-06, "loss": 0.8558, "step": 4804 }, { "epoch": 0.43, "grad_norm": 5.96942103059374, "learning_rate": 9.135220147835694e-06, "loss": 0.8614, "step": 4805 }, { "epoch": 0.43, "grad_norm": 6.14138807494017, "learning_rate": 9.134814032954717e-06, "loss": 0.8396, "step": 4806 }, { "epoch": 0.43, "grad_norm": 6.388331877751764, "learning_rate": 9.13440783176855e-06, "loss": 0.8933, "step": 4807 }, { "epoch": 0.43, "grad_norm": 4.918892878758849, "learning_rate": 9.134001544285676e-06, "loss": 0.8805, "step": 4808 }, { "epoch": 0.43, "grad_norm": 5.305146788955883, "learning_rate": 9.133595170514571e-06, "loss": 0.7844, "step": 4809 }, { "epoch": 0.43, "grad_norm": 6.214109285169786, "learning_rate": 9.133188710463721e-06, "loss": 0.8441, "step": 4810 }, { "epoch": 0.43, "grad_norm": 4.706609196675064, "learning_rate": 9.132782164141606e-06, "loss": 0.7939, "step": 4811 }, { "epoch": 0.43, "grad_norm": 5.269334231613863, "learning_rate": 9.132375531556714e-06, "loss": 0.7106, "step": 4812 }, { "epoch": 0.43, "grad_norm": 6.602309220520748, "learning_rate": 9.131968812717534e-06, "loss": 0.7897, "step": 4813 }, { "epoch": 0.43, "grad_norm": 4.717450952703593, "learning_rate": 9.131562007632555e-06, "loss": 0.8179, "step": 4814 }, { "epoch": 0.43, "grad_norm": 6.2638076287388635, "learning_rate": 9.131155116310263e-06, "loss": 0.9658, "step": 4815 }, { "epoch": 0.43, "grad_norm": 6.830889701606198, "learning_rate": 9.130748138759157e-06, "loss": 0.8031, "step": 4816 }, { "epoch": 0.43, "grad_norm": 7.399287363211557, "learning_rate": 9.130341074987732e-06, "loss": 0.8251, "step": 4817 }, { "epoch": 0.43, "grad_norm": 4.768180085559075, "learning_rate": 9.12993392500448e-06, "loss": 0.8164, "step": 4818 }, { "epoch": 0.43, "grad_norm": 5.419118110237655, "learning_rate": 9.129526688817902e-06, "loss": 0.8303, "step": 4819 }, { "epoch": 0.43, "grad_norm": 6.259236668039366, "learning_rate": 9.129119366436498e-06, "loss": 0.8583, "step": 4820 }, { "epoch": 0.43, "grad_norm": 5.2995669792411455, "learning_rate": 9.128711957868772e-06, "loss": 0.8614, "step": 4821 }, { "epoch": 0.43, "grad_norm": 5.854825526177955, "learning_rate": 9.128304463123224e-06, "loss": 0.8434, "step": 4822 }, { "epoch": 0.43, "grad_norm": 5.522533667546708, "learning_rate": 9.127896882208359e-06, "loss": 0.895, "step": 4823 }, { "epoch": 0.43, "grad_norm": 5.279331814772121, "learning_rate": 9.127489215132688e-06, "loss": 0.8133, "step": 4824 }, { "epoch": 0.43, "grad_norm": 7.070670671629508, "learning_rate": 9.127081461904719e-06, "loss": 0.8714, "step": 4825 }, { "epoch": 0.43, "grad_norm": 8.435640450454244, "learning_rate": 9.126673622532963e-06, "loss": 0.8625, "step": 4826 }, { "epoch": 0.43, "grad_norm": 5.9742167599247455, "learning_rate": 9.126265697025932e-06, "loss": 0.8498, "step": 4827 }, { "epoch": 0.43, "grad_norm": 6.896903810621668, "learning_rate": 9.125857685392141e-06, "loss": 0.8068, "step": 4828 }, { "epoch": 0.43, "grad_norm": 7.79387704569665, "learning_rate": 9.125449587640107e-06, "loss": 0.8017, "step": 4829 }, { "epoch": 0.43, "grad_norm": 6.672882399584768, "learning_rate": 9.125041403778345e-06, "loss": 0.8444, "step": 4830 }, { "epoch": 0.43, "grad_norm": 5.562963469245397, "learning_rate": 9.12463313381538e-06, "loss": 0.7673, "step": 4831 }, { "epoch": 0.43, "grad_norm": 6.097294703146544, "learning_rate": 9.12422477775973e-06, "loss": 0.8075, "step": 4832 }, { "epoch": 0.43, "grad_norm": 6.243248426771218, "learning_rate": 9.12381633561992e-06, "loss": 0.8572, "step": 4833 }, { "epoch": 0.43, "grad_norm": 5.325109696860835, "learning_rate": 9.123407807404474e-06, "loss": 0.8252, "step": 4834 }, { "epoch": 0.43, "grad_norm": 5.7054567700402385, "learning_rate": 9.122999193121922e-06, "loss": 0.8377, "step": 4835 }, { "epoch": 0.43, "grad_norm": 4.569833453098052, "learning_rate": 9.12259049278079e-06, "loss": 0.8157, "step": 4836 }, { "epoch": 0.43, "grad_norm": 6.762251706171692, "learning_rate": 9.12218170638961e-06, "loss": 0.8025, "step": 4837 }, { "epoch": 0.43, "grad_norm": 5.610263071413296, "learning_rate": 9.121772833956915e-06, "loss": 0.8447, "step": 4838 }, { "epoch": 0.43, "grad_norm": 6.819459325317752, "learning_rate": 9.121363875491236e-06, "loss": 0.8955, "step": 4839 }, { "epoch": 0.43, "grad_norm": 8.065031663098662, "learning_rate": 9.120954831001114e-06, "loss": 0.8199, "step": 4840 }, { "epoch": 0.43, "grad_norm": 6.128598775592627, "learning_rate": 9.120545700495085e-06, "loss": 0.8263, "step": 4841 }, { "epoch": 0.43, "grad_norm": 5.994503596343405, "learning_rate": 9.120136483981688e-06, "loss": 0.8711, "step": 4842 }, { "epoch": 0.43, "grad_norm": 5.591603739853564, "learning_rate": 9.119727181469462e-06, "loss": 0.7576, "step": 4843 }, { "epoch": 0.43, "grad_norm": 7.422901151681481, "learning_rate": 9.119317792966957e-06, "loss": 0.8358, "step": 4844 }, { "epoch": 0.43, "grad_norm": 7.191764691810066, "learning_rate": 9.118908318482711e-06, "loss": 0.8594, "step": 4845 }, { "epoch": 0.43, "grad_norm": 5.156870919383938, "learning_rate": 9.118498758025277e-06, "loss": 0.8578, "step": 4846 }, { "epoch": 0.43, "grad_norm": 5.660505668626911, "learning_rate": 9.1180891116032e-06, "loss": 0.7698, "step": 4847 }, { "epoch": 0.43, "grad_norm": 6.909224530873166, "learning_rate": 9.117679379225031e-06, "loss": 0.8256, "step": 4848 }, { "epoch": 0.43, "grad_norm": 6.496760772024197, "learning_rate": 9.117269560899322e-06, "loss": 0.7821, "step": 4849 }, { "epoch": 0.43, "grad_norm": 5.982366068952116, "learning_rate": 9.116859656634629e-06, "loss": 0.8691, "step": 4850 }, { "epoch": 0.43, "grad_norm": 5.436438914426638, "learning_rate": 9.116449666439504e-06, "loss": 0.793, "step": 4851 }, { "epoch": 0.43, "grad_norm": 6.933256549510221, "learning_rate": 9.116039590322508e-06, "loss": 0.8476, "step": 4852 }, { "epoch": 0.43, "grad_norm": 7.438844150700897, "learning_rate": 9.1156294282922e-06, "loss": 0.8428, "step": 4853 }, { "epoch": 0.43, "grad_norm": 4.89770910442735, "learning_rate": 9.11521918035714e-06, "loss": 0.7486, "step": 4854 }, { "epoch": 0.43, "grad_norm": 7.496095920576283, "learning_rate": 9.114808846525893e-06, "loss": 0.766, "step": 4855 }, { "epoch": 0.43, "grad_norm": 7.403760885673823, "learning_rate": 9.114398426807024e-06, "loss": 0.8707, "step": 4856 }, { "epoch": 0.43, "grad_norm": 6.0091258254670175, "learning_rate": 9.113987921209095e-06, "loss": 0.7522, "step": 4857 }, { "epoch": 0.43, "grad_norm": 5.853296386558378, "learning_rate": 9.11357732974068e-06, "loss": 0.8826, "step": 4858 }, { "epoch": 0.43, "grad_norm": 9.225204006400393, "learning_rate": 9.113166652410344e-06, "loss": 0.8522, "step": 4859 }, { "epoch": 0.43, "grad_norm": 8.070378011059255, "learning_rate": 9.112755889226664e-06, "loss": 0.774, "step": 4860 }, { "epoch": 0.43, "grad_norm": 5.952803432518575, "learning_rate": 9.112345040198212e-06, "loss": 0.8162, "step": 4861 }, { "epoch": 0.43, "grad_norm": 4.622240886574784, "learning_rate": 9.111934105333563e-06, "loss": 0.8444, "step": 4862 }, { "epoch": 0.43, "grad_norm": 6.392016538822963, "learning_rate": 9.111523084641292e-06, "loss": 0.7944, "step": 4863 }, { "epoch": 0.43, "grad_norm": 4.9825462642169605, "learning_rate": 9.111111978129983e-06, "loss": 0.8807, "step": 4864 }, { "epoch": 0.43, "grad_norm": 5.834063968707749, "learning_rate": 9.110700785808216e-06, "loss": 0.7289, "step": 4865 }, { "epoch": 0.43, "grad_norm": 6.764413796162558, "learning_rate": 9.110289507684569e-06, "loss": 0.7878, "step": 4866 }, { "epoch": 0.43, "grad_norm": 6.790280262182799, "learning_rate": 9.109878143767633e-06, "loss": 0.821, "step": 4867 }, { "epoch": 0.43, "grad_norm": 4.996171763389265, "learning_rate": 9.10946669406599e-06, "loss": 0.86, "step": 4868 }, { "epoch": 0.43, "grad_norm": 7.308244354496188, "learning_rate": 9.109055158588226e-06, "loss": 0.8747, "step": 4869 }, { "epoch": 0.43, "grad_norm": 5.683003077853453, "learning_rate": 9.108643537342938e-06, "loss": 0.7568, "step": 4870 }, { "epoch": 0.43, "grad_norm": 5.169716494751594, "learning_rate": 9.108231830338714e-06, "loss": 0.8198, "step": 4871 }, { "epoch": 0.43, "grad_norm": 5.057289966398745, "learning_rate": 9.107820037584147e-06, "loss": 0.8373, "step": 4872 }, { "epoch": 0.43, "grad_norm": 5.118980415714306, "learning_rate": 9.107408159087831e-06, "loss": 0.8351, "step": 4873 }, { "epoch": 0.43, "grad_norm": 6.668471480557678, "learning_rate": 9.106996194858368e-06, "loss": 0.8312, "step": 4874 }, { "epoch": 0.43, "grad_norm": 5.2501341141926225, "learning_rate": 9.106584144904352e-06, "loss": 0.821, "step": 4875 }, { "epoch": 0.43, "grad_norm": 8.128484954219688, "learning_rate": 9.106172009234385e-06, "loss": 0.8705, "step": 4876 }, { "epoch": 0.44, "grad_norm": 5.421820242883798, "learning_rate": 9.105759787857069e-06, "loss": 0.8326, "step": 4877 }, { "epoch": 0.44, "grad_norm": 6.336248091208248, "learning_rate": 9.105347480781008e-06, "loss": 0.866, "step": 4878 }, { "epoch": 0.44, "grad_norm": 4.6288972372407935, "learning_rate": 9.104935088014813e-06, "loss": 0.7967, "step": 4879 }, { "epoch": 0.44, "grad_norm": 4.649258859580292, "learning_rate": 9.104522609567083e-06, "loss": 0.8471, "step": 4880 }, { "epoch": 0.44, "grad_norm": 6.473163932481095, "learning_rate": 9.104110045446435e-06, "loss": 0.7515, "step": 4881 }, { "epoch": 0.44, "grad_norm": 5.808625417867456, "learning_rate": 9.103697395661477e-06, "loss": 0.7918, "step": 4882 }, { "epoch": 0.44, "grad_norm": 8.41269730698717, "learning_rate": 9.103284660220823e-06, "loss": 0.7189, "step": 4883 }, { "epoch": 0.44, "grad_norm": 7.566026978609095, "learning_rate": 9.102871839133087e-06, "loss": 0.8262, "step": 4884 }, { "epoch": 0.44, "grad_norm": 6.284426809092878, "learning_rate": 9.102458932406889e-06, "loss": 0.848, "step": 4885 }, { "epoch": 0.44, "grad_norm": 5.040023035708307, "learning_rate": 9.102045940050843e-06, "loss": 0.7947, "step": 4886 }, { "epoch": 0.44, "grad_norm": 10.642318195286915, "learning_rate": 9.101632862073571e-06, "loss": 0.8861, "step": 4887 }, { "epoch": 0.44, "grad_norm": 6.296746432152736, "learning_rate": 9.101219698483694e-06, "loss": 0.8257, "step": 4888 }, { "epoch": 0.44, "grad_norm": 5.9750406885018785, "learning_rate": 9.10080644928984e-06, "loss": 0.8992, "step": 4889 }, { "epoch": 0.44, "grad_norm": 6.336430795855631, "learning_rate": 9.100393114500632e-06, "loss": 0.8137, "step": 4890 }, { "epoch": 0.44, "grad_norm": 6.229087324085516, "learning_rate": 9.099979694124696e-06, "loss": 0.8594, "step": 4891 }, { "epoch": 0.44, "grad_norm": 5.121633823963735, "learning_rate": 9.099566188170663e-06, "loss": 0.8029, "step": 4892 }, { "epoch": 0.44, "grad_norm": 6.141951747110666, "learning_rate": 9.099152596647165e-06, "loss": 0.7847, "step": 4893 }, { "epoch": 0.44, "grad_norm": 7.802408005664781, "learning_rate": 9.098738919562832e-06, "loss": 0.8206, "step": 4894 }, { "epoch": 0.44, "grad_norm": 4.968672526431462, "learning_rate": 9.0983251569263e-06, "loss": 0.8697, "step": 4895 }, { "epoch": 0.44, "grad_norm": 5.424975118955648, "learning_rate": 9.097911308746206e-06, "loss": 0.8517, "step": 4896 }, { "epoch": 0.44, "grad_norm": 7.734972061208118, "learning_rate": 9.097497375031189e-06, "loss": 0.8441, "step": 4897 }, { "epoch": 0.44, "grad_norm": 6.288862862494857, "learning_rate": 9.097083355789886e-06, "loss": 0.9049, "step": 4898 }, { "epoch": 0.44, "grad_norm": 5.572106811164453, "learning_rate": 9.09666925103094e-06, "loss": 0.8221, "step": 4899 }, { "epoch": 0.44, "grad_norm": 6.577498237539196, "learning_rate": 9.096255060762998e-06, "loss": 0.8284, "step": 4900 }, { "epoch": 0.44, "grad_norm": 5.501844460247866, "learning_rate": 9.095840784994699e-06, "loss": 0.869, "step": 4901 }, { "epoch": 0.44, "grad_norm": 7.162010420300825, "learning_rate": 9.095426423734695e-06, "loss": 0.8966, "step": 4902 }, { "epoch": 0.44, "grad_norm": 5.919884772221421, "learning_rate": 9.095011976991632e-06, "loss": 0.867, "step": 4903 }, { "epoch": 0.44, "grad_norm": 7.182391125624397, "learning_rate": 9.094597444774162e-06, "loss": 0.8135, "step": 4904 }, { "epoch": 0.44, "grad_norm": 7.644653209275113, "learning_rate": 9.09418282709094e-06, "loss": 0.8346, "step": 4905 }, { "epoch": 0.44, "grad_norm": 7.818937709405059, "learning_rate": 9.093768123950616e-06, "loss": 0.8098, "step": 4906 }, { "epoch": 0.44, "grad_norm": 6.847664273594822, "learning_rate": 9.093353335361845e-06, "loss": 0.8337, "step": 4907 }, { "epoch": 0.44, "grad_norm": 7.396766347544747, "learning_rate": 9.09293846133329e-06, "loss": 0.895, "step": 4908 }, { "epoch": 0.44, "grad_norm": 7.853544707736268, "learning_rate": 9.092523501873609e-06, "loss": 0.8663, "step": 4909 }, { "epoch": 0.44, "grad_norm": 8.794949573888701, "learning_rate": 9.09210845699146e-06, "loss": 0.885, "step": 4910 }, { "epoch": 0.44, "grad_norm": 6.26467628850975, "learning_rate": 9.09169332669551e-06, "loss": 0.8358, "step": 4911 }, { "epoch": 0.44, "grad_norm": 8.63298954072151, "learning_rate": 9.091278110994422e-06, "loss": 0.9096, "step": 4912 }, { "epoch": 0.44, "grad_norm": 5.362068065799047, "learning_rate": 9.090862809896864e-06, "loss": 0.8052, "step": 4913 }, { "epoch": 0.44, "grad_norm": 7.764988674468138, "learning_rate": 9.090447423411501e-06, "loss": 0.7967, "step": 4914 }, { "epoch": 0.44, "grad_norm": 5.5812895241714005, "learning_rate": 9.090031951547009e-06, "loss": 0.7847, "step": 4915 }, { "epoch": 0.44, "grad_norm": 6.135172137192945, "learning_rate": 9.089616394312056e-06, "loss": 0.8703, "step": 4916 }, { "epoch": 0.44, "grad_norm": 6.277565341915346, "learning_rate": 9.089200751715318e-06, "loss": 0.8592, "step": 4917 }, { "epoch": 0.44, "grad_norm": 5.7284509623746045, "learning_rate": 9.088785023765467e-06, "loss": 0.8267, "step": 4918 }, { "epoch": 0.44, "grad_norm": 7.506459956213716, "learning_rate": 9.088369210471183e-06, "loss": 0.8223, "step": 4919 }, { "epoch": 0.44, "grad_norm": 7.3454062294847775, "learning_rate": 9.087953311841148e-06, "loss": 0.866, "step": 4920 }, { "epoch": 0.44, "grad_norm": 6.616842180628503, "learning_rate": 9.087537327884037e-06, "loss": 0.7944, "step": 4921 }, { "epoch": 0.44, "grad_norm": 6.288630382476383, "learning_rate": 9.087121258608538e-06, "loss": 0.8369, "step": 4922 }, { "epoch": 0.44, "grad_norm": 7.499485597169764, "learning_rate": 9.086705104023333e-06, "loss": 0.8361, "step": 4923 }, { "epoch": 0.44, "grad_norm": 6.915982187529836, "learning_rate": 9.086288864137108e-06, "loss": 0.7949, "step": 4924 }, { "epoch": 0.44, "grad_norm": 6.359225796409971, "learning_rate": 9.08587253895855e-06, "loss": 0.8178, "step": 4925 }, { "epoch": 0.44, "grad_norm": 6.366038398522875, "learning_rate": 9.085456128496354e-06, "loss": 0.8103, "step": 4926 }, { "epoch": 0.44, "grad_norm": 5.745203494509066, "learning_rate": 9.085039632759208e-06, "loss": 0.8787, "step": 4927 }, { "epoch": 0.44, "grad_norm": 4.8430864357744206, "learning_rate": 9.084623051755803e-06, "loss": 0.8436, "step": 4928 }, { "epoch": 0.44, "grad_norm": 5.792613389632451, "learning_rate": 9.08420638549484e-06, "loss": 0.8643, "step": 4929 }, { "epoch": 0.44, "grad_norm": 4.9549184682562375, "learning_rate": 9.083789633985012e-06, "loss": 0.8529, "step": 4930 }, { "epoch": 0.44, "grad_norm": 6.180274838788972, "learning_rate": 9.083372797235017e-06, "loss": 0.868, "step": 4931 }, { "epoch": 0.44, "grad_norm": 6.426413842741616, "learning_rate": 9.082955875253559e-06, "loss": 0.8658, "step": 4932 }, { "epoch": 0.44, "grad_norm": 6.452946835775552, "learning_rate": 9.08253886804934e-06, "loss": 0.8324, "step": 4933 }, { "epoch": 0.44, "grad_norm": 6.595017686580911, "learning_rate": 9.082121775631058e-06, "loss": 0.7809, "step": 4934 }, { "epoch": 0.44, "grad_norm": 6.026163602296175, "learning_rate": 9.081704598007428e-06, "loss": 0.8314, "step": 4935 }, { "epoch": 0.44, "grad_norm": 7.260227691118625, "learning_rate": 9.081287335187153e-06, "loss": 0.8788, "step": 4936 }, { "epoch": 0.44, "grad_norm": 5.855432899575218, "learning_rate": 9.080869987178941e-06, "loss": 0.8118, "step": 4937 }, { "epoch": 0.44, "grad_norm": 6.738891803206613, "learning_rate": 9.080452553991505e-06, "loss": 0.8788, "step": 4938 }, { "epoch": 0.44, "grad_norm": 10.565320249373139, "learning_rate": 9.080035035633558e-06, "loss": 0.8532, "step": 4939 }, { "epoch": 0.44, "grad_norm": 4.6631605718541715, "learning_rate": 9.079617432113816e-06, "loss": 0.7791, "step": 4940 }, { "epoch": 0.44, "grad_norm": 6.4116254576292055, "learning_rate": 9.079199743440994e-06, "loss": 0.8195, "step": 4941 }, { "epoch": 0.44, "grad_norm": 6.280108679916767, "learning_rate": 9.07878196962381e-06, "loss": 0.8276, "step": 4942 }, { "epoch": 0.44, "grad_norm": 5.304931483061797, "learning_rate": 9.078364110670985e-06, "loss": 0.8497, "step": 4943 }, { "epoch": 0.44, "grad_norm": 6.767258251264538, "learning_rate": 9.077946166591242e-06, "loss": 0.8002, "step": 4944 }, { "epoch": 0.44, "grad_norm": 6.162269808296781, "learning_rate": 9.077528137393302e-06, "loss": 0.8459, "step": 4945 }, { "epoch": 0.44, "grad_norm": 6.559000757370646, "learning_rate": 9.07711002308589e-06, "loss": 0.8932, "step": 4946 }, { "epoch": 0.44, "grad_norm": 5.484232673893755, "learning_rate": 9.07669182367774e-06, "loss": 0.7625, "step": 4947 }, { "epoch": 0.44, "grad_norm": 6.440623647438592, "learning_rate": 9.076273539177572e-06, "loss": 0.8138, "step": 4948 }, { "epoch": 0.44, "grad_norm": 7.169344608257306, "learning_rate": 9.075855169594122e-06, "loss": 0.8442, "step": 4949 }, { "epoch": 0.44, "grad_norm": 6.7365980069108256, "learning_rate": 9.07543671493612e-06, "loss": 0.9581, "step": 4950 }, { "epoch": 0.44, "grad_norm": 7.147881083545158, "learning_rate": 9.075018175212302e-06, "loss": 0.8132, "step": 4951 }, { "epoch": 0.44, "grad_norm": 5.927444707976578, "learning_rate": 9.074599550431405e-06, "loss": 0.8052, "step": 4952 }, { "epoch": 0.44, "grad_norm": 4.890517908407159, "learning_rate": 9.074180840602165e-06, "loss": 0.8667, "step": 4953 }, { "epoch": 0.44, "grad_norm": 4.885274111604918, "learning_rate": 9.073762045733322e-06, "loss": 0.8441, "step": 4954 }, { "epoch": 0.44, "grad_norm": 4.784214650661166, "learning_rate": 9.073343165833617e-06, "loss": 0.8358, "step": 4955 }, { "epoch": 0.44, "grad_norm": 6.751018323843988, "learning_rate": 9.072924200911795e-06, "loss": 0.8716, "step": 4956 }, { "epoch": 0.44, "grad_norm": 6.428475507912158, "learning_rate": 9.072505150976601e-06, "loss": 0.7945, "step": 4957 }, { "epoch": 0.44, "grad_norm": 7.191817565625338, "learning_rate": 9.072086016036779e-06, "loss": 0.8498, "step": 4958 }, { "epoch": 0.44, "grad_norm": 5.0675998649827925, "learning_rate": 9.071666796101078e-06, "loss": 0.8176, "step": 4959 }, { "epoch": 0.44, "grad_norm": 6.534337219171704, "learning_rate": 9.07124749117825e-06, "loss": 0.8677, "step": 4960 }, { "epoch": 0.44, "grad_norm": 5.485272299104047, "learning_rate": 9.070828101277047e-06, "loss": 0.7945, "step": 4961 }, { "epoch": 0.44, "grad_norm": 5.674192555699343, "learning_rate": 9.070408626406224e-06, "loss": 0.8332, "step": 4962 }, { "epoch": 0.44, "grad_norm": 4.940620218467653, "learning_rate": 9.069989066574534e-06, "loss": 0.7791, "step": 4963 }, { "epoch": 0.44, "grad_norm": 5.964658491085949, "learning_rate": 9.069569421790734e-06, "loss": 0.7529, "step": 4964 }, { "epoch": 0.44, "grad_norm": 5.173534038136184, "learning_rate": 9.069149692063585e-06, "loss": 0.7701, "step": 4965 }, { "epoch": 0.44, "grad_norm": 6.2571631033907185, "learning_rate": 9.068729877401848e-06, "loss": 0.824, "step": 4966 }, { "epoch": 0.44, "grad_norm": 7.215251106126207, "learning_rate": 9.068309977814286e-06, "loss": 0.815, "step": 4967 }, { "epoch": 0.44, "grad_norm": 7.605164420848333, "learning_rate": 9.067889993309662e-06, "loss": 0.907, "step": 4968 }, { "epoch": 0.44, "grad_norm": 6.61463928685527, "learning_rate": 9.067469923896743e-06, "loss": 0.8169, "step": 4969 }, { "epoch": 0.44, "grad_norm": 5.8469789630350375, "learning_rate": 9.067049769584295e-06, "loss": 0.7721, "step": 4970 }, { "epoch": 0.44, "grad_norm": 6.689687597679808, "learning_rate": 9.066629530381093e-06, "loss": 0.9313, "step": 4971 }, { "epoch": 0.44, "grad_norm": 5.497101589072142, "learning_rate": 9.066209206295904e-06, "loss": 0.8063, "step": 4972 }, { "epoch": 0.44, "grad_norm": 5.473484062892589, "learning_rate": 9.065788797337502e-06, "loss": 0.8157, "step": 4973 }, { "epoch": 0.44, "grad_norm": 6.345112737378636, "learning_rate": 9.065368303514662e-06, "loss": 0.829, "step": 4974 }, { "epoch": 0.44, "grad_norm": 5.400023421265176, "learning_rate": 9.064947724836163e-06, "loss": 0.8148, "step": 4975 }, { "epoch": 0.44, "grad_norm": 8.09803200982256, "learning_rate": 9.064527061310782e-06, "loss": 0.814, "step": 4976 }, { "epoch": 0.44, "grad_norm": 5.797807647795938, "learning_rate": 9.0641063129473e-06, "loss": 0.8752, "step": 4977 }, { "epoch": 0.44, "grad_norm": 4.533454338815272, "learning_rate": 9.063685479754498e-06, "loss": 0.822, "step": 4978 }, { "epoch": 0.44, "grad_norm": 6.462620560781152, "learning_rate": 9.063264561741163e-06, "loss": 0.8112, "step": 4979 }, { "epoch": 0.44, "grad_norm": 7.193747367776714, "learning_rate": 9.062843558916076e-06, "loss": 0.8508, "step": 4980 }, { "epoch": 0.44, "grad_norm": 6.4718042542974255, "learning_rate": 9.062422471288029e-06, "loss": 0.8689, "step": 4981 }, { "epoch": 0.44, "grad_norm": 4.554518107171751, "learning_rate": 9.062001298865811e-06, "loss": 0.8189, "step": 4982 }, { "epoch": 0.44, "grad_norm": 6.817209495247513, "learning_rate": 9.061580041658209e-06, "loss": 0.8424, "step": 4983 }, { "epoch": 0.44, "grad_norm": 5.370561119188758, "learning_rate": 9.061158699674018e-06, "loss": 0.8263, "step": 4984 }, { "epoch": 0.44, "grad_norm": 6.989734031404186, "learning_rate": 9.060737272922033e-06, "loss": 0.7945, "step": 4985 }, { "epoch": 0.44, "grad_norm": 6.6375027754978415, "learning_rate": 9.060315761411052e-06, "loss": 0.8921, "step": 4986 }, { "epoch": 0.44, "grad_norm": 6.263250033798762, "learning_rate": 9.05989416514987e-06, "loss": 0.8436, "step": 4987 }, { "epoch": 0.44, "grad_norm": 4.797324841663219, "learning_rate": 9.059472484147289e-06, "loss": 0.8048, "step": 4988 }, { "epoch": 0.45, "grad_norm": 5.561108909718329, "learning_rate": 9.05905071841211e-06, "loss": 0.8069, "step": 4989 }, { "epoch": 0.45, "grad_norm": 7.0231837025332435, "learning_rate": 9.058628867953136e-06, "loss": 0.8558, "step": 4990 }, { "epoch": 0.45, "grad_norm": 5.880013844026016, "learning_rate": 9.058206932779174e-06, "loss": 0.8393, "step": 4991 }, { "epoch": 0.45, "grad_norm": 5.907787108207934, "learning_rate": 9.057784912899028e-06, "loss": 0.875, "step": 4992 }, { "epoch": 0.45, "grad_norm": 6.303137408130783, "learning_rate": 9.057362808321509e-06, "loss": 0.8022, "step": 4993 }, { "epoch": 0.45, "grad_norm": 5.174172975800396, "learning_rate": 9.056940619055428e-06, "loss": 0.814, "step": 4994 }, { "epoch": 0.45, "grad_norm": 4.916546411654465, "learning_rate": 9.056518345109595e-06, "loss": 0.8254, "step": 4995 }, { "epoch": 0.45, "grad_norm": 6.830973464155386, "learning_rate": 9.056095986492823e-06, "loss": 0.7472, "step": 4996 }, { "epoch": 0.45, "grad_norm": 6.61074225800067, "learning_rate": 9.055673543213932e-06, "loss": 0.8115, "step": 4997 }, { "epoch": 0.45, "grad_norm": 6.869034900019781, "learning_rate": 9.055251015281739e-06, "loss": 0.8475, "step": 4998 }, { "epoch": 0.45, "grad_norm": 4.981455221560775, "learning_rate": 9.05482840270506e-06, "loss": 0.792, "step": 4999 }, { "epoch": 0.45, "grad_norm": 7.332255180667185, "learning_rate": 9.054405705492718e-06, "loss": 0.8712, "step": 5000 }, { "epoch": 0.45, "grad_norm": 4.999082067255124, "learning_rate": 9.053982923653538e-06, "loss": 0.8222, "step": 5001 }, { "epoch": 0.45, "grad_norm": 6.118803917373723, "learning_rate": 9.05356005719634e-06, "loss": 0.8067, "step": 5002 }, { "epoch": 0.45, "grad_norm": 6.1201272021944115, "learning_rate": 9.053137106129953e-06, "loss": 0.7572, "step": 5003 }, { "epoch": 0.45, "grad_norm": 5.524797992810213, "learning_rate": 9.052714070463206e-06, "loss": 0.8331, "step": 5004 }, { "epoch": 0.45, "grad_norm": 5.734753551743723, "learning_rate": 9.052290950204929e-06, "loss": 0.8393, "step": 5005 }, { "epoch": 0.45, "grad_norm": 4.696684014156215, "learning_rate": 9.051867745363952e-06, "loss": 0.8356, "step": 5006 }, { "epoch": 0.45, "grad_norm": 6.777284094712152, "learning_rate": 9.051444455949107e-06, "loss": 0.7918, "step": 5007 }, { "epoch": 0.45, "grad_norm": 6.812520270248402, "learning_rate": 9.051021081969236e-06, "loss": 0.8152, "step": 5008 }, { "epoch": 0.45, "grad_norm": 6.255314806640712, "learning_rate": 9.050597623433169e-06, "loss": 0.789, "step": 5009 }, { "epoch": 0.45, "grad_norm": 6.595214791113278, "learning_rate": 9.050174080349746e-06, "loss": 0.8931, "step": 5010 }, { "epoch": 0.45, "grad_norm": 6.6860779585771, "learning_rate": 9.04975045272781e-06, "loss": 0.807, "step": 5011 }, { "epoch": 0.45, "grad_norm": 6.212951510855668, "learning_rate": 9.049326740576203e-06, "loss": 0.8416, "step": 5012 }, { "epoch": 0.45, "grad_norm": 5.857005001403426, "learning_rate": 9.048902943903768e-06, "loss": 0.7327, "step": 5013 }, { "epoch": 0.45, "grad_norm": 5.3887958270933884, "learning_rate": 9.04847906271935e-06, "loss": 0.8603, "step": 5014 }, { "epoch": 0.45, "grad_norm": 7.195653239883354, "learning_rate": 9.048055097031797e-06, "loss": 0.7875, "step": 5015 }, { "epoch": 0.45, "grad_norm": 5.61805331702492, "learning_rate": 9.047631046849961e-06, "loss": 0.8864, "step": 5016 }, { "epoch": 0.45, "grad_norm": 5.4474689879348865, "learning_rate": 9.047206912182688e-06, "loss": 0.807, "step": 5017 }, { "epoch": 0.45, "grad_norm": 7.222143898491759, "learning_rate": 9.046782693038837e-06, "loss": 0.8142, "step": 5018 }, { "epoch": 0.45, "grad_norm": 6.00865213564452, "learning_rate": 9.046358389427259e-06, "loss": 0.9047, "step": 5019 }, { "epoch": 0.45, "grad_norm": 7.504208638795159, "learning_rate": 9.04593400135681e-06, "loss": 0.8567, "step": 5020 }, { "epoch": 0.45, "grad_norm": 5.269079692285917, "learning_rate": 9.04550952883635e-06, "loss": 0.7735, "step": 5021 }, { "epoch": 0.45, "grad_norm": 8.932809846394722, "learning_rate": 9.045084971874738e-06, "loss": 0.7795, "step": 5022 }, { "epoch": 0.45, "grad_norm": 5.8078885121791535, "learning_rate": 9.044660330480835e-06, "loss": 0.7993, "step": 5023 }, { "epoch": 0.45, "grad_norm": 5.573354529655484, "learning_rate": 9.044235604663507e-06, "loss": 0.7826, "step": 5024 }, { "epoch": 0.45, "grad_norm": 5.379444732333957, "learning_rate": 9.043810794431615e-06, "loss": 0.7929, "step": 5025 }, { "epoch": 0.45, "grad_norm": 7.757809206399995, "learning_rate": 9.043385899794031e-06, "loss": 0.7852, "step": 5026 }, { "epoch": 0.45, "grad_norm": 5.155616265141507, "learning_rate": 9.04296092075962e-06, "loss": 0.835, "step": 5027 }, { "epoch": 0.45, "grad_norm": 5.863302810679815, "learning_rate": 9.042535857337253e-06, "loss": 0.7334, "step": 5028 }, { "epoch": 0.45, "grad_norm": 5.860121100378805, "learning_rate": 9.042110709535803e-06, "loss": 0.8863, "step": 5029 }, { "epoch": 0.45, "grad_norm": 6.356797975854452, "learning_rate": 9.041685477364146e-06, "loss": 0.848, "step": 5030 }, { "epoch": 0.45, "grad_norm": 6.210635866219729, "learning_rate": 9.041260160831153e-06, "loss": 0.7887, "step": 5031 }, { "epoch": 0.45, "grad_norm": 4.759612203595883, "learning_rate": 9.040834759945707e-06, "loss": 0.8679, "step": 5032 }, { "epoch": 0.45, "grad_norm": 6.360845218899811, "learning_rate": 9.040409274716683e-06, "loss": 0.8376, "step": 5033 }, { "epoch": 0.45, "grad_norm": 5.778527741462842, "learning_rate": 9.039983705152965e-06, "loss": 0.8203, "step": 5034 }, { "epoch": 0.45, "grad_norm": 5.992529983776203, "learning_rate": 9.039558051263433e-06, "loss": 0.796, "step": 5035 }, { "epoch": 0.45, "grad_norm": 5.700029866465778, "learning_rate": 9.039132313056974e-06, "loss": 0.807, "step": 5036 }, { "epoch": 0.45, "grad_norm": 5.545398275672792, "learning_rate": 9.038706490542474e-06, "loss": 0.836, "step": 5037 }, { "epoch": 0.45, "grad_norm": 7.8768349230037025, "learning_rate": 9.03828058372882e-06, "loss": 0.8309, "step": 5038 }, { "epoch": 0.45, "grad_norm": 4.306846390307188, "learning_rate": 9.037854592624902e-06, "loss": 0.8378, "step": 5039 }, { "epoch": 0.45, "grad_norm": 6.581159556114853, "learning_rate": 9.037428517239613e-06, "loss": 0.9239, "step": 5040 }, { "epoch": 0.45, "grad_norm": 6.95931971618534, "learning_rate": 9.037002357581846e-06, "loss": 0.8757, "step": 5041 }, { "epoch": 0.45, "grad_norm": 5.1490692208678395, "learning_rate": 9.036576113660495e-06, "loss": 0.8369, "step": 5042 }, { "epoch": 0.45, "grad_norm": 6.429267564946473, "learning_rate": 9.036149785484457e-06, "loss": 0.8338, "step": 5043 }, { "epoch": 0.45, "grad_norm": 6.329331772429348, "learning_rate": 9.035723373062631e-06, "loss": 0.9375, "step": 5044 }, { "epoch": 0.45, "grad_norm": 5.625437755033326, "learning_rate": 9.035296876403918e-06, "loss": 0.7766, "step": 5045 }, { "epoch": 0.45, "grad_norm": 6.763544989893001, "learning_rate": 9.034870295517222e-06, "loss": 0.823, "step": 5046 }, { "epoch": 0.45, "grad_norm": 7.3032926454762315, "learning_rate": 9.034443630411442e-06, "loss": 0.8104, "step": 5047 }, { "epoch": 0.45, "grad_norm": 7.404778360169698, "learning_rate": 9.034016881095489e-06, "loss": 0.8287, "step": 5048 }, { "epoch": 0.45, "grad_norm": 7.553873155547471, "learning_rate": 9.033590047578266e-06, "loss": 0.8656, "step": 5049 }, { "epoch": 0.45, "grad_norm": 7.413849168099422, "learning_rate": 9.033163129868685e-06, "loss": 0.8878, "step": 5050 }, { "epoch": 0.45, "grad_norm": 5.188355117046866, "learning_rate": 9.032736127975654e-06, "loss": 0.8031, "step": 5051 }, { "epoch": 0.45, "grad_norm": 7.116859864046155, "learning_rate": 9.032309041908091e-06, "loss": 0.8395, "step": 5052 }, { "epoch": 0.45, "grad_norm": 6.06367771971315, "learning_rate": 9.031881871674906e-06, "loss": 0.7694, "step": 5053 }, { "epoch": 0.45, "grad_norm": 5.3364632928642255, "learning_rate": 9.031454617285015e-06, "loss": 0.7693, "step": 5054 }, { "epoch": 0.45, "grad_norm": 6.897152427963591, "learning_rate": 9.031027278747341e-06, "loss": 0.8419, "step": 5055 }, { "epoch": 0.45, "grad_norm": 7.109566328941059, "learning_rate": 9.030599856070799e-06, "loss": 0.8615, "step": 5056 }, { "epoch": 0.45, "grad_norm": 5.777331246592382, "learning_rate": 9.03017234926431e-06, "loss": 0.8316, "step": 5057 }, { "epoch": 0.45, "grad_norm": 6.226816638488124, "learning_rate": 9.029744758336802e-06, "loss": 0.8193, "step": 5058 }, { "epoch": 0.45, "grad_norm": 4.419800045022271, "learning_rate": 9.029317083297196e-06, "loss": 0.8621, "step": 5059 }, { "epoch": 0.45, "grad_norm": 7.150291377237508, "learning_rate": 9.028889324154419e-06, "loss": 0.8712, "step": 5060 }, { "epoch": 0.45, "grad_norm": 6.336730023253211, "learning_rate": 9.028461480917401e-06, "loss": 0.8897, "step": 5061 }, { "epoch": 0.45, "grad_norm": 4.114759163251798, "learning_rate": 9.028033553595072e-06, "loss": 0.8256, "step": 5062 }, { "epoch": 0.45, "grad_norm": 5.363603961033879, "learning_rate": 9.027605542196364e-06, "loss": 0.7794, "step": 5063 }, { "epoch": 0.45, "grad_norm": 7.350532044329956, "learning_rate": 9.02717744673021e-06, "loss": 0.8246, "step": 5064 }, { "epoch": 0.45, "grad_norm": 6.346170210083831, "learning_rate": 9.026749267205547e-06, "loss": 0.8799, "step": 5065 }, { "epoch": 0.45, "grad_norm": 5.516150684106209, "learning_rate": 9.026321003631311e-06, "loss": 0.7709, "step": 5066 }, { "epoch": 0.45, "grad_norm": 6.569509963222773, "learning_rate": 9.025892656016442e-06, "loss": 0.903, "step": 5067 }, { "epoch": 0.45, "grad_norm": 6.349875774489, "learning_rate": 9.02546422436988e-06, "loss": 0.8563, "step": 5068 }, { "epoch": 0.45, "grad_norm": 5.997665960489791, "learning_rate": 9.025035708700566e-06, "loss": 0.8935, "step": 5069 }, { "epoch": 0.45, "grad_norm": 6.793268851754902, "learning_rate": 9.024607109017449e-06, "loss": 0.8421, "step": 5070 }, { "epoch": 0.45, "grad_norm": 5.027803008821304, "learning_rate": 9.024178425329472e-06, "loss": 0.8669, "step": 5071 }, { "epoch": 0.45, "grad_norm": 5.787558677658023, "learning_rate": 9.023749657645581e-06, "loss": 0.8416, "step": 5072 }, { "epoch": 0.45, "grad_norm": 6.482870303295683, "learning_rate": 9.023320805974731e-06, "loss": 0.8029, "step": 5073 }, { "epoch": 0.45, "grad_norm": 5.702986097889686, "learning_rate": 9.022891870325869e-06, "loss": 0.8299, "step": 5074 }, { "epoch": 0.45, "grad_norm": 5.3645706328934475, "learning_rate": 9.022462850707949e-06, "loss": 0.8023, "step": 5075 }, { "epoch": 0.45, "grad_norm": 7.689310822044897, "learning_rate": 9.022033747129925e-06, "loss": 0.7945, "step": 5076 }, { "epoch": 0.45, "grad_norm": 7.675860918625916, "learning_rate": 9.021604559600756e-06, "loss": 0.7843, "step": 5077 }, { "epoch": 0.45, "grad_norm": 7.302481394923712, "learning_rate": 9.021175288129398e-06, "loss": 0.8404, "step": 5078 }, { "epoch": 0.45, "grad_norm": 5.976142110182973, "learning_rate": 9.020745932724812e-06, "loss": 0.7328, "step": 5079 }, { "epoch": 0.45, "grad_norm": 5.8601038683117, "learning_rate": 9.02031649339596e-06, "loss": 0.7378, "step": 5080 }, { "epoch": 0.45, "grad_norm": 4.881645632757235, "learning_rate": 9.019886970151805e-06, "loss": 0.7992, "step": 5081 }, { "epoch": 0.45, "grad_norm": 7.135894137543992, "learning_rate": 9.019457363001316e-06, "loss": 0.8454, "step": 5082 }, { "epoch": 0.45, "grad_norm": 6.053055355667253, "learning_rate": 9.019027671953453e-06, "loss": 0.824, "step": 5083 }, { "epoch": 0.45, "grad_norm": 4.4202877647627545, "learning_rate": 9.018597897017192e-06, "loss": 0.8307, "step": 5084 }, { "epoch": 0.45, "grad_norm": 6.508710790819579, "learning_rate": 9.018168038201498e-06, "loss": 0.8101, "step": 5085 }, { "epoch": 0.45, "grad_norm": 5.898263908006392, "learning_rate": 9.017738095515347e-06, "loss": 0.8283, "step": 5086 }, { "epoch": 0.45, "grad_norm": 7.0921734551170585, "learning_rate": 9.017308068967712e-06, "loss": 0.7883, "step": 5087 }, { "epoch": 0.45, "grad_norm": 5.076802046205431, "learning_rate": 9.01687795856757e-06, "loss": 0.7893, "step": 5088 }, { "epoch": 0.45, "grad_norm": 5.915784806173987, "learning_rate": 9.016447764323894e-06, "loss": 0.7996, "step": 5089 }, { "epoch": 0.45, "grad_norm": 4.480522973906189, "learning_rate": 9.01601748624567e-06, "loss": 0.8089, "step": 5090 }, { "epoch": 0.45, "grad_norm": 7.282058408406082, "learning_rate": 9.015587124341875e-06, "loss": 0.9167, "step": 5091 }, { "epoch": 0.45, "grad_norm": 6.669236243660897, "learning_rate": 9.015156678621492e-06, "loss": 0.8799, "step": 5092 }, { "epoch": 0.45, "grad_norm": 4.167515446005485, "learning_rate": 9.014726149093508e-06, "loss": 0.8505, "step": 5093 }, { "epoch": 0.45, "grad_norm": 7.287330451320698, "learning_rate": 9.014295535766905e-06, "loss": 0.8295, "step": 5094 }, { "epoch": 0.45, "grad_norm": 7.136122110704356, "learning_rate": 9.013864838650677e-06, "loss": 0.8993, "step": 5095 }, { "epoch": 0.45, "grad_norm": 6.1019736515795415, "learning_rate": 9.013434057753809e-06, "loss": 0.8808, "step": 5096 }, { "epoch": 0.45, "grad_norm": 6.686934123663191, "learning_rate": 9.013003193085295e-06, "loss": 0.8359, "step": 5097 }, { "epoch": 0.45, "grad_norm": 5.869360315873249, "learning_rate": 9.012572244654128e-06, "loss": 0.846, "step": 5098 }, { "epoch": 0.45, "grad_norm": 6.712785425012518, "learning_rate": 9.0121412124693e-06, "loss": 0.8592, "step": 5099 }, { "epoch": 0.45, "grad_norm": 5.925459604106917, "learning_rate": 9.011710096539814e-06, "loss": 0.7939, "step": 5100 }, { "epoch": 0.46, "grad_norm": 5.9944928076431925, "learning_rate": 9.011278896874664e-06, "loss": 0.8047, "step": 5101 }, { "epoch": 0.46, "grad_norm": 6.783928218196069, "learning_rate": 9.010847613482852e-06, "loss": 0.8844, "step": 5102 }, { "epoch": 0.46, "grad_norm": 5.810469391310199, "learning_rate": 9.01041624637338e-06, "loss": 0.7352, "step": 5103 }, { "epoch": 0.46, "grad_norm": 5.63486633565103, "learning_rate": 9.009984795555248e-06, "loss": 0.8558, "step": 5104 }, { "epoch": 0.46, "grad_norm": 5.665924497846773, "learning_rate": 9.00955326103747e-06, "loss": 0.9132, "step": 5105 }, { "epoch": 0.46, "grad_norm": 5.0348920471682534, "learning_rate": 9.009121642829046e-06, "loss": 0.8375, "step": 5106 }, { "epoch": 0.46, "grad_norm": 6.053244324521354, "learning_rate": 9.008689940938988e-06, "loss": 0.8438, "step": 5107 }, { "epoch": 0.46, "grad_norm": 6.318383359651649, "learning_rate": 9.008258155376307e-06, "loss": 0.808, "step": 5108 }, { "epoch": 0.46, "grad_norm": 5.658893032040583, "learning_rate": 9.007826286150011e-06, "loss": 0.8505, "step": 5109 }, { "epoch": 0.46, "grad_norm": 7.855846196599967, "learning_rate": 9.007394333269124e-06, "loss": 0.8196, "step": 5110 }, { "epoch": 0.46, "grad_norm": 7.130684104016678, "learning_rate": 9.006962296742653e-06, "loss": 0.9047, "step": 5111 }, { "epoch": 0.46, "grad_norm": 5.553594675646942, "learning_rate": 9.00653017657962e-06, "loss": 0.8776, "step": 5112 }, { "epoch": 0.46, "grad_norm": 6.724512367550496, "learning_rate": 9.006097972789042e-06, "loss": 0.7866, "step": 5113 }, { "epoch": 0.46, "grad_norm": 5.244919113574722, "learning_rate": 9.005665685379945e-06, "loss": 0.8236, "step": 5114 }, { "epoch": 0.46, "grad_norm": 5.66106884038679, "learning_rate": 9.005233314361349e-06, "loss": 0.858, "step": 5115 }, { "epoch": 0.46, "grad_norm": 6.37173292566724, "learning_rate": 9.004800859742276e-06, "loss": 0.8256, "step": 5116 }, { "epoch": 0.46, "grad_norm": 5.537981204527235, "learning_rate": 9.004368321531757e-06, "loss": 0.8764, "step": 5117 }, { "epoch": 0.46, "grad_norm": 5.308820411411482, "learning_rate": 9.00393569973882e-06, "loss": 0.8887, "step": 5118 }, { "epoch": 0.46, "grad_norm": 5.612723373572323, "learning_rate": 9.00350299437249e-06, "loss": 0.8673, "step": 5119 }, { "epoch": 0.46, "grad_norm": 5.599560516400066, "learning_rate": 9.003070205441807e-06, "loss": 0.7947, "step": 5120 }, { "epoch": 0.46, "grad_norm": 6.742870089713064, "learning_rate": 9.002637332955797e-06, "loss": 0.7917, "step": 5121 }, { "epoch": 0.46, "grad_norm": 5.248537408982362, "learning_rate": 9.0022043769235e-06, "loss": 0.8272, "step": 5122 }, { "epoch": 0.46, "grad_norm": 5.9122783248461594, "learning_rate": 9.00177133735395e-06, "loss": 0.8297, "step": 5123 }, { "epoch": 0.46, "grad_norm": 7.385495752678054, "learning_rate": 9.001338214256188e-06, "loss": 0.808, "step": 5124 }, { "epoch": 0.46, "grad_norm": 6.334458326584334, "learning_rate": 9.000905007639252e-06, "loss": 0.8167, "step": 5125 }, { "epoch": 0.46, "grad_norm": 6.6127378255930305, "learning_rate": 9.000471717512188e-06, "loss": 0.8733, "step": 5126 }, { "epoch": 0.46, "grad_norm": 6.429854361101258, "learning_rate": 9.000038343884037e-06, "loss": 0.7266, "step": 5127 }, { "epoch": 0.46, "grad_norm": 5.6180991483049985, "learning_rate": 8.999604886763844e-06, "loss": 0.8369, "step": 5128 }, { "epoch": 0.46, "grad_norm": 5.057339578458408, "learning_rate": 8.99917134616066e-06, "loss": 0.8002, "step": 5129 }, { "epoch": 0.46, "grad_norm": 5.068384668724896, "learning_rate": 8.998737722083531e-06, "loss": 0.7729, "step": 5130 }, { "epoch": 0.46, "grad_norm": 6.11828404002208, "learning_rate": 8.99830401454151e-06, "loss": 0.8108, "step": 5131 }, { "epoch": 0.46, "grad_norm": 7.239775693636682, "learning_rate": 8.997870223543647e-06, "loss": 0.8175, "step": 5132 }, { "epoch": 0.46, "grad_norm": 4.828374962799092, "learning_rate": 8.997436349099e-06, "loss": 0.8217, "step": 5133 }, { "epoch": 0.46, "grad_norm": 6.221169000982274, "learning_rate": 8.997002391216623e-06, "loss": 0.8438, "step": 5134 }, { "epoch": 0.46, "grad_norm": 5.812759791923963, "learning_rate": 8.996568349905577e-06, "loss": 0.8034, "step": 5135 }, { "epoch": 0.46, "grad_norm": 6.945024855185755, "learning_rate": 8.996134225174915e-06, "loss": 0.834, "step": 5136 }, { "epoch": 0.46, "grad_norm": 5.334795585499347, "learning_rate": 8.995700017033703e-06, "loss": 0.8283, "step": 5137 }, { "epoch": 0.46, "grad_norm": 5.746877129333405, "learning_rate": 8.995265725491007e-06, "loss": 0.8149, "step": 5138 }, { "epoch": 0.46, "grad_norm": 4.920857454789024, "learning_rate": 8.994831350555885e-06, "loss": 0.8442, "step": 5139 }, { "epoch": 0.46, "grad_norm": 5.530149700830683, "learning_rate": 8.994396892237409e-06, "loss": 0.8359, "step": 5140 }, { "epoch": 0.46, "grad_norm": 7.9796731115184185, "learning_rate": 8.993962350544643e-06, "loss": 0.8328, "step": 5141 }, { "epoch": 0.46, "grad_norm": 7.387867982033721, "learning_rate": 8.993527725486662e-06, "loss": 0.8447, "step": 5142 }, { "epoch": 0.46, "grad_norm": 5.098297819818132, "learning_rate": 8.993093017072535e-06, "loss": 0.8213, "step": 5143 }, { "epoch": 0.46, "grad_norm": 7.846050224792815, "learning_rate": 8.992658225311334e-06, "loss": 0.8553, "step": 5144 }, { "epoch": 0.46, "grad_norm": 8.265988385052772, "learning_rate": 8.99222335021214e-06, "loss": 0.8149, "step": 5145 }, { "epoch": 0.46, "grad_norm": 6.2900752130342275, "learning_rate": 8.991788391784022e-06, "loss": 0.7816, "step": 5146 }, { "epoch": 0.46, "grad_norm": 5.465218725048546, "learning_rate": 8.991353350036065e-06, "loss": 0.7875, "step": 5147 }, { "epoch": 0.46, "grad_norm": 5.391564226014759, "learning_rate": 8.99091822497735e-06, "loss": 0.8115, "step": 5148 }, { "epoch": 0.46, "grad_norm": 5.552076257155046, "learning_rate": 8.990483016616955e-06, "loss": 0.8448, "step": 5149 }, { "epoch": 0.46, "grad_norm": 5.13387548653464, "learning_rate": 8.990047724963967e-06, "loss": 0.8243, "step": 5150 }, { "epoch": 0.46, "grad_norm": 6.071567307182445, "learning_rate": 8.989612350027467e-06, "loss": 0.8358, "step": 5151 }, { "epoch": 0.46, "grad_norm": 6.114891435105107, "learning_rate": 8.989176891816551e-06, "loss": 0.8423, "step": 5152 }, { "epoch": 0.46, "grad_norm": 6.07647478414593, "learning_rate": 8.988741350340303e-06, "loss": 0.8246, "step": 5153 }, { "epoch": 0.46, "grad_norm": 5.344994326234233, "learning_rate": 8.988305725607812e-06, "loss": 0.8405, "step": 5154 }, { "epoch": 0.46, "grad_norm": 4.779992611325056, "learning_rate": 8.987870017628174e-06, "loss": 0.7886, "step": 5155 }, { "epoch": 0.46, "grad_norm": 7.015998749017503, "learning_rate": 8.987434226410483e-06, "loss": 0.8487, "step": 5156 }, { "epoch": 0.46, "grad_norm": 4.883043034320816, "learning_rate": 8.986998351963836e-06, "loss": 0.9356, "step": 5157 }, { "epoch": 0.46, "grad_norm": 6.202879932313559, "learning_rate": 8.986562394297329e-06, "loss": 0.8076, "step": 5158 }, { "epoch": 0.46, "grad_norm": 4.714095780884199, "learning_rate": 8.986126353420062e-06, "loss": 0.8207, "step": 5159 }, { "epoch": 0.46, "grad_norm": 6.809141119482261, "learning_rate": 8.985690229341137e-06, "loss": 0.8242, "step": 5160 }, { "epoch": 0.46, "grad_norm": 6.996542759902648, "learning_rate": 8.985254022069658e-06, "loss": 0.8153, "step": 5161 }, { "epoch": 0.46, "grad_norm": 6.369077671655855, "learning_rate": 8.984817731614727e-06, "loss": 0.8097, "step": 5162 }, { "epoch": 0.46, "grad_norm": 5.565612826226608, "learning_rate": 8.984381357985452e-06, "loss": 0.8353, "step": 5163 }, { "epoch": 0.46, "grad_norm": 6.06314637500562, "learning_rate": 8.983944901190944e-06, "loss": 0.8857, "step": 5164 }, { "epoch": 0.46, "grad_norm": 5.57835559306156, "learning_rate": 8.983508361240309e-06, "loss": 0.9373, "step": 5165 }, { "epoch": 0.46, "grad_norm": 7.843295978243513, "learning_rate": 8.983071738142663e-06, "loss": 0.8606, "step": 5166 }, { "epoch": 0.46, "grad_norm": 7.206879582172523, "learning_rate": 8.982635031907116e-06, "loss": 0.7973, "step": 5167 }, { "epoch": 0.46, "grad_norm": 5.235041055742097, "learning_rate": 8.982198242542785e-06, "loss": 0.8387, "step": 5168 }, { "epoch": 0.46, "grad_norm": 5.7543881384320805, "learning_rate": 8.981761370058785e-06, "loss": 0.814, "step": 5169 }, { "epoch": 0.46, "grad_norm": 7.401243623358166, "learning_rate": 8.981324414464238e-06, "loss": 0.8338, "step": 5170 }, { "epoch": 0.46, "grad_norm": 6.339068297047841, "learning_rate": 8.980887375768263e-06, "loss": 0.8361, "step": 5171 }, { "epoch": 0.46, "grad_norm": 6.5602655655487725, "learning_rate": 8.980450253979981e-06, "loss": 0.8295, "step": 5172 }, { "epoch": 0.46, "grad_norm": 8.101326185881579, "learning_rate": 8.980013049108517e-06, "loss": 0.7551, "step": 5173 }, { "epoch": 0.46, "grad_norm": 4.296463563427267, "learning_rate": 8.979575761162998e-06, "loss": 0.7611, "step": 5174 }, { "epoch": 0.46, "grad_norm": 6.09585591135136, "learning_rate": 8.979138390152548e-06, "loss": 0.7654, "step": 5175 }, { "epoch": 0.46, "grad_norm": 5.1088611974847105, "learning_rate": 8.9787009360863e-06, "loss": 0.7723, "step": 5176 }, { "epoch": 0.46, "grad_norm": 5.158167316183559, "learning_rate": 8.978263398973381e-06, "loss": 0.7739, "step": 5177 }, { "epoch": 0.46, "grad_norm": 5.2488166649922485, "learning_rate": 8.977825778822928e-06, "loss": 0.8288, "step": 5178 }, { "epoch": 0.46, "grad_norm": 9.61263395462851, "learning_rate": 8.977388075644073e-06, "loss": 0.8904, "step": 5179 }, { "epoch": 0.46, "grad_norm": 6.039188953313096, "learning_rate": 8.976950289445951e-06, "loss": 0.8214, "step": 5180 }, { "epoch": 0.46, "grad_norm": 4.4035257095455185, "learning_rate": 8.976512420237701e-06, "loss": 0.8545, "step": 5181 }, { "epoch": 0.46, "grad_norm": 5.951074351113605, "learning_rate": 8.976074468028464e-06, "loss": 0.712, "step": 5182 }, { "epoch": 0.46, "grad_norm": 5.456455065554696, "learning_rate": 8.97563643282738e-06, "loss": 0.7781, "step": 5183 }, { "epoch": 0.46, "grad_norm": 4.88355198962257, "learning_rate": 8.975198314643591e-06, "loss": 0.8499, "step": 5184 }, { "epoch": 0.46, "grad_norm": 5.28667464256252, "learning_rate": 8.974760113486243e-06, "loss": 0.8351, "step": 5185 }, { "epoch": 0.46, "grad_norm": 4.699102688562684, "learning_rate": 8.974321829364483e-06, "loss": 0.8014, "step": 5186 }, { "epoch": 0.46, "grad_norm": 4.488250766789452, "learning_rate": 8.973883462287456e-06, "loss": 0.8023, "step": 5187 }, { "epoch": 0.46, "grad_norm": 4.403220260624605, "learning_rate": 8.973445012264316e-06, "loss": 0.8028, "step": 5188 }, { "epoch": 0.46, "grad_norm": 6.322901228840251, "learning_rate": 8.973006479304213e-06, "loss": 0.7801, "step": 5189 }, { "epoch": 0.46, "grad_norm": 5.479593301071975, "learning_rate": 8.972567863416302e-06, "loss": 0.8203, "step": 5190 }, { "epoch": 0.46, "grad_norm": 5.833763835012731, "learning_rate": 8.972129164609735e-06, "loss": 0.8088, "step": 5191 }, { "epoch": 0.46, "grad_norm": 5.977470525889851, "learning_rate": 8.97169038289367e-06, "loss": 0.7762, "step": 5192 }, { "epoch": 0.46, "grad_norm": 5.152652409203137, "learning_rate": 8.971251518277266e-06, "loss": 0.8257, "step": 5193 }, { "epoch": 0.46, "grad_norm": 6.027536949951563, "learning_rate": 8.970812570769686e-06, "loss": 0.7388, "step": 5194 }, { "epoch": 0.46, "grad_norm": 5.142297227587591, "learning_rate": 8.970373540380086e-06, "loss": 0.8232, "step": 5195 }, { "epoch": 0.46, "grad_norm": 7.434531033459896, "learning_rate": 8.969934427117636e-06, "loss": 0.9112, "step": 5196 }, { "epoch": 0.46, "grad_norm": 5.097043335564176, "learning_rate": 8.969495230991498e-06, "loss": 0.8114, "step": 5197 }, { "epoch": 0.46, "grad_norm": 5.045995590797559, "learning_rate": 8.969055952010838e-06, "loss": 0.8533, "step": 5198 }, { "epoch": 0.46, "grad_norm": 5.443426474926982, "learning_rate": 8.96861659018483e-06, "loss": 0.8187, "step": 5199 }, { "epoch": 0.46, "grad_norm": 7.285091940770798, "learning_rate": 8.968177145522641e-06, "loss": 0.8051, "step": 5200 }, { "epoch": 0.46, "grad_norm": 6.412123384082406, "learning_rate": 8.967737618033445e-06, "loss": 0.8381, "step": 5201 }, { "epoch": 0.46, "grad_norm": 4.763880256149526, "learning_rate": 8.967298007726415e-06, "loss": 0.8504, "step": 5202 }, { "epoch": 0.46, "grad_norm": 6.340965938882367, "learning_rate": 8.966858314610728e-06, "loss": 0.8887, "step": 5203 }, { "epoch": 0.46, "grad_norm": 5.506980314028418, "learning_rate": 8.966418538695558e-06, "loss": 0.8559, "step": 5204 }, { "epoch": 0.46, "grad_norm": 6.553849548043362, "learning_rate": 8.965978679990088e-06, "loss": 0.847, "step": 5205 }, { "epoch": 0.46, "grad_norm": 6.540418156651894, "learning_rate": 8.965538738503501e-06, "loss": 0.891, "step": 5206 }, { "epoch": 0.46, "grad_norm": 7.285717176122889, "learning_rate": 8.965098714244976e-06, "loss": 0.8535, "step": 5207 }, { "epoch": 0.46, "grad_norm": 4.266638916781551, "learning_rate": 8.964658607223697e-06, "loss": 0.8351, "step": 5208 }, { "epoch": 0.46, "grad_norm": 5.75917253528263, "learning_rate": 8.964218417448854e-06, "loss": 0.8269, "step": 5209 }, { "epoch": 0.46, "grad_norm": 5.971151463381407, "learning_rate": 8.963778144929633e-06, "loss": 0.9389, "step": 5210 }, { "epoch": 0.46, "grad_norm": 6.4457510589096065, "learning_rate": 8.963337789675224e-06, "loss": 0.7858, "step": 5211 }, { "epoch": 0.46, "grad_norm": 5.716159038134252, "learning_rate": 8.962897351694817e-06, "loss": 0.8264, "step": 5212 }, { "epoch": 0.47, "grad_norm": 6.468285415744158, "learning_rate": 8.962456830997606e-06, "loss": 0.8503, "step": 5213 }, { "epoch": 0.47, "grad_norm": 5.4908268860858955, "learning_rate": 8.962016227592788e-06, "loss": 0.8599, "step": 5214 }, { "epoch": 0.47, "grad_norm": 3.878752086515638, "learning_rate": 8.961575541489558e-06, "loss": 0.8394, "step": 5215 }, { "epoch": 0.47, "grad_norm": 5.442373981101603, "learning_rate": 8.961134772697113e-06, "loss": 0.8529, "step": 5216 }, { "epoch": 0.47, "grad_norm": 5.196767770181244, "learning_rate": 8.960693921224654e-06, "loss": 0.8085, "step": 5217 }, { "epoch": 0.47, "grad_norm": 5.122279807007636, "learning_rate": 8.960252987081383e-06, "loss": 0.8182, "step": 5218 }, { "epoch": 0.47, "grad_norm": 6.998241515219926, "learning_rate": 8.959811970276506e-06, "loss": 0.8165, "step": 5219 }, { "epoch": 0.47, "grad_norm": 4.868976616064946, "learning_rate": 8.959370870819224e-06, "loss": 0.7728, "step": 5220 }, { "epoch": 0.47, "grad_norm": 5.799771681075606, "learning_rate": 8.958929688718746e-06, "loss": 0.7742, "step": 5221 }, { "epoch": 0.47, "grad_norm": 6.504477405125737, "learning_rate": 8.958488423984281e-06, "loss": 0.8114, "step": 5222 }, { "epoch": 0.47, "grad_norm": 5.589764665500126, "learning_rate": 8.95804707662504e-06, "loss": 0.7968, "step": 5223 }, { "epoch": 0.47, "grad_norm": 6.189852195532309, "learning_rate": 8.957605646650233e-06, "loss": 0.8856, "step": 5224 }, { "epoch": 0.47, "grad_norm": 6.38207722812808, "learning_rate": 8.957164134069076e-06, "loss": 0.7867, "step": 5225 }, { "epoch": 0.47, "grad_norm": 5.7203235102836105, "learning_rate": 8.956722538890784e-06, "loss": 0.8228, "step": 5226 }, { "epoch": 0.47, "grad_norm": 7.7841319955173125, "learning_rate": 8.956280861124573e-06, "loss": 0.9064, "step": 5227 }, { "epoch": 0.47, "grad_norm": 6.58204092359681, "learning_rate": 8.955839100779665e-06, "loss": 0.8261, "step": 5228 }, { "epoch": 0.47, "grad_norm": 7.521866671998887, "learning_rate": 8.955397257865277e-06, "loss": 0.8659, "step": 5229 }, { "epoch": 0.47, "grad_norm": 5.22538829217875, "learning_rate": 8.954955332390633e-06, "loss": 0.8521, "step": 5230 }, { "epoch": 0.47, "grad_norm": 5.422823145618653, "learning_rate": 8.95451332436496e-06, "loss": 0.8191, "step": 5231 }, { "epoch": 0.47, "grad_norm": 6.23407617471784, "learning_rate": 8.954071233797479e-06, "loss": 0.8164, "step": 5232 }, { "epoch": 0.47, "grad_norm": 6.938111105481462, "learning_rate": 8.953629060697422e-06, "loss": 0.838, "step": 5233 }, { "epoch": 0.47, "grad_norm": 5.846641478438352, "learning_rate": 8.953186805074016e-06, "loss": 0.8513, "step": 5234 }, { "epoch": 0.47, "grad_norm": 6.055895661410474, "learning_rate": 8.952744466936492e-06, "loss": 0.8326, "step": 5235 }, { "epoch": 0.47, "grad_norm": 6.512577905217255, "learning_rate": 8.952302046294085e-06, "loss": 0.9053, "step": 5236 }, { "epoch": 0.47, "grad_norm": 9.078875239990236, "learning_rate": 8.951859543156028e-06, "loss": 0.7941, "step": 5237 }, { "epoch": 0.47, "grad_norm": 7.0505724141337875, "learning_rate": 8.951416957531555e-06, "loss": 0.8385, "step": 5238 }, { "epoch": 0.47, "grad_norm": 5.31826551146731, "learning_rate": 8.95097428942991e-06, "loss": 0.8402, "step": 5239 }, { "epoch": 0.47, "grad_norm": 6.306700356111773, "learning_rate": 8.950531538860328e-06, "loss": 0.8285, "step": 5240 }, { "epoch": 0.47, "grad_norm": 7.105484277795808, "learning_rate": 8.950088705832052e-06, "loss": 0.8023, "step": 5241 }, { "epoch": 0.47, "grad_norm": 7.581645647444966, "learning_rate": 8.949645790354325e-06, "loss": 0.8123, "step": 5242 }, { "epoch": 0.47, "grad_norm": 6.418046318691268, "learning_rate": 8.94920279243639e-06, "loss": 0.8593, "step": 5243 }, { "epoch": 0.47, "grad_norm": 6.592455134514821, "learning_rate": 8.948759712087496e-06, "loss": 0.8518, "step": 5244 }, { "epoch": 0.47, "grad_norm": 6.640358176454387, "learning_rate": 8.948316549316891e-06, "loss": 0.7954, "step": 5245 }, { "epoch": 0.47, "grad_norm": 5.338221490553725, "learning_rate": 8.947873304133824e-06, "loss": 0.8406, "step": 5246 }, { "epoch": 0.47, "grad_norm": 6.082911742259069, "learning_rate": 8.947429976547549e-06, "loss": 0.7942, "step": 5247 }, { "epoch": 0.47, "grad_norm": 6.027675693824544, "learning_rate": 8.946986566567317e-06, "loss": 0.8385, "step": 5248 }, { "epoch": 0.47, "grad_norm": 5.633735265626218, "learning_rate": 8.946543074202382e-06, "loss": 0.9167, "step": 5249 }, { "epoch": 0.47, "grad_norm": 4.8870293724140135, "learning_rate": 8.946099499462006e-06, "loss": 0.762, "step": 5250 }, { "epoch": 0.47, "grad_norm": 6.342132237880394, "learning_rate": 8.945655842355445e-06, "loss": 0.8695, "step": 5251 }, { "epoch": 0.47, "grad_norm": 5.84119282972036, "learning_rate": 8.945212102891958e-06, "loss": 0.8116, "step": 5252 }, { "epoch": 0.47, "grad_norm": 6.835468493849038, "learning_rate": 8.94476828108081e-06, "loss": 0.7935, "step": 5253 }, { "epoch": 0.47, "grad_norm": 6.184435876494331, "learning_rate": 8.94432437693126e-06, "loss": 0.8297, "step": 5254 }, { "epoch": 0.47, "grad_norm": 5.153134071920158, "learning_rate": 8.943880390452578e-06, "loss": 0.7927, "step": 5255 }, { "epoch": 0.47, "grad_norm": 9.821283638407671, "learning_rate": 8.94343632165403e-06, "loss": 0.8571, "step": 5256 }, { "epoch": 0.47, "grad_norm": 6.214367138651282, "learning_rate": 8.942992170544884e-06, "loss": 0.8542, "step": 5257 }, { "epoch": 0.47, "grad_norm": 6.809967284514573, "learning_rate": 8.942547937134413e-06, "loss": 0.8081, "step": 5258 }, { "epoch": 0.47, "grad_norm": 7.1562504931129185, "learning_rate": 8.942103621431885e-06, "loss": 0.8334, "step": 5259 }, { "epoch": 0.47, "grad_norm": 5.216617411811801, "learning_rate": 8.941659223446579e-06, "loss": 0.8112, "step": 5260 }, { "epoch": 0.47, "grad_norm": 7.8603164273013615, "learning_rate": 8.94121474318777e-06, "loss": 0.7462, "step": 5261 }, { "epoch": 0.47, "grad_norm": 5.78220445738919, "learning_rate": 8.940770180664734e-06, "loss": 0.7742, "step": 5262 }, { "epoch": 0.47, "grad_norm": 5.769294610984565, "learning_rate": 8.940325535886748e-06, "loss": 0.8383, "step": 5263 }, { "epoch": 0.47, "grad_norm": 7.017907005765718, "learning_rate": 8.939880808863097e-06, "loss": 0.8324, "step": 5264 }, { "epoch": 0.47, "grad_norm": 5.986055207079286, "learning_rate": 8.939435999603063e-06, "loss": 0.7738, "step": 5265 }, { "epoch": 0.47, "grad_norm": 6.041452994689371, "learning_rate": 8.938991108115928e-06, "loss": 0.8689, "step": 5266 }, { "epoch": 0.47, "grad_norm": 7.846724443101808, "learning_rate": 8.938546134410983e-06, "loss": 0.7621, "step": 5267 }, { "epoch": 0.47, "grad_norm": 6.192495936843143, "learning_rate": 8.938101078497509e-06, "loss": 0.7984, "step": 5268 }, { "epoch": 0.47, "grad_norm": 8.077621494744491, "learning_rate": 8.937655940384802e-06, "loss": 0.8313, "step": 5269 }, { "epoch": 0.47, "grad_norm": 5.332898912414202, "learning_rate": 8.93721072008215e-06, "loss": 0.7399, "step": 5270 }, { "epoch": 0.47, "grad_norm": 5.020884754018453, "learning_rate": 8.936765417598845e-06, "loss": 0.7243, "step": 5271 }, { "epoch": 0.47, "grad_norm": 5.667624981195776, "learning_rate": 8.936320032944185e-06, "loss": 0.8517, "step": 5272 }, { "epoch": 0.47, "grad_norm": 6.343391834524811, "learning_rate": 8.935874566127465e-06, "loss": 0.7575, "step": 5273 }, { "epoch": 0.47, "grad_norm": 6.040696890070989, "learning_rate": 8.935429017157982e-06, "loss": 0.8013, "step": 5274 }, { "epoch": 0.47, "grad_norm": 6.135464145871769, "learning_rate": 8.934983386045037e-06, "loss": 0.7702, "step": 5275 }, { "epoch": 0.47, "grad_norm": 6.4290769900003015, "learning_rate": 8.934537672797931e-06, "loss": 0.7912, "step": 5276 }, { "epoch": 0.47, "grad_norm": 6.285617158832652, "learning_rate": 8.934091877425968e-06, "loss": 0.8765, "step": 5277 }, { "epoch": 0.47, "grad_norm": 6.429904374260959, "learning_rate": 8.933645999938452e-06, "loss": 0.8637, "step": 5278 }, { "epoch": 0.47, "grad_norm": 5.924515122887034, "learning_rate": 8.933200040344689e-06, "loss": 0.7432, "step": 5279 }, { "epoch": 0.47, "grad_norm": 7.141559201370439, "learning_rate": 8.932753998653991e-06, "loss": 0.8812, "step": 5280 }, { "epoch": 0.47, "grad_norm": 6.499340881505883, "learning_rate": 8.932307874875666e-06, "loss": 0.7377, "step": 5281 }, { "epoch": 0.47, "grad_norm": 5.557013221993514, "learning_rate": 8.931861669019024e-06, "loss": 0.8109, "step": 5282 }, { "epoch": 0.47, "grad_norm": 7.277451564421584, "learning_rate": 8.93141538109338e-06, "loss": 0.8543, "step": 5283 }, { "epoch": 0.47, "grad_norm": 5.99043430344877, "learning_rate": 8.930969011108051e-06, "loss": 0.8657, "step": 5284 }, { "epoch": 0.47, "grad_norm": 7.341202914576589, "learning_rate": 8.930522559072352e-06, "loss": 0.8933, "step": 5285 }, { "epoch": 0.47, "grad_norm": 5.183970338841475, "learning_rate": 8.930076024995604e-06, "loss": 0.7764, "step": 5286 }, { "epoch": 0.47, "grad_norm": 5.906125349140781, "learning_rate": 8.929629408887123e-06, "loss": 0.8576, "step": 5287 }, { "epoch": 0.47, "grad_norm": 7.659708402447854, "learning_rate": 8.929182710756234e-06, "loss": 0.792, "step": 5288 }, { "epoch": 0.47, "grad_norm": 6.738814870421521, "learning_rate": 8.928735930612262e-06, "loss": 0.7916, "step": 5289 }, { "epoch": 0.47, "grad_norm": 5.479468076682971, "learning_rate": 8.928289068464532e-06, "loss": 0.8198, "step": 5290 }, { "epoch": 0.47, "grad_norm": 7.7531356612383595, "learning_rate": 8.92784212432237e-06, "loss": 0.831, "step": 5291 }, { "epoch": 0.47, "grad_norm": 6.665324903959377, "learning_rate": 8.927395098195105e-06, "loss": 0.8419, "step": 5292 }, { "epoch": 0.47, "grad_norm": 6.226943784445617, "learning_rate": 8.926947990092068e-06, "loss": 0.8024, "step": 5293 }, { "epoch": 0.47, "grad_norm": 5.210213377288032, "learning_rate": 8.92650080002259e-06, "loss": 0.8168, "step": 5294 }, { "epoch": 0.47, "grad_norm": 6.225332433045818, "learning_rate": 8.92605352799601e-06, "loss": 0.8271, "step": 5295 }, { "epoch": 0.47, "grad_norm": 5.660926729972117, "learning_rate": 8.925606174021656e-06, "loss": 0.7718, "step": 5296 }, { "epoch": 0.47, "grad_norm": 5.843594189518743, "learning_rate": 8.925158738108875e-06, "loss": 0.7625, "step": 5297 }, { "epoch": 0.47, "grad_norm": 4.992034921635682, "learning_rate": 8.924711220266999e-06, "loss": 0.8382, "step": 5298 }, { "epoch": 0.47, "grad_norm": 6.450297925154189, "learning_rate": 8.924263620505372e-06, "loss": 0.7827, "step": 5299 }, { "epoch": 0.47, "grad_norm": 5.837132163652643, "learning_rate": 8.923815938833336e-06, "loss": 0.8834, "step": 5300 }, { "epoch": 0.47, "grad_norm": 4.444131392192165, "learning_rate": 8.923368175260233e-06, "loss": 0.7511, "step": 5301 }, { "epoch": 0.47, "grad_norm": 5.605876668038056, "learning_rate": 8.922920329795414e-06, "loss": 0.8958, "step": 5302 }, { "epoch": 0.47, "grad_norm": 5.16310070569928, "learning_rate": 8.922472402448223e-06, "loss": 0.8405, "step": 5303 }, { "epoch": 0.47, "grad_norm": 6.855634288380377, "learning_rate": 8.92202439322801e-06, "loss": 0.7465, "step": 5304 }, { "epoch": 0.47, "grad_norm": 5.037764649450968, "learning_rate": 8.921576302144129e-06, "loss": 0.8274, "step": 5305 }, { "epoch": 0.47, "grad_norm": 5.002688099143435, "learning_rate": 8.92112812920593e-06, "loss": 0.8229, "step": 5306 }, { "epoch": 0.47, "grad_norm": 6.552781726659676, "learning_rate": 8.920679874422766e-06, "loss": 0.8091, "step": 5307 }, { "epoch": 0.47, "grad_norm": 5.905196978528415, "learning_rate": 8.920231537803998e-06, "loss": 0.9041, "step": 5308 }, { "epoch": 0.47, "grad_norm": 5.858175567241367, "learning_rate": 8.919783119358983e-06, "loss": 0.7927, "step": 5309 }, { "epoch": 0.47, "grad_norm": 5.7571741968332155, "learning_rate": 8.919334619097078e-06, "loss": 0.8503, "step": 5310 }, { "epoch": 0.47, "grad_norm": 5.2673708420786705, "learning_rate": 8.918886037027646e-06, "loss": 0.838, "step": 5311 }, { "epoch": 0.47, "grad_norm": 5.249237502490381, "learning_rate": 8.918437373160049e-06, "loss": 0.8436, "step": 5312 }, { "epoch": 0.47, "grad_norm": 4.880788704380828, "learning_rate": 8.917988627503655e-06, "loss": 0.8229, "step": 5313 }, { "epoch": 0.47, "grad_norm": 5.858748326674754, "learning_rate": 8.917539800067827e-06, "loss": 0.8202, "step": 5314 }, { "epoch": 0.47, "grad_norm": 5.478832612369946, "learning_rate": 8.917090890861938e-06, "loss": 0.7812, "step": 5315 }, { "epoch": 0.47, "grad_norm": 8.218878147570763, "learning_rate": 8.916641899895351e-06, "loss": 0.8551, "step": 5316 }, { "epoch": 0.47, "grad_norm": 5.2641647656701585, "learning_rate": 8.916192827177444e-06, "loss": 0.8206, "step": 5317 }, { "epoch": 0.47, "grad_norm": 5.675589177879057, "learning_rate": 8.915743672717588e-06, "loss": 0.8406, "step": 5318 }, { "epoch": 0.47, "grad_norm": 5.719289326357555, "learning_rate": 8.915294436525158e-06, "loss": 0.9279, "step": 5319 }, { "epoch": 0.47, "grad_norm": 6.843101217239931, "learning_rate": 8.91484511860953e-06, "loss": 0.8037, "step": 5320 }, { "epoch": 0.47, "grad_norm": 5.010792712561126, "learning_rate": 8.914395718980084e-06, "loss": 0.8942, "step": 5321 }, { "epoch": 0.47, "grad_norm": 5.32345497024361, "learning_rate": 8.9139462376462e-06, "loss": 0.8201, "step": 5322 }, { "epoch": 0.47, "grad_norm": 4.585061450573544, "learning_rate": 8.91349667461726e-06, "loss": 0.7794, "step": 5323 }, { "epoch": 0.47, "grad_norm": 7.142161132611303, "learning_rate": 8.913047029902646e-06, "loss": 0.8305, "step": 5324 }, { "epoch": 0.48, "grad_norm": 6.779243141921022, "learning_rate": 8.912597303511746e-06, "loss": 0.8847, "step": 5325 }, { "epoch": 0.48, "grad_norm": 5.327577775346467, "learning_rate": 8.912147495453946e-06, "loss": 0.7883, "step": 5326 }, { "epoch": 0.48, "grad_norm": 6.967547649450736, "learning_rate": 8.911697605738634e-06, "loss": 0.843, "step": 5327 }, { "epoch": 0.48, "grad_norm": 5.859851833829306, "learning_rate": 8.911247634375201e-06, "loss": 0.8525, "step": 5328 }, { "epoch": 0.48, "grad_norm": 6.784127138607858, "learning_rate": 8.910797581373038e-06, "loss": 0.7835, "step": 5329 }, { "epoch": 0.48, "grad_norm": 5.374779572002459, "learning_rate": 8.910347446741541e-06, "loss": 0.8095, "step": 5330 }, { "epoch": 0.48, "grad_norm": 5.969599572463945, "learning_rate": 8.909897230490105e-06, "loss": 0.8144, "step": 5331 }, { "epoch": 0.48, "grad_norm": 6.88542768037812, "learning_rate": 8.909446932628128e-06, "loss": 0.7873, "step": 5332 }, { "epoch": 0.48, "grad_norm": 5.7920789211157375, "learning_rate": 8.908996553165005e-06, "loss": 0.8235, "step": 5333 }, { "epoch": 0.48, "grad_norm": 4.984841442525056, "learning_rate": 8.90854609211014e-06, "loss": 0.7938, "step": 5334 }, { "epoch": 0.48, "grad_norm": 5.221739521481055, "learning_rate": 8.908095549472936e-06, "loss": 0.8547, "step": 5335 }, { "epoch": 0.48, "grad_norm": 5.650276747396451, "learning_rate": 8.907644925262796e-06, "loss": 0.7847, "step": 5336 }, { "epoch": 0.48, "grad_norm": 5.81315313095072, "learning_rate": 8.907194219489125e-06, "loss": 0.7794, "step": 5337 }, { "epoch": 0.48, "grad_norm": 6.664181380089211, "learning_rate": 8.90674343216133e-06, "loss": 0.803, "step": 5338 }, { "epoch": 0.48, "grad_norm": 6.22548318381688, "learning_rate": 8.906292563288826e-06, "loss": 0.7532, "step": 5339 }, { "epoch": 0.48, "grad_norm": 5.95294562980093, "learning_rate": 8.905841612881015e-06, "loss": 0.9053, "step": 5340 }, { "epoch": 0.48, "grad_norm": 6.485863873383317, "learning_rate": 8.905390580947315e-06, "loss": 0.8524, "step": 5341 }, { "epoch": 0.48, "grad_norm": 5.053201839413083, "learning_rate": 8.90493946749714e-06, "loss": 0.884, "step": 5342 }, { "epoch": 0.48, "grad_norm": 5.386235207508995, "learning_rate": 8.904488272539905e-06, "loss": 0.8453, "step": 5343 }, { "epoch": 0.48, "grad_norm": 6.493222314721316, "learning_rate": 8.904036996085028e-06, "loss": 0.8647, "step": 5344 }, { "epoch": 0.48, "grad_norm": 7.334128233296175, "learning_rate": 8.90358563814193e-06, "loss": 0.8377, "step": 5345 }, { "epoch": 0.48, "grad_norm": 5.882368424034483, "learning_rate": 8.903134198720027e-06, "loss": 0.8149, "step": 5346 }, { "epoch": 0.48, "grad_norm": 6.755851540560259, "learning_rate": 8.902682677828749e-06, "loss": 0.8266, "step": 5347 }, { "epoch": 0.48, "grad_norm": 6.914346777970283, "learning_rate": 8.902231075477514e-06, "loss": 0.9492, "step": 5348 }, { "epoch": 0.48, "grad_norm": 4.585028324860895, "learning_rate": 8.901779391675752e-06, "loss": 0.7595, "step": 5349 }, { "epoch": 0.48, "grad_norm": 5.612288952961467, "learning_rate": 8.90132762643289e-06, "loss": 0.7445, "step": 5350 }, { "epoch": 0.48, "grad_norm": 7.172014577916074, "learning_rate": 8.900875779758358e-06, "loss": 0.804, "step": 5351 }, { "epoch": 0.48, "grad_norm": 4.898302126593629, "learning_rate": 8.900423851661586e-06, "loss": 0.7638, "step": 5352 }, { "epoch": 0.48, "grad_norm": 6.2239629391039015, "learning_rate": 8.899971842152008e-06, "loss": 0.8124, "step": 5353 }, { "epoch": 0.48, "grad_norm": 7.197159674990731, "learning_rate": 8.89951975123906e-06, "loss": 0.8412, "step": 5354 }, { "epoch": 0.48, "grad_norm": 5.265370229346946, "learning_rate": 8.899067578932177e-06, "loss": 0.813, "step": 5355 }, { "epoch": 0.48, "grad_norm": 5.535559382986295, "learning_rate": 8.898615325240796e-06, "loss": 0.7463, "step": 5356 }, { "epoch": 0.48, "grad_norm": 8.306020248533402, "learning_rate": 8.898162990174358e-06, "loss": 0.8317, "step": 5357 }, { "epoch": 0.48, "grad_norm": 6.938115533948314, "learning_rate": 8.897710573742304e-06, "loss": 0.8241, "step": 5358 }, { "epoch": 0.48, "grad_norm": 7.786529313239252, "learning_rate": 8.897258075954078e-06, "loss": 0.8824, "step": 5359 }, { "epoch": 0.48, "grad_norm": 7.068772430843752, "learning_rate": 8.896805496819125e-06, "loss": 0.8877, "step": 5360 }, { "epoch": 0.48, "grad_norm": 4.965898232876674, "learning_rate": 8.89635283634689e-06, "loss": 0.9091, "step": 5361 }, { "epoch": 0.48, "grad_norm": 5.580070472307052, "learning_rate": 8.895900094546823e-06, "loss": 0.8242, "step": 5362 }, { "epoch": 0.48, "grad_norm": 5.941670926798362, "learning_rate": 8.895447271428374e-06, "loss": 0.8729, "step": 5363 }, { "epoch": 0.48, "grad_norm": 5.4721469470497075, "learning_rate": 8.894994367000993e-06, "loss": 0.8346, "step": 5364 }, { "epoch": 0.48, "grad_norm": 5.909836003334355, "learning_rate": 8.894541381274136e-06, "loss": 0.7766, "step": 5365 }, { "epoch": 0.48, "grad_norm": 5.298420437768125, "learning_rate": 8.894088314257254e-06, "loss": 0.8287, "step": 5366 }, { "epoch": 0.48, "grad_norm": 5.609417397752601, "learning_rate": 8.893635165959809e-06, "loss": 0.8252, "step": 5367 }, { "epoch": 0.48, "grad_norm": 6.331677654913266, "learning_rate": 8.893181936391255e-06, "loss": 0.7883, "step": 5368 }, { "epoch": 0.48, "grad_norm": 6.559048133082869, "learning_rate": 8.892728625561056e-06, "loss": 0.8719, "step": 5369 }, { "epoch": 0.48, "grad_norm": 5.934668766060812, "learning_rate": 8.892275233478668e-06, "loss": 0.8342, "step": 5370 }, { "epoch": 0.48, "grad_norm": 5.9023831122771515, "learning_rate": 8.891821760153563e-06, "loss": 0.8237, "step": 5371 }, { "epoch": 0.48, "grad_norm": 6.863793100930706, "learning_rate": 8.891368205595198e-06, "loss": 0.8008, "step": 5372 }, { "epoch": 0.48, "grad_norm": 6.6049163018328345, "learning_rate": 8.890914569813048e-06, "loss": 0.9064, "step": 5373 }, { "epoch": 0.48, "grad_norm": 6.048074001846459, "learning_rate": 8.890460852816574e-06, "loss": 0.7912, "step": 5374 }, { "epoch": 0.48, "grad_norm": 4.8131741318618495, "learning_rate": 8.89000705461525e-06, "loss": 0.7957, "step": 5375 }, { "epoch": 0.48, "grad_norm": 4.82703367553774, "learning_rate": 8.889553175218549e-06, "loss": 0.8743, "step": 5376 }, { "epoch": 0.48, "grad_norm": 6.654266462041014, "learning_rate": 8.889099214635941e-06, "loss": 0.8532, "step": 5377 }, { "epoch": 0.48, "grad_norm": 5.446130627651068, "learning_rate": 8.888645172876907e-06, "loss": 0.8111, "step": 5378 }, { "epoch": 0.48, "grad_norm": 4.795044736553056, "learning_rate": 8.888191049950918e-06, "loss": 0.8268, "step": 5379 }, { "epoch": 0.48, "grad_norm": 6.0516035599076465, "learning_rate": 8.887736845867458e-06, "loss": 0.8623, "step": 5380 }, { "epoch": 0.48, "grad_norm": 6.268227876017758, "learning_rate": 8.887282560636003e-06, "loss": 0.8186, "step": 5381 }, { "epoch": 0.48, "grad_norm": 6.43316159516123, "learning_rate": 8.88682819426604e-06, "loss": 0.8161, "step": 5382 }, { "epoch": 0.48, "grad_norm": 5.779326256832511, "learning_rate": 8.88637374676705e-06, "loss": 0.8285, "step": 5383 }, { "epoch": 0.48, "grad_norm": 7.038199355160814, "learning_rate": 8.885919218148519e-06, "loss": 0.9075, "step": 5384 }, { "epoch": 0.48, "grad_norm": 6.363384528890565, "learning_rate": 8.885464608419934e-06, "loss": 0.8519, "step": 5385 }, { "epoch": 0.48, "grad_norm": 5.8592881427923444, "learning_rate": 8.885009917590783e-06, "loss": 0.8579, "step": 5386 }, { "epoch": 0.48, "grad_norm": 5.61344661247994, "learning_rate": 8.884555145670561e-06, "loss": 0.8018, "step": 5387 }, { "epoch": 0.48, "grad_norm": 4.517602282236666, "learning_rate": 8.884100292668755e-06, "loss": 0.8161, "step": 5388 }, { "epoch": 0.48, "grad_norm": 6.934780603085137, "learning_rate": 8.883645358594863e-06, "loss": 0.8435, "step": 5389 }, { "epoch": 0.48, "grad_norm": 5.372529534697981, "learning_rate": 8.883190343458378e-06, "loss": 0.7458, "step": 5390 }, { "epoch": 0.48, "grad_norm": 6.482037933168966, "learning_rate": 8.882735247268798e-06, "loss": 0.8057, "step": 5391 }, { "epoch": 0.48, "grad_norm": 6.360708653475338, "learning_rate": 8.882280070035625e-06, "loss": 0.8733, "step": 5392 }, { "epoch": 0.48, "grad_norm": 5.36592359457441, "learning_rate": 8.881824811768356e-06, "loss": 0.7961, "step": 5393 }, { "epoch": 0.48, "grad_norm": 5.535807727213531, "learning_rate": 8.881369472476495e-06, "loss": 0.8504, "step": 5394 }, { "epoch": 0.48, "grad_norm": 5.838749433334302, "learning_rate": 8.880914052169547e-06, "loss": 0.7549, "step": 5395 }, { "epoch": 0.48, "grad_norm": 6.024264638015091, "learning_rate": 8.880458550857016e-06, "loss": 0.8235, "step": 5396 }, { "epoch": 0.48, "grad_norm": 5.545301739993742, "learning_rate": 8.880002968548411e-06, "loss": 0.8193, "step": 5397 }, { "epoch": 0.48, "grad_norm": 5.734990158845536, "learning_rate": 8.879547305253243e-06, "loss": 0.8046, "step": 5398 }, { "epoch": 0.48, "grad_norm": 7.149357074085558, "learning_rate": 8.879091560981018e-06, "loss": 0.8295, "step": 5399 }, { "epoch": 0.48, "grad_norm": 6.398068584664788, "learning_rate": 8.878635735741255e-06, "loss": 0.8071, "step": 5400 }, { "epoch": 0.48, "grad_norm": 6.94047688254384, "learning_rate": 8.878179829543464e-06, "loss": 0.8302, "step": 5401 }, { "epoch": 0.48, "grad_norm": 6.279321145065282, "learning_rate": 8.87772384239716e-06, "loss": 0.8166, "step": 5402 }, { "epoch": 0.48, "grad_norm": 5.591775707857859, "learning_rate": 8.877267774311866e-06, "loss": 0.8167, "step": 5403 }, { "epoch": 0.48, "grad_norm": 6.865245089863423, "learning_rate": 8.876811625297097e-06, "loss": 0.8332, "step": 5404 }, { "epoch": 0.48, "grad_norm": 6.892108104350891, "learning_rate": 8.876355395362376e-06, "loss": 0.7848, "step": 5405 }, { "epoch": 0.48, "grad_norm": 5.364838412407481, "learning_rate": 8.875899084517225e-06, "loss": 0.8589, "step": 5406 }, { "epoch": 0.48, "grad_norm": 6.407991646383552, "learning_rate": 8.875442692771168e-06, "loss": 0.8949, "step": 5407 }, { "epoch": 0.48, "grad_norm": 6.205000380009722, "learning_rate": 8.874986220133731e-06, "loss": 0.8754, "step": 5408 }, { "epoch": 0.48, "grad_norm": 7.411793670492893, "learning_rate": 8.874529666614444e-06, "loss": 0.923, "step": 5409 }, { "epoch": 0.48, "grad_norm": 6.418524750629432, "learning_rate": 8.874073032222836e-06, "loss": 0.8153, "step": 5410 }, { "epoch": 0.48, "grad_norm": 6.902290378716985, "learning_rate": 8.873616316968436e-06, "loss": 0.8398, "step": 5411 }, { "epoch": 0.48, "grad_norm": 8.365000742655269, "learning_rate": 8.87315952086078e-06, "loss": 0.8416, "step": 5412 }, { "epoch": 0.48, "grad_norm": 5.4099371610854465, "learning_rate": 8.8727026439094e-06, "loss": 0.8034, "step": 5413 }, { "epoch": 0.48, "grad_norm": 6.412474507641492, "learning_rate": 8.872245686123832e-06, "loss": 0.8056, "step": 5414 }, { "epoch": 0.48, "grad_norm": 5.502540192278813, "learning_rate": 8.871788647513617e-06, "loss": 0.7985, "step": 5415 }, { "epoch": 0.48, "grad_norm": 6.05957808188602, "learning_rate": 8.871331528088294e-06, "loss": 0.7476, "step": 5416 }, { "epoch": 0.48, "grad_norm": 4.791314348128598, "learning_rate": 8.870874327857401e-06, "loss": 0.8009, "step": 5417 }, { "epoch": 0.48, "grad_norm": 5.486625955336836, "learning_rate": 8.870417046830487e-06, "loss": 0.8294, "step": 5418 }, { "epoch": 0.48, "grad_norm": 7.229304740321843, "learning_rate": 8.86995968501709e-06, "loss": 0.8524, "step": 5419 }, { "epoch": 0.48, "grad_norm": 5.610055167633258, "learning_rate": 8.869502242426761e-06, "loss": 0.8392, "step": 5420 }, { "epoch": 0.48, "grad_norm": 5.2085571817770235, "learning_rate": 8.869044719069047e-06, "loss": 0.7903, "step": 5421 }, { "epoch": 0.48, "grad_norm": 6.77853831865441, "learning_rate": 8.868587114953497e-06, "loss": 0.7917, "step": 5422 }, { "epoch": 0.48, "grad_norm": 7.237914822932454, "learning_rate": 8.868129430089663e-06, "loss": 0.8811, "step": 5423 }, { "epoch": 0.48, "grad_norm": 5.766112083211153, "learning_rate": 8.867671664487099e-06, "loss": 0.8341, "step": 5424 }, { "epoch": 0.48, "grad_norm": 5.790786625719171, "learning_rate": 8.867213818155357e-06, "loss": 0.8623, "step": 5425 }, { "epoch": 0.48, "grad_norm": 7.782346363002075, "learning_rate": 8.866755891103997e-06, "loss": 0.758, "step": 5426 }, { "epoch": 0.48, "grad_norm": 5.702478347269997, "learning_rate": 8.866297883342577e-06, "loss": 0.8115, "step": 5427 }, { "epoch": 0.48, "grad_norm": 7.184316421012745, "learning_rate": 8.865839794880653e-06, "loss": 0.7868, "step": 5428 }, { "epoch": 0.48, "grad_norm": 5.211516453327603, "learning_rate": 8.865381625727791e-06, "loss": 0.8049, "step": 5429 }, { "epoch": 0.48, "grad_norm": 6.3510942449992305, "learning_rate": 8.864923375893552e-06, "loss": 0.957, "step": 5430 }, { "epoch": 0.48, "grad_norm": 7.128167190911886, "learning_rate": 8.864465045387502e-06, "loss": 0.834, "step": 5431 }, { "epoch": 0.48, "grad_norm": 6.627019375161246, "learning_rate": 8.864006634219207e-06, "loss": 0.8569, "step": 5432 }, { "epoch": 0.48, "grad_norm": 5.620272146979533, "learning_rate": 8.863548142398238e-06, "loss": 0.8455, "step": 5433 }, { "epoch": 0.48, "grad_norm": 5.816628448398909, "learning_rate": 8.863089569934158e-06, "loss": 0.8014, "step": 5434 }, { "epoch": 0.48, "grad_norm": 5.650120162193779, "learning_rate": 8.862630916836547e-06, "loss": 0.8295, "step": 5435 }, { "epoch": 0.48, "grad_norm": 5.334862531189611, "learning_rate": 8.862172183114972e-06, "loss": 0.8389, "step": 5436 }, { "epoch": 0.49, "grad_norm": 5.167755882044029, "learning_rate": 8.861713368779013e-06, "loss": 0.8195, "step": 5437 }, { "epoch": 0.49, "grad_norm": 11.633534148378313, "learning_rate": 8.861254473838244e-06, "loss": 0.9308, "step": 5438 }, { "epoch": 0.49, "grad_norm": 4.814310067894542, "learning_rate": 8.860795498302242e-06, "loss": 0.8424, "step": 5439 }, { "epoch": 0.49, "grad_norm": 5.631489406704529, "learning_rate": 8.860336442180591e-06, "loss": 0.8393, "step": 5440 }, { "epoch": 0.49, "grad_norm": 6.7961697150847025, "learning_rate": 8.85987730548287e-06, "loss": 0.7876, "step": 5441 }, { "epoch": 0.49, "grad_norm": 6.301040409056121, "learning_rate": 8.859418088218664e-06, "loss": 0.8815, "step": 5442 }, { "epoch": 0.49, "grad_norm": 6.010141640063692, "learning_rate": 8.858958790397554e-06, "loss": 0.8294, "step": 5443 }, { "epoch": 0.49, "grad_norm": 6.7657144918935, "learning_rate": 8.858499412029134e-06, "loss": 0.7738, "step": 5444 }, { "epoch": 0.49, "grad_norm": 6.140036710698247, "learning_rate": 8.858039953122988e-06, "loss": 0.7698, "step": 5445 }, { "epoch": 0.49, "grad_norm": 4.755576234117862, "learning_rate": 8.857580413688706e-06, "loss": 0.822, "step": 5446 }, { "epoch": 0.49, "grad_norm": 4.547285611644854, "learning_rate": 8.857120793735881e-06, "loss": 0.823, "step": 5447 }, { "epoch": 0.49, "grad_norm": 6.871221980081119, "learning_rate": 8.856661093274105e-06, "loss": 0.8055, "step": 5448 }, { "epoch": 0.49, "grad_norm": 4.609805085422755, "learning_rate": 8.856201312312978e-06, "loss": 0.8872, "step": 5449 }, { "epoch": 0.49, "grad_norm": 6.887767840571917, "learning_rate": 8.855741450862091e-06, "loss": 0.829, "step": 5450 }, { "epoch": 0.49, "grad_norm": 7.080567377623554, "learning_rate": 8.855281508931047e-06, "loss": 0.8142, "step": 5451 }, { "epoch": 0.49, "grad_norm": 6.491634993569011, "learning_rate": 8.854821486529443e-06, "loss": 0.7731, "step": 5452 }, { "epoch": 0.49, "grad_norm": 4.7692879060481435, "learning_rate": 8.854361383666883e-06, "loss": 0.8305, "step": 5453 }, { "epoch": 0.49, "grad_norm": 6.409847738114942, "learning_rate": 8.85390120035297e-06, "loss": 0.8526, "step": 5454 }, { "epoch": 0.49, "grad_norm": 9.010385641662076, "learning_rate": 8.853440936597308e-06, "loss": 0.7644, "step": 5455 }, { "epoch": 0.49, "grad_norm": 6.359258545695406, "learning_rate": 8.852980592409507e-06, "loss": 0.7861, "step": 5456 }, { "epoch": 0.49, "grad_norm": 6.8658166230734015, "learning_rate": 8.852520167799173e-06, "loss": 0.8586, "step": 5457 }, { "epoch": 0.49, "grad_norm": 6.161745509601713, "learning_rate": 8.852059662775918e-06, "loss": 0.8605, "step": 5458 }, { "epoch": 0.49, "grad_norm": 6.120811378207375, "learning_rate": 8.851599077349354e-06, "loss": 0.7828, "step": 5459 }, { "epoch": 0.49, "grad_norm": 5.672714923819332, "learning_rate": 8.851138411529094e-06, "loss": 0.8461, "step": 5460 }, { "epoch": 0.49, "grad_norm": 6.680910291214889, "learning_rate": 8.850677665324751e-06, "loss": 0.8463, "step": 5461 }, { "epoch": 0.49, "grad_norm": 7.695871571161157, "learning_rate": 8.850216838745947e-06, "loss": 0.8411, "step": 5462 }, { "epoch": 0.49, "grad_norm": 4.522777129331663, "learning_rate": 8.849755931802298e-06, "loss": 0.8357, "step": 5463 }, { "epoch": 0.49, "grad_norm": 7.327416702450108, "learning_rate": 8.849294944503424e-06, "loss": 0.7882, "step": 5464 }, { "epoch": 0.49, "grad_norm": 4.546660644137393, "learning_rate": 8.848833876858949e-06, "loss": 0.8092, "step": 5465 }, { "epoch": 0.49, "grad_norm": 4.767951619055209, "learning_rate": 8.848372728878495e-06, "loss": 0.8273, "step": 5466 }, { "epoch": 0.49, "grad_norm": 8.358040895830687, "learning_rate": 8.847911500571688e-06, "loss": 0.8264, "step": 5467 }, { "epoch": 0.49, "grad_norm": 6.772754493476175, "learning_rate": 8.847450191948155e-06, "loss": 0.8215, "step": 5468 }, { "epoch": 0.49, "grad_norm": 6.007811361640313, "learning_rate": 8.846988803017526e-06, "loss": 0.8833, "step": 5469 }, { "epoch": 0.49, "grad_norm": 6.824473978082111, "learning_rate": 8.84652733378943e-06, "loss": 0.8517, "step": 5470 }, { "epoch": 0.49, "grad_norm": 6.460611978482936, "learning_rate": 8.846065784273499e-06, "loss": 0.8049, "step": 5471 }, { "epoch": 0.49, "grad_norm": 5.280705751284739, "learning_rate": 8.845604154479368e-06, "loss": 0.7656, "step": 5472 }, { "epoch": 0.49, "grad_norm": 6.745439486371974, "learning_rate": 8.845142444416672e-06, "loss": 0.801, "step": 5473 }, { "epoch": 0.49, "grad_norm": 5.969438824931848, "learning_rate": 8.844680654095048e-06, "loss": 0.8482, "step": 5474 }, { "epoch": 0.49, "grad_norm": 7.815412910498652, "learning_rate": 8.844218783524135e-06, "loss": 0.8338, "step": 5475 }, { "epoch": 0.49, "grad_norm": 5.611102785561891, "learning_rate": 8.843756832713575e-06, "loss": 0.8473, "step": 5476 }, { "epoch": 0.49, "grad_norm": 8.985500794668559, "learning_rate": 8.843294801673007e-06, "loss": 0.8337, "step": 5477 }, { "epoch": 0.49, "grad_norm": 6.319099780359786, "learning_rate": 8.842832690412078e-06, "loss": 0.8316, "step": 5478 }, { "epoch": 0.49, "grad_norm": 5.280261643715185, "learning_rate": 8.84237049894043e-06, "loss": 0.7898, "step": 5479 }, { "epoch": 0.49, "grad_norm": 8.636365096031428, "learning_rate": 8.841908227267714e-06, "loss": 0.7836, "step": 5480 }, { "epoch": 0.49, "grad_norm": 5.349142595841628, "learning_rate": 8.841445875403578e-06, "loss": 0.7616, "step": 5481 }, { "epoch": 0.49, "grad_norm": 7.2571466047928865, "learning_rate": 8.84098344335767e-06, "loss": 0.804, "step": 5482 }, { "epoch": 0.49, "grad_norm": 6.700031643782717, "learning_rate": 8.840520931139647e-06, "loss": 0.8332, "step": 5483 }, { "epoch": 0.49, "grad_norm": 6.398499009596309, "learning_rate": 8.840058338759159e-06, "loss": 0.7936, "step": 5484 }, { "epoch": 0.49, "grad_norm": 6.16295571937463, "learning_rate": 8.839595666225862e-06, "loss": 0.8691, "step": 5485 }, { "epoch": 0.49, "grad_norm": 7.466730670706551, "learning_rate": 8.839132913549415e-06, "loss": 0.861, "step": 5486 }, { "epoch": 0.49, "grad_norm": 5.701858505388962, "learning_rate": 8.838670080739475e-06, "loss": 0.8563, "step": 5487 }, { "epoch": 0.49, "grad_norm": 4.894613893901296, "learning_rate": 8.838207167805704e-06, "loss": 0.8764, "step": 5488 }, { "epoch": 0.49, "grad_norm": 7.007647514809908, "learning_rate": 8.837744174757765e-06, "loss": 0.8085, "step": 5489 }, { "epoch": 0.49, "grad_norm": 5.281215170492915, "learning_rate": 8.83728110160532e-06, "loss": 0.8811, "step": 5490 }, { "epoch": 0.49, "grad_norm": 4.63612072243749, "learning_rate": 8.836817948358035e-06, "loss": 0.8269, "step": 5491 }, { "epoch": 0.49, "grad_norm": 4.53196969016628, "learning_rate": 8.83635471502558e-06, "loss": 0.8707, "step": 5492 }, { "epoch": 0.49, "grad_norm": 6.415027663847866, "learning_rate": 8.835891401617617e-06, "loss": 0.7876, "step": 5493 }, { "epoch": 0.49, "grad_norm": 5.772663549058861, "learning_rate": 8.835428008143825e-06, "loss": 0.8728, "step": 5494 }, { "epoch": 0.49, "grad_norm": 5.481638207990558, "learning_rate": 8.834964534613873e-06, "loss": 0.818, "step": 5495 }, { "epoch": 0.49, "grad_norm": 7.093130864434619, "learning_rate": 8.834500981037435e-06, "loss": 0.8075, "step": 5496 }, { "epoch": 0.49, "grad_norm": 5.259009265893882, "learning_rate": 8.834037347424185e-06, "loss": 0.8193, "step": 5497 }, { "epoch": 0.49, "grad_norm": 4.079136867732288, "learning_rate": 8.833573633783801e-06, "loss": 0.8508, "step": 5498 }, { "epoch": 0.49, "grad_norm": 8.087602471488859, "learning_rate": 8.833109840125964e-06, "loss": 0.8533, "step": 5499 }, { "epoch": 0.49, "grad_norm": 6.815014187441658, "learning_rate": 8.832645966460353e-06, "loss": 0.7872, "step": 5500 }, { "epoch": 0.49, "grad_norm": 6.038548263659589, "learning_rate": 8.832182012796653e-06, "loss": 0.804, "step": 5501 }, { "epoch": 0.49, "grad_norm": 5.051079691388929, "learning_rate": 8.831717979144544e-06, "loss": 0.8355, "step": 5502 }, { "epoch": 0.49, "grad_norm": 6.33231088081368, "learning_rate": 8.831253865513713e-06, "loss": 0.7953, "step": 5503 }, { "epoch": 0.49, "grad_norm": 6.292162416451263, "learning_rate": 8.830789671913849e-06, "loss": 0.756, "step": 5504 }, { "epoch": 0.49, "grad_norm": 4.359062289872673, "learning_rate": 8.830325398354639e-06, "loss": 0.7167, "step": 5505 }, { "epoch": 0.49, "grad_norm": 7.989652288671198, "learning_rate": 8.829861044845775e-06, "loss": 0.8359, "step": 5506 }, { "epoch": 0.49, "grad_norm": 5.237497392495298, "learning_rate": 8.82939661139695e-06, "loss": 0.8418, "step": 5507 }, { "epoch": 0.49, "grad_norm": 4.89708717575345, "learning_rate": 8.828932098017855e-06, "loss": 0.8272, "step": 5508 }, { "epoch": 0.49, "grad_norm": 4.928310920873493, "learning_rate": 8.828467504718188e-06, "loss": 0.8079, "step": 5509 }, { "epoch": 0.49, "grad_norm": 5.879394927845364, "learning_rate": 8.828002831507648e-06, "loss": 0.7699, "step": 5510 }, { "epoch": 0.49, "grad_norm": 4.547101142463651, "learning_rate": 8.82753807839593e-06, "loss": 0.7715, "step": 5511 }, { "epoch": 0.49, "grad_norm": 7.610563795519481, "learning_rate": 8.827073245392736e-06, "loss": 0.8862, "step": 5512 }, { "epoch": 0.49, "grad_norm": 7.378056086251798, "learning_rate": 8.82660833250777e-06, "loss": 0.832, "step": 5513 }, { "epoch": 0.49, "grad_norm": 4.692397991489804, "learning_rate": 8.826143339750737e-06, "loss": 0.8358, "step": 5514 }, { "epoch": 0.49, "grad_norm": 6.575583704582548, "learning_rate": 8.82567826713134e-06, "loss": 0.8118, "step": 5515 }, { "epoch": 0.49, "grad_norm": 4.803273868418658, "learning_rate": 8.825213114659286e-06, "loss": 0.7884, "step": 5516 }, { "epoch": 0.49, "grad_norm": 5.275578057843489, "learning_rate": 8.824747882344286e-06, "loss": 0.8129, "step": 5517 }, { "epoch": 0.49, "grad_norm": 5.3620056644674525, "learning_rate": 8.824282570196051e-06, "loss": 0.8162, "step": 5518 }, { "epoch": 0.49, "grad_norm": 4.935843539546981, "learning_rate": 8.823817178224292e-06, "loss": 0.7283, "step": 5519 }, { "epoch": 0.49, "grad_norm": 6.992701268700711, "learning_rate": 8.823351706438722e-06, "loss": 0.8933, "step": 5520 }, { "epoch": 0.49, "grad_norm": 6.4977656823201375, "learning_rate": 8.82288615484906e-06, "loss": 0.7773, "step": 5521 }, { "epoch": 0.49, "grad_norm": 5.821219411381136, "learning_rate": 8.82242052346502e-06, "loss": 0.9303, "step": 5522 }, { "epoch": 0.49, "grad_norm": 6.038045182833607, "learning_rate": 8.821954812296325e-06, "loss": 0.8098, "step": 5523 }, { "epoch": 0.49, "grad_norm": 5.642245397080293, "learning_rate": 8.821489021352691e-06, "loss": 0.8493, "step": 5524 }, { "epoch": 0.49, "grad_norm": 6.678296364222998, "learning_rate": 8.821023150643843e-06, "loss": 0.8182, "step": 5525 }, { "epoch": 0.49, "grad_norm": 5.744838168195922, "learning_rate": 8.820557200179505e-06, "loss": 0.8099, "step": 5526 }, { "epoch": 0.49, "grad_norm": 7.101604287224704, "learning_rate": 8.8200911699694e-06, "loss": 0.8024, "step": 5527 }, { "epoch": 0.49, "grad_norm": 6.205420755092225, "learning_rate": 8.81962506002326e-06, "loss": 0.8363, "step": 5528 }, { "epoch": 0.49, "grad_norm": 6.395375879570854, "learning_rate": 8.819158870350813e-06, "loss": 0.8287, "step": 5529 }, { "epoch": 0.49, "grad_norm": 4.870131073485972, "learning_rate": 8.818692600961786e-06, "loss": 0.8635, "step": 5530 }, { "epoch": 0.49, "grad_norm": 5.509271488746129, "learning_rate": 8.818226251865915e-06, "loss": 0.77, "step": 5531 }, { "epoch": 0.49, "grad_norm": 5.787917101235761, "learning_rate": 8.817759823072931e-06, "loss": 0.8023, "step": 5532 }, { "epoch": 0.49, "grad_norm": 4.862523367151897, "learning_rate": 8.817293314592574e-06, "loss": 0.7562, "step": 5533 }, { "epoch": 0.49, "grad_norm": 5.0162477258097455, "learning_rate": 8.816826726434578e-06, "loss": 0.7948, "step": 5534 }, { "epoch": 0.49, "grad_norm": 5.772189120286938, "learning_rate": 8.816360058608681e-06, "loss": 0.8797, "step": 5535 }, { "epoch": 0.49, "grad_norm": 4.92605402421492, "learning_rate": 8.815893311124626e-06, "loss": 0.7921, "step": 5536 }, { "epoch": 0.49, "grad_norm": 6.2204181436127515, "learning_rate": 8.815426483992156e-06, "loss": 0.805, "step": 5537 }, { "epoch": 0.49, "grad_norm": 5.950570599642973, "learning_rate": 8.814959577221014e-06, "loss": 0.8243, "step": 5538 }, { "epoch": 0.49, "grad_norm": 4.637243572562901, "learning_rate": 8.814492590820944e-06, "loss": 0.7641, "step": 5539 }, { "epoch": 0.49, "grad_norm": 9.948119929150002, "learning_rate": 8.814025524801698e-06, "loss": 0.8242, "step": 5540 }, { "epoch": 0.49, "grad_norm": 6.270505237682784, "learning_rate": 8.813558379173018e-06, "loss": 0.8194, "step": 5541 }, { "epoch": 0.49, "grad_norm": 5.350967312599104, "learning_rate": 8.81309115394466e-06, "loss": 0.7967, "step": 5542 }, { "epoch": 0.49, "grad_norm": 7.0784511337081195, "learning_rate": 8.812623849126374e-06, "loss": 0.8551, "step": 5543 }, { "epoch": 0.49, "grad_norm": 5.423206428464275, "learning_rate": 8.812156464727914e-06, "loss": 0.8839, "step": 5544 }, { "epoch": 0.49, "grad_norm": 6.048261481207874, "learning_rate": 8.811689000759039e-06, "loss": 0.8444, "step": 5545 }, { "epoch": 0.49, "grad_norm": 5.82661507029909, "learning_rate": 8.8112214572295e-06, "loss": 0.8767, "step": 5546 }, { "epoch": 0.49, "grad_norm": 4.830945119570509, "learning_rate": 8.810753834149061e-06, "loss": 0.8249, "step": 5547 }, { "epoch": 0.49, "grad_norm": 4.494961929740061, "learning_rate": 8.810286131527482e-06, "loss": 0.7788, "step": 5548 }, { "epoch": 0.5, "grad_norm": 8.4813436457888, "learning_rate": 8.809818349374525e-06, "loss": 0.8084, "step": 5549 }, { "epoch": 0.5, "grad_norm": 6.7445059558716425, "learning_rate": 8.809350487699952e-06, "loss": 0.8264, "step": 5550 }, { "epoch": 0.5, "grad_norm": 4.760521975681675, "learning_rate": 8.80888254651353e-06, "loss": 0.7546, "step": 5551 }, { "epoch": 0.5, "grad_norm": 5.905108546444297, "learning_rate": 8.808414525825026e-06, "loss": 0.8335, "step": 5552 }, { "epoch": 0.5, "grad_norm": 4.3083934368827554, "learning_rate": 8.807946425644209e-06, "loss": 0.8347, "step": 5553 }, { "epoch": 0.5, "grad_norm": 5.452110977896546, "learning_rate": 8.807478245980851e-06, "loss": 0.8396, "step": 5554 }, { "epoch": 0.5, "grad_norm": 6.723570500052544, "learning_rate": 8.80700998684472e-06, "loss": 0.8131, "step": 5555 }, { "epoch": 0.5, "grad_norm": 4.765869890188234, "learning_rate": 8.806541648245595e-06, "loss": 0.8214, "step": 5556 }, { "epoch": 0.5, "grad_norm": 5.845920796871105, "learning_rate": 8.806073230193251e-06, "loss": 0.876, "step": 5557 }, { "epoch": 0.5, "grad_norm": 7.526438446816304, "learning_rate": 8.80560473269746e-06, "loss": 0.905, "step": 5558 }, { "epoch": 0.5, "grad_norm": 6.212430867488916, "learning_rate": 8.805136155768005e-06, "loss": 0.8247, "step": 5559 }, { "epoch": 0.5, "grad_norm": 6.162608944181552, "learning_rate": 8.804667499414668e-06, "loss": 0.8491, "step": 5560 }, { "epoch": 0.5, "grad_norm": 5.846372029558468, "learning_rate": 8.804198763647227e-06, "loss": 0.8622, "step": 5561 }, { "epoch": 0.5, "grad_norm": 7.124835685312793, "learning_rate": 8.803729948475467e-06, "loss": 0.8229, "step": 5562 }, { "epoch": 0.5, "grad_norm": 5.813329825246744, "learning_rate": 8.803261053909177e-06, "loss": 0.8313, "step": 5563 }, { "epoch": 0.5, "grad_norm": 5.1665416802696535, "learning_rate": 8.80279207995814e-06, "loss": 0.7806, "step": 5564 }, { "epoch": 0.5, "grad_norm": 6.4733843362881105, "learning_rate": 8.802323026632147e-06, "loss": 0.8733, "step": 5565 }, { "epoch": 0.5, "grad_norm": 6.289806814233937, "learning_rate": 8.801853893940987e-06, "loss": 0.8078, "step": 5566 }, { "epoch": 0.5, "grad_norm": 5.166284056659091, "learning_rate": 8.801384681894453e-06, "loss": 0.8514, "step": 5567 }, { "epoch": 0.5, "grad_norm": 6.182258535535486, "learning_rate": 8.800915390502338e-06, "loss": 0.8115, "step": 5568 }, { "epoch": 0.5, "grad_norm": 6.661992984346423, "learning_rate": 8.80044601977444e-06, "loss": 0.7609, "step": 5569 }, { "epoch": 0.5, "grad_norm": 6.922986478312525, "learning_rate": 8.799976569720552e-06, "loss": 0.7995, "step": 5570 }, { "epoch": 0.5, "grad_norm": 6.4527155106389, "learning_rate": 8.799507040350476e-06, "loss": 0.8414, "step": 5571 }, { "epoch": 0.5, "grad_norm": 6.425482336298079, "learning_rate": 8.799037431674012e-06, "loss": 0.8619, "step": 5572 }, { "epoch": 0.5, "grad_norm": 5.909608891076864, "learning_rate": 8.79856774370096e-06, "loss": 0.8441, "step": 5573 }, { "epoch": 0.5, "grad_norm": 7.140563043841346, "learning_rate": 8.798097976441126e-06, "loss": 0.7949, "step": 5574 }, { "epoch": 0.5, "grad_norm": 4.462764458378051, "learning_rate": 8.797628129904314e-06, "loss": 0.8478, "step": 5575 }, { "epoch": 0.5, "grad_norm": 4.864667849553765, "learning_rate": 8.797158204100332e-06, "loss": 0.839, "step": 5576 }, { "epoch": 0.5, "grad_norm": 10.475613685631325, "learning_rate": 8.796688199038987e-06, "loss": 0.9458, "step": 5577 }, { "epoch": 0.5, "grad_norm": 5.0872039393537385, "learning_rate": 8.796218114730092e-06, "loss": 0.7936, "step": 5578 }, { "epoch": 0.5, "grad_norm": 6.260676665586891, "learning_rate": 8.795747951183457e-06, "loss": 0.7494, "step": 5579 }, { "epoch": 0.5, "grad_norm": 6.097795977628091, "learning_rate": 8.795277708408896e-06, "loss": 0.8645, "step": 5580 }, { "epoch": 0.5, "grad_norm": 7.432444455672716, "learning_rate": 8.794807386416224e-06, "loss": 0.9038, "step": 5581 }, { "epoch": 0.5, "grad_norm": 5.704478679427868, "learning_rate": 8.794336985215258e-06, "loss": 0.8547, "step": 5582 }, { "epoch": 0.5, "grad_norm": 5.533703270259658, "learning_rate": 8.793866504815818e-06, "loss": 0.8349, "step": 5583 }, { "epoch": 0.5, "grad_norm": 7.169590163062241, "learning_rate": 8.793395945227721e-06, "loss": 0.914, "step": 5584 }, { "epoch": 0.5, "grad_norm": 6.116209001314709, "learning_rate": 8.792925306460793e-06, "loss": 0.8366, "step": 5585 }, { "epoch": 0.5, "grad_norm": 5.196099869791053, "learning_rate": 8.792454588524855e-06, "loss": 0.8674, "step": 5586 }, { "epoch": 0.5, "grad_norm": 5.864568133713459, "learning_rate": 8.791983791429732e-06, "loss": 0.8403, "step": 5587 }, { "epoch": 0.5, "grad_norm": 5.555023958616594, "learning_rate": 8.791512915185254e-06, "loss": 0.8504, "step": 5588 }, { "epoch": 0.5, "grad_norm": 4.967745313050185, "learning_rate": 8.791041959801245e-06, "loss": 0.7898, "step": 5589 }, { "epoch": 0.5, "grad_norm": 4.408646893938039, "learning_rate": 8.790570925287538e-06, "loss": 0.7598, "step": 5590 }, { "epoch": 0.5, "grad_norm": 4.948070460809465, "learning_rate": 8.790099811653963e-06, "loss": 0.8019, "step": 5591 }, { "epoch": 0.5, "grad_norm": 5.437654839604383, "learning_rate": 8.789628618910357e-06, "loss": 0.7745, "step": 5592 }, { "epoch": 0.5, "grad_norm": 5.868528546766006, "learning_rate": 8.789157347066552e-06, "loss": 0.8234, "step": 5593 }, { "epoch": 0.5, "grad_norm": 6.48313342678063, "learning_rate": 8.788685996132384e-06, "loss": 0.8387, "step": 5594 }, { "epoch": 0.5, "grad_norm": 4.803805977904739, "learning_rate": 8.788214566117694e-06, "loss": 0.7895, "step": 5595 }, { "epoch": 0.5, "grad_norm": 6.69988015977202, "learning_rate": 8.78774305703232e-06, "loss": 0.8981, "step": 5596 }, { "epoch": 0.5, "grad_norm": 4.746695794606797, "learning_rate": 8.787271468886107e-06, "loss": 0.8371, "step": 5597 }, { "epoch": 0.5, "grad_norm": 5.136166592481909, "learning_rate": 8.786799801688894e-06, "loss": 0.8289, "step": 5598 }, { "epoch": 0.5, "grad_norm": 6.1258418236165095, "learning_rate": 8.786328055450528e-06, "loss": 0.8845, "step": 5599 }, { "epoch": 0.5, "grad_norm": 6.855236411487828, "learning_rate": 8.785856230180857e-06, "loss": 0.8081, "step": 5600 }, { "epoch": 0.5, "grad_norm": 6.86665439317705, "learning_rate": 8.785384325889728e-06, "loss": 0.8034, "step": 5601 }, { "epoch": 0.5, "grad_norm": 7.404959450403622, "learning_rate": 8.784912342586991e-06, "loss": 0.8229, "step": 5602 }, { "epoch": 0.5, "grad_norm": 5.489964474685759, "learning_rate": 8.784440280282497e-06, "loss": 0.7587, "step": 5603 }, { "epoch": 0.5, "grad_norm": 5.9481701347505815, "learning_rate": 8.7839681389861e-06, "loss": 0.8232, "step": 5604 }, { "epoch": 0.5, "grad_norm": 7.593131991702785, "learning_rate": 8.783495918707653e-06, "loss": 0.8456, "step": 5605 }, { "epoch": 0.5, "grad_norm": 4.63372069218948, "learning_rate": 8.783023619457019e-06, "loss": 0.8471, "step": 5606 }, { "epoch": 0.5, "grad_norm": 6.150534282242147, "learning_rate": 8.782551241244049e-06, "loss": 0.831, "step": 5607 }, { "epoch": 0.5, "grad_norm": 5.778367068231051, "learning_rate": 8.782078784078604e-06, "loss": 0.7912, "step": 5608 }, { "epoch": 0.5, "grad_norm": 5.932322018378101, "learning_rate": 8.78160624797055e-06, "loss": 0.8051, "step": 5609 }, { "epoch": 0.5, "grad_norm": 5.233044214794863, "learning_rate": 8.781133632929745e-06, "loss": 0.8054, "step": 5610 }, { "epoch": 0.5, "grad_norm": 6.251248701755817, "learning_rate": 8.780660938966057e-06, "loss": 0.8654, "step": 5611 }, { "epoch": 0.5, "grad_norm": 5.553410706448754, "learning_rate": 8.78018816608935e-06, "loss": 0.8432, "step": 5612 }, { "epoch": 0.5, "grad_norm": 4.935603325677223, "learning_rate": 8.779715314309494e-06, "loss": 0.8284, "step": 5613 }, { "epoch": 0.5, "grad_norm": 7.26202972525311, "learning_rate": 8.77924238363636e-06, "loss": 0.7723, "step": 5614 }, { "epoch": 0.5, "grad_norm": 4.309065282758038, "learning_rate": 8.778769374079816e-06, "loss": 0.8525, "step": 5615 }, { "epoch": 0.5, "grad_norm": 6.6085670289149085, "learning_rate": 8.778296285649738e-06, "loss": 0.8518, "step": 5616 }, { "epoch": 0.5, "grad_norm": 5.3081597782879255, "learning_rate": 8.777823118355999e-06, "loss": 0.8348, "step": 5617 }, { "epoch": 0.5, "grad_norm": 4.5286207106142315, "learning_rate": 8.777349872208475e-06, "loss": 0.851, "step": 5618 }, { "epoch": 0.5, "grad_norm": 7.71870810454012, "learning_rate": 8.776876547217043e-06, "loss": 0.799, "step": 5619 }, { "epoch": 0.5, "grad_norm": 6.162888191748796, "learning_rate": 8.776403143391588e-06, "loss": 0.8324, "step": 5620 }, { "epoch": 0.5, "grad_norm": 6.060984465007856, "learning_rate": 8.775929660741985e-06, "loss": 0.842, "step": 5621 }, { "epoch": 0.5, "grad_norm": 5.9893998801331865, "learning_rate": 8.77545609927812e-06, "loss": 0.831, "step": 5622 }, { "epoch": 0.5, "grad_norm": 5.33976056431188, "learning_rate": 8.774982459009877e-06, "loss": 0.836, "step": 5623 }, { "epoch": 0.5, "grad_norm": 5.813902217177362, "learning_rate": 8.774508739947143e-06, "loss": 0.8505, "step": 5624 }, { "epoch": 0.5, "grad_norm": 9.865401065238201, "learning_rate": 8.774034942099804e-06, "loss": 0.8582, "step": 5625 }, { "epoch": 0.5, "grad_norm": 5.262516745457644, "learning_rate": 8.773561065477751e-06, "loss": 0.7963, "step": 5626 }, { "epoch": 0.5, "grad_norm": 6.642995469597107, "learning_rate": 8.773087110090875e-06, "loss": 0.821, "step": 5627 }, { "epoch": 0.5, "grad_norm": 7.328377505500768, "learning_rate": 8.772613075949067e-06, "loss": 0.7877, "step": 5628 }, { "epoch": 0.5, "grad_norm": 5.927730861143127, "learning_rate": 8.772138963062227e-06, "loss": 0.8596, "step": 5629 }, { "epoch": 0.5, "grad_norm": 6.439247217507778, "learning_rate": 8.771664771440243e-06, "loss": 0.8435, "step": 5630 }, { "epoch": 0.5, "grad_norm": 4.931586926104177, "learning_rate": 8.771190501093019e-06, "loss": 0.8365, "step": 5631 }, { "epoch": 0.5, "grad_norm": 5.787152949572916, "learning_rate": 8.77071615203045e-06, "loss": 0.8594, "step": 5632 }, { "epoch": 0.5, "grad_norm": 6.195628472568788, "learning_rate": 8.77024172426244e-06, "loss": 0.8154, "step": 5633 }, { "epoch": 0.5, "grad_norm": 6.254848165382624, "learning_rate": 8.76976721779889e-06, "loss": 0.7498, "step": 5634 }, { "epoch": 0.5, "grad_norm": 6.449395740567542, "learning_rate": 8.769292632649707e-06, "loss": 0.8592, "step": 5635 }, { "epoch": 0.5, "grad_norm": 5.689443388093119, "learning_rate": 8.768817968824794e-06, "loss": 0.7696, "step": 5636 }, { "epoch": 0.5, "grad_norm": 6.680673409801719, "learning_rate": 8.768343226334059e-06, "loss": 0.8095, "step": 5637 }, { "epoch": 0.5, "grad_norm": 7.262625844537167, "learning_rate": 8.767868405187411e-06, "loss": 0.8097, "step": 5638 }, { "epoch": 0.5, "grad_norm": 6.216956288986688, "learning_rate": 8.767393505394764e-06, "loss": 0.7933, "step": 5639 }, { "epoch": 0.5, "grad_norm": 6.628314001969875, "learning_rate": 8.766918526966025e-06, "loss": 0.8447, "step": 5640 }, { "epoch": 0.5, "grad_norm": 5.75616978790288, "learning_rate": 8.766443469911111e-06, "loss": 0.785, "step": 5641 }, { "epoch": 0.5, "grad_norm": 4.701206197121069, "learning_rate": 8.76596833423994e-06, "loss": 0.8606, "step": 5642 }, { "epoch": 0.5, "grad_norm": 5.56729942775858, "learning_rate": 8.765493119962429e-06, "loss": 0.8201, "step": 5643 }, { "epoch": 0.5, "grad_norm": 6.106175028870563, "learning_rate": 8.765017827088492e-06, "loss": 0.8194, "step": 5644 }, { "epoch": 0.5, "grad_norm": 6.21983949232419, "learning_rate": 8.764542455628054e-06, "loss": 0.882, "step": 5645 }, { "epoch": 0.5, "grad_norm": 4.400398669053492, "learning_rate": 8.764067005591037e-06, "loss": 0.7719, "step": 5646 }, { "epoch": 0.5, "grad_norm": 7.254060418030271, "learning_rate": 8.763591476987364e-06, "loss": 0.8742, "step": 5647 }, { "epoch": 0.5, "grad_norm": 5.867158231715006, "learning_rate": 8.763115869826962e-06, "loss": 0.8092, "step": 5648 }, { "epoch": 0.5, "grad_norm": 5.46070155117413, "learning_rate": 8.762640184119756e-06, "loss": 0.8162, "step": 5649 }, { "epoch": 0.5, "grad_norm": 5.032526274530505, "learning_rate": 8.762164419875676e-06, "loss": 0.8577, "step": 5650 }, { "epoch": 0.5, "grad_norm": 6.923725302659771, "learning_rate": 8.761688577104655e-06, "loss": 0.8229, "step": 5651 }, { "epoch": 0.5, "grad_norm": 6.941836289431967, "learning_rate": 8.76121265581662e-06, "loss": 0.8211, "step": 5652 }, { "epoch": 0.5, "grad_norm": 6.0512104105025575, "learning_rate": 8.76073665602151e-06, "loss": 0.8043, "step": 5653 }, { "epoch": 0.5, "grad_norm": 6.242077460332891, "learning_rate": 8.760260577729258e-06, "loss": 0.8463, "step": 5654 }, { "epoch": 0.5, "grad_norm": 5.284501072106909, "learning_rate": 8.7597844209498e-06, "loss": 0.7865, "step": 5655 }, { "epoch": 0.5, "grad_norm": 7.222491679829832, "learning_rate": 8.759308185693076e-06, "loss": 0.7859, "step": 5656 }, { "epoch": 0.5, "grad_norm": 9.042084352625805, "learning_rate": 8.758831871969026e-06, "loss": 0.8438, "step": 5657 }, { "epoch": 0.5, "grad_norm": 4.869249129898664, "learning_rate": 8.758355479787594e-06, "loss": 0.8273, "step": 5658 }, { "epoch": 0.5, "grad_norm": 8.533630442065972, "learning_rate": 8.757879009158722e-06, "loss": 0.8466, "step": 5659 }, { "epoch": 0.5, "grad_norm": 5.188883191239206, "learning_rate": 8.757402460092355e-06, "loss": 0.8547, "step": 5660 }, { "epoch": 0.51, "grad_norm": 6.897186848015153, "learning_rate": 8.75692583259844e-06, "loss": 0.8594, "step": 5661 }, { "epoch": 0.51, "grad_norm": 6.406586845676556, "learning_rate": 8.756449126686925e-06, "loss": 0.8323, "step": 5662 }, { "epoch": 0.51, "grad_norm": 6.329968324531108, "learning_rate": 8.755972342367762e-06, "loss": 0.8307, "step": 5663 }, { "epoch": 0.51, "grad_norm": 6.979975380275228, "learning_rate": 8.755495479650901e-06, "loss": 0.8628, "step": 5664 }, { "epoch": 0.51, "grad_norm": 5.29714989632913, "learning_rate": 8.755018538546297e-06, "loss": 0.7624, "step": 5665 }, { "epoch": 0.51, "grad_norm": 5.694647620445541, "learning_rate": 8.754541519063904e-06, "loss": 0.7829, "step": 5666 }, { "epoch": 0.51, "grad_norm": 6.869824930763273, "learning_rate": 8.754064421213679e-06, "loss": 0.8452, "step": 5667 }, { "epoch": 0.51, "grad_norm": 5.264250254116545, "learning_rate": 8.75358724500558e-06, "loss": 0.8158, "step": 5668 }, { "epoch": 0.51, "grad_norm": 5.05238796507581, "learning_rate": 8.753109990449568e-06, "loss": 0.8297, "step": 5669 }, { "epoch": 0.51, "grad_norm": 5.597453912168418, "learning_rate": 8.752632657555605e-06, "loss": 0.7604, "step": 5670 }, { "epoch": 0.51, "grad_norm": 6.743382954307651, "learning_rate": 8.752155246333652e-06, "loss": 0.7958, "step": 5671 }, { "epoch": 0.51, "grad_norm": 6.091310088065734, "learning_rate": 8.751677756793676e-06, "loss": 0.7707, "step": 5672 }, { "epoch": 0.51, "grad_norm": 7.810017589462644, "learning_rate": 8.751200188945642e-06, "loss": 0.7633, "step": 5673 }, { "epoch": 0.51, "grad_norm": 5.9523604277509214, "learning_rate": 8.750722542799518e-06, "loss": 0.8334, "step": 5674 }, { "epoch": 0.51, "grad_norm": 5.411044113048015, "learning_rate": 8.750244818365278e-06, "loss": 0.8564, "step": 5675 }, { "epoch": 0.51, "grad_norm": 6.731058648822523, "learning_rate": 8.74976701565289e-06, "loss": 0.7977, "step": 5676 }, { "epoch": 0.51, "grad_norm": 5.91905231375966, "learning_rate": 8.749289134672326e-06, "loss": 0.8449, "step": 5677 }, { "epoch": 0.51, "grad_norm": 6.0551932984726955, "learning_rate": 8.748811175433563e-06, "loss": 0.7952, "step": 5678 }, { "epoch": 0.51, "grad_norm": 6.356728392784034, "learning_rate": 8.748333137946574e-06, "loss": 0.794, "step": 5679 }, { "epoch": 0.51, "grad_norm": 5.3842715881446965, "learning_rate": 8.747855022221342e-06, "loss": 0.7705, "step": 5680 }, { "epoch": 0.51, "grad_norm": 7.389805113520474, "learning_rate": 8.747376828267843e-06, "loss": 0.8228, "step": 5681 }, { "epoch": 0.51, "grad_norm": 5.661018478830402, "learning_rate": 8.746898556096061e-06, "loss": 0.8355, "step": 5682 }, { "epoch": 0.51, "grad_norm": 6.040941038249002, "learning_rate": 8.746420205715976e-06, "loss": 0.7814, "step": 5683 }, { "epoch": 0.51, "grad_norm": 7.217049567726062, "learning_rate": 8.745941777137573e-06, "loss": 0.7889, "step": 5684 }, { "epoch": 0.51, "grad_norm": 8.026812855837257, "learning_rate": 8.74546327037084e-06, "loss": 0.7786, "step": 5685 }, { "epoch": 0.51, "grad_norm": 6.2085834249706355, "learning_rate": 8.744984685425764e-06, "loss": 0.7807, "step": 5686 }, { "epoch": 0.51, "grad_norm": 5.8884887152527625, "learning_rate": 8.744506022312332e-06, "loss": 0.8734, "step": 5687 }, { "epoch": 0.51, "grad_norm": 6.169234871145174, "learning_rate": 8.74402728104054e-06, "loss": 0.8219, "step": 5688 }, { "epoch": 0.51, "grad_norm": 6.858647940584735, "learning_rate": 8.743548461620374e-06, "loss": 0.8056, "step": 5689 }, { "epoch": 0.51, "grad_norm": 4.759621969499233, "learning_rate": 8.743069564061835e-06, "loss": 0.7411, "step": 5690 }, { "epoch": 0.51, "grad_norm": 6.208776677097714, "learning_rate": 8.742590588374916e-06, "loss": 0.7872, "step": 5691 }, { "epoch": 0.51, "grad_norm": 5.5817215076216, "learning_rate": 8.742111534569612e-06, "loss": 0.8095, "step": 5692 }, { "epoch": 0.51, "grad_norm": 5.875875208647669, "learning_rate": 8.741632402655928e-06, "loss": 0.8706, "step": 5693 }, { "epoch": 0.51, "grad_norm": 4.87200717368755, "learning_rate": 8.741153192643861e-06, "loss": 0.7908, "step": 5694 }, { "epoch": 0.51, "grad_norm": 6.931799506060896, "learning_rate": 8.740673904543413e-06, "loss": 0.8056, "step": 5695 }, { "epoch": 0.51, "grad_norm": 6.512019445236444, "learning_rate": 8.740194538364588e-06, "loss": 0.8469, "step": 5696 }, { "epoch": 0.51, "grad_norm": 6.645883466274443, "learning_rate": 8.739715094117396e-06, "loss": 0.9058, "step": 5697 }, { "epoch": 0.51, "grad_norm": 5.447206364154061, "learning_rate": 8.73923557181184e-06, "loss": 0.7739, "step": 5698 }, { "epoch": 0.51, "grad_norm": 4.754737226520066, "learning_rate": 8.73875597145793e-06, "loss": 0.7503, "step": 5699 }, { "epoch": 0.51, "grad_norm": 5.410728986892296, "learning_rate": 8.738276293065677e-06, "loss": 0.8288, "step": 5700 }, { "epoch": 0.51, "grad_norm": 8.348749802497387, "learning_rate": 8.737796536645093e-06, "loss": 0.9034, "step": 5701 }, { "epoch": 0.51, "grad_norm": 6.35455158614091, "learning_rate": 8.73731670220619e-06, "loss": 0.8243, "step": 5702 }, { "epoch": 0.51, "grad_norm": 6.12106015111757, "learning_rate": 8.73683678975899e-06, "loss": 0.8184, "step": 5703 }, { "epoch": 0.51, "grad_norm": 3.660868934674814, "learning_rate": 8.736356799313501e-06, "loss": 0.7982, "step": 5704 }, { "epoch": 0.51, "grad_norm": 6.0927921577529975, "learning_rate": 8.73587673087975e-06, "loss": 0.8281, "step": 5705 }, { "epoch": 0.51, "grad_norm": 6.359008475168115, "learning_rate": 8.73539658446775e-06, "loss": 0.8467, "step": 5706 }, { "epoch": 0.51, "grad_norm": 7.159921838378046, "learning_rate": 8.734916360087528e-06, "loss": 0.8366, "step": 5707 }, { "epoch": 0.51, "grad_norm": 8.008967987703356, "learning_rate": 8.734436057749108e-06, "loss": 0.8166, "step": 5708 }, { "epoch": 0.51, "grad_norm": 6.071136274209243, "learning_rate": 8.733955677462511e-06, "loss": 0.8124, "step": 5709 }, { "epoch": 0.51, "grad_norm": 5.044791193065917, "learning_rate": 8.733475219237768e-06, "loss": 0.8317, "step": 5710 }, { "epoch": 0.51, "grad_norm": 4.905303080640748, "learning_rate": 8.732994683084907e-06, "loss": 0.7873, "step": 5711 }, { "epoch": 0.51, "grad_norm": 5.852016146099419, "learning_rate": 8.732514069013956e-06, "loss": 0.8052, "step": 5712 }, { "epoch": 0.51, "grad_norm": 6.666027579510247, "learning_rate": 8.732033377034947e-06, "loss": 0.8402, "step": 5713 }, { "epoch": 0.51, "grad_norm": 5.786548116080704, "learning_rate": 8.731552607157914e-06, "loss": 0.7912, "step": 5714 }, { "epoch": 0.51, "grad_norm": 5.38621025409749, "learning_rate": 8.731071759392895e-06, "loss": 0.8353, "step": 5715 }, { "epoch": 0.51, "grad_norm": 5.967842235591543, "learning_rate": 8.73059083374992e-06, "loss": 0.7529, "step": 5716 }, { "epoch": 0.51, "grad_norm": 5.2481751101313465, "learning_rate": 8.730109830239036e-06, "loss": 0.7862, "step": 5717 }, { "epoch": 0.51, "grad_norm": 6.469022907867675, "learning_rate": 8.729628748870274e-06, "loss": 0.8234, "step": 5718 }, { "epoch": 0.51, "grad_norm": 6.235253129413111, "learning_rate": 8.729147589653681e-06, "loss": 0.8412, "step": 5719 }, { "epoch": 0.51, "grad_norm": 4.544024395441532, "learning_rate": 8.7286663525993e-06, "loss": 0.7904, "step": 5720 }, { "epoch": 0.51, "grad_norm": 7.133812139757403, "learning_rate": 8.728185037717172e-06, "loss": 0.7928, "step": 5721 }, { "epoch": 0.51, "grad_norm": 5.643997472334568, "learning_rate": 8.727703645017349e-06, "loss": 0.804, "step": 5722 }, { "epoch": 0.51, "grad_norm": 6.640185678733757, "learning_rate": 8.727222174509873e-06, "loss": 0.8106, "step": 5723 }, { "epoch": 0.51, "grad_norm": 6.117011654036177, "learning_rate": 8.726740626204799e-06, "loss": 0.8205, "step": 5724 }, { "epoch": 0.51, "grad_norm": 7.128560760410511, "learning_rate": 8.726259000112174e-06, "loss": 0.8723, "step": 5725 }, { "epoch": 0.51, "grad_norm": 5.71083419413605, "learning_rate": 8.725777296242054e-06, "loss": 0.8741, "step": 5726 }, { "epoch": 0.51, "grad_norm": 6.821441086629458, "learning_rate": 8.725295514604492e-06, "loss": 0.7476, "step": 5727 }, { "epoch": 0.51, "grad_norm": 5.374982813778905, "learning_rate": 8.724813655209543e-06, "loss": 0.7948, "step": 5728 }, { "epoch": 0.51, "grad_norm": 6.830079196366561, "learning_rate": 8.724331718067269e-06, "loss": 0.8634, "step": 5729 }, { "epoch": 0.51, "grad_norm": 7.131750921932709, "learning_rate": 8.723849703187723e-06, "loss": 0.798, "step": 5730 }, { "epoch": 0.51, "grad_norm": 5.752241371375161, "learning_rate": 8.723367610580973e-06, "loss": 0.8067, "step": 5731 }, { "epoch": 0.51, "grad_norm": 6.663078251562671, "learning_rate": 8.722885440257075e-06, "loss": 0.7794, "step": 5732 }, { "epoch": 0.51, "grad_norm": 7.053920294127615, "learning_rate": 8.722403192226099e-06, "loss": 0.8893, "step": 5733 }, { "epoch": 0.51, "grad_norm": 5.805347780119679, "learning_rate": 8.721920866498107e-06, "loss": 0.8055, "step": 5734 }, { "epoch": 0.51, "grad_norm": 6.675019286880624, "learning_rate": 8.721438463083168e-06, "loss": 0.8166, "step": 5735 }, { "epoch": 0.51, "grad_norm": 7.006866503667122, "learning_rate": 8.72095598199135e-06, "loss": 0.8193, "step": 5736 }, { "epoch": 0.51, "grad_norm": 4.632571902047563, "learning_rate": 8.720473423232726e-06, "loss": 0.7751, "step": 5737 }, { "epoch": 0.51, "grad_norm": 4.905215083505556, "learning_rate": 8.719990786817367e-06, "loss": 0.8008, "step": 5738 }, { "epoch": 0.51, "grad_norm": 6.5411357539883195, "learning_rate": 8.719508072755345e-06, "loss": 0.8538, "step": 5739 }, { "epoch": 0.51, "grad_norm": 9.278517256201297, "learning_rate": 8.71902528105674e-06, "loss": 0.9175, "step": 5740 }, { "epoch": 0.51, "grad_norm": 8.535211358742204, "learning_rate": 8.718542411731626e-06, "loss": 0.8957, "step": 5741 }, { "epoch": 0.51, "grad_norm": 5.623199942488983, "learning_rate": 8.718059464790082e-06, "loss": 0.8396, "step": 5742 }, { "epoch": 0.51, "grad_norm": 7.059933438873013, "learning_rate": 8.71757644024219e-06, "loss": 0.8001, "step": 5743 }, { "epoch": 0.51, "grad_norm": 9.957385591870104, "learning_rate": 8.71709333809803e-06, "loss": 0.8119, "step": 5744 }, { "epoch": 0.51, "grad_norm": 7.7677950714420145, "learning_rate": 8.716610158367688e-06, "loss": 0.8875, "step": 5745 }, { "epoch": 0.51, "grad_norm": 6.446171842213826, "learning_rate": 8.716126901061248e-06, "loss": 0.8732, "step": 5746 }, { "epoch": 0.51, "grad_norm": 6.381789993071132, "learning_rate": 8.715643566188797e-06, "loss": 0.8128, "step": 5747 }, { "epoch": 0.51, "grad_norm": 5.464851828255418, "learning_rate": 8.715160153760422e-06, "loss": 0.876, "step": 5748 }, { "epoch": 0.51, "grad_norm": 6.647085603101719, "learning_rate": 8.714676663786216e-06, "loss": 0.7801, "step": 5749 }, { "epoch": 0.51, "grad_norm": 7.185902678256492, "learning_rate": 8.714193096276272e-06, "loss": 0.812, "step": 5750 }, { "epoch": 0.51, "grad_norm": 6.398991162276467, "learning_rate": 8.713709451240679e-06, "loss": 0.957, "step": 5751 }, { "epoch": 0.51, "grad_norm": 6.572346398902644, "learning_rate": 8.713225728689534e-06, "loss": 0.7716, "step": 5752 }, { "epoch": 0.51, "grad_norm": 4.563499805892573, "learning_rate": 8.712741928632933e-06, "loss": 1.0096, "step": 5753 }, { "epoch": 0.51, "grad_norm": 6.422860872744204, "learning_rate": 8.712258051080975e-06, "loss": 0.8319, "step": 5754 }, { "epoch": 0.51, "grad_norm": 6.13073235937632, "learning_rate": 8.711774096043762e-06, "loss": 0.8576, "step": 5755 }, { "epoch": 0.51, "grad_norm": 6.315718873527776, "learning_rate": 8.711290063531393e-06, "loss": 0.7578, "step": 5756 }, { "epoch": 0.51, "grad_norm": 5.649425923569732, "learning_rate": 8.710805953553971e-06, "loss": 0.8205, "step": 5757 }, { "epoch": 0.51, "grad_norm": 6.828016439689453, "learning_rate": 8.7103217661216e-06, "loss": 0.7856, "step": 5758 }, { "epoch": 0.51, "grad_norm": 5.52548865112177, "learning_rate": 8.70983750124439e-06, "loss": 0.8168, "step": 5759 }, { "epoch": 0.51, "grad_norm": 6.154187500715567, "learning_rate": 8.709353158932442e-06, "loss": 0.8278, "step": 5760 }, { "epoch": 0.51, "grad_norm": 22.781189933084534, "learning_rate": 8.708868739195875e-06, "loss": 0.8343, "step": 5761 }, { "epoch": 0.51, "grad_norm": 5.773865457858816, "learning_rate": 8.708384242044793e-06, "loss": 0.7413, "step": 5762 }, { "epoch": 0.51, "grad_norm": 7.100039627454103, "learning_rate": 8.707899667489312e-06, "loss": 0.8376, "step": 5763 }, { "epoch": 0.51, "grad_norm": 6.844058003056372, "learning_rate": 8.707415015539546e-06, "loss": 0.8395, "step": 5764 }, { "epoch": 0.51, "grad_norm": 8.261438581169475, "learning_rate": 8.706930286205611e-06, "loss": 0.8271, "step": 5765 }, { "epoch": 0.51, "grad_norm": 6.4421742541436116, "learning_rate": 8.706445479497623e-06, "loss": 0.8262, "step": 5766 }, { "epoch": 0.51, "grad_norm": 5.987426583221405, "learning_rate": 8.705960595425701e-06, "loss": 0.8273, "step": 5767 }, { "epoch": 0.51, "grad_norm": 5.384476868382562, "learning_rate": 8.705475633999971e-06, "loss": 0.7961, "step": 5768 }, { "epoch": 0.51, "grad_norm": 8.109813634613358, "learning_rate": 8.70499059523055e-06, "loss": 0.8307, "step": 5769 }, { "epoch": 0.51, "grad_norm": 6.381037406673442, "learning_rate": 8.704505479127564e-06, "loss": 0.8198, "step": 5770 }, { "epoch": 0.51, "grad_norm": 7.956425373451172, "learning_rate": 8.70402028570114e-06, "loss": 0.8263, "step": 5771 }, { "epoch": 0.51, "grad_norm": 5.094193535875792, "learning_rate": 8.703535014961404e-06, "loss": 0.7822, "step": 5772 }, { "epoch": 0.52, "grad_norm": 6.0756566071039355, "learning_rate": 8.703049666918484e-06, "loss": 0.8449, "step": 5773 }, { "epoch": 0.52, "grad_norm": 5.862271692051127, "learning_rate": 8.702564241582513e-06, "loss": 0.7016, "step": 5774 }, { "epoch": 0.52, "grad_norm": 6.775134299311187, "learning_rate": 8.702078738963623e-06, "loss": 0.8105, "step": 5775 }, { "epoch": 0.52, "grad_norm": 6.187425287393864, "learning_rate": 8.701593159071945e-06, "loss": 0.8674, "step": 5776 }, { "epoch": 0.52, "grad_norm": 5.8075496066449315, "learning_rate": 8.701107501917616e-06, "loss": 0.828, "step": 5777 }, { "epoch": 0.52, "grad_norm": 6.959180396726149, "learning_rate": 8.700621767510774e-06, "loss": 0.7748, "step": 5778 }, { "epoch": 0.52, "grad_norm": 4.709156774966415, "learning_rate": 8.700135955861555e-06, "loss": 0.7852, "step": 5779 }, { "epoch": 0.52, "grad_norm": 5.993845937908161, "learning_rate": 8.699650066980102e-06, "loss": 0.834, "step": 5780 }, { "epoch": 0.52, "grad_norm": 5.0874468829899735, "learning_rate": 8.699164100876557e-06, "loss": 0.8023, "step": 5781 }, { "epoch": 0.52, "grad_norm": 5.42674979256416, "learning_rate": 8.69867805756106e-06, "loss": 0.898, "step": 5782 }, { "epoch": 0.52, "grad_norm": 5.401841924411248, "learning_rate": 8.69819193704376e-06, "loss": 0.824, "step": 5783 }, { "epoch": 0.52, "grad_norm": 7.087579637160131, "learning_rate": 8.697705739334805e-06, "loss": 0.8709, "step": 5784 }, { "epoch": 0.52, "grad_norm": 7.523768588211032, "learning_rate": 8.697219464444337e-06, "loss": 0.8042, "step": 5785 }, { "epoch": 0.52, "grad_norm": 6.8360967103691, "learning_rate": 8.69673311238251e-06, "loss": 0.8115, "step": 5786 }, { "epoch": 0.52, "grad_norm": 7.856844644695396, "learning_rate": 8.696246683159476e-06, "loss": 0.8215, "step": 5787 }, { "epoch": 0.52, "grad_norm": 6.788478296977279, "learning_rate": 8.695760176785386e-06, "loss": 0.8629, "step": 5788 }, { "epoch": 0.52, "grad_norm": 5.216487547235108, "learning_rate": 8.695273593270396e-06, "loss": 0.7657, "step": 5789 }, { "epoch": 0.52, "grad_norm": 6.181336419466348, "learning_rate": 8.694786932624663e-06, "loss": 0.7817, "step": 5790 }, { "epoch": 0.52, "grad_norm": 5.740612031477996, "learning_rate": 8.694300194858345e-06, "loss": 0.7367, "step": 5791 }, { "epoch": 0.52, "grad_norm": 5.9782173761304325, "learning_rate": 8.6938133799816e-06, "loss": 0.7672, "step": 5792 }, { "epoch": 0.52, "grad_norm": 6.025564952346374, "learning_rate": 8.69332648800459e-06, "loss": 0.8586, "step": 5793 }, { "epoch": 0.52, "grad_norm": 6.195319012424117, "learning_rate": 8.692839518937477e-06, "loss": 0.8284, "step": 5794 }, { "epoch": 0.52, "grad_norm": 6.18304070463878, "learning_rate": 8.692352472790427e-06, "loss": 0.8076, "step": 5795 }, { "epoch": 0.52, "grad_norm": 5.6113535148407685, "learning_rate": 8.691865349573606e-06, "loss": 0.7873, "step": 5796 }, { "epoch": 0.52, "grad_norm": 4.929410780940768, "learning_rate": 8.69137814929718e-06, "loss": 0.7224, "step": 5797 }, { "epoch": 0.52, "grad_norm": 5.58702630737882, "learning_rate": 8.690890871971318e-06, "loss": 0.8173, "step": 5798 }, { "epoch": 0.52, "grad_norm": 5.128813248183907, "learning_rate": 8.690403517606193e-06, "loss": 0.7042, "step": 5799 }, { "epoch": 0.52, "grad_norm": 7.399927092925807, "learning_rate": 8.689916086211977e-06, "loss": 0.8164, "step": 5800 }, { "epoch": 0.52, "grad_norm": 5.1710019912427825, "learning_rate": 8.689428577798843e-06, "loss": 0.7981, "step": 5801 }, { "epoch": 0.52, "grad_norm": 7.604813997565266, "learning_rate": 8.688940992376966e-06, "loss": 0.8086, "step": 5802 }, { "epoch": 0.52, "grad_norm": 9.760176906734495, "learning_rate": 8.688453329956526e-06, "loss": 0.9153, "step": 5803 }, { "epoch": 0.52, "grad_norm": 4.854059398888365, "learning_rate": 8.6879655905477e-06, "loss": 0.8553, "step": 5804 }, { "epoch": 0.52, "grad_norm": 5.416887246472852, "learning_rate": 8.687477774160668e-06, "loss": 0.8488, "step": 5805 }, { "epoch": 0.52, "grad_norm": 5.1784643533734265, "learning_rate": 8.686989880805614e-06, "loss": 0.753, "step": 5806 }, { "epoch": 0.52, "grad_norm": 6.344982546634153, "learning_rate": 8.68650191049272e-06, "loss": 0.7595, "step": 5807 }, { "epoch": 0.52, "grad_norm": 7.5652850099482025, "learning_rate": 8.686013863232173e-06, "loss": 0.8452, "step": 5808 }, { "epoch": 0.52, "grad_norm": 6.751502329410952, "learning_rate": 8.685525739034158e-06, "loss": 0.847, "step": 5809 }, { "epoch": 0.52, "grad_norm": 7.834847388684795, "learning_rate": 8.685037537908865e-06, "loss": 0.844, "step": 5810 }, { "epoch": 0.52, "grad_norm": 7.7146583685829375, "learning_rate": 8.684549259866481e-06, "loss": 0.8672, "step": 5811 }, { "epoch": 0.52, "grad_norm": 6.034218441622979, "learning_rate": 8.684060904917201e-06, "loss": 0.8741, "step": 5812 }, { "epoch": 0.52, "grad_norm": 4.032366003820098, "learning_rate": 8.68357247307122e-06, "loss": 0.8237, "step": 5813 }, { "epoch": 0.52, "grad_norm": 5.088184677539254, "learning_rate": 8.683083964338728e-06, "loss": 0.9411, "step": 5814 }, { "epoch": 0.52, "grad_norm": 5.937311078827777, "learning_rate": 8.682595378729925e-06, "loss": 0.7493, "step": 5815 }, { "epoch": 0.52, "grad_norm": 6.759193239561655, "learning_rate": 8.68210671625501e-06, "loss": 0.8668, "step": 5816 }, { "epoch": 0.52, "grad_norm": 4.970133383131887, "learning_rate": 8.681617976924179e-06, "loss": 0.8696, "step": 5817 }, { "epoch": 0.52, "grad_norm": 6.1452477626782915, "learning_rate": 8.681129160747636e-06, "loss": 0.8789, "step": 5818 }, { "epoch": 0.52, "grad_norm": 5.337485276630007, "learning_rate": 8.680640267735582e-06, "loss": 0.8619, "step": 5819 }, { "epoch": 0.52, "grad_norm": 6.620873318200385, "learning_rate": 8.680151297898224e-06, "loss": 0.7877, "step": 5820 }, { "epoch": 0.52, "grad_norm": 6.452621604022749, "learning_rate": 8.679662251245768e-06, "loss": 0.8181, "step": 5821 }, { "epoch": 0.52, "grad_norm": 5.446134143378865, "learning_rate": 8.679173127788418e-06, "loss": 0.8256, "step": 5822 }, { "epoch": 0.52, "grad_norm": 6.390551931565307, "learning_rate": 8.678683927536389e-06, "loss": 0.8467, "step": 5823 }, { "epoch": 0.52, "grad_norm": 7.001840565952541, "learning_rate": 8.678194650499888e-06, "loss": 0.7517, "step": 5824 }, { "epoch": 0.52, "grad_norm": 7.725662748993245, "learning_rate": 8.67770529668913e-06, "loss": 0.8347, "step": 5825 }, { "epoch": 0.52, "grad_norm": 4.937090576892703, "learning_rate": 8.677215866114326e-06, "loss": 0.7682, "step": 5826 }, { "epoch": 0.52, "grad_norm": 5.664380607957075, "learning_rate": 8.676726358785695e-06, "loss": 0.8393, "step": 5827 }, { "epoch": 0.52, "grad_norm": 6.21904737552294, "learning_rate": 8.67623677471345e-06, "loss": 0.8114, "step": 5828 }, { "epoch": 0.52, "grad_norm": 5.1324640590837785, "learning_rate": 8.675747113907818e-06, "loss": 0.8728, "step": 5829 }, { "epoch": 0.52, "grad_norm": 8.554614584810185, "learning_rate": 8.675257376379013e-06, "loss": 0.8018, "step": 5830 }, { "epoch": 0.52, "grad_norm": 6.421788661747229, "learning_rate": 8.674767562137258e-06, "loss": 0.798, "step": 5831 }, { "epoch": 0.52, "grad_norm": 5.452455108181789, "learning_rate": 8.67427767119278e-06, "loss": 0.7945, "step": 5832 }, { "epoch": 0.52, "grad_norm": 4.776367611496056, "learning_rate": 8.673787703555797e-06, "loss": 0.8481, "step": 5833 }, { "epoch": 0.52, "grad_norm": 7.154290080262535, "learning_rate": 8.673297659236546e-06, "loss": 0.8308, "step": 5834 }, { "epoch": 0.52, "grad_norm": 5.881841263429405, "learning_rate": 8.67280753824525e-06, "loss": 0.7705, "step": 5835 }, { "epoch": 0.52, "grad_norm": 5.284639909928041, "learning_rate": 8.67231734059214e-06, "loss": 0.8564, "step": 5836 }, { "epoch": 0.52, "grad_norm": 5.907975559718964, "learning_rate": 8.671827066287446e-06, "loss": 0.7754, "step": 5837 }, { "epoch": 0.52, "grad_norm": 6.116066419596677, "learning_rate": 8.671336715341406e-06, "loss": 0.8135, "step": 5838 }, { "epoch": 0.52, "grad_norm": 6.556805765809384, "learning_rate": 8.670846287764249e-06, "loss": 0.7778, "step": 5839 }, { "epoch": 0.52, "grad_norm": 4.4224575446521674, "learning_rate": 8.670355783566216e-06, "loss": 0.8272, "step": 5840 }, { "epoch": 0.52, "grad_norm": 7.525716811210102, "learning_rate": 8.669865202757545e-06, "loss": 0.7752, "step": 5841 }, { "epoch": 0.52, "grad_norm": 5.038632069834694, "learning_rate": 8.669374545348474e-06, "loss": 0.8002, "step": 5842 }, { "epoch": 0.52, "grad_norm": 5.785049194308323, "learning_rate": 8.668883811349244e-06, "loss": 0.7265, "step": 5843 }, { "epoch": 0.52, "grad_norm": 4.859634297291458, "learning_rate": 8.668393000770101e-06, "loss": 0.7786, "step": 5844 }, { "epoch": 0.52, "grad_norm": 5.1976172305602315, "learning_rate": 8.667902113621287e-06, "loss": 0.8001, "step": 5845 }, { "epoch": 0.52, "grad_norm": 5.6828375295949565, "learning_rate": 8.667411149913049e-06, "loss": 0.8118, "step": 5846 }, { "epoch": 0.52, "grad_norm": 7.278795048680833, "learning_rate": 8.666920109655635e-06, "loss": 0.8459, "step": 5847 }, { "epoch": 0.52, "grad_norm": 7.209184965448266, "learning_rate": 8.666428992859292e-06, "loss": 0.845, "step": 5848 }, { "epoch": 0.52, "grad_norm": 6.243022643755357, "learning_rate": 8.665937799534276e-06, "loss": 0.8816, "step": 5849 }, { "epoch": 0.52, "grad_norm": 5.189025727739834, "learning_rate": 8.665446529690835e-06, "loss": 0.8617, "step": 5850 }, { "epoch": 0.52, "grad_norm": 6.725799718597585, "learning_rate": 8.664955183339225e-06, "loss": 0.7671, "step": 5851 }, { "epoch": 0.52, "grad_norm": 6.26458728182319, "learning_rate": 8.664463760489702e-06, "loss": 0.8669, "step": 5852 }, { "epoch": 0.52, "grad_norm": 6.009630389915014, "learning_rate": 8.663972261152525e-06, "loss": 0.8506, "step": 5853 }, { "epoch": 0.52, "grad_norm": 5.590625896228143, "learning_rate": 8.663480685337949e-06, "loss": 0.807, "step": 5854 }, { "epoch": 0.52, "grad_norm": 7.442163173205468, "learning_rate": 8.662989033056236e-06, "loss": 0.8555, "step": 5855 }, { "epoch": 0.52, "grad_norm": 5.439306543111314, "learning_rate": 8.66249730431765e-06, "loss": 0.8224, "step": 5856 }, { "epoch": 0.52, "grad_norm": 5.142572838905145, "learning_rate": 8.662005499132453e-06, "loss": 0.8121, "step": 5857 }, { "epoch": 0.52, "grad_norm": 6.548074491232959, "learning_rate": 8.66151361751091e-06, "loss": 0.7792, "step": 5858 }, { "epoch": 0.52, "grad_norm": 6.01415768658376, "learning_rate": 8.661021659463291e-06, "loss": 0.8657, "step": 5859 }, { "epoch": 0.52, "grad_norm": 7.353882542965322, "learning_rate": 8.66052962499986e-06, "loss": 0.87, "step": 5860 }, { "epoch": 0.52, "grad_norm": 5.7456530099919805, "learning_rate": 8.66003751413089e-06, "loss": 0.7978, "step": 5861 }, { "epoch": 0.52, "grad_norm": 4.555662987495287, "learning_rate": 8.659545326866654e-06, "loss": 0.804, "step": 5862 }, { "epoch": 0.52, "grad_norm": 7.78475662611469, "learning_rate": 8.659053063217423e-06, "loss": 0.8775, "step": 5863 }, { "epoch": 0.52, "grad_norm": 6.701464067362781, "learning_rate": 8.65856072319347e-06, "loss": 0.836, "step": 5864 }, { "epoch": 0.52, "grad_norm": 6.496276721066393, "learning_rate": 8.658068306805076e-06, "loss": 0.8648, "step": 5865 }, { "epoch": 0.52, "grad_norm": 6.048691600795879, "learning_rate": 8.657575814062518e-06, "loss": 0.8248, "step": 5866 }, { "epoch": 0.52, "grad_norm": 7.0588862343307825, "learning_rate": 8.657083244976074e-06, "loss": 0.8256, "step": 5867 }, { "epoch": 0.52, "grad_norm": 6.902783416051479, "learning_rate": 8.656590599556024e-06, "loss": 0.9138, "step": 5868 }, { "epoch": 0.52, "grad_norm": 5.461219340601897, "learning_rate": 8.656097877812658e-06, "loss": 0.8361, "step": 5869 }, { "epoch": 0.52, "grad_norm": 5.461641741271912, "learning_rate": 8.65560507975625e-06, "loss": 0.8137, "step": 5870 }, { "epoch": 0.52, "grad_norm": 5.62478958979562, "learning_rate": 8.655112205397096e-06, "loss": 0.7629, "step": 5871 }, { "epoch": 0.52, "grad_norm": 6.830251388527715, "learning_rate": 8.654619254745476e-06, "loss": 0.8386, "step": 5872 }, { "epoch": 0.52, "grad_norm": 5.173822926207818, "learning_rate": 8.654126227811685e-06, "loss": 0.8908, "step": 5873 }, { "epoch": 0.52, "grad_norm": 5.192119037646925, "learning_rate": 8.65363312460601e-06, "loss": 0.8485, "step": 5874 }, { "epoch": 0.52, "grad_norm": 4.286649318842872, "learning_rate": 8.653139945138746e-06, "loss": 0.9107, "step": 5875 }, { "epoch": 0.52, "grad_norm": 5.802309909232628, "learning_rate": 8.652646689420184e-06, "loss": 0.8234, "step": 5876 }, { "epoch": 0.52, "grad_norm": 4.9387858974413295, "learning_rate": 8.652153357460622e-06, "loss": 0.7789, "step": 5877 }, { "epoch": 0.52, "grad_norm": 4.9985928919903655, "learning_rate": 8.65165994927036e-06, "loss": 0.7443, "step": 5878 }, { "epoch": 0.52, "grad_norm": 4.511388774308987, "learning_rate": 8.651166464859689e-06, "loss": 0.835, "step": 5879 }, { "epoch": 0.52, "grad_norm": 5.379462188182881, "learning_rate": 8.650672904238917e-06, "loss": 0.7797, "step": 5880 }, { "epoch": 0.52, "grad_norm": 7.965018981372429, "learning_rate": 8.65017926741834e-06, "loss": 0.8612, "step": 5881 }, { "epoch": 0.52, "grad_norm": 6.4198053829118615, "learning_rate": 8.649685554408267e-06, "loss": 0.7675, "step": 5882 }, { "epoch": 0.52, "grad_norm": 4.255484195011597, "learning_rate": 8.649191765219001e-06, "loss": 0.7866, "step": 5883 }, { "epoch": 0.52, "grad_norm": 7.457003922008676, "learning_rate": 8.648697899860848e-06, "loss": 0.7731, "step": 5884 }, { "epoch": 0.53, "grad_norm": 6.716223652204424, "learning_rate": 8.648203958344115e-06, "loss": 0.7296, "step": 5885 }, { "epoch": 0.53, "grad_norm": 10.834241749051515, "learning_rate": 8.647709940679116e-06, "loss": 0.8541, "step": 5886 }, { "epoch": 0.53, "grad_norm": 5.944107135815664, "learning_rate": 8.647215846876158e-06, "loss": 0.8406, "step": 5887 }, { "epoch": 0.53, "grad_norm": 7.176779518193242, "learning_rate": 8.646721676945559e-06, "loss": 0.8582, "step": 5888 }, { "epoch": 0.53, "grad_norm": 6.801428213908792, "learning_rate": 8.64622743089763e-06, "loss": 0.8694, "step": 5889 }, { "epoch": 0.53, "grad_norm": 6.229578357317486, "learning_rate": 8.645733108742689e-06, "loss": 0.7349, "step": 5890 }, { "epoch": 0.53, "grad_norm": 5.906458347544252, "learning_rate": 8.645238710491054e-06, "loss": 0.824, "step": 5891 }, { "epoch": 0.53, "grad_norm": 5.581578044411557, "learning_rate": 8.644744236153042e-06, "loss": 0.7604, "step": 5892 }, { "epoch": 0.53, "grad_norm": 5.676728896862765, "learning_rate": 8.644249685738977e-06, "loss": 0.8697, "step": 5893 }, { "epoch": 0.53, "grad_norm": 3.945961269684335, "learning_rate": 8.643755059259181e-06, "loss": 0.8648, "step": 5894 }, { "epoch": 0.53, "grad_norm": 5.7761591165840835, "learning_rate": 8.643260356723977e-06, "loss": 0.8491, "step": 5895 }, { "epoch": 0.53, "grad_norm": 5.963085768233817, "learning_rate": 8.64276557814369e-06, "loss": 0.8552, "step": 5896 }, { "epoch": 0.53, "grad_norm": 6.017270489091205, "learning_rate": 8.642270723528652e-06, "loss": 0.8453, "step": 5897 }, { "epoch": 0.53, "grad_norm": 5.176265424313485, "learning_rate": 8.641775792889188e-06, "loss": 0.8161, "step": 5898 }, { "epoch": 0.53, "grad_norm": 5.518456062944514, "learning_rate": 8.641280786235631e-06, "loss": 0.7776, "step": 5899 }, { "epoch": 0.53, "grad_norm": 5.827923844505221, "learning_rate": 8.64078570357831e-06, "loss": 0.849, "step": 5900 }, { "epoch": 0.53, "grad_norm": 4.405540099988824, "learning_rate": 8.640290544927561e-06, "loss": 0.8377, "step": 5901 }, { "epoch": 0.53, "grad_norm": 5.482105532833051, "learning_rate": 8.63979531029372e-06, "loss": 0.8671, "step": 5902 }, { "epoch": 0.53, "grad_norm": 5.180400037588014, "learning_rate": 8.63929999968712e-06, "loss": 0.8458, "step": 5903 }, { "epoch": 0.53, "grad_norm": 6.708669658746596, "learning_rate": 8.638804613118106e-06, "loss": 0.8311, "step": 5904 }, { "epoch": 0.53, "grad_norm": 5.643412347997996, "learning_rate": 8.638309150597013e-06, "loss": 0.9175, "step": 5905 }, { "epoch": 0.53, "grad_norm": 5.642704343252684, "learning_rate": 8.637813612134183e-06, "loss": 0.8153, "step": 5906 }, { "epoch": 0.53, "grad_norm": 5.690201966400359, "learning_rate": 8.637317997739962e-06, "loss": 0.8545, "step": 5907 }, { "epoch": 0.53, "grad_norm": 5.197513503545189, "learning_rate": 8.636822307424694e-06, "loss": 0.8307, "step": 5908 }, { "epoch": 0.53, "grad_norm": 6.624004293318079, "learning_rate": 8.636326541198724e-06, "loss": 0.7764, "step": 5909 }, { "epoch": 0.53, "grad_norm": 5.421440928363879, "learning_rate": 8.6358306990724e-06, "loss": 0.8429, "step": 5910 }, { "epoch": 0.53, "grad_norm": 6.732823558050309, "learning_rate": 8.635334781056074e-06, "loss": 0.861, "step": 5911 }, { "epoch": 0.53, "grad_norm": 5.921574147408365, "learning_rate": 8.634838787160096e-06, "loss": 0.8272, "step": 5912 }, { "epoch": 0.53, "grad_norm": 6.32225569071901, "learning_rate": 8.634342717394817e-06, "loss": 0.8917, "step": 5913 }, { "epoch": 0.53, "grad_norm": 4.802714798385363, "learning_rate": 8.633846571770593e-06, "loss": 0.8183, "step": 5914 }, { "epoch": 0.53, "grad_norm": 6.929354104310692, "learning_rate": 8.63335035029778e-06, "loss": 0.8429, "step": 5915 }, { "epoch": 0.53, "grad_norm": 6.2772776954939635, "learning_rate": 8.632854052986736e-06, "loss": 0.8284, "step": 5916 }, { "epoch": 0.53, "grad_norm": 6.662468199443719, "learning_rate": 8.63235767984782e-06, "loss": 0.8029, "step": 5917 }, { "epoch": 0.53, "grad_norm": 5.890829798335322, "learning_rate": 8.631861230891391e-06, "loss": 0.7785, "step": 5918 }, { "epoch": 0.53, "grad_norm": 6.520583680228424, "learning_rate": 8.631364706127812e-06, "loss": 0.808, "step": 5919 }, { "epoch": 0.53, "grad_norm": 6.857123804299668, "learning_rate": 8.630868105567449e-06, "loss": 0.8411, "step": 5920 }, { "epoch": 0.53, "grad_norm": 6.2795063651370135, "learning_rate": 8.630371429220665e-06, "loss": 0.7814, "step": 5921 }, { "epoch": 0.53, "grad_norm": 5.40924820880883, "learning_rate": 8.629874677097827e-06, "loss": 0.873, "step": 5922 }, { "epoch": 0.53, "grad_norm": 6.769645820266975, "learning_rate": 8.629377849209303e-06, "loss": 0.8272, "step": 5923 }, { "epoch": 0.53, "grad_norm": 5.762479328512109, "learning_rate": 8.628880945565468e-06, "loss": 0.8113, "step": 5924 }, { "epoch": 0.53, "grad_norm": 6.69112130342404, "learning_rate": 8.628383966176688e-06, "loss": 0.8505, "step": 5925 }, { "epoch": 0.53, "grad_norm": 5.310612958416333, "learning_rate": 8.62788691105334e-06, "loss": 0.8108, "step": 5926 }, { "epoch": 0.53, "grad_norm": 5.588636258431608, "learning_rate": 8.627389780205798e-06, "loss": 0.8097, "step": 5927 }, { "epoch": 0.53, "grad_norm": 7.459016232186524, "learning_rate": 8.626892573644437e-06, "loss": 0.8005, "step": 5928 }, { "epoch": 0.53, "grad_norm": 7.429122217944679, "learning_rate": 8.626395291379636e-06, "loss": 0.8408, "step": 5929 }, { "epoch": 0.53, "grad_norm": 7.2643040375769194, "learning_rate": 8.625897933421777e-06, "loss": 0.7563, "step": 5930 }, { "epoch": 0.53, "grad_norm": 6.513483902972747, "learning_rate": 8.625400499781239e-06, "loss": 0.7732, "step": 5931 }, { "epoch": 0.53, "grad_norm": 4.115078516425911, "learning_rate": 8.624902990468404e-06, "loss": 0.7783, "step": 5932 }, { "epoch": 0.53, "grad_norm": 5.311436106180336, "learning_rate": 8.624405405493656e-06, "loss": 0.8585, "step": 5933 }, { "epoch": 0.53, "grad_norm": 5.331074276461183, "learning_rate": 8.623907744867384e-06, "loss": 0.8107, "step": 5934 }, { "epoch": 0.53, "grad_norm": 5.799763763312336, "learning_rate": 8.623410008599975e-06, "loss": 0.8502, "step": 5935 }, { "epoch": 0.53, "grad_norm": 5.900972926138916, "learning_rate": 8.622912196701816e-06, "loss": 0.8297, "step": 5936 }, { "epoch": 0.53, "grad_norm": 5.029524088305606, "learning_rate": 8.6224143091833e-06, "loss": 0.871, "step": 5937 }, { "epoch": 0.53, "grad_norm": 5.438269374370796, "learning_rate": 8.621916346054816e-06, "loss": 0.8998, "step": 5938 }, { "epoch": 0.53, "grad_norm": 5.750549003815412, "learning_rate": 8.621418307326761e-06, "loss": 0.7803, "step": 5939 }, { "epoch": 0.53, "grad_norm": 5.106996777166243, "learning_rate": 8.62092019300953e-06, "loss": 0.9078, "step": 5940 }, { "epoch": 0.53, "grad_norm": 6.83939390845059, "learning_rate": 8.62042200311352e-06, "loss": 0.8817, "step": 5941 }, { "epoch": 0.53, "grad_norm": 4.9092518171983945, "learning_rate": 8.61992373764913e-06, "loss": 0.7392, "step": 5942 }, { "epoch": 0.53, "grad_norm": 5.315744593338969, "learning_rate": 8.619425396626758e-06, "loss": 0.8435, "step": 5943 }, { "epoch": 0.53, "grad_norm": 6.721528522038859, "learning_rate": 8.618926980056808e-06, "loss": 0.8222, "step": 5944 }, { "epoch": 0.53, "grad_norm": 6.718669231105705, "learning_rate": 8.618428487949682e-06, "loss": 0.8099, "step": 5945 }, { "epoch": 0.53, "grad_norm": 6.554078076711697, "learning_rate": 8.617929920315787e-06, "loss": 0.708, "step": 5946 }, { "epoch": 0.53, "grad_norm": 7.275317540329718, "learning_rate": 8.617431277165526e-06, "loss": 0.7724, "step": 5947 }, { "epoch": 0.53, "grad_norm": 6.034451063293915, "learning_rate": 8.61693255850931e-06, "loss": 0.8317, "step": 5948 }, { "epoch": 0.53, "grad_norm": 6.3283445802996185, "learning_rate": 8.616433764357547e-06, "loss": 0.8071, "step": 5949 }, { "epoch": 0.53, "grad_norm": 6.41451294868157, "learning_rate": 8.615934894720653e-06, "loss": 0.8824, "step": 5950 }, { "epoch": 0.53, "grad_norm": 7.7283094830664165, "learning_rate": 8.615435949609032e-06, "loss": 0.8068, "step": 5951 }, { "epoch": 0.53, "grad_norm": 5.864314246203908, "learning_rate": 8.614936929033105e-06, "loss": 0.8084, "step": 5952 }, { "epoch": 0.53, "grad_norm": 7.866311440323056, "learning_rate": 8.614437833003286e-06, "loss": 0.8776, "step": 5953 }, { "epoch": 0.53, "grad_norm": 6.689370667413159, "learning_rate": 8.613938661529992e-06, "loss": 0.7869, "step": 5954 }, { "epoch": 0.53, "grad_norm": 8.425490894399791, "learning_rate": 8.613439414623645e-06, "loss": 0.9229, "step": 5955 }, { "epoch": 0.53, "grad_norm": 7.093928133776955, "learning_rate": 8.612940092294661e-06, "loss": 0.7636, "step": 5956 }, { "epoch": 0.53, "grad_norm": 8.10916176993494, "learning_rate": 8.612440694553468e-06, "loss": 0.857, "step": 5957 }, { "epoch": 0.53, "grad_norm": 5.705470878358799, "learning_rate": 8.611941221410484e-06, "loss": 0.7981, "step": 5958 }, { "epoch": 0.53, "grad_norm": 5.96723794157607, "learning_rate": 8.611441672876137e-06, "loss": 0.7793, "step": 5959 }, { "epoch": 0.53, "grad_norm": 4.940457766662916, "learning_rate": 8.610942048960855e-06, "loss": 0.7517, "step": 5960 }, { "epoch": 0.53, "grad_norm": 6.694537577750331, "learning_rate": 8.610442349675065e-06, "loss": 0.8323, "step": 5961 }, { "epoch": 0.53, "grad_norm": 6.5777513851493286, "learning_rate": 8.609942575029198e-06, "loss": 0.762, "step": 5962 }, { "epoch": 0.53, "grad_norm": 6.003939279995617, "learning_rate": 8.609442725033685e-06, "loss": 0.8118, "step": 5963 }, { "epoch": 0.53, "grad_norm": 14.934523793615586, "learning_rate": 8.60894279969896e-06, "loss": 0.8522, "step": 5964 }, { "epoch": 0.53, "grad_norm": 6.643278177559672, "learning_rate": 8.608442799035458e-06, "loss": 0.8485, "step": 5965 }, { "epoch": 0.53, "grad_norm": 7.1281810442417655, "learning_rate": 8.607942723053615e-06, "loss": 0.8854, "step": 5966 }, { "epoch": 0.53, "grad_norm": 5.792865586905612, "learning_rate": 8.607442571763868e-06, "loss": 0.7569, "step": 5967 }, { "epoch": 0.53, "grad_norm": 7.025156682112292, "learning_rate": 8.606942345176658e-06, "loss": 0.8217, "step": 5968 }, { "epoch": 0.53, "grad_norm": 5.161841221024935, "learning_rate": 8.606442043302426e-06, "loss": 0.7964, "step": 5969 }, { "epoch": 0.53, "grad_norm": 6.331667915014399, "learning_rate": 8.605941666151613e-06, "loss": 0.8385, "step": 5970 }, { "epoch": 0.53, "grad_norm": 5.673320235373131, "learning_rate": 8.605441213734664e-06, "loss": 0.8292, "step": 5971 }, { "epoch": 0.53, "grad_norm": 7.9204880133349675, "learning_rate": 8.604940686062027e-06, "loss": 0.8063, "step": 5972 }, { "epoch": 0.53, "grad_norm": 6.079683749632774, "learning_rate": 8.604440083144147e-06, "loss": 0.8523, "step": 5973 }, { "epoch": 0.53, "grad_norm": 6.414706538958956, "learning_rate": 8.603939404991474e-06, "loss": 0.9264, "step": 5974 }, { "epoch": 0.53, "grad_norm": 7.315688749420987, "learning_rate": 8.60343865161446e-06, "loss": 0.8248, "step": 5975 }, { "epoch": 0.53, "grad_norm": 5.040535569565349, "learning_rate": 8.602937823023554e-06, "loss": 0.8514, "step": 5976 }, { "epoch": 0.53, "grad_norm": 7.275477441647361, "learning_rate": 8.602436919229214e-06, "loss": 0.8042, "step": 5977 }, { "epoch": 0.53, "grad_norm": 6.838148420512196, "learning_rate": 8.601935940241888e-06, "loss": 0.8622, "step": 5978 }, { "epoch": 0.53, "grad_norm": 8.127351709405445, "learning_rate": 8.60143488607204e-06, "loss": 0.8294, "step": 5979 }, { "epoch": 0.53, "grad_norm": 6.229731831395928, "learning_rate": 8.600933756730126e-06, "loss": 0.8372, "step": 5980 }, { "epoch": 0.53, "grad_norm": 6.687414413745216, "learning_rate": 8.600432552226604e-06, "loss": 0.7938, "step": 5981 }, { "epoch": 0.53, "grad_norm": 6.1706882164397365, "learning_rate": 8.59993127257194e-06, "loss": 0.8087, "step": 5982 }, { "epoch": 0.53, "grad_norm": 8.378666956741002, "learning_rate": 8.599429917776592e-06, "loss": 0.9085, "step": 5983 }, { "epoch": 0.53, "grad_norm": 6.427278709788023, "learning_rate": 8.598928487851026e-06, "loss": 0.8498, "step": 5984 }, { "epoch": 0.53, "grad_norm": 5.083798654508727, "learning_rate": 8.598426982805712e-06, "loss": 0.8324, "step": 5985 }, { "epoch": 0.53, "grad_norm": 5.520173582664215, "learning_rate": 8.597925402651114e-06, "loss": 0.8136, "step": 5986 }, { "epoch": 0.53, "grad_norm": 7.295222407333342, "learning_rate": 8.597423747397705e-06, "loss": 0.8119, "step": 5987 }, { "epoch": 0.53, "grad_norm": 7.023505716786854, "learning_rate": 8.59692201705595e-06, "loss": 0.8329, "step": 5988 }, { "epoch": 0.53, "grad_norm": 6.784247821358579, "learning_rate": 8.596420211636328e-06, "loss": 0.7894, "step": 5989 }, { "epoch": 0.53, "grad_norm": 5.596441308285133, "learning_rate": 8.595918331149307e-06, "loss": 0.8635, "step": 5990 }, { "epoch": 0.53, "grad_norm": 7.108078004362753, "learning_rate": 8.595416375605367e-06, "loss": 0.799, "step": 5991 }, { "epoch": 0.53, "grad_norm": 7.429759819793662, "learning_rate": 8.594914345014984e-06, "loss": 0.8011, "step": 5992 }, { "epoch": 0.53, "grad_norm": 5.4086308965482015, "learning_rate": 8.594412239388638e-06, "loss": 0.8214, "step": 5993 }, { "epoch": 0.53, "grad_norm": 5.466529788010683, "learning_rate": 8.593910058736807e-06, "loss": 0.8241, "step": 5994 }, { "epoch": 0.53, "grad_norm": 4.799862594530469, "learning_rate": 8.593407803069973e-06, "loss": 0.8075, "step": 5995 }, { "epoch": 0.53, "grad_norm": 4.414700947156203, "learning_rate": 8.592905472398622e-06, "loss": 0.8647, "step": 5996 }, { "epoch": 0.53, "grad_norm": 5.617586864207485, "learning_rate": 8.592403066733237e-06, "loss": 0.8335, "step": 5997 }, { "epoch": 0.54, "grad_norm": 6.370469646604213, "learning_rate": 8.591900586084305e-06, "loss": 0.806, "step": 5998 }, { "epoch": 0.54, "grad_norm": 5.142305241946222, "learning_rate": 8.591398030462315e-06, "loss": 0.8813, "step": 5999 }, { "epoch": 0.54, "grad_norm": 5.988350473328172, "learning_rate": 8.590895399877757e-06, "loss": 0.7867, "step": 6000 }, { "epoch": 0.54, "grad_norm": 4.294499847417179, "learning_rate": 8.590392694341121e-06, "loss": 0.7925, "step": 6001 }, { "epoch": 0.54, "grad_norm": 5.517592930473238, "learning_rate": 8.589889913862899e-06, "loss": 0.8611, "step": 6002 }, { "epoch": 0.54, "grad_norm": 5.750257312459497, "learning_rate": 8.589387058453588e-06, "loss": 0.8125, "step": 6003 }, { "epoch": 0.54, "grad_norm": 8.33561830592563, "learning_rate": 8.588884128123682e-06, "loss": 0.8147, "step": 6004 }, { "epoch": 0.54, "grad_norm": 4.21561781348251, "learning_rate": 8.58838112288368e-06, "loss": 0.8001, "step": 6005 }, { "epoch": 0.54, "grad_norm": 7.318249581673096, "learning_rate": 8.58787804274408e-06, "loss": 0.813, "step": 6006 }, { "epoch": 0.54, "grad_norm": 6.123830957447272, "learning_rate": 8.587374887715385e-06, "loss": 0.7966, "step": 6007 }, { "epoch": 0.54, "grad_norm": 4.957086316518475, "learning_rate": 8.586871657808092e-06, "loss": 0.7476, "step": 6008 }, { "epoch": 0.54, "grad_norm": 5.728084802369861, "learning_rate": 8.58636835303271e-06, "loss": 0.9083, "step": 6009 }, { "epoch": 0.54, "grad_norm": 4.137105485595029, "learning_rate": 8.585864973399743e-06, "loss": 0.8642, "step": 6010 }, { "epoch": 0.54, "grad_norm": 12.71945085109548, "learning_rate": 8.585361518919698e-06, "loss": 0.8418, "step": 6011 }, { "epoch": 0.54, "grad_norm": 6.993426007414448, "learning_rate": 8.584857989603082e-06, "loss": 0.8366, "step": 6012 }, { "epoch": 0.54, "grad_norm": 7.948656286437939, "learning_rate": 8.584354385460408e-06, "loss": 0.8575, "step": 6013 }, { "epoch": 0.54, "grad_norm": 6.6570305329388075, "learning_rate": 8.583850706502184e-06, "loss": 0.7891, "step": 6014 }, { "epoch": 0.54, "grad_norm": 5.882073310637497, "learning_rate": 8.583346952738924e-06, "loss": 0.7639, "step": 6015 }, { "epoch": 0.54, "grad_norm": 7.329641906290548, "learning_rate": 8.582843124181147e-06, "loss": 0.8096, "step": 6016 }, { "epoch": 0.54, "grad_norm": 5.684171315254989, "learning_rate": 8.582339220839363e-06, "loss": 0.8158, "step": 6017 }, { "epoch": 0.54, "grad_norm": 5.158475102298465, "learning_rate": 8.581835242724095e-06, "loss": 0.7874, "step": 6018 }, { "epoch": 0.54, "grad_norm": 5.610236124576656, "learning_rate": 8.581331189845859e-06, "loss": 0.8435, "step": 6019 }, { "epoch": 0.54, "grad_norm": 4.536418122890589, "learning_rate": 8.580827062215178e-06, "loss": 0.8213, "step": 6020 }, { "epoch": 0.54, "grad_norm": 4.9736651117460235, "learning_rate": 8.580322859842574e-06, "loss": 0.8352, "step": 6021 }, { "epoch": 0.54, "grad_norm": 6.594432355310355, "learning_rate": 8.57981858273857e-06, "loss": 0.8288, "step": 6022 }, { "epoch": 0.54, "grad_norm": 6.781530037782979, "learning_rate": 8.579314230913693e-06, "loss": 0.7503, "step": 6023 }, { "epoch": 0.54, "grad_norm": 5.577182540948343, "learning_rate": 8.578809804378472e-06, "loss": 0.7775, "step": 6024 }, { "epoch": 0.54, "grad_norm": 4.235089924482543, "learning_rate": 8.578305303143433e-06, "loss": 0.8512, "step": 6025 }, { "epoch": 0.54, "grad_norm": 6.913389997235769, "learning_rate": 8.577800727219106e-06, "loss": 0.8416, "step": 6026 }, { "epoch": 0.54, "grad_norm": 5.215529289311893, "learning_rate": 8.577296076616025e-06, "loss": 0.8449, "step": 6027 }, { "epoch": 0.54, "grad_norm": 7.343390857706067, "learning_rate": 8.576791351344722e-06, "loss": 0.7862, "step": 6028 }, { "epoch": 0.54, "grad_norm": 4.7929412914168115, "learning_rate": 8.576286551415733e-06, "loss": 0.7394, "step": 6029 }, { "epoch": 0.54, "grad_norm": 5.158746094612381, "learning_rate": 8.575781676839594e-06, "loss": 0.7281, "step": 6030 }, { "epoch": 0.54, "grad_norm": 8.797987756338955, "learning_rate": 8.575276727626843e-06, "loss": 0.8129, "step": 6031 }, { "epoch": 0.54, "grad_norm": 6.33901101128853, "learning_rate": 8.574771703788021e-06, "loss": 0.8704, "step": 6032 }, { "epoch": 0.54, "grad_norm": 7.311812695714598, "learning_rate": 8.574266605333668e-06, "loss": 0.8472, "step": 6033 }, { "epoch": 0.54, "grad_norm": 5.108686108660076, "learning_rate": 8.573761432274326e-06, "loss": 0.8005, "step": 6034 }, { "epoch": 0.54, "grad_norm": 5.869878440707376, "learning_rate": 8.573256184620542e-06, "loss": 0.8498, "step": 6035 }, { "epoch": 0.54, "grad_norm": 6.172123977414717, "learning_rate": 8.57275086238286e-06, "loss": 0.838, "step": 6036 }, { "epoch": 0.54, "grad_norm": 6.355445572149422, "learning_rate": 8.572245465571828e-06, "loss": 0.8196, "step": 6037 }, { "epoch": 0.54, "grad_norm": 6.789499685135674, "learning_rate": 8.571739994197994e-06, "loss": 0.8848, "step": 6038 }, { "epoch": 0.54, "grad_norm": 4.853447641193016, "learning_rate": 8.571234448271911e-06, "loss": 0.8399, "step": 6039 }, { "epoch": 0.54, "grad_norm": 5.165967187492139, "learning_rate": 8.57072882780413e-06, "loss": 0.7947, "step": 6040 }, { "epoch": 0.54, "grad_norm": 5.608968489966911, "learning_rate": 8.570223132805202e-06, "loss": 0.777, "step": 6041 }, { "epoch": 0.54, "grad_norm": 6.243848141933354, "learning_rate": 8.569717363285685e-06, "loss": 0.7717, "step": 6042 }, { "epoch": 0.54, "grad_norm": 10.477802172375956, "learning_rate": 8.569211519256138e-06, "loss": 0.8604, "step": 6043 }, { "epoch": 0.54, "grad_norm": 4.981491507194219, "learning_rate": 8.568705600727117e-06, "loss": 0.7751, "step": 6044 }, { "epoch": 0.54, "grad_norm": 6.0447522740171, "learning_rate": 8.56819960770918e-06, "loss": 0.8447, "step": 6045 }, { "epoch": 0.54, "grad_norm": 7.381274889033219, "learning_rate": 8.56769354021289e-06, "loss": 0.7847, "step": 6046 }, { "epoch": 0.54, "grad_norm": 4.584585391135997, "learning_rate": 8.567187398248813e-06, "loss": 0.8455, "step": 6047 }, { "epoch": 0.54, "grad_norm": 5.746923541961522, "learning_rate": 8.56668118182751e-06, "loss": 0.7936, "step": 6048 }, { "epoch": 0.54, "grad_norm": 5.524721831482064, "learning_rate": 8.566174890959547e-06, "loss": 0.795, "step": 6049 }, { "epoch": 0.54, "grad_norm": 7.1344982758962265, "learning_rate": 8.565668525655495e-06, "loss": 0.8247, "step": 6050 }, { "epoch": 0.54, "grad_norm": 4.383353942600456, "learning_rate": 8.56516208592592e-06, "loss": 0.7965, "step": 6051 }, { "epoch": 0.54, "grad_norm": 6.039456429482954, "learning_rate": 8.564655571781393e-06, "loss": 0.7687, "step": 6052 }, { "epoch": 0.54, "grad_norm": 5.312853261598657, "learning_rate": 8.56414898323249e-06, "loss": 0.7442, "step": 6053 }, { "epoch": 0.54, "grad_norm": 4.911361852922648, "learning_rate": 8.56364232028978e-06, "loss": 0.8076, "step": 6054 }, { "epoch": 0.54, "grad_norm": 4.959570682076482, "learning_rate": 8.563135582963841e-06, "loss": 0.7801, "step": 6055 }, { "epoch": 0.54, "grad_norm": 4.609395870367582, "learning_rate": 8.56262877126525e-06, "loss": 0.7987, "step": 6056 }, { "epoch": 0.54, "grad_norm": 5.06570991456562, "learning_rate": 8.562121885204587e-06, "loss": 0.8131, "step": 6057 }, { "epoch": 0.54, "grad_norm": 6.045374404102717, "learning_rate": 8.56161492479243e-06, "loss": 0.7604, "step": 6058 }, { "epoch": 0.54, "grad_norm": 5.725501047047067, "learning_rate": 8.561107890039359e-06, "loss": 0.7825, "step": 6059 }, { "epoch": 0.54, "grad_norm": 6.225849473414879, "learning_rate": 8.560600780955964e-06, "loss": 0.7995, "step": 6060 }, { "epoch": 0.54, "grad_norm": 5.614255153649072, "learning_rate": 8.56009359755282e-06, "loss": 0.8815, "step": 6061 }, { "epoch": 0.54, "grad_norm": 5.682355715574637, "learning_rate": 8.559586339840522e-06, "loss": 0.8345, "step": 6062 }, { "epoch": 0.54, "grad_norm": 5.972625146098602, "learning_rate": 8.559079007829654e-06, "loss": 0.8417, "step": 6063 }, { "epoch": 0.54, "grad_norm": 6.94103059165331, "learning_rate": 8.558571601530805e-06, "loss": 0.8761, "step": 6064 }, { "epoch": 0.54, "grad_norm": 4.406715050488156, "learning_rate": 8.558064120954569e-06, "loss": 0.8264, "step": 6065 }, { "epoch": 0.54, "grad_norm": 5.903030652934619, "learning_rate": 8.557556566111535e-06, "loss": 0.8228, "step": 6066 }, { "epoch": 0.54, "grad_norm": 7.3949183498334, "learning_rate": 8.557048937012298e-06, "loss": 0.8407, "step": 6067 }, { "epoch": 0.54, "grad_norm": 5.52738558684205, "learning_rate": 8.556541233667454e-06, "loss": 0.7759, "step": 6068 }, { "epoch": 0.54, "grad_norm": 7.000199939478609, "learning_rate": 8.556033456087602e-06, "loss": 0.8439, "step": 6069 }, { "epoch": 0.54, "grad_norm": 5.942933993521471, "learning_rate": 8.555525604283338e-06, "loss": 0.743, "step": 6070 }, { "epoch": 0.54, "grad_norm": 5.018904782405857, "learning_rate": 8.555017678265265e-06, "loss": 0.8322, "step": 6071 }, { "epoch": 0.54, "grad_norm": 5.151466641347401, "learning_rate": 8.554509678043983e-06, "loss": 0.84, "step": 6072 }, { "epoch": 0.54, "grad_norm": 6.668336978515194, "learning_rate": 8.554001603630094e-06, "loss": 0.7679, "step": 6073 }, { "epoch": 0.54, "grad_norm": 5.773401107237038, "learning_rate": 8.553493455034205e-06, "loss": 0.8802, "step": 6074 }, { "epoch": 0.54, "grad_norm": 5.921911830248139, "learning_rate": 8.552985232266924e-06, "loss": 0.7559, "step": 6075 }, { "epoch": 0.54, "grad_norm": 6.5261787581851305, "learning_rate": 8.552476935338855e-06, "loss": 0.8851, "step": 6076 }, { "epoch": 0.54, "grad_norm": 5.323435997468076, "learning_rate": 8.551968564260613e-06, "loss": 0.8498, "step": 6077 }, { "epoch": 0.54, "grad_norm": 4.031032940954554, "learning_rate": 8.551460119042803e-06, "loss": 0.7812, "step": 6078 }, { "epoch": 0.54, "grad_norm": 5.8417264523951715, "learning_rate": 8.550951599696041e-06, "loss": 0.8224, "step": 6079 }, { "epoch": 0.54, "grad_norm": 5.039826768181223, "learning_rate": 8.550443006230942e-06, "loss": 0.847, "step": 6080 }, { "epoch": 0.54, "grad_norm": 7.15145924041698, "learning_rate": 8.549934338658118e-06, "loss": 0.8551, "step": 6081 }, { "epoch": 0.54, "grad_norm": 6.287392523044316, "learning_rate": 8.549425596988191e-06, "loss": 0.824, "step": 6082 }, { "epoch": 0.54, "grad_norm": 5.654812395923941, "learning_rate": 8.548916781231778e-06, "loss": 0.7923, "step": 6083 }, { "epoch": 0.54, "grad_norm": 6.315248697287516, "learning_rate": 8.548407891399498e-06, "loss": 0.8943, "step": 6084 }, { "epoch": 0.54, "grad_norm": 4.733155089508796, "learning_rate": 8.547898927501975e-06, "loss": 0.7794, "step": 6085 }, { "epoch": 0.54, "grad_norm": 6.429365455833748, "learning_rate": 8.547389889549831e-06, "loss": 0.7851, "step": 6086 }, { "epoch": 0.54, "grad_norm": 6.892537681221298, "learning_rate": 8.546880777553692e-06, "loss": 0.8742, "step": 6087 }, { "epoch": 0.54, "grad_norm": 4.295660598221255, "learning_rate": 8.546371591524185e-06, "loss": 0.8295, "step": 6088 }, { "epoch": 0.54, "grad_norm": 4.673502318616251, "learning_rate": 8.545862331471938e-06, "loss": 0.8541, "step": 6089 }, { "epoch": 0.54, "grad_norm": 5.268875701829673, "learning_rate": 8.545352997407577e-06, "loss": 0.7273, "step": 6090 }, { "epoch": 0.54, "grad_norm": 6.480713517069899, "learning_rate": 8.54484358934174e-06, "loss": 0.7951, "step": 6091 }, { "epoch": 0.54, "grad_norm": 5.197659632124393, "learning_rate": 8.544334107285053e-06, "loss": 0.8315, "step": 6092 }, { "epoch": 0.54, "grad_norm": 12.783276290298039, "learning_rate": 8.543824551248154e-06, "loss": 0.8243, "step": 6093 }, { "epoch": 0.54, "grad_norm": 7.96486596010229, "learning_rate": 8.543314921241682e-06, "loss": 0.8482, "step": 6094 }, { "epoch": 0.54, "grad_norm": 7.459199548043038, "learning_rate": 8.542805217276266e-06, "loss": 0.8279, "step": 6095 }, { "epoch": 0.54, "grad_norm": 7.084335575283407, "learning_rate": 8.542295439362552e-06, "loss": 0.8414, "step": 6096 }, { "epoch": 0.54, "grad_norm": 7.475444790242986, "learning_rate": 8.541785587511177e-06, "loss": 0.7853, "step": 6097 }, { "epoch": 0.54, "grad_norm": 5.476033841835467, "learning_rate": 8.541275661732786e-06, "loss": 0.7805, "step": 6098 }, { "epoch": 0.54, "grad_norm": 6.288016445468627, "learning_rate": 8.54076566203802e-06, "loss": 0.8039, "step": 6099 }, { "epoch": 0.54, "grad_norm": 5.58086167340294, "learning_rate": 8.540255588437524e-06, "loss": 0.785, "step": 6100 }, { "epoch": 0.54, "grad_norm": 5.484317924137649, "learning_rate": 8.539745440941947e-06, "loss": 0.8405, "step": 6101 }, { "epoch": 0.54, "grad_norm": 6.558743665017648, "learning_rate": 8.539235219561936e-06, "loss": 0.7911, "step": 6102 }, { "epoch": 0.54, "grad_norm": 10.812253016634005, "learning_rate": 8.53872492430814e-06, "loss": 0.8763, "step": 6103 }, { "epoch": 0.54, "grad_norm": 6.892192534241795, "learning_rate": 8.53821455519121e-06, "loss": 0.7753, "step": 6104 }, { "epoch": 0.54, "grad_norm": 5.155576907203239, "learning_rate": 8.5377041122218e-06, "loss": 0.8438, "step": 6105 }, { "epoch": 0.54, "grad_norm": 5.830494636115569, "learning_rate": 8.537193595410566e-06, "loss": 0.8391, "step": 6106 }, { "epoch": 0.54, "grad_norm": 6.187512729162747, "learning_rate": 8.536683004768161e-06, "loss": 0.8666, "step": 6107 }, { "epoch": 0.54, "grad_norm": 6.538826599601231, "learning_rate": 8.536172340305242e-06, "loss": 0.8379, "step": 6108 }, { "epoch": 0.54, "grad_norm": 6.1413526361094615, "learning_rate": 8.535661602032472e-06, "loss": 0.851, "step": 6109 }, { "epoch": 0.55, "grad_norm": 5.246461776283396, "learning_rate": 8.535150789960507e-06, "loss": 0.8318, "step": 6110 }, { "epoch": 0.55, "grad_norm": 7.027629678423712, "learning_rate": 8.534639904100011e-06, "loss": 0.8214, "step": 6111 }, { "epoch": 0.55, "grad_norm": 7.3097926295388405, "learning_rate": 8.53412894446165e-06, "loss": 0.809, "step": 6112 }, { "epoch": 0.55, "grad_norm": 6.688158955421188, "learning_rate": 8.533617911056084e-06, "loss": 0.808, "step": 6113 }, { "epoch": 0.55, "grad_norm": 5.39351617237998, "learning_rate": 8.533106803893984e-06, "loss": 0.8779, "step": 6114 }, { "epoch": 0.55, "grad_norm": 4.3857744163071395, "learning_rate": 8.532595622986017e-06, "loss": 0.8039, "step": 6115 }, { "epoch": 0.55, "grad_norm": 7.132158925511807, "learning_rate": 8.532084368342854e-06, "loss": 0.7913, "step": 6116 }, { "epoch": 0.55, "grad_norm": 7.652570348626114, "learning_rate": 8.531573039975164e-06, "loss": 0.7603, "step": 6117 }, { "epoch": 0.55, "grad_norm": 5.506853503591832, "learning_rate": 8.531061637893618e-06, "loss": 0.7983, "step": 6118 }, { "epoch": 0.55, "grad_norm": 8.030941631291078, "learning_rate": 8.530550162108897e-06, "loss": 0.8486, "step": 6119 }, { "epoch": 0.55, "grad_norm": 5.350807794703884, "learning_rate": 8.53003861263167e-06, "loss": 0.8908, "step": 6120 }, { "epoch": 0.55, "grad_norm": 5.492747925562131, "learning_rate": 8.529526989472621e-06, "loss": 0.8515, "step": 6121 }, { "epoch": 0.55, "grad_norm": 5.870426288603464, "learning_rate": 8.529015292642423e-06, "loss": 0.7915, "step": 6122 }, { "epoch": 0.55, "grad_norm": 5.2031722424627755, "learning_rate": 8.528503522151759e-06, "loss": 0.9284, "step": 6123 }, { "epoch": 0.55, "grad_norm": 7.060135655263216, "learning_rate": 8.527991678011313e-06, "loss": 0.8799, "step": 6124 }, { "epoch": 0.55, "grad_norm": 7.75945972116728, "learning_rate": 8.527479760231766e-06, "loss": 0.8801, "step": 6125 }, { "epoch": 0.55, "grad_norm": 6.52827871296182, "learning_rate": 8.526967768823805e-06, "loss": 0.7716, "step": 6126 }, { "epoch": 0.55, "grad_norm": 7.544286626629943, "learning_rate": 8.526455703798113e-06, "loss": 0.8145, "step": 6127 }, { "epoch": 0.55, "grad_norm": 5.4289019178874, "learning_rate": 8.525943565165384e-06, "loss": 0.7611, "step": 6128 }, { "epoch": 0.55, "grad_norm": 4.9165742674876185, "learning_rate": 8.525431352936304e-06, "loss": 0.7961, "step": 6129 }, { "epoch": 0.55, "grad_norm": 5.806291070497382, "learning_rate": 8.524919067121565e-06, "loss": 0.7568, "step": 6130 }, { "epoch": 0.55, "grad_norm": 9.473074750498004, "learning_rate": 8.52440670773186e-06, "loss": 0.8254, "step": 6131 }, { "epoch": 0.55, "grad_norm": 4.80676068055187, "learning_rate": 8.523894274777881e-06, "loss": 0.7927, "step": 6132 }, { "epoch": 0.55, "grad_norm": 6.1035006938847065, "learning_rate": 8.523381768270328e-06, "loss": 0.803, "step": 6133 }, { "epoch": 0.55, "grad_norm": 6.8932974768295665, "learning_rate": 8.522869188219897e-06, "loss": 0.8137, "step": 6134 }, { "epoch": 0.55, "grad_norm": 5.57753006792163, "learning_rate": 8.522356534637289e-06, "loss": 0.7931, "step": 6135 }, { "epoch": 0.55, "grad_norm": 6.6830923577533525, "learning_rate": 8.521843807533198e-06, "loss": 0.8466, "step": 6136 }, { "epoch": 0.55, "grad_norm": 22.577365496736437, "learning_rate": 8.521331006918332e-06, "loss": 0.8939, "step": 6137 }, { "epoch": 0.55, "grad_norm": 7.240673115275456, "learning_rate": 8.520818132803393e-06, "loss": 0.9448, "step": 6138 }, { "epoch": 0.55, "grad_norm": 7.680152692879237, "learning_rate": 8.520305185199086e-06, "loss": 0.8448, "step": 6139 }, { "epoch": 0.55, "grad_norm": 8.265266659354326, "learning_rate": 8.519792164116116e-06, "loss": 0.8008, "step": 6140 }, { "epoch": 0.55, "grad_norm": 4.171404773664601, "learning_rate": 8.519279069565194e-06, "loss": 0.7996, "step": 6141 }, { "epoch": 0.55, "grad_norm": 5.936926518889804, "learning_rate": 8.518765901557029e-06, "loss": 0.876, "step": 6142 }, { "epoch": 0.55, "grad_norm": 4.5959364192323, "learning_rate": 8.518252660102332e-06, "loss": 0.7479, "step": 6143 }, { "epoch": 0.55, "grad_norm": 5.583298869792633, "learning_rate": 8.517739345211815e-06, "loss": 0.8352, "step": 6144 }, { "epoch": 0.55, "grad_norm": 6.072393972075555, "learning_rate": 8.517225956896192e-06, "loss": 0.7841, "step": 6145 }, { "epoch": 0.55, "grad_norm": 5.71228958728902, "learning_rate": 8.516712495166181e-06, "loss": 0.7372, "step": 6146 }, { "epoch": 0.55, "grad_norm": 6.194409858498816, "learning_rate": 8.516198960032498e-06, "loss": 0.7673, "step": 6147 }, { "epoch": 0.55, "grad_norm": 6.785605798911552, "learning_rate": 8.515685351505862e-06, "loss": 0.8144, "step": 6148 }, { "epoch": 0.55, "grad_norm": 6.988166068397192, "learning_rate": 8.515171669596994e-06, "loss": 0.8504, "step": 6149 }, { "epoch": 0.55, "grad_norm": 6.64038373616684, "learning_rate": 8.514657914316614e-06, "loss": 0.8506, "step": 6150 }, { "epoch": 0.55, "grad_norm": 6.616194665394371, "learning_rate": 8.514144085675447e-06, "loss": 0.8454, "step": 6151 }, { "epoch": 0.55, "grad_norm": 4.6384352366671235, "learning_rate": 8.513630183684219e-06, "loss": 0.8236, "step": 6152 }, { "epoch": 0.55, "grad_norm": 5.714038505224621, "learning_rate": 8.513116208353656e-06, "loss": 0.849, "step": 6153 }, { "epoch": 0.55, "grad_norm": 5.58433448007138, "learning_rate": 8.512602159694484e-06, "loss": 0.7623, "step": 6154 }, { "epoch": 0.55, "grad_norm": 5.92934196169855, "learning_rate": 8.512088037717435e-06, "loss": 0.8309, "step": 6155 }, { "epoch": 0.55, "grad_norm": 4.3977838168335115, "learning_rate": 8.511573842433241e-06, "loss": 0.7869, "step": 6156 }, { "epoch": 0.55, "grad_norm": 5.672238009435888, "learning_rate": 8.511059573852631e-06, "loss": 0.7694, "step": 6157 }, { "epoch": 0.55, "grad_norm": 8.10240728160039, "learning_rate": 8.510545231986345e-06, "loss": 0.858, "step": 6158 }, { "epoch": 0.55, "grad_norm": 5.403313937104579, "learning_rate": 8.510030816845112e-06, "loss": 0.7655, "step": 6159 }, { "epoch": 0.55, "grad_norm": 6.135652116802077, "learning_rate": 8.509516328439672e-06, "loss": 0.8266, "step": 6160 }, { "epoch": 0.55, "grad_norm": 5.356264189961006, "learning_rate": 8.509001766780767e-06, "loss": 0.8474, "step": 6161 }, { "epoch": 0.55, "grad_norm": 6.535713193115595, "learning_rate": 8.508487131879133e-06, "loss": 0.8218, "step": 6162 }, { "epoch": 0.55, "grad_norm": 7.265778929767966, "learning_rate": 8.507972423745514e-06, "loss": 0.9337, "step": 6163 }, { "epoch": 0.55, "grad_norm": 5.651389247458359, "learning_rate": 8.507457642390655e-06, "loss": 0.7855, "step": 6164 }, { "epoch": 0.55, "grad_norm": 7.0412275337831165, "learning_rate": 8.506942787825297e-06, "loss": 0.7924, "step": 6165 }, { "epoch": 0.55, "grad_norm": 5.869582867265301, "learning_rate": 8.506427860060188e-06, "loss": 0.8657, "step": 6166 }, { "epoch": 0.55, "grad_norm": 4.572228809830339, "learning_rate": 8.505912859106076e-06, "loss": 0.7875, "step": 6167 }, { "epoch": 0.55, "grad_norm": 8.238495264105655, "learning_rate": 8.505397784973713e-06, "loss": 0.876, "step": 6168 }, { "epoch": 0.55, "grad_norm": 5.705198294482988, "learning_rate": 8.504882637673846e-06, "loss": 0.8114, "step": 6169 }, { "epoch": 0.55, "grad_norm": 5.651033572783869, "learning_rate": 8.50436741721723e-06, "loss": 0.8113, "step": 6170 }, { "epoch": 0.55, "grad_norm": 5.3885708148308105, "learning_rate": 8.50385212361462e-06, "loss": 0.9038, "step": 6171 }, { "epoch": 0.55, "grad_norm": 7.781686433451358, "learning_rate": 8.503336756876768e-06, "loss": 0.7905, "step": 6172 }, { "epoch": 0.55, "grad_norm": 5.182124498683177, "learning_rate": 8.502821317014435e-06, "loss": 0.853, "step": 6173 }, { "epoch": 0.55, "grad_norm": 6.165236050079116, "learning_rate": 8.502305804038378e-06, "loss": 0.8581, "step": 6174 }, { "epoch": 0.55, "grad_norm": 5.383905639846727, "learning_rate": 8.501790217959356e-06, "loss": 0.7667, "step": 6175 }, { "epoch": 0.55, "grad_norm": 6.788932636278462, "learning_rate": 8.501274558788132e-06, "loss": 0.8085, "step": 6176 }, { "epoch": 0.55, "grad_norm": 6.232506092646649, "learning_rate": 8.50075882653547e-06, "loss": 0.8351, "step": 6177 }, { "epoch": 0.55, "grad_norm": 7.15309952238115, "learning_rate": 8.500243021212134e-06, "loss": 0.8334, "step": 6178 }, { "epoch": 0.55, "grad_norm": 6.25696044281939, "learning_rate": 8.499727142828891e-06, "loss": 0.827, "step": 6179 }, { "epoch": 0.55, "grad_norm": 6.024752377862959, "learning_rate": 8.499211191396507e-06, "loss": 0.8184, "step": 6180 }, { "epoch": 0.55, "grad_norm": 4.502766460356569, "learning_rate": 8.498695166925752e-06, "loss": 0.8036, "step": 6181 }, { "epoch": 0.55, "grad_norm": 6.686933824678985, "learning_rate": 8.4981790694274e-06, "loss": 0.8471, "step": 6182 }, { "epoch": 0.55, "grad_norm": 4.867360482712466, "learning_rate": 8.497662898912217e-06, "loss": 0.8267, "step": 6183 }, { "epoch": 0.55, "grad_norm": 4.896911452303937, "learning_rate": 8.497146655390983e-06, "loss": 0.8149, "step": 6184 }, { "epoch": 0.55, "grad_norm": 7.549454694567512, "learning_rate": 8.496630338874472e-06, "loss": 0.7623, "step": 6185 }, { "epoch": 0.55, "grad_norm": 5.588479665324505, "learning_rate": 8.496113949373459e-06, "loss": 0.827, "step": 6186 }, { "epoch": 0.55, "grad_norm": 6.168992502176361, "learning_rate": 8.495597486898725e-06, "loss": 0.7979, "step": 6187 }, { "epoch": 0.55, "grad_norm": 6.69096513482704, "learning_rate": 8.495080951461047e-06, "loss": 0.8009, "step": 6188 }, { "epoch": 0.55, "grad_norm": 8.279172339248566, "learning_rate": 8.494564343071208e-06, "loss": 0.7653, "step": 6189 }, { "epoch": 0.55, "grad_norm": 6.032240702611898, "learning_rate": 8.494047661739993e-06, "loss": 0.7463, "step": 6190 }, { "epoch": 0.55, "grad_norm": 5.45045263504536, "learning_rate": 8.493530907478185e-06, "loss": 0.838, "step": 6191 }, { "epoch": 0.55, "grad_norm": 5.68817240129019, "learning_rate": 8.493014080296568e-06, "loss": 0.7658, "step": 6192 }, { "epoch": 0.55, "grad_norm": 5.019394605947285, "learning_rate": 8.492497180205933e-06, "loss": 0.8451, "step": 6193 }, { "epoch": 0.55, "grad_norm": 5.488384110892746, "learning_rate": 8.491980207217067e-06, "loss": 0.8314, "step": 6194 }, { "epoch": 0.55, "grad_norm": 5.296688182012467, "learning_rate": 8.491463161340763e-06, "loss": 0.8682, "step": 6195 }, { "epoch": 0.55, "grad_norm": 6.229336561781027, "learning_rate": 8.490946042587811e-06, "loss": 0.7701, "step": 6196 }, { "epoch": 0.55, "grad_norm": 4.73288484022705, "learning_rate": 8.490428850969003e-06, "loss": 0.8132, "step": 6197 }, { "epoch": 0.55, "grad_norm": 4.626764850375793, "learning_rate": 8.48991158649514e-06, "loss": 0.8013, "step": 6198 }, { "epoch": 0.55, "grad_norm": 7.44650280944281, "learning_rate": 8.489394249177014e-06, "loss": 0.8196, "step": 6199 }, { "epoch": 0.55, "grad_norm": 6.544397670626195, "learning_rate": 8.488876839025425e-06, "loss": 0.8285, "step": 6200 }, { "epoch": 0.55, "grad_norm": 5.88041715441373, "learning_rate": 8.488359356051173e-06, "loss": 0.8467, "step": 6201 }, { "epoch": 0.55, "grad_norm": 5.802429633707484, "learning_rate": 8.487841800265057e-06, "loss": 0.8289, "step": 6202 }, { "epoch": 0.55, "grad_norm": 8.732475229772797, "learning_rate": 8.487324171677883e-06, "loss": 0.7883, "step": 6203 }, { "epoch": 0.55, "grad_norm": 5.85431711785811, "learning_rate": 8.486806470300454e-06, "loss": 0.8985, "step": 6204 }, { "epoch": 0.55, "grad_norm": 5.4707884856013695, "learning_rate": 8.486288696143575e-06, "loss": 0.7989, "step": 6205 }, { "epoch": 0.55, "grad_norm": 5.9559208345258545, "learning_rate": 8.485770849218056e-06, "loss": 0.8539, "step": 6206 }, { "epoch": 0.55, "grad_norm": 5.412133779917283, "learning_rate": 8.485252929534704e-06, "loss": 0.835, "step": 6207 }, { "epoch": 0.55, "grad_norm": 5.230043251090192, "learning_rate": 8.484734937104328e-06, "loss": 0.7344, "step": 6208 }, { "epoch": 0.55, "grad_norm": 5.238770667421405, "learning_rate": 8.484216871937743e-06, "loss": 0.8014, "step": 6209 }, { "epoch": 0.55, "grad_norm": 6.909913880190588, "learning_rate": 8.48369873404576e-06, "loss": 0.8837, "step": 6210 }, { "epoch": 0.55, "grad_norm": 5.031842016810103, "learning_rate": 8.483180523439196e-06, "loss": 0.8274, "step": 6211 }, { "epoch": 0.55, "grad_norm": 5.991914581562651, "learning_rate": 8.482662240128865e-06, "loss": 0.7621, "step": 6212 }, { "epoch": 0.55, "grad_norm": 6.547377085438254, "learning_rate": 8.48214388412559e-06, "loss": 0.7363, "step": 6213 }, { "epoch": 0.55, "grad_norm": 6.588381205660118, "learning_rate": 8.481625455440185e-06, "loss": 0.7416, "step": 6214 }, { "epoch": 0.55, "grad_norm": 5.548409260396815, "learning_rate": 8.481106954083474e-06, "loss": 0.7313, "step": 6215 }, { "epoch": 0.55, "grad_norm": 4.92638862992217, "learning_rate": 8.480588380066277e-06, "loss": 0.8193, "step": 6216 }, { "epoch": 0.55, "grad_norm": 5.944868465147303, "learning_rate": 8.48006973339942e-06, "loss": 0.7554, "step": 6217 }, { "epoch": 0.55, "grad_norm": 7.115003838308667, "learning_rate": 8.47955101409373e-06, "loss": 0.7747, "step": 6218 }, { "epoch": 0.55, "grad_norm": 6.907041181367516, "learning_rate": 8.479032222160032e-06, "loss": 0.826, "step": 6219 }, { "epoch": 0.55, "grad_norm": 4.784267422542636, "learning_rate": 8.478513357609158e-06, "loss": 0.7732, "step": 6220 }, { "epoch": 0.55, "grad_norm": 5.527020873792892, "learning_rate": 8.477994420451932e-06, "loss": 0.8031, "step": 6221 }, { "epoch": 0.56, "grad_norm": 5.1419833088213975, "learning_rate": 8.477475410699191e-06, "loss": 0.7674, "step": 6222 }, { "epoch": 0.56, "grad_norm": 5.957203611719857, "learning_rate": 8.476956328361765e-06, "loss": 0.8481, "step": 6223 }, { "epoch": 0.56, "grad_norm": 5.1916148789951775, "learning_rate": 8.476437173450491e-06, "loss": 0.8378, "step": 6224 }, { "epoch": 0.56, "grad_norm": 4.711236751936519, "learning_rate": 8.475917945976204e-06, "loss": 0.8564, "step": 6225 }, { "epoch": 0.56, "grad_norm": 6.818195869576693, "learning_rate": 8.475398645949741e-06, "loss": 0.8569, "step": 6226 }, { "epoch": 0.56, "grad_norm": 7.12118276425607, "learning_rate": 8.474879273381944e-06, "loss": 0.8582, "step": 6227 }, { "epoch": 0.56, "grad_norm": 10.722477354096204, "learning_rate": 8.474359828283652e-06, "loss": 0.8276, "step": 6228 }, { "epoch": 0.56, "grad_norm": 6.743799754666631, "learning_rate": 8.473840310665706e-06, "loss": 0.7777, "step": 6229 }, { "epoch": 0.56, "grad_norm": 5.298126877660536, "learning_rate": 8.473320720538951e-06, "loss": 0.8561, "step": 6230 }, { "epoch": 0.56, "grad_norm": 6.582817716537235, "learning_rate": 8.472801057914235e-06, "loss": 0.8208, "step": 6231 }, { "epoch": 0.56, "grad_norm": 5.381779660405868, "learning_rate": 8.472281322802401e-06, "loss": 0.8141, "step": 6232 }, { "epoch": 0.56, "grad_norm": 4.781511200481694, "learning_rate": 8.471761515214296e-06, "loss": 0.785, "step": 6233 }, { "epoch": 0.56, "grad_norm": 4.846120028090245, "learning_rate": 8.471241635160774e-06, "loss": 0.8331, "step": 6234 }, { "epoch": 0.56, "grad_norm": 7.031494598828061, "learning_rate": 8.470721682652684e-06, "loss": 0.84, "step": 6235 }, { "epoch": 0.56, "grad_norm": 5.626416423506994, "learning_rate": 8.470201657700884e-06, "loss": 0.7974, "step": 6236 }, { "epoch": 0.56, "grad_norm": 6.018376778980985, "learning_rate": 8.469681560316219e-06, "loss": 0.9541, "step": 6237 }, { "epoch": 0.56, "grad_norm": 6.6935782475211445, "learning_rate": 8.469161390509553e-06, "loss": 0.8348, "step": 6238 }, { "epoch": 0.56, "grad_norm": 6.533818457564585, "learning_rate": 8.468641148291738e-06, "loss": 0.8006, "step": 6239 }, { "epoch": 0.56, "grad_norm": 4.899207562638327, "learning_rate": 8.468120833673636e-06, "loss": 0.7626, "step": 6240 }, { "epoch": 0.56, "grad_norm": 4.804851569995742, "learning_rate": 8.467600446666106e-06, "loss": 0.8134, "step": 6241 }, { "epoch": 0.56, "grad_norm": 4.136177462255176, "learning_rate": 8.46707998728001e-06, "loss": 0.773, "step": 6242 }, { "epoch": 0.56, "grad_norm": 4.955011746701454, "learning_rate": 8.466559455526215e-06, "loss": 0.8498, "step": 6243 }, { "epoch": 0.56, "grad_norm": 4.976064590078513, "learning_rate": 8.466038851415581e-06, "loss": 0.7965, "step": 6244 }, { "epoch": 0.56, "grad_norm": 6.862344108008988, "learning_rate": 8.465518174958978e-06, "loss": 0.7798, "step": 6245 }, { "epoch": 0.56, "grad_norm": 7.254069486652891, "learning_rate": 8.464997426167271e-06, "loss": 0.8783, "step": 6246 }, { "epoch": 0.56, "grad_norm": 5.006443546340096, "learning_rate": 8.46447660505133e-06, "loss": 0.7832, "step": 6247 }, { "epoch": 0.56, "grad_norm": 6.554205593482087, "learning_rate": 8.46395571162203e-06, "loss": 0.8481, "step": 6248 }, { "epoch": 0.56, "grad_norm": 8.839629187630445, "learning_rate": 8.463434745890238e-06, "loss": 0.8388, "step": 6249 }, { "epoch": 0.56, "grad_norm": 6.333135076944842, "learning_rate": 8.46291370786683e-06, "loss": 0.8657, "step": 6250 }, { "epoch": 0.56, "grad_norm": 5.441589658680589, "learning_rate": 8.462392597562685e-06, "loss": 0.8073, "step": 6251 }, { "epoch": 0.56, "grad_norm": 4.937790280186652, "learning_rate": 8.461871414988673e-06, "loss": 0.8211, "step": 6252 }, { "epoch": 0.56, "grad_norm": 5.79251490007488, "learning_rate": 8.461350160155678e-06, "loss": 0.8871, "step": 6253 }, { "epoch": 0.56, "grad_norm": 4.394440620719659, "learning_rate": 8.460828833074582e-06, "loss": 0.7998, "step": 6254 }, { "epoch": 0.56, "grad_norm": 7.663932195422399, "learning_rate": 8.46030743375626e-06, "loss": 0.7737, "step": 6255 }, { "epoch": 0.56, "grad_norm": 5.343246134577354, "learning_rate": 8.459785962211599e-06, "loss": 0.8114, "step": 6256 }, { "epoch": 0.56, "grad_norm": 6.80863811071495, "learning_rate": 8.459264418451483e-06, "loss": 0.8513, "step": 6257 }, { "epoch": 0.56, "grad_norm": 5.908366344607159, "learning_rate": 8.458742802486798e-06, "loss": 0.7853, "step": 6258 }, { "epoch": 0.56, "grad_norm": 4.237891202919982, "learning_rate": 8.45822111432843e-06, "loss": 0.7867, "step": 6259 }, { "epoch": 0.56, "grad_norm": 6.058890497441204, "learning_rate": 8.45769935398727e-06, "loss": 0.7682, "step": 6260 }, { "epoch": 0.56, "grad_norm": 5.386244922059968, "learning_rate": 8.457177521474209e-06, "loss": 0.8003, "step": 6261 }, { "epoch": 0.56, "grad_norm": 4.425782499658812, "learning_rate": 8.45665561680014e-06, "loss": 0.7757, "step": 6262 }, { "epoch": 0.56, "grad_norm": 6.548978986857963, "learning_rate": 8.456133639975952e-06, "loss": 0.8332, "step": 6263 }, { "epoch": 0.56, "grad_norm": 6.9658661806508855, "learning_rate": 8.455611591012544e-06, "loss": 0.8027, "step": 6264 }, { "epoch": 0.56, "grad_norm": 6.580460635182092, "learning_rate": 8.455089469920812e-06, "loss": 0.8049, "step": 6265 }, { "epoch": 0.56, "grad_norm": 6.191637957649086, "learning_rate": 8.454567276711654e-06, "loss": 0.779, "step": 6266 }, { "epoch": 0.56, "grad_norm": 5.565770793508458, "learning_rate": 8.45404501139597e-06, "loss": 0.8187, "step": 6267 }, { "epoch": 0.56, "grad_norm": 5.094109439481992, "learning_rate": 8.45352267398466e-06, "loss": 0.8303, "step": 6268 }, { "epoch": 0.56, "grad_norm": 7.4061588126629525, "learning_rate": 8.453000264488627e-06, "loss": 0.8934, "step": 6269 }, { "epoch": 0.56, "grad_norm": 6.247308137596359, "learning_rate": 8.452477782918775e-06, "loss": 0.7943, "step": 6270 }, { "epoch": 0.56, "grad_norm": 6.704847379843028, "learning_rate": 8.451955229286012e-06, "loss": 0.824, "step": 6271 }, { "epoch": 0.56, "grad_norm": 6.774737485666886, "learning_rate": 8.451432603601243e-06, "loss": 0.8583, "step": 6272 }, { "epoch": 0.56, "grad_norm": 6.7858093536546225, "learning_rate": 8.450909905875374e-06, "loss": 0.8631, "step": 6273 }, { "epoch": 0.56, "grad_norm": 5.550783444853573, "learning_rate": 8.45038713611932e-06, "loss": 0.7459, "step": 6274 }, { "epoch": 0.56, "grad_norm": 5.142852263201285, "learning_rate": 8.44986429434399e-06, "loss": 0.7938, "step": 6275 }, { "epoch": 0.56, "grad_norm": 5.5094940287490415, "learning_rate": 8.449341380560298e-06, "loss": 0.8172, "step": 6276 }, { "epoch": 0.56, "grad_norm": 4.64538582441074, "learning_rate": 8.44881839477916e-06, "loss": 0.7928, "step": 6277 }, { "epoch": 0.56, "grad_norm": 6.077133999730289, "learning_rate": 8.448295337011489e-06, "loss": 0.7691, "step": 6278 }, { "epoch": 0.56, "grad_norm": 7.068427885933199, "learning_rate": 8.447772207268207e-06, "loss": 0.7955, "step": 6279 }, { "epoch": 0.56, "grad_norm": 7.315754049812272, "learning_rate": 8.44724900556023e-06, "loss": 0.8045, "step": 6280 }, { "epoch": 0.56, "grad_norm": 8.690280502518323, "learning_rate": 8.446725731898478e-06, "loss": 0.8633, "step": 6281 }, { "epoch": 0.56, "grad_norm": 4.576959038792565, "learning_rate": 8.446202386293874e-06, "loss": 0.7823, "step": 6282 }, { "epoch": 0.56, "grad_norm": 6.064200440107103, "learning_rate": 8.445678968757343e-06, "loss": 0.8775, "step": 6283 }, { "epoch": 0.56, "grad_norm": 5.89167701655723, "learning_rate": 8.44515547929981e-06, "loss": 0.8276, "step": 6284 }, { "epoch": 0.56, "grad_norm": 5.007439869280262, "learning_rate": 8.4446319179322e-06, "loss": 0.9052, "step": 6285 }, { "epoch": 0.56, "grad_norm": 9.12907322371847, "learning_rate": 8.444108284665444e-06, "loss": 0.789, "step": 6286 }, { "epoch": 0.56, "grad_norm": 6.151874107192116, "learning_rate": 8.44358457951047e-06, "loss": 0.849, "step": 6287 }, { "epoch": 0.56, "grad_norm": 7.62119654635346, "learning_rate": 8.443060802478207e-06, "loss": 0.8, "step": 6288 }, { "epoch": 0.56, "grad_norm": 6.009118726127017, "learning_rate": 8.442536953579591e-06, "loss": 0.7456, "step": 6289 }, { "epoch": 0.56, "grad_norm": 5.9040719935820745, "learning_rate": 8.442013032825556e-06, "loss": 0.767, "step": 6290 }, { "epoch": 0.56, "grad_norm": 4.779871130558414, "learning_rate": 8.441489040227037e-06, "loss": 0.8674, "step": 6291 }, { "epoch": 0.56, "grad_norm": 7.231902348059256, "learning_rate": 8.44096497579497e-06, "loss": 0.7996, "step": 6292 }, { "epoch": 0.56, "grad_norm": 6.425774042426578, "learning_rate": 8.440440839540296e-06, "loss": 0.8075, "step": 6293 }, { "epoch": 0.56, "grad_norm": 5.138511337768828, "learning_rate": 8.439916631473952e-06, "loss": 0.8146, "step": 6294 }, { "epoch": 0.56, "grad_norm": 5.224241795700008, "learning_rate": 8.439392351606884e-06, "loss": 0.8012, "step": 6295 }, { "epoch": 0.56, "grad_norm": 4.463608596283664, "learning_rate": 8.43886799995003e-06, "loss": 0.8532, "step": 6296 }, { "epoch": 0.56, "grad_norm": 6.328126953955852, "learning_rate": 8.438343576514339e-06, "loss": 0.7582, "step": 6297 }, { "epoch": 0.56, "grad_norm": 7.414492044123563, "learning_rate": 8.437819081310756e-06, "loss": 0.8276, "step": 6298 }, { "epoch": 0.56, "grad_norm": 7.292027280206943, "learning_rate": 8.437294514350227e-06, "loss": 0.8552, "step": 6299 }, { "epoch": 0.56, "grad_norm": 5.581227539847117, "learning_rate": 8.436769875643703e-06, "loss": 0.8481, "step": 6300 }, { "epoch": 0.56, "grad_norm": 6.683993921880735, "learning_rate": 8.436245165202135e-06, "loss": 0.7774, "step": 6301 }, { "epoch": 0.56, "grad_norm": 3.8277235157999487, "learning_rate": 8.435720383036474e-06, "loss": 0.7803, "step": 6302 }, { "epoch": 0.56, "grad_norm": 5.097736823698492, "learning_rate": 8.435195529157676e-06, "loss": 0.7996, "step": 6303 }, { "epoch": 0.56, "grad_norm": 5.865699572511002, "learning_rate": 8.434670603576691e-06, "loss": 0.8003, "step": 6304 }, { "epoch": 0.56, "grad_norm": 5.537567528908108, "learning_rate": 8.43414560630448e-06, "loss": 0.8047, "step": 6305 }, { "epoch": 0.56, "grad_norm": 4.658066927145337, "learning_rate": 8.433620537352001e-06, "loss": 0.8066, "step": 6306 }, { "epoch": 0.56, "grad_norm": 7.595720075705235, "learning_rate": 8.433095396730212e-06, "loss": 0.7674, "step": 6307 }, { "epoch": 0.56, "grad_norm": 5.492135945545079, "learning_rate": 8.432570184450077e-06, "loss": 0.8381, "step": 6308 }, { "epoch": 0.56, "grad_norm": 4.667860466282736, "learning_rate": 8.432044900522553e-06, "loss": 0.8035, "step": 6309 }, { "epoch": 0.56, "grad_norm": 6.215963728713693, "learning_rate": 8.43151954495861e-06, "loss": 0.8453, "step": 6310 }, { "epoch": 0.56, "grad_norm": 5.7456038803293055, "learning_rate": 8.430994117769211e-06, "loss": 0.7885, "step": 6311 }, { "epoch": 0.56, "grad_norm": 7.235561861721924, "learning_rate": 8.430468618965324e-06, "loss": 0.738, "step": 6312 }, { "epoch": 0.56, "grad_norm": 5.398555423094524, "learning_rate": 8.429943048557917e-06, "loss": 0.8688, "step": 6313 }, { "epoch": 0.56, "grad_norm": 4.7313324190948025, "learning_rate": 8.42941740655796e-06, "loss": 0.8526, "step": 6314 }, { "epoch": 0.56, "grad_norm": 6.636788492540761, "learning_rate": 8.428891692976425e-06, "loss": 0.8635, "step": 6315 }, { "epoch": 0.56, "grad_norm": 7.338508054248021, "learning_rate": 8.428365907824285e-06, "loss": 0.8344, "step": 6316 }, { "epoch": 0.56, "grad_norm": 5.800794171299255, "learning_rate": 8.427840051112514e-06, "loss": 0.8942, "step": 6317 }, { "epoch": 0.56, "grad_norm": 6.289539922395088, "learning_rate": 8.42731412285209e-06, "loss": 0.8931, "step": 6318 }, { "epoch": 0.56, "grad_norm": 5.097839716063886, "learning_rate": 8.42678812305399e-06, "loss": 0.7746, "step": 6319 }, { "epoch": 0.56, "grad_norm": 5.487372301726876, "learning_rate": 8.42626205172919e-06, "loss": 0.795, "step": 6320 }, { "epoch": 0.56, "grad_norm": 6.797470602121593, "learning_rate": 8.425735908888676e-06, "loss": 0.7905, "step": 6321 }, { "epoch": 0.56, "grad_norm": 5.83939571421671, "learning_rate": 8.425209694543425e-06, "loss": 0.878, "step": 6322 }, { "epoch": 0.56, "grad_norm": 5.950947694500436, "learning_rate": 8.424683408704423e-06, "loss": 0.8285, "step": 6323 }, { "epoch": 0.56, "grad_norm": 6.058197370776621, "learning_rate": 8.424157051382656e-06, "loss": 0.8258, "step": 6324 }, { "epoch": 0.56, "grad_norm": 7.95255844847449, "learning_rate": 8.423630622589107e-06, "loss": 0.8177, "step": 6325 }, { "epoch": 0.56, "grad_norm": 6.452879134427941, "learning_rate": 8.423104122334768e-06, "loss": 0.7669, "step": 6326 }, { "epoch": 0.56, "grad_norm": 6.273184348009166, "learning_rate": 8.422577550630627e-06, "loss": 0.8241, "step": 6327 }, { "epoch": 0.56, "grad_norm": 5.155244319566487, "learning_rate": 8.422050907487673e-06, "loss": 0.7708, "step": 6328 }, { "epoch": 0.56, "grad_norm": 8.218260491866683, "learning_rate": 8.421524192916903e-06, "loss": 0.8075, "step": 6329 }, { "epoch": 0.56, "grad_norm": 10.515615296779487, "learning_rate": 8.420997406929308e-06, "loss": 0.8705, "step": 6330 }, { "epoch": 0.56, "grad_norm": 5.926702165901228, "learning_rate": 8.420470549535884e-06, "loss": 0.8351, "step": 6331 }, { "epoch": 0.56, "grad_norm": 5.90293589194997, "learning_rate": 8.419943620747628e-06, "loss": 0.839, "step": 6332 }, { "epoch": 0.56, "grad_norm": 5.6543381595380895, "learning_rate": 8.419416620575536e-06, "loss": 0.7462, "step": 6333 }, { "epoch": 0.57, "grad_norm": 6.336092772487322, "learning_rate": 8.418889549030614e-06, "loss": 0.8458, "step": 6334 }, { "epoch": 0.57, "grad_norm": 5.690894946019437, "learning_rate": 8.418362406123856e-06, "loss": 0.7713, "step": 6335 }, { "epoch": 0.57, "grad_norm": 4.824133202567947, "learning_rate": 8.417835191866273e-06, "loss": 0.8936, "step": 6336 }, { "epoch": 0.57, "grad_norm": 6.407555817901577, "learning_rate": 8.417307906268864e-06, "loss": 0.8289, "step": 6337 }, { "epoch": 0.57, "grad_norm": 4.969687132248885, "learning_rate": 8.416780549342636e-06, "loss": 0.7947, "step": 6338 }, { "epoch": 0.57, "grad_norm": 5.833216048142414, "learning_rate": 8.416253121098595e-06, "loss": 0.7635, "step": 6339 }, { "epoch": 0.57, "grad_norm": 5.050130620307425, "learning_rate": 8.415725621547752e-06, "loss": 0.7957, "step": 6340 }, { "epoch": 0.57, "grad_norm": 5.052182220601051, "learning_rate": 8.415198050701119e-06, "loss": 0.8724, "step": 6341 }, { "epoch": 0.57, "grad_norm": 7.477900014215796, "learning_rate": 8.414670408569705e-06, "loss": 0.9096, "step": 6342 }, { "epoch": 0.57, "grad_norm": 7.42919112174351, "learning_rate": 8.414142695164523e-06, "loss": 0.8052, "step": 6343 }, { "epoch": 0.57, "grad_norm": 4.545578540382481, "learning_rate": 8.41361491049659e-06, "loss": 0.752, "step": 6344 }, { "epoch": 0.57, "grad_norm": 3.875468964415634, "learning_rate": 8.413087054576923e-06, "loss": 0.8169, "step": 6345 }, { "epoch": 0.57, "grad_norm": 5.87391438065856, "learning_rate": 8.412559127416536e-06, "loss": 0.7759, "step": 6346 }, { "epoch": 0.57, "grad_norm": 5.721858255793767, "learning_rate": 8.412031129026452e-06, "loss": 0.8199, "step": 6347 }, { "epoch": 0.57, "grad_norm": 5.697367756939675, "learning_rate": 8.41150305941769e-06, "loss": 0.8919, "step": 6348 }, { "epoch": 0.57, "grad_norm": 7.163665965888667, "learning_rate": 8.410974918601272e-06, "loss": 0.8399, "step": 6349 }, { "epoch": 0.57, "grad_norm": 6.447015633361741, "learning_rate": 8.410446706588223e-06, "loss": 0.8505, "step": 6350 }, { "epoch": 0.57, "grad_norm": 4.504920155126795, "learning_rate": 8.409918423389568e-06, "loss": 0.8037, "step": 6351 }, { "epoch": 0.57, "grad_norm": 5.429953750743542, "learning_rate": 8.409390069016331e-06, "loss": 0.7912, "step": 6352 }, { "epoch": 0.57, "grad_norm": 6.0501594240388314, "learning_rate": 8.408861643479546e-06, "loss": 0.7864, "step": 6353 }, { "epoch": 0.57, "grad_norm": 5.40529439940605, "learning_rate": 8.408333146790238e-06, "loss": 0.7739, "step": 6354 }, { "epoch": 0.57, "grad_norm": 5.5541722409253405, "learning_rate": 8.40780457895944e-06, "loss": 0.8146, "step": 6355 }, { "epoch": 0.57, "grad_norm": 4.9557074984490255, "learning_rate": 8.407275939998184e-06, "loss": 0.8563, "step": 6356 }, { "epoch": 0.57, "grad_norm": 6.214713537587707, "learning_rate": 8.406747229917507e-06, "loss": 0.7698, "step": 6357 }, { "epoch": 0.57, "grad_norm": 5.94358432998389, "learning_rate": 8.40621844872844e-06, "loss": 0.8071, "step": 6358 }, { "epoch": 0.57, "grad_norm": 6.02690489579196, "learning_rate": 8.405689596442022e-06, "loss": 0.9108, "step": 6359 }, { "epoch": 0.57, "grad_norm": 5.8096379239525575, "learning_rate": 8.405160673069293e-06, "loss": 0.7132, "step": 6360 }, { "epoch": 0.57, "grad_norm": 7.149839066726462, "learning_rate": 8.404631678621292e-06, "loss": 0.7552, "step": 6361 }, { "epoch": 0.57, "grad_norm": 5.418012708795107, "learning_rate": 8.40410261310906e-06, "loss": 0.8363, "step": 6362 }, { "epoch": 0.57, "grad_norm": 6.127212407014792, "learning_rate": 8.403573476543641e-06, "loss": 0.7863, "step": 6363 }, { "epoch": 0.57, "grad_norm": 5.283871410444886, "learning_rate": 8.40304426893608e-06, "loss": 0.8599, "step": 6364 }, { "epoch": 0.57, "grad_norm": 5.98981824958954, "learning_rate": 8.402514990297421e-06, "loss": 0.7673, "step": 6365 }, { "epoch": 0.57, "grad_norm": 5.09888553824146, "learning_rate": 8.401985640638715e-06, "loss": 0.8366, "step": 6366 }, { "epoch": 0.57, "grad_norm": 6.939549962831484, "learning_rate": 8.401456219971008e-06, "loss": 0.8476, "step": 6367 }, { "epoch": 0.57, "grad_norm": 5.228795402897187, "learning_rate": 8.400926728305353e-06, "loss": 0.859, "step": 6368 }, { "epoch": 0.57, "grad_norm": 6.684209483462334, "learning_rate": 8.400397165652797e-06, "loss": 0.7902, "step": 6369 }, { "epoch": 0.57, "grad_norm": 5.27385695311404, "learning_rate": 8.3998675320244e-06, "loss": 0.7436, "step": 6370 }, { "epoch": 0.57, "grad_norm": 7.224226627762636, "learning_rate": 8.399337827431211e-06, "loss": 0.8078, "step": 6371 }, { "epoch": 0.57, "grad_norm": 4.55815220215311, "learning_rate": 8.398808051884292e-06, "loss": 0.8522, "step": 6372 }, { "epoch": 0.57, "grad_norm": 7.2698600440447185, "learning_rate": 8.398278205394697e-06, "loss": 0.7517, "step": 6373 }, { "epoch": 0.57, "grad_norm": 4.987968636550121, "learning_rate": 8.397748287973486e-06, "loss": 0.7384, "step": 6374 }, { "epoch": 0.57, "grad_norm": 8.141548708758679, "learning_rate": 8.397218299631723e-06, "loss": 0.8345, "step": 6375 }, { "epoch": 0.57, "grad_norm": 7.38404744235643, "learning_rate": 8.396688240380465e-06, "loss": 0.7993, "step": 6376 }, { "epoch": 0.57, "grad_norm": 6.705991452506625, "learning_rate": 8.396158110230778e-06, "loss": 0.8506, "step": 6377 }, { "epoch": 0.57, "grad_norm": 6.9915806526215265, "learning_rate": 8.395627909193731e-06, "loss": 0.8211, "step": 6378 }, { "epoch": 0.57, "grad_norm": 5.958566420534438, "learning_rate": 8.395097637280387e-06, "loss": 0.8042, "step": 6379 }, { "epoch": 0.57, "grad_norm": 8.927662893097061, "learning_rate": 8.394567294501815e-06, "loss": 0.827, "step": 6380 }, { "epoch": 0.57, "grad_norm": 6.701348690666862, "learning_rate": 8.394036880869082e-06, "loss": 0.8516, "step": 6381 }, { "epoch": 0.57, "grad_norm": 5.996377973099109, "learning_rate": 8.393506396393264e-06, "loss": 0.8108, "step": 6382 }, { "epoch": 0.57, "grad_norm": 5.665861125055204, "learning_rate": 8.392975841085432e-06, "loss": 0.8452, "step": 6383 }, { "epoch": 0.57, "grad_norm": 7.254886442770166, "learning_rate": 8.392445214956658e-06, "loss": 0.8034, "step": 6384 }, { "epoch": 0.57, "grad_norm": 9.228158014432296, "learning_rate": 8.39191451801802e-06, "loss": 0.8538, "step": 6385 }, { "epoch": 0.57, "grad_norm": 6.446224330772276, "learning_rate": 8.391383750280594e-06, "loss": 0.8555, "step": 6386 }, { "epoch": 0.57, "grad_norm": 6.141599537751966, "learning_rate": 8.390852911755463e-06, "loss": 0.8294, "step": 6387 }, { "epoch": 0.57, "grad_norm": 4.611089176272104, "learning_rate": 8.3903220024537e-06, "loss": 0.782, "step": 6388 }, { "epoch": 0.57, "grad_norm": 5.156796784584249, "learning_rate": 8.38979102238639e-06, "loss": 0.7887, "step": 6389 }, { "epoch": 0.57, "grad_norm": 5.340125238868906, "learning_rate": 8.389259971564618e-06, "loss": 0.7689, "step": 6390 }, { "epoch": 0.57, "grad_norm": 6.1012900336155536, "learning_rate": 8.388728849999465e-06, "loss": 0.8338, "step": 6391 }, { "epoch": 0.57, "grad_norm": 4.7312958571210535, "learning_rate": 8.388197657702017e-06, "loss": 0.8189, "step": 6392 }, { "epoch": 0.57, "grad_norm": 5.568835269016552, "learning_rate": 8.387666394683365e-06, "loss": 0.7722, "step": 6393 }, { "epoch": 0.57, "grad_norm": 4.715292791689378, "learning_rate": 8.387135060954594e-06, "loss": 0.7958, "step": 6394 }, { "epoch": 0.57, "grad_norm": 5.903541099100973, "learning_rate": 8.3866036565268e-06, "loss": 0.7837, "step": 6395 }, { "epoch": 0.57, "grad_norm": 5.819768684308915, "learning_rate": 8.386072181411068e-06, "loss": 0.7627, "step": 6396 }, { "epoch": 0.57, "grad_norm": 4.840295527539505, "learning_rate": 8.385540635618495e-06, "loss": 0.7593, "step": 6397 }, { "epoch": 0.57, "grad_norm": 6.2308612624173545, "learning_rate": 8.385009019160176e-06, "loss": 0.8239, "step": 6398 }, { "epoch": 0.57, "grad_norm": 6.007674355162268, "learning_rate": 8.384477332047206e-06, "loss": 0.7881, "step": 6399 }, { "epoch": 0.57, "grad_norm": 6.37681632762346, "learning_rate": 8.383945574290684e-06, "loss": 0.8603, "step": 6400 }, { "epoch": 0.57, "grad_norm": 4.669914659728779, "learning_rate": 8.38341374590171e-06, "loss": 0.8286, "step": 6401 }, { "epoch": 0.57, "grad_norm": 6.217165536361894, "learning_rate": 8.382881846891381e-06, "loss": 0.8728, "step": 6402 }, { "epoch": 0.57, "grad_norm": 6.5367591140674435, "learning_rate": 8.382349877270804e-06, "loss": 0.7575, "step": 6403 }, { "epoch": 0.57, "grad_norm": 5.958038363001682, "learning_rate": 8.38181783705108e-06, "loss": 0.7964, "step": 6404 }, { "epoch": 0.57, "grad_norm": 4.042780481711249, "learning_rate": 8.381285726243316e-06, "loss": 0.7839, "step": 6405 }, { "epoch": 0.57, "grad_norm": 5.241322894572705, "learning_rate": 8.380753544858615e-06, "loss": 0.7531, "step": 6406 }, { "epoch": 0.57, "grad_norm": 5.441367426592209, "learning_rate": 8.380221292908088e-06, "loss": 0.7648, "step": 6407 }, { "epoch": 0.57, "grad_norm": 5.640196404216833, "learning_rate": 8.379688970402845e-06, "loss": 0.779, "step": 6408 }, { "epoch": 0.57, "grad_norm": 4.909939547150019, "learning_rate": 8.379156577353995e-06, "loss": 0.7921, "step": 6409 }, { "epoch": 0.57, "grad_norm": 5.875912809404505, "learning_rate": 8.378624113772653e-06, "loss": 0.8413, "step": 6410 }, { "epoch": 0.57, "grad_norm": 5.9044348475426585, "learning_rate": 8.37809157966993e-06, "loss": 0.8073, "step": 6411 }, { "epoch": 0.57, "grad_norm": 5.827483789523319, "learning_rate": 8.377558975056945e-06, "loss": 0.7521, "step": 6412 }, { "epoch": 0.57, "grad_norm": 3.9629672980840516, "learning_rate": 8.377026299944813e-06, "loss": 0.8023, "step": 6413 }, { "epoch": 0.57, "grad_norm": 7.7907634224069495, "learning_rate": 8.376493554344652e-06, "loss": 0.8427, "step": 6414 }, { "epoch": 0.57, "grad_norm": 6.375826074232661, "learning_rate": 8.37596073826758e-06, "loss": 0.796, "step": 6415 }, { "epoch": 0.57, "grad_norm": 6.70135947496201, "learning_rate": 8.375427851724725e-06, "loss": 0.7897, "step": 6416 }, { "epoch": 0.57, "grad_norm": 5.824913729040646, "learning_rate": 8.374894894727203e-06, "loss": 0.7885, "step": 6417 }, { "epoch": 0.57, "grad_norm": 7.640193494959229, "learning_rate": 8.374361867286143e-06, "loss": 0.7609, "step": 6418 }, { "epoch": 0.57, "grad_norm": 4.732170228556597, "learning_rate": 8.373828769412667e-06, "loss": 0.8066, "step": 6419 }, { "epoch": 0.57, "grad_norm": 5.341596206767716, "learning_rate": 8.373295601117903e-06, "loss": 0.7864, "step": 6420 }, { "epoch": 0.57, "grad_norm": 4.935007139865047, "learning_rate": 8.372762362412982e-06, "loss": 0.8213, "step": 6421 }, { "epoch": 0.57, "grad_norm": 5.9616765403765495, "learning_rate": 8.372229053309035e-06, "loss": 0.8351, "step": 6422 }, { "epoch": 0.57, "grad_norm": 5.972802019605322, "learning_rate": 8.37169567381719e-06, "loss": 0.8431, "step": 6423 }, { "epoch": 0.57, "grad_norm": 7.445837448479996, "learning_rate": 8.37116222394858e-06, "loss": 0.7908, "step": 6424 }, { "epoch": 0.57, "grad_norm": 6.314495008176629, "learning_rate": 8.370628703714343e-06, "loss": 0.7756, "step": 6425 }, { "epoch": 0.57, "grad_norm": 6.617900010467859, "learning_rate": 8.370095113125613e-06, "loss": 0.768, "step": 6426 }, { "epoch": 0.57, "grad_norm": 5.265545606876863, "learning_rate": 8.369561452193528e-06, "loss": 0.8879, "step": 6427 }, { "epoch": 0.57, "grad_norm": 7.291272722547609, "learning_rate": 8.369027720929228e-06, "loss": 0.7555, "step": 6428 }, { "epoch": 0.57, "grad_norm": 6.276792640234346, "learning_rate": 8.368493919343849e-06, "loss": 0.815, "step": 6429 }, { "epoch": 0.57, "grad_norm": 5.101473634051828, "learning_rate": 8.36796004744854e-06, "loss": 0.8249, "step": 6430 }, { "epoch": 0.57, "grad_norm": 4.135722169608094, "learning_rate": 8.367426105254438e-06, "loss": 0.7666, "step": 6431 }, { "epoch": 0.57, "grad_norm": 4.58591992493518, "learning_rate": 8.366892092772694e-06, "loss": 0.7901, "step": 6432 }, { "epoch": 0.57, "grad_norm": 9.911933853041136, "learning_rate": 8.366358010014449e-06, "loss": 0.7422, "step": 6433 }, { "epoch": 0.57, "grad_norm": 6.384447259282951, "learning_rate": 8.365823856990851e-06, "loss": 0.8115, "step": 6434 }, { "epoch": 0.57, "grad_norm": 6.667093295584337, "learning_rate": 8.365289633713054e-06, "loss": 0.8053, "step": 6435 }, { "epoch": 0.57, "grad_norm": 9.936444051628119, "learning_rate": 8.364755340192203e-06, "loss": 0.8635, "step": 6436 }, { "epoch": 0.57, "grad_norm": 4.997035653612605, "learning_rate": 8.364220976439454e-06, "loss": 0.8575, "step": 6437 }, { "epoch": 0.57, "grad_norm": 4.694355146901909, "learning_rate": 8.36368654246596e-06, "loss": 0.7686, "step": 6438 }, { "epoch": 0.57, "grad_norm": 6.170693255894848, "learning_rate": 8.363152038282876e-06, "loss": 0.8023, "step": 6439 }, { "epoch": 0.57, "grad_norm": 7.491048148406624, "learning_rate": 8.362617463901358e-06, "loss": 0.8591, "step": 6440 }, { "epoch": 0.57, "grad_norm": 6.549260190265516, "learning_rate": 8.362082819332564e-06, "loss": 0.8572, "step": 6441 }, { "epoch": 0.57, "grad_norm": 5.957296621966334, "learning_rate": 8.361548104587655e-06, "loss": 0.7625, "step": 6442 }, { "epoch": 0.57, "grad_norm": 4.736309725661769, "learning_rate": 8.36101331967779e-06, "loss": 0.813, "step": 6443 }, { "epoch": 0.57, "grad_norm": 6.4105505574306285, "learning_rate": 8.360478464614133e-06, "loss": 0.8315, "step": 6444 }, { "epoch": 0.57, "grad_norm": 4.704648734830471, "learning_rate": 8.359943539407847e-06, "loss": 0.8601, "step": 6445 }, { "epoch": 0.58, "grad_norm": 5.937704858071197, "learning_rate": 8.3594085440701e-06, "loss": 0.7721, "step": 6446 }, { "epoch": 0.58, "grad_norm": 5.6311327444161865, "learning_rate": 8.358873478612054e-06, "loss": 0.7438, "step": 6447 }, { "epoch": 0.58, "grad_norm": 6.592319380886831, "learning_rate": 8.35833834304488e-06, "loss": 0.7907, "step": 6448 }, { "epoch": 0.58, "grad_norm": 5.225017609578877, "learning_rate": 8.357803137379748e-06, "loss": 0.8145, "step": 6449 }, { "epoch": 0.58, "grad_norm": 4.991006583485945, "learning_rate": 8.35726786162783e-06, "loss": 0.882, "step": 6450 }, { "epoch": 0.58, "grad_norm": 4.7911869172432064, "learning_rate": 8.356732515800296e-06, "loss": 0.8249, "step": 6451 }, { "epoch": 0.58, "grad_norm": 5.2532772536449395, "learning_rate": 8.356197099908324e-06, "loss": 0.7157, "step": 6452 }, { "epoch": 0.58, "grad_norm": 6.129141585902698, "learning_rate": 8.355661613963086e-06, "loss": 0.7839, "step": 6453 }, { "epoch": 0.58, "grad_norm": 5.150798687943961, "learning_rate": 8.35512605797576e-06, "loss": 0.812, "step": 6454 }, { "epoch": 0.58, "grad_norm": 8.143849079140178, "learning_rate": 8.354590431957525e-06, "loss": 0.8857, "step": 6455 }, { "epoch": 0.58, "grad_norm": 5.959695180633051, "learning_rate": 8.354054735919563e-06, "loss": 0.8227, "step": 6456 }, { "epoch": 0.58, "grad_norm": 7.394192951306144, "learning_rate": 8.353518969873053e-06, "loss": 0.7313, "step": 6457 }, { "epoch": 0.58, "grad_norm": 6.683953597729611, "learning_rate": 8.352983133829178e-06, "loss": 0.8663, "step": 6458 }, { "epoch": 0.58, "grad_norm": 7.824641455153864, "learning_rate": 8.352447227799125e-06, "loss": 0.8154, "step": 6459 }, { "epoch": 0.58, "grad_norm": 5.5643920437432675, "learning_rate": 8.351911251794076e-06, "loss": 0.756, "step": 6460 }, { "epoch": 0.58, "grad_norm": 7.248037237801102, "learning_rate": 8.351375205825221e-06, "loss": 0.839, "step": 6461 }, { "epoch": 0.58, "grad_norm": 5.171551040286376, "learning_rate": 8.350839089903746e-06, "loss": 0.8323, "step": 6462 }, { "epoch": 0.58, "grad_norm": 6.5829477893941215, "learning_rate": 8.350302904040844e-06, "loss": 0.842, "step": 6463 }, { "epoch": 0.58, "grad_norm": 8.535719894210468, "learning_rate": 8.349766648247707e-06, "loss": 0.794, "step": 6464 }, { "epoch": 0.58, "grad_norm": 6.718341444884894, "learning_rate": 8.349230322535527e-06, "loss": 0.8178, "step": 6465 }, { "epoch": 0.58, "grad_norm": 4.599520501679836, "learning_rate": 8.348693926915497e-06, "loss": 0.8189, "step": 6466 }, { "epoch": 0.58, "grad_norm": 6.68648673746061, "learning_rate": 8.348157461398818e-06, "loss": 0.833, "step": 6467 }, { "epoch": 0.58, "grad_norm": 5.044219315964691, "learning_rate": 8.347620925996682e-06, "loss": 0.7835, "step": 6468 }, { "epoch": 0.58, "grad_norm": 5.140841195846728, "learning_rate": 8.34708432072029e-06, "loss": 0.8235, "step": 6469 }, { "epoch": 0.58, "grad_norm": 4.790359148330508, "learning_rate": 8.346547645580844e-06, "loss": 0.7918, "step": 6470 }, { "epoch": 0.58, "grad_norm": 4.638324976636239, "learning_rate": 8.346010900589545e-06, "loss": 0.8259, "step": 6471 }, { "epoch": 0.58, "grad_norm": 6.057890831912723, "learning_rate": 8.345474085757594e-06, "loss": 0.9297, "step": 6472 }, { "epoch": 0.58, "grad_norm": 7.707654564421968, "learning_rate": 8.3449372010962e-06, "loss": 0.832, "step": 6473 }, { "epoch": 0.58, "grad_norm": 5.9385205445662805, "learning_rate": 8.344400246616566e-06, "loss": 0.797, "step": 6474 }, { "epoch": 0.58, "grad_norm": 7.21771592661929, "learning_rate": 8.343863222329903e-06, "loss": 0.7793, "step": 6475 }, { "epoch": 0.58, "grad_norm": 5.41163697706573, "learning_rate": 8.343326128247415e-06, "loss": 0.7621, "step": 6476 }, { "epoch": 0.58, "grad_norm": 4.378320776830714, "learning_rate": 8.342788964380317e-06, "loss": 0.8205, "step": 6477 }, { "epoch": 0.58, "grad_norm": 5.346530145607095, "learning_rate": 8.34225173073982e-06, "loss": 0.8154, "step": 6478 }, { "epoch": 0.58, "grad_norm": 6.54969941555116, "learning_rate": 8.34171442733714e-06, "loss": 0.8578, "step": 6479 }, { "epoch": 0.58, "grad_norm": 3.7280633080433385, "learning_rate": 8.341177054183488e-06, "loss": 0.8438, "step": 6480 }, { "epoch": 0.58, "grad_norm": 4.734055764513203, "learning_rate": 8.340639611290081e-06, "loss": 0.8043, "step": 6481 }, { "epoch": 0.58, "grad_norm": 6.038671822979245, "learning_rate": 8.340102098668138e-06, "loss": 0.7918, "step": 6482 }, { "epoch": 0.58, "grad_norm": 6.531316967489235, "learning_rate": 8.33956451632888e-06, "loss": 0.7713, "step": 6483 }, { "epoch": 0.58, "grad_norm": 6.517084735852784, "learning_rate": 8.339026864283524e-06, "loss": 0.8289, "step": 6484 }, { "epoch": 0.58, "grad_norm": 7.732851261457929, "learning_rate": 8.338489142543295e-06, "loss": 0.7858, "step": 6485 }, { "epoch": 0.58, "grad_norm": 5.912376041210346, "learning_rate": 8.337951351119417e-06, "loss": 0.766, "step": 6486 }, { "epoch": 0.58, "grad_norm": 5.116482406632327, "learning_rate": 8.337413490023115e-06, "loss": 0.8272, "step": 6487 }, { "epoch": 0.58, "grad_norm": 6.40173341219443, "learning_rate": 8.336875559265616e-06, "loss": 0.7807, "step": 6488 }, { "epoch": 0.58, "grad_norm": 7.802428829543586, "learning_rate": 8.336337558858145e-06, "loss": 0.8545, "step": 6489 }, { "epoch": 0.58, "grad_norm": 5.608992645290709, "learning_rate": 8.335799488811936e-06, "loss": 0.7748, "step": 6490 }, { "epoch": 0.58, "grad_norm": 5.2054065211028036, "learning_rate": 8.335261349138217e-06, "loss": 0.767, "step": 6491 }, { "epoch": 0.58, "grad_norm": 5.9980232782685405, "learning_rate": 8.33472313984822e-06, "loss": 0.739, "step": 6492 }, { "epoch": 0.58, "grad_norm": 4.975814378235043, "learning_rate": 8.334184860953183e-06, "loss": 0.7998, "step": 6493 }, { "epoch": 0.58, "grad_norm": 5.983901792952384, "learning_rate": 8.333646512464337e-06, "loss": 0.7189, "step": 6494 }, { "epoch": 0.58, "grad_norm": 6.114748489263203, "learning_rate": 8.333108094392921e-06, "loss": 0.8162, "step": 6495 }, { "epoch": 0.58, "grad_norm": 5.5827630354317535, "learning_rate": 8.332569606750174e-06, "loss": 0.7994, "step": 6496 }, { "epoch": 0.58, "grad_norm": 5.353084675836693, "learning_rate": 8.332031049547334e-06, "loss": 0.8561, "step": 6497 }, { "epoch": 0.58, "grad_norm": 6.734688588517768, "learning_rate": 8.331492422795641e-06, "loss": 0.8808, "step": 6498 }, { "epoch": 0.58, "grad_norm": 6.31009392546542, "learning_rate": 8.33095372650634e-06, "loss": 0.8726, "step": 6499 }, { "epoch": 0.58, "grad_norm": 7.028264353462315, "learning_rate": 8.330414960690678e-06, "loss": 0.8265, "step": 6500 }, { "epoch": 0.58, "grad_norm": 8.61583212432705, "learning_rate": 8.329876125359894e-06, "loss": 0.8301, "step": 6501 }, { "epoch": 0.58, "grad_norm": 6.677444933462211, "learning_rate": 8.329337220525239e-06, "loss": 0.8233, "step": 6502 }, { "epoch": 0.58, "grad_norm": 7.134629603052685, "learning_rate": 8.328798246197963e-06, "loss": 0.8409, "step": 6503 }, { "epoch": 0.58, "grad_norm": 6.529457989698098, "learning_rate": 8.328259202389311e-06, "loss": 0.8596, "step": 6504 }, { "epoch": 0.58, "grad_norm": 5.633697046507348, "learning_rate": 8.327720089110536e-06, "loss": 0.7416, "step": 6505 }, { "epoch": 0.58, "grad_norm": 4.830233972929224, "learning_rate": 8.327180906372893e-06, "loss": 0.7761, "step": 6506 }, { "epoch": 0.58, "grad_norm": 4.583793005061743, "learning_rate": 8.326641654187634e-06, "loss": 0.8874, "step": 6507 }, { "epoch": 0.58, "grad_norm": 4.768832534031234, "learning_rate": 8.326102332566018e-06, "loss": 0.7787, "step": 6508 }, { "epoch": 0.58, "grad_norm": 6.291370712953162, "learning_rate": 8.325562941519298e-06, "loss": 0.8131, "step": 6509 }, { "epoch": 0.58, "grad_norm": 5.625343195972422, "learning_rate": 8.325023481058736e-06, "loss": 0.8022, "step": 6510 }, { "epoch": 0.58, "grad_norm": 4.572211766666693, "learning_rate": 8.324483951195587e-06, "loss": 0.8231, "step": 6511 }, { "epoch": 0.58, "grad_norm": 5.9988273279010595, "learning_rate": 8.32394435194112e-06, "loss": 0.7395, "step": 6512 }, { "epoch": 0.58, "grad_norm": 5.217974979390241, "learning_rate": 8.323404683306593e-06, "loss": 0.8671, "step": 6513 }, { "epoch": 0.58, "grad_norm": 6.915139144135979, "learning_rate": 8.32286494530327e-06, "loss": 0.8467, "step": 6514 }, { "epoch": 0.58, "grad_norm": 6.001017258201932, "learning_rate": 8.322325137942419e-06, "loss": 0.7665, "step": 6515 }, { "epoch": 0.58, "grad_norm": 5.342533541231736, "learning_rate": 8.321785261235306e-06, "loss": 0.8267, "step": 6516 }, { "epoch": 0.58, "grad_norm": 6.3482228951260335, "learning_rate": 8.321245315193202e-06, "loss": 0.7672, "step": 6517 }, { "epoch": 0.58, "grad_norm": 6.744080882937791, "learning_rate": 8.320705299827375e-06, "loss": 0.7885, "step": 6518 }, { "epoch": 0.58, "grad_norm": 6.07888679015369, "learning_rate": 8.320165215149094e-06, "loss": 0.7703, "step": 6519 }, { "epoch": 0.58, "grad_norm": 5.111108903218273, "learning_rate": 8.319625061169639e-06, "loss": 0.8772, "step": 6520 }, { "epoch": 0.58, "grad_norm": 5.708294355193285, "learning_rate": 8.319084837900278e-06, "loss": 0.7912, "step": 6521 }, { "epoch": 0.58, "grad_norm": 6.4843478805659975, "learning_rate": 8.318544545352292e-06, "loss": 0.889, "step": 6522 }, { "epoch": 0.58, "grad_norm": 6.872404165735018, "learning_rate": 8.318004183536954e-06, "loss": 0.8186, "step": 6523 }, { "epoch": 0.58, "grad_norm": 5.873862626441227, "learning_rate": 8.317463752465547e-06, "loss": 0.7619, "step": 6524 }, { "epoch": 0.58, "grad_norm": 5.882270191757441, "learning_rate": 8.316923252149346e-06, "loss": 0.8179, "step": 6525 }, { "epoch": 0.58, "grad_norm": 6.089408342294823, "learning_rate": 8.31638268259964e-06, "loss": 0.8365, "step": 6526 }, { "epoch": 0.58, "grad_norm": 6.304414575466239, "learning_rate": 8.315842043827706e-06, "loss": 0.7742, "step": 6527 }, { "epoch": 0.58, "grad_norm": 7.828734931507414, "learning_rate": 8.315301335844832e-06, "loss": 0.864, "step": 6528 }, { "epoch": 0.58, "grad_norm": 5.059427539851561, "learning_rate": 8.314760558662303e-06, "loss": 0.7831, "step": 6529 }, { "epoch": 0.58, "grad_norm": 5.470687775111974, "learning_rate": 8.314219712291404e-06, "loss": 0.8481, "step": 6530 }, { "epoch": 0.58, "grad_norm": 5.887516896248597, "learning_rate": 8.31367879674343e-06, "loss": 0.8643, "step": 6531 }, { "epoch": 0.58, "grad_norm": 4.948859012274711, "learning_rate": 8.313137812029666e-06, "loss": 0.7929, "step": 6532 }, { "epoch": 0.58, "grad_norm": 6.464864060439881, "learning_rate": 8.312596758161407e-06, "loss": 0.9184, "step": 6533 }, { "epoch": 0.58, "grad_norm": 4.815801619198798, "learning_rate": 8.312055635149944e-06, "loss": 0.7589, "step": 6534 }, { "epoch": 0.58, "grad_norm": 7.192147414731345, "learning_rate": 8.311514443006573e-06, "loss": 0.8125, "step": 6535 }, { "epoch": 0.58, "grad_norm": 6.373342729756691, "learning_rate": 8.31097318174259e-06, "loss": 0.7977, "step": 6536 }, { "epoch": 0.58, "grad_norm": 4.715739803551229, "learning_rate": 8.310431851369294e-06, "loss": 0.8349, "step": 6537 }, { "epoch": 0.58, "grad_norm": 9.481738427196262, "learning_rate": 8.309890451897982e-06, "loss": 0.8154, "step": 6538 }, { "epoch": 0.58, "grad_norm": 5.6560271841873835, "learning_rate": 8.309348983339956e-06, "loss": 0.7811, "step": 6539 }, { "epoch": 0.58, "grad_norm": 4.732504390195648, "learning_rate": 8.308807445706516e-06, "loss": 0.7952, "step": 6540 }, { "epoch": 0.58, "grad_norm": 6.231198196644329, "learning_rate": 8.308265839008966e-06, "loss": 0.7602, "step": 6541 }, { "epoch": 0.58, "grad_norm": 12.410109134962266, "learning_rate": 8.307724163258615e-06, "loss": 0.834, "step": 6542 }, { "epoch": 0.58, "grad_norm": 5.794573680802504, "learning_rate": 8.307182418466763e-06, "loss": 0.8595, "step": 6543 }, { "epoch": 0.58, "grad_norm": 5.485580975051904, "learning_rate": 8.30664060464472e-06, "loss": 0.7603, "step": 6544 }, { "epoch": 0.58, "grad_norm": 7.159299325673465, "learning_rate": 8.306098721803798e-06, "loss": 0.7333, "step": 6545 }, { "epoch": 0.58, "grad_norm": 6.134051897162804, "learning_rate": 8.305556769955306e-06, "loss": 0.7571, "step": 6546 }, { "epoch": 0.58, "grad_norm": 6.4387829989685486, "learning_rate": 8.305014749110554e-06, "loss": 0.8225, "step": 6547 }, { "epoch": 0.58, "grad_norm": 4.976061637504289, "learning_rate": 8.304472659280857e-06, "loss": 0.7759, "step": 6548 }, { "epoch": 0.58, "grad_norm": 4.723480524687842, "learning_rate": 8.303930500477529e-06, "loss": 0.7435, "step": 6549 }, { "epoch": 0.58, "grad_norm": 5.58041545972474, "learning_rate": 8.303388272711888e-06, "loss": 0.7996, "step": 6550 }, { "epoch": 0.58, "grad_norm": 5.20484338679722, "learning_rate": 8.302845975995253e-06, "loss": 0.8176, "step": 6551 }, { "epoch": 0.58, "grad_norm": 5.120971864626568, "learning_rate": 8.302303610338938e-06, "loss": 0.7057, "step": 6552 }, { "epoch": 0.58, "grad_norm": 5.999208906832063, "learning_rate": 8.30176117575427e-06, "loss": 0.7692, "step": 6553 }, { "epoch": 0.58, "grad_norm": 6.665590544651028, "learning_rate": 8.301218672252566e-06, "loss": 0.803, "step": 6554 }, { "epoch": 0.58, "grad_norm": 6.178398959873913, "learning_rate": 8.300676099845155e-06, "loss": 0.8508, "step": 6555 }, { "epoch": 0.58, "grad_norm": 5.723911213655337, "learning_rate": 8.300133458543355e-06, "loss": 0.7705, "step": 6556 }, { "epoch": 0.58, "grad_norm": 11.610498832410464, "learning_rate": 8.299590748358498e-06, "loss": 0.8026, "step": 6557 }, { "epoch": 0.59, "grad_norm": 5.553347931007273, "learning_rate": 8.299047969301909e-06, "loss": 0.8452, "step": 6558 }, { "epoch": 0.59, "grad_norm": 4.582438837790492, "learning_rate": 8.298505121384921e-06, "loss": 0.7952, "step": 6559 }, { "epoch": 0.59, "grad_norm": 5.80838132082343, "learning_rate": 8.29796220461886e-06, "loss": 0.8029, "step": 6560 }, { "epoch": 0.59, "grad_norm": 4.739244175538246, "learning_rate": 8.297419219015061e-06, "loss": 0.8365, "step": 6561 }, { "epoch": 0.59, "grad_norm": 7.942084906265972, "learning_rate": 8.296876164584857e-06, "loss": 0.8446, "step": 6562 }, { "epoch": 0.59, "grad_norm": 5.807822739495269, "learning_rate": 8.296333041339583e-06, "loss": 0.8448, "step": 6563 }, { "epoch": 0.59, "grad_norm": 5.947927735300665, "learning_rate": 8.295789849290576e-06, "loss": 0.805, "step": 6564 }, { "epoch": 0.59, "grad_norm": 5.074972465693616, "learning_rate": 8.295246588449173e-06, "loss": 0.7948, "step": 6565 }, { "epoch": 0.59, "grad_norm": 6.848212603002914, "learning_rate": 8.294703258826713e-06, "loss": 0.8491, "step": 6566 }, { "epoch": 0.59, "grad_norm": 5.899751485370875, "learning_rate": 8.29415986043454e-06, "loss": 0.8213, "step": 6567 }, { "epoch": 0.59, "grad_norm": 5.454746230223329, "learning_rate": 8.293616393283993e-06, "loss": 0.7672, "step": 6568 }, { "epoch": 0.59, "grad_norm": 6.802700134043347, "learning_rate": 8.293072857386418e-06, "loss": 0.8535, "step": 6569 }, { "epoch": 0.59, "grad_norm": 4.926781242722968, "learning_rate": 8.292529252753156e-06, "loss": 0.8611, "step": 6570 }, { "epoch": 0.59, "grad_norm": 6.4874655517034086, "learning_rate": 8.291985579395557e-06, "loss": 0.8021, "step": 6571 }, { "epoch": 0.59, "grad_norm": 7.207468659424794, "learning_rate": 8.29144183732497e-06, "loss": 0.8298, "step": 6572 }, { "epoch": 0.59, "grad_norm": 5.667501823944266, "learning_rate": 8.290898026552741e-06, "loss": 0.8057, "step": 6573 }, { "epoch": 0.59, "grad_norm": 6.953874694255466, "learning_rate": 8.290354147090224e-06, "loss": 0.7828, "step": 6574 }, { "epoch": 0.59, "grad_norm": 5.408994035527277, "learning_rate": 8.28981019894877e-06, "loss": 0.7925, "step": 6575 }, { "epoch": 0.59, "grad_norm": 5.85017060127885, "learning_rate": 8.289266182139733e-06, "loss": 0.7461, "step": 6576 }, { "epoch": 0.59, "grad_norm": 7.279692542980102, "learning_rate": 8.288722096674467e-06, "loss": 0.7939, "step": 6577 }, { "epoch": 0.59, "grad_norm": 7.595810416365484, "learning_rate": 8.288177942564328e-06, "loss": 0.8551, "step": 6578 }, { "epoch": 0.59, "grad_norm": 5.868336945382938, "learning_rate": 8.287633719820676e-06, "loss": 0.83, "step": 6579 }, { "epoch": 0.59, "grad_norm": 5.242467994452417, "learning_rate": 8.287089428454871e-06, "loss": 0.71, "step": 6580 }, { "epoch": 0.59, "grad_norm": 5.779013079652278, "learning_rate": 8.286545068478273e-06, "loss": 0.857, "step": 6581 }, { "epoch": 0.59, "grad_norm": 5.398748400131399, "learning_rate": 8.286000639902243e-06, "loss": 0.8035, "step": 6582 }, { "epoch": 0.59, "grad_norm": 7.7198623724677775, "learning_rate": 8.285456142738147e-06, "loss": 0.9221, "step": 6583 }, { "epoch": 0.59, "grad_norm": 6.759520170769188, "learning_rate": 8.284911576997347e-06, "loss": 0.884, "step": 6584 }, { "epoch": 0.59, "grad_norm": 5.740732078653209, "learning_rate": 8.284366942691214e-06, "loss": 0.7553, "step": 6585 }, { "epoch": 0.59, "grad_norm": 4.713463540220353, "learning_rate": 8.283822239831115e-06, "loss": 0.835, "step": 6586 }, { "epoch": 0.59, "grad_norm": 5.855677004617308, "learning_rate": 8.283277468428415e-06, "loss": 0.7376, "step": 6587 }, { "epoch": 0.59, "grad_norm": 4.858814708665648, "learning_rate": 8.282732628494488e-06, "loss": 0.7397, "step": 6588 }, { "epoch": 0.59, "grad_norm": 3.719057212376901, "learning_rate": 8.282187720040709e-06, "loss": 0.8219, "step": 6589 }, { "epoch": 0.59, "grad_norm": 4.6395074729455414, "learning_rate": 8.281642743078446e-06, "loss": 0.7813, "step": 6590 }, { "epoch": 0.59, "grad_norm": 6.541469521269135, "learning_rate": 8.281097697619081e-06, "loss": 0.8406, "step": 6591 }, { "epoch": 0.59, "grad_norm": 6.884737864752866, "learning_rate": 8.280552583673984e-06, "loss": 0.8345, "step": 6592 }, { "epoch": 0.59, "grad_norm": 6.499590329327538, "learning_rate": 8.280007401254538e-06, "loss": 0.7671, "step": 6593 }, { "epoch": 0.59, "grad_norm": 6.190829480422459, "learning_rate": 8.27946215037212e-06, "loss": 0.7951, "step": 6594 }, { "epoch": 0.59, "grad_norm": 8.274918934765552, "learning_rate": 8.27891683103811e-06, "loss": 0.7627, "step": 6595 }, { "epoch": 0.59, "grad_norm": 6.379027127731314, "learning_rate": 8.278371443263896e-06, "loss": 0.766, "step": 6596 }, { "epoch": 0.59, "grad_norm": 5.635292838572133, "learning_rate": 8.277825987060853e-06, "loss": 0.9053, "step": 6597 }, { "epoch": 0.59, "grad_norm": 4.844070144557614, "learning_rate": 8.277280462440373e-06, "loss": 0.8411, "step": 6598 }, { "epoch": 0.59, "grad_norm": 6.604134346489943, "learning_rate": 8.276734869413841e-06, "loss": 0.7926, "step": 6599 }, { "epoch": 0.59, "grad_norm": 6.785936076991452, "learning_rate": 8.276189207992644e-06, "loss": 0.8065, "step": 6600 }, { "epoch": 0.59, "grad_norm": 4.642862173334842, "learning_rate": 8.275643478188171e-06, "loss": 0.8644, "step": 6601 }, { "epoch": 0.59, "grad_norm": 6.757278464706172, "learning_rate": 8.275097680011814e-06, "loss": 0.7179, "step": 6602 }, { "epoch": 0.59, "grad_norm": 5.2841357704698275, "learning_rate": 8.274551813474967e-06, "loss": 0.7387, "step": 6603 }, { "epoch": 0.59, "grad_norm": 6.11286194695604, "learning_rate": 8.274005878589022e-06, "loss": 0.8544, "step": 6604 }, { "epoch": 0.59, "grad_norm": 4.944702120962299, "learning_rate": 8.273459875365373e-06, "loss": 0.7966, "step": 6605 }, { "epoch": 0.59, "grad_norm": 8.208674474299395, "learning_rate": 8.272913803815416e-06, "loss": 0.7806, "step": 6606 }, { "epoch": 0.59, "grad_norm": 5.782127872674019, "learning_rate": 8.272367663950554e-06, "loss": 0.7816, "step": 6607 }, { "epoch": 0.59, "grad_norm": 4.806825303416563, "learning_rate": 8.271821455782183e-06, "loss": 0.7986, "step": 6608 }, { "epoch": 0.59, "grad_norm": 4.511392308463543, "learning_rate": 8.271275179321703e-06, "loss": 0.736, "step": 6609 }, { "epoch": 0.59, "grad_norm": 4.747936205121432, "learning_rate": 8.27072883458052e-06, "loss": 0.8258, "step": 6610 }, { "epoch": 0.59, "grad_norm": 5.906652403069464, "learning_rate": 8.270182421570033e-06, "loss": 0.8188, "step": 6611 }, { "epoch": 0.59, "grad_norm": 5.375205317797918, "learning_rate": 8.26963594030165e-06, "loss": 0.8278, "step": 6612 }, { "epoch": 0.59, "grad_norm": 8.282678593955008, "learning_rate": 8.269089390786777e-06, "loss": 0.8085, "step": 6613 }, { "epoch": 0.59, "grad_norm": 6.390239736488926, "learning_rate": 8.268542773036824e-06, "loss": 0.8489, "step": 6614 }, { "epoch": 0.59, "grad_norm": 6.3383272549989, "learning_rate": 8.267996087063195e-06, "loss": 0.8234, "step": 6615 }, { "epoch": 0.59, "grad_norm": 6.721851340043212, "learning_rate": 8.267449332877306e-06, "loss": 0.8013, "step": 6616 }, { "epoch": 0.59, "grad_norm": 7.621121792658276, "learning_rate": 8.266902510490568e-06, "loss": 0.8127, "step": 6617 }, { "epoch": 0.59, "grad_norm": 6.682026688576515, "learning_rate": 8.266355619914396e-06, "loss": 0.8168, "step": 6618 }, { "epoch": 0.59, "grad_norm": 9.431706679601259, "learning_rate": 8.265808661160202e-06, "loss": 0.7328, "step": 6619 }, { "epoch": 0.59, "grad_norm": 6.144964951155161, "learning_rate": 8.265261634239406e-06, "loss": 0.7982, "step": 6620 }, { "epoch": 0.59, "grad_norm": 7.766080727778055, "learning_rate": 8.264714539163422e-06, "loss": 0.7852, "step": 6621 }, { "epoch": 0.59, "grad_norm": 5.109365197726513, "learning_rate": 8.264167375943672e-06, "loss": 0.7644, "step": 6622 }, { "epoch": 0.59, "grad_norm": 5.12097901732077, "learning_rate": 8.263620144591578e-06, "loss": 0.8737, "step": 6623 }, { "epoch": 0.59, "grad_norm": 6.777288872528583, "learning_rate": 8.263072845118559e-06, "loss": 0.8129, "step": 6624 }, { "epoch": 0.59, "grad_norm": 6.603961641106768, "learning_rate": 8.262525477536041e-06, "loss": 0.8181, "step": 6625 }, { "epoch": 0.59, "grad_norm": 6.005165253134522, "learning_rate": 8.261978041855448e-06, "loss": 0.8076, "step": 6626 }, { "epoch": 0.59, "grad_norm": 4.721641058798318, "learning_rate": 8.261430538088207e-06, "loss": 0.8277, "step": 6627 }, { "epoch": 0.59, "grad_norm": 5.237456798111674, "learning_rate": 8.260882966245747e-06, "loss": 0.7833, "step": 6628 }, { "epoch": 0.59, "grad_norm": 5.609641387274582, "learning_rate": 8.260335326339494e-06, "loss": 0.793, "step": 6629 }, { "epoch": 0.59, "grad_norm": 5.963122227898637, "learning_rate": 8.259787618380883e-06, "loss": 0.7623, "step": 6630 }, { "epoch": 0.59, "grad_norm": 6.263384809869215, "learning_rate": 8.259239842381344e-06, "loss": 0.8125, "step": 6631 }, { "epoch": 0.59, "grad_norm": 4.880514913643217, "learning_rate": 8.258691998352312e-06, "loss": 0.7854, "step": 6632 }, { "epoch": 0.59, "grad_norm": 5.651145854310553, "learning_rate": 8.25814408630522e-06, "loss": 0.7953, "step": 6633 }, { "epoch": 0.59, "grad_norm": 5.022278224337511, "learning_rate": 8.257596106251504e-06, "loss": 0.8067, "step": 6634 }, { "epoch": 0.59, "grad_norm": 5.653026920230497, "learning_rate": 8.257048058202604e-06, "loss": 0.8286, "step": 6635 }, { "epoch": 0.59, "grad_norm": 5.765057926171833, "learning_rate": 8.25649994216996e-06, "loss": 0.7636, "step": 6636 }, { "epoch": 0.59, "grad_norm": 5.067601513269242, "learning_rate": 8.255951758165011e-06, "loss": 0.804, "step": 6637 }, { "epoch": 0.59, "grad_norm": 5.841170074062014, "learning_rate": 8.255403506199197e-06, "loss": 0.896, "step": 6638 }, { "epoch": 0.59, "grad_norm": 7.597488678848482, "learning_rate": 8.254855186283966e-06, "loss": 0.8866, "step": 6639 }, { "epoch": 0.59, "grad_norm": 8.042683158537608, "learning_rate": 8.254306798430762e-06, "loss": 0.7454, "step": 6640 }, { "epoch": 0.59, "grad_norm": 5.960232751667454, "learning_rate": 8.25375834265103e-06, "loss": 0.7775, "step": 6641 }, { "epoch": 0.59, "grad_norm": 4.99675161414685, "learning_rate": 8.253209818956216e-06, "loss": 0.7526, "step": 6642 }, { "epoch": 0.59, "grad_norm": 5.165206654541226, "learning_rate": 8.252661227357775e-06, "loss": 0.7703, "step": 6643 }, { "epoch": 0.59, "grad_norm": 5.257575390620387, "learning_rate": 8.25211256786715e-06, "loss": 0.7814, "step": 6644 }, { "epoch": 0.59, "grad_norm": 4.5837036576443495, "learning_rate": 8.2515638404958e-06, "loss": 0.7544, "step": 6645 }, { "epoch": 0.59, "grad_norm": 9.352524573543771, "learning_rate": 8.251015045255173e-06, "loss": 0.8217, "step": 6646 }, { "epoch": 0.59, "grad_norm": 5.3024615945389035, "learning_rate": 8.25046618215673e-06, "loss": 0.8029, "step": 6647 }, { "epoch": 0.59, "grad_norm": 4.7904224927651216, "learning_rate": 8.249917251211924e-06, "loss": 0.7755, "step": 6648 }, { "epoch": 0.59, "grad_norm": 5.8479459083797725, "learning_rate": 8.249368252432208e-06, "loss": 0.8233, "step": 6649 }, { "epoch": 0.59, "grad_norm": 4.914573051603051, "learning_rate": 8.248819185829049e-06, "loss": 0.7875, "step": 6650 }, { "epoch": 0.59, "grad_norm": 5.206075963016387, "learning_rate": 8.248270051413905e-06, "loss": 0.7594, "step": 6651 }, { "epoch": 0.59, "grad_norm": 5.489629207134546, "learning_rate": 8.247720849198235e-06, "loss": 0.7963, "step": 6652 }, { "epoch": 0.59, "grad_norm": 4.3653055692873926, "learning_rate": 8.247171579193505e-06, "loss": 0.8408, "step": 6653 }, { "epoch": 0.59, "grad_norm": 6.459434549544171, "learning_rate": 8.24662224141118e-06, "loss": 0.7536, "step": 6654 }, { "epoch": 0.59, "grad_norm": 5.951661783722108, "learning_rate": 8.246072835862726e-06, "loss": 0.7502, "step": 6655 }, { "epoch": 0.59, "grad_norm": 5.73667383494703, "learning_rate": 8.245523362559611e-06, "loss": 0.7662, "step": 6656 }, { "epoch": 0.59, "grad_norm": 6.066635551806379, "learning_rate": 8.244973821513302e-06, "loss": 0.8196, "step": 6657 }, { "epoch": 0.59, "grad_norm": 7.3349799020133135, "learning_rate": 8.244424212735272e-06, "loss": 0.8205, "step": 6658 }, { "epoch": 0.59, "grad_norm": 7.136730473794988, "learning_rate": 8.24387453623699e-06, "loss": 0.8338, "step": 6659 }, { "epoch": 0.59, "grad_norm": 7.026718960353829, "learning_rate": 8.243324792029931e-06, "loss": 0.8125, "step": 6660 }, { "epoch": 0.59, "grad_norm": 4.98620905558427, "learning_rate": 8.242774980125573e-06, "loss": 0.8538, "step": 6661 }, { "epoch": 0.59, "grad_norm": 4.807019931117183, "learning_rate": 8.242225100535386e-06, "loss": 0.8561, "step": 6662 }, { "epoch": 0.59, "grad_norm": 5.631264662860794, "learning_rate": 8.241675153270852e-06, "loss": 0.774, "step": 6663 }, { "epoch": 0.59, "grad_norm": 6.805289238645786, "learning_rate": 8.241125138343447e-06, "loss": 0.7928, "step": 6664 }, { "epoch": 0.59, "grad_norm": 5.050954164609158, "learning_rate": 8.240575055764653e-06, "loss": 0.8406, "step": 6665 }, { "epoch": 0.59, "grad_norm": 6.2786998129264875, "learning_rate": 8.24002490554595e-06, "loss": 0.8402, "step": 6666 }, { "epoch": 0.59, "grad_norm": 5.003252502321802, "learning_rate": 8.239474687698825e-06, "loss": 0.7612, "step": 6667 }, { "epoch": 0.59, "grad_norm": 6.67285773600483, "learning_rate": 8.23892440223476e-06, "loss": 0.8109, "step": 6668 }, { "epoch": 0.59, "grad_norm": 5.834436215495525, "learning_rate": 8.23837404916524e-06, "loss": 0.7623, "step": 6669 }, { "epoch": 0.6, "grad_norm": 6.312077964674626, "learning_rate": 8.237823628501756e-06, "loss": 0.7683, "step": 6670 }, { "epoch": 0.6, "grad_norm": 4.041893802694508, "learning_rate": 8.237273140255792e-06, "loss": 0.8054, "step": 6671 }, { "epoch": 0.6, "grad_norm": 6.309996305541788, "learning_rate": 8.236722584438841e-06, "loss": 0.7876, "step": 6672 }, { "epoch": 0.6, "grad_norm": 5.907666052710539, "learning_rate": 8.236171961062394e-06, "loss": 0.7323, "step": 6673 }, { "epoch": 0.6, "grad_norm": 6.5653490628792825, "learning_rate": 8.235621270137945e-06, "loss": 0.8625, "step": 6674 }, { "epoch": 0.6, "grad_norm": 5.005983595096594, "learning_rate": 8.235070511676989e-06, "loss": 0.7886, "step": 6675 }, { "epoch": 0.6, "grad_norm": 7.04460968680593, "learning_rate": 8.23451968569102e-06, "loss": 0.7985, "step": 6676 }, { "epoch": 0.6, "grad_norm": 9.962846117098081, "learning_rate": 8.233968792191535e-06, "loss": 0.8491, "step": 6677 }, { "epoch": 0.6, "grad_norm": 6.7943553165130695, "learning_rate": 8.233417831190036e-06, "loss": 0.8181, "step": 6678 }, { "epoch": 0.6, "grad_norm": 5.687193703900281, "learning_rate": 8.232866802698019e-06, "loss": 0.8123, "step": 6679 }, { "epoch": 0.6, "grad_norm": 4.871630135025604, "learning_rate": 8.232315706726988e-06, "loss": 0.7926, "step": 6680 }, { "epoch": 0.6, "grad_norm": 6.588484284722919, "learning_rate": 8.231764543288445e-06, "loss": 0.8622, "step": 6681 }, { "epoch": 0.6, "grad_norm": 5.3656298795301165, "learning_rate": 8.231213312393895e-06, "loss": 0.7683, "step": 6682 }, { "epoch": 0.6, "grad_norm": 5.2690455673925625, "learning_rate": 8.230662014054841e-06, "loss": 0.7745, "step": 6683 }, { "epoch": 0.6, "grad_norm": 6.109352543813534, "learning_rate": 8.230110648282797e-06, "loss": 0.7916, "step": 6684 }, { "epoch": 0.6, "grad_norm": 6.03225231892724, "learning_rate": 8.229559215089264e-06, "loss": 0.8575, "step": 6685 }, { "epoch": 0.6, "grad_norm": 6.516175683311602, "learning_rate": 8.229007714485755e-06, "loss": 0.8144, "step": 6686 }, { "epoch": 0.6, "grad_norm": 6.817765330982432, "learning_rate": 8.228456146483781e-06, "loss": 0.829, "step": 6687 }, { "epoch": 0.6, "grad_norm": 5.103612679052471, "learning_rate": 8.227904511094858e-06, "loss": 0.7881, "step": 6688 }, { "epoch": 0.6, "grad_norm": 5.809276732097919, "learning_rate": 8.227352808330495e-06, "loss": 0.8235, "step": 6689 }, { "epoch": 0.6, "grad_norm": 6.3951427517731, "learning_rate": 8.22680103820221e-06, "loss": 0.774, "step": 6690 }, { "epoch": 0.6, "grad_norm": 6.068680907143205, "learning_rate": 8.226249200721521e-06, "loss": 0.7712, "step": 6691 }, { "epoch": 0.6, "grad_norm": 5.945854506324684, "learning_rate": 8.225697295899944e-06, "loss": 0.8623, "step": 6692 }, { "epoch": 0.6, "grad_norm": 6.068690926975485, "learning_rate": 8.225145323749e-06, "loss": 0.8218, "step": 6693 }, { "epoch": 0.6, "grad_norm": 5.559512495772458, "learning_rate": 8.224593284280212e-06, "loss": 0.7543, "step": 6694 }, { "epoch": 0.6, "grad_norm": 4.843239391864627, "learning_rate": 8.224041177505097e-06, "loss": 0.8194, "step": 6695 }, { "epoch": 0.6, "grad_norm": 6.371643149671071, "learning_rate": 8.223489003435188e-06, "loss": 0.8096, "step": 6696 }, { "epoch": 0.6, "grad_norm": 6.565507795930152, "learning_rate": 8.222936762082003e-06, "loss": 0.8431, "step": 6697 }, { "epoch": 0.6, "grad_norm": 6.050620507692319, "learning_rate": 8.222384453457071e-06, "loss": 0.7427, "step": 6698 }, { "epoch": 0.6, "grad_norm": 8.695908403104138, "learning_rate": 8.22183207757192e-06, "loss": 0.7872, "step": 6699 }, { "epoch": 0.6, "grad_norm": 3.7861713277509152, "learning_rate": 8.221279634438082e-06, "loss": 0.7969, "step": 6700 }, { "epoch": 0.6, "grad_norm": 5.910134269406934, "learning_rate": 8.220727124067085e-06, "loss": 0.8585, "step": 6701 }, { "epoch": 0.6, "grad_norm": 6.651640446394922, "learning_rate": 8.220174546470462e-06, "loss": 0.7407, "step": 6702 }, { "epoch": 0.6, "grad_norm": 5.748679608248464, "learning_rate": 8.219621901659746e-06, "loss": 0.7871, "step": 6703 }, { "epoch": 0.6, "grad_norm": 5.9950266831591845, "learning_rate": 8.219069189646476e-06, "loss": 0.8275, "step": 6704 }, { "epoch": 0.6, "grad_norm": 6.492172347895253, "learning_rate": 8.218516410442188e-06, "loss": 0.7952, "step": 6705 }, { "epoch": 0.6, "grad_norm": 5.634356187379448, "learning_rate": 8.217963564058415e-06, "loss": 0.8303, "step": 6706 }, { "epoch": 0.6, "grad_norm": 7.120977597631844, "learning_rate": 8.217410650506702e-06, "loss": 0.812, "step": 6707 }, { "epoch": 0.6, "grad_norm": 6.693817833452381, "learning_rate": 8.216857669798586e-06, "loss": 0.8169, "step": 6708 }, { "epoch": 0.6, "grad_norm": 5.748880775794269, "learning_rate": 8.216304621945612e-06, "loss": 0.8332, "step": 6709 }, { "epoch": 0.6, "grad_norm": 6.251488163643184, "learning_rate": 8.215751506959322e-06, "loss": 0.8978, "step": 6710 }, { "epoch": 0.6, "grad_norm": 5.850339033425616, "learning_rate": 8.21519832485126e-06, "loss": 0.8384, "step": 6711 }, { "epoch": 0.6, "grad_norm": 6.213198379777557, "learning_rate": 8.214645075632979e-06, "loss": 0.7318, "step": 6712 }, { "epoch": 0.6, "grad_norm": 4.686815530653178, "learning_rate": 8.214091759316018e-06, "loss": 0.8532, "step": 6713 }, { "epoch": 0.6, "grad_norm": 8.030411411361563, "learning_rate": 8.213538375911932e-06, "loss": 0.8281, "step": 6714 }, { "epoch": 0.6, "grad_norm": 5.429598453757664, "learning_rate": 8.212984925432269e-06, "loss": 0.7948, "step": 6715 }, { "epoch": 0.6, "grad_norm": 4.908624621308087, "learning_rate": 8.212431407888583e-06, "loss": 0.8071, "step": 6716 }, { "epoch": 0.6, "grad_norm": 4.787754853773987, "learning_rate": 8.211877823292425e-06, "loss": 0.7795, "step": 6717 }, { "epoch": 0.6, "grad_norm": 3.8359609715332157, "learning_rate": 8.211324171655353e-06, "loss": 0.7596, "step": 6718 }, { "epoch": 0.6, "grad_norm": 6.6647921998729585, "learning_rate": 8.210770452988921e-06, "loss": 0.8103, "step": 6719 }, { "epoch": 0.6, "grad_norm": 6.0627295307032085, "learning_rate": 8.210216667304687e-06, "loss": 0.8575, "step": 6720 }, { "epoch": 0.6, "grad_norm": 5.4554958528325725, "learning_rate": 8.20966281461421e-06, "loss": 0.833, "step": 6721 }, { "epoch": 0.6, "grad_norm": 4.80899943466147, "learning_rate": 8.20910889492905e-06, "loss": 0.8324, "step": 6722 }, { "epoch": 0.6, "grad_norm": 6.014812576735678, "learning_rate": 8.20855490826077e-06, "loss": 0.8156, "step": 6723 }, { "epoch": 0.6, "grad_norm": 6.317628828456329, "learning_rate": 8.208000854620934e-06, "loss": 0.8244, "step": 6724 }, { "epoch": 0.6, "grad_norm": 5.876650035677475, "learning_rate": 8.207446734021105e-06, "loss": 0.7035, "step": 6725 }, { "epoch": 0.6, "grad_norm": 5.364311990472134, "learning_rate": 8.206892546472851e-06, "loss": 0.7942, "step": 6726 }, { "epoch": 0.6, "grad_norm": 5.692485332649723, "learning_rate": 8.206338291987736e-06, "loss": 0.8462, "step": 6727 }, { "epoch": 0.6, "grad_norm": 3.9780671260189995, "learning_rate": 8.205783970577333e-06, "loss": 0.868, "step": 6728 }, { "epoch": 0.6, "grad_norm": 4.814032493004836, "learning_rate": 8.20522958225321e-06, "loss": 0.884, "step": 6729 }, { "epoch": 0.6, "grad_norm": 5.162143467255809, "learning_rate": 8.204675127026935e-06, "loss": 0.7841, "step": 6730 }, { "epoch": 0.6, "grad_norm": 5.948846005157953, "learning_rate": 8.204120604910088e-06, "loss": 0.7491, "step": 6731 }, { "epoch": 0.6, "grad_norm": 6.936337767331207, "learning_rate": 8.20356601591424e-06, "loss": 0.8757, "step": 6732 }, { "epoch": 0.6, "grad_norm": 5.661185428597538, "learning_rate": 8.203011360050967e-06, "loss": 0.8267, "step": 6733 }, { "epoch": 0.6, "grad_norm": 5.9811674016598415, "learning_rate": 8.202456637331846e-06, "loss": 0.8248, "step": 6734 }, { "epoch": 0.6, "grad_norm": 4.988377044071307, "learning_rate": 8.201901847768456e-06, "loss": 0.8355, "step": 6735 }, { "epoch": 0.6, "grad_norm": 6.1582446685505685, "learning_rate": 8.201346991372379e-06, "loss": 0.8723, "step": 6736 }, { "epoch": 0.6, "grad_norm": 6.540308234829635, "learning_rate": 8.200792068155192e-06, "loss": 0.8265, "step": 6737 }, { "epoch": 0.6, "grad_norm": 6.5769686675761765, "learning_rate": 8.20023707812848e-06, "loss": 0.7978, "step": 6738 }, { "epoch": 0.6, "grad_norm": 6.218094294102233, "learning_rate": 8.199682021303829e-06, "loss": 0.7873, "step": 6739 }, { "epoch": 0.6, "grad_norm": 6.420048223544867, "learning_rate": 8.199126897692823e-06, "loss": 0.7766, "step": 6740 }, { "epoch": 0.6, "grad_norm": 4.785156796671875, "learning_rate": 8.198571707307048e-06, "loss": 0.8748, "step": 6741 }, { "epoch": 0.6, "grad_norm": 4.455397181145248, "learning_rate": 8.198016450158093e-06, "loss": 0.782, "step": 6742 }, { "epoch": 0.6, "grad_norm": 5.134533703428839, "learning_rate": 8.19746112625755e-06, "loss": 0.8322, "step": 6743 }, { "epoch": 0.6, "grad_norm": 5.079899482854342, "learning_rate": 8.196905735617005e-06, "loss": 0.7369, "step": 6744 }, { "epoch": 0.6, "grad_norm": 7.162233867231676, "learning_rate": 8.196350278248056e-06, "loss": 0.8376, "step": 6745 }, { "epoch": 0.6, "grad_norm": 4.772519797278695, "learning_rate": 8.195794754162294e-06, "loss": 0.8033, "step": 6746 }, { "epoch": 0.6, "grad_norm": 5.726606906995816, "learning_rate": 8.195239163371316e-06, "loss": 0.7983, "step": 6747 }, { "epoch": 0.6, "grad_norm": 5.691049559504579, "learning_rate": 8.194683505886718e-06, "loss": 0.8081, "step": 6748 }, { "epoch": 0.6, "grad_norm": 7.723086282878302, "learning_rate": 8.194127781720097e-06, "loss": 0.8912, "step": 6749 }, { "epoch": 0.6, "grad_norm": 5.756419890225413, "learning_rate": 8.193571990883056e-06, "loss": 0.8155, "step": 6750 }, { "epoch": 0.6, "grad_norm": 5.007087562229614, "learning_rate": 8.19301613338719e-06, "loss": 0.757, "step": 6751 }, { "epoch": 0.6, "grad_norm": 7.714328572336348, "learning_rate": 8.192460209244108e-06, "loss": 0.8089, "step": 6752 }, { "epoch": 0.6, "grad_norm": 4.549118555905508, "learning_rate": 8.19190421846541e-06, "loss": 0.8166, "step": 6753 }, { "epoch": 0.6, "grad_norm": 6.496844533475255, "learning_rate": 8.1913481610627e-06, "loss": 0.7505, "step": 6754 }, { "epoch": 0.6, "grad_norm": 6.722586243374113, "learning_rate": 8.190792037047587e-06, "loss": 0.7691, "step": 6755 }, { "epoch": 0.6, "grad_norm": 5.805282775228774, "learning_rate": 8.190235846431678e-06, "loss": 0.8194, "step": 6756 }, { "epoch": 0.6, "grad_norm": 4.547130797908869, "learning_rate": 8.189679589226582e-06, "loss": 0.7464, "step": 6757 }, { "epoch": 0.6, "grad_norm": 4.972212872332395, "learning_rate": 8.18912326544391e-06, "loss": 0.8163, "step": 6758 }, { "epoch": 0.6, "grad_norm": 6.2739354416947055, "learning_rate": 8.188566875095274e-06, "loss": 0.8682, "step": 6759 }, { "epoch": 0.6, "grad_norm": 4.997055205978728, "learning_rate": 8.188010418192286e-06, "loss": 0.8246, "step": 6760 }, { "epoch": 0.6, "grad_norm": 4.8199179865655974, "learning_rate": 8.187453894746564e-06, "loss": 0.7799, "step": 6761 }, { "epoch": 0.6, "grad_norm": 4.922491042635821, "learning_rate": 8.186897304769722e-06, "loss": 0.8579, "step": 6762 }, { "epoch": 0.6, "grad_norm": 6.550495885303634, "learning_rate": 8.186340648273377e-06, "loss": 0.8568, "step": 6763 }, { "epoch": 0.6, "grad_norm": 7.150879273873694, "learning_rate": 8.18578392526915e-06, "loss": 0.8603, "step": 6764 }, { "epoch": 0.6, "grad_norm": 4.929845725570779, "learning_rate": 8.18522713576866e-06, "loss": 0.775, "step": 6765 }, { "epoch": 0.6, "grad_norm": 7.025039800120546, "learning_rate": 8.184670279783528e-06, "loss": 0.8393, "step": 6766 }, { "epoch": 0.6, "grad_norm": 6.0803708695159875, "learning_rate": 8.18411335732538e-06, "loss": 0.7927, "step": 6767 }, { "epoch": 0.6, "grad_norm": 5.728429169558053, "learning_rate": 8.183556368405836e-06, "loss": 0.7339, "step": 6768 }, { "epoch": 0.6, "grad_norm": 7.0377435319472195, "learning_rate": 8.182999313036528e-06, "loss": 0.8049, "step": 6769 }, { "epoch": 0.6, "grad_norm": 5.44357289244365, "learning_rate": 8.182442191229078e-06, "loss": 0.7969, "step": 6770 }, { "epoch": 0.6, "grad_norm": 8.514304423351478, "learning_rate": 8.181885002995118e-06, "loss": 0.8213, "step": 6771 }, { "epoch": 0.6, "grad_norm": 5.891871592294824, "learning_rate": 8.181327748346275e-06, "loss": 0.7534, "step": 6772 }, { "epoch": 0.6, "grad_norm": 4.741781741770471, "learning_rate": 8.180770427294182e-06, "loss": 0.7519, "step": 6773 }, { "epoch": 0.6, "grad_norm": 6.824199954517697, "learning_rate": 8.180213039850475e-06, "loss": 0.861, "step": 6774 }, { "epoch": 0.6, "grad_norm": 8.077784606921792, "learning_rate": 8.179655586026783e-06, "loss": 0.7766, "step": 6775 }, { "epoch": 0.6, "grad_norm": 6.857431538899155, "learning_rate": 8.179098065834745e-06, "loss": 0.8575, "step": 6776 }, { "epoch": 0.6, "grad_norm": 4.907148971750793, "learning_rate": 8.178540479285996e-06, "loss": 0.7528, "step": 6777 }, { "epoch": 0.6, "grad_norm": 10.173039719142778, "learning_rate": 8.177982826392177e-06, "loss": 0.7967, "step": 6778 }, { "epoch": 0.6, "grad_norm": 5.143584074938996, "learning_rate": 8.177425107164925e-06, "loss": 0.8233, "step": 6779 }, { "epoch": 0.6, "grad_norm": 6.122294221530041, "learning_rate": 8.176867321615882e-06, "loss": 0.8168, "step": 6780 }, { "epoch": 0.6, "grad_norm": 7.092034472665681, "learning_rate": 8.176309469756691e-06, "loss": 0.7618, "step": 6781 }, { "epoch": 0.61, "grad_norm": 6.702818327295792, "learning_rate": 8.175751551598995e-06, "loss": 0.7966, "step": 6782 }, { "epoch": 0.61, "grad_norm": 4.110430875160217, "learning_rate": 8.175193567154442e-06, "loss": 0.8258, "step": 6783 }, { "epoch": 0.61, "grad_norm": 5.509539918378062, "learning_rate": 8.174635516434677e-06, "loss": 0.789, "step": 6784 }, { "epoch": 0.61, "grad_norm": 5.562237676943552, "learning_rate": 8.174077399451348e-06, "loss": 0.8354, "step": 6785 }, { "epoch": 0.61, "grad_norm": 5.094386956191116, "learning_rate": 8.173519216216103e-06, "loss": 0.7404, "step": 6786 }, { "epoch": 0.61, "grad_norm": 5.5645275635483555, "learning_rate": 8.172960966740594e-06, "loss": 0.8072, "step": 6787 }, { "epoch": 0.61, "grad_norm": 5.650625379002032, "learning_rate": 8.172402651036474e-06, "loss": 0.7476, "step": 6788 }, { "epoch": 0.61, "grad_norm": 6.971573122752251, "learning_rate": 8.171844269115397e-06, "loss": 0.8244, "step": 6789 }, { "epoch": 0.61, "grad_norm": 5.977950625599968, "learning_rate": 8.171285820989015e-06, "loss": 0.7774, "step": 6790 }, { "epoch": 0.61, "grad_norm": 9.290647430441261, "learning_rate": 8.170727306668988e-06, "loss": 0.808, "step": 6791 }, { "epoch": 0.61, "grad_norm": 5.563718367702113, "learning_rate": 8.170168726166974e-06, "loss": 0.7826, "step": 6792 }, { "epoch": 0.61, "grad_norm": 6.600539263370696, "learning_rate": 8.169610079494628e-06, "loss": 0.8571, "step": 6793 }, { "epoch": 0.61, "grad_norm": 5.083525114299053, "learning_rate": 8.169051366663612e-06, "loss": 0.7563, "step": 6794 }, { "epoch": 0.61, "grad_norm": 5.806583175030543, "learning_rate": 8.16849258768559e-06, "loss": 0.8494, "step": 6795 }, { "epoch": 0.61, "grad_norm": 5.210277699486852, "learning_rate": 8.167933742572225e-06, "loss": 0.7641, "step": 6796 }, { "epoch": 0.61, "grad_norm": 5.462452776356719, "learning_rate": 8.16737483133518e-06, "loss": 0.789, "step": 6797 }, { "epoch": 0.61, "grad_norm": 5.2495499126198775, "learning_rate": 8.166815853986123e-06, "loss": 0.784, "step": 6798 }, { "epoch": 0.61, "grad_norm": 7.473298786178799, "learning_rate": 8.16625681053672e-06, "loss": 0.768, "step": 6799 }, { "epoch": 0.61, "grad_norm": 4.556264823653086, "learning_rate": 8.165697700998638e-06, "loss": 0.8124, "step": 6800 }, { "epoch": 0.61, "grad_norm": 6.7636546967405575, "learning_rate": 8.16513852538355e-06, "loss": 0.7941, "step": 6801 }, { "epoch": 0.61, "grad_norm": 5.364103953875404, "learning_rate": 8.164579283703127e-06, "loss": 0.7869, "step": 6802 }, { "epoch": 0.61, "grad_norm": 5.577240186213648, "learning_rate": 8.164019975969043e-06, "loss": 0.8963, "step": 6803 }, { "epoch": 0.61, "grad_norm": 5.090283014326774, "learning_rate": 8.16346060219297e-06, "loss": 0.7774, "step": 6804 }, { "epoch": 0.61, "grad_norm": 4.241281539375794, "learning_rate": 8.162901162386586e-06, "loss": 0.8818, "step": 6805 }, { "epoch": 0.61, "grad_norm": 6.144286080216064, "learning_rate": 8.162341656561565e-06, "loss": 0.8013, "step": 6806 }, { "epoch": 0.61, "grad_norm": 5.877036976707335, "learning_rate": 8.161782084729588e-06, "loss": 0.8055, "step": 6807 }, { "epoch": 0.61, "grad_norm": 5.668694898820878, "learning_rate": 8.161222446902334e-06, "loss": 0.8517, "step": 6808 }, { "epoch": 0.61, "grad_norm": 5.32381708234665, "learning_rate": 8.160662743091485e-06, "loss": 0.8482, "step": 6809 }, { "epoch": 0.61, "grad_norm": 4.426583113177476, "learning_rate": 8.160102973308723e-06, "loss": 0.8474, "step": 6810 }, { "epoch": 0.61, "grad_norm": 5.504120440284222, "learning_rate": 8.159543137565731e-06, "loss": 0.8336, "step": 6811 }, { "epoch": 0.61, "grad_norm": 7.303393997002978, "learning_rate": 8.158983235874196e-06, "loss": 0.7589, "step": 6812 }, { "epoch": 0.61, "grad_norm": 4.827552303619555, "learning_rate": 8.158423268245805e-06, "loss": 0.8221, "step": 6813 }, { "epoch": 0.61, "grad_norm": 6.509162292212347, "learning_rate": 8.157863234692244e-06, "loss": 0.8244, "step": 6814 }, { "epoch": 0.61, "grad_norm": 5.1233057919885185, "learning_rate": 8.157303135225204e-06, "loss": 0.798, "step": 6815 }, { "epoch": 0.61, "grad_norm": 5.495505914142259, "learning_rate": 8.156742969856375e-06, "loss": 0.7629, "step": 6816 }, { "epoch": 0.61, "grad_norm": 7.231185287709736, "learning_rate": 8.156182738597449e-06, "loss": 0.9044, "step": 6817 }, { "epoch": 0.61, "grad_norm": 6.220836908956301, "learning_rate": 8.155622441460122e-06, "loss": 0.7852, "step": 6818 }, { "epoch": 0.61, "grad_norm": 7.041208639860685, "learning_rate": 8.155062078456085e-06, "loss": 0.7411, "step": 6819 }, { "epoch": 0.61, "grad_norm": 4.912235470977246, "learning_rate": 8.15450164959704e-06, "loss": 0.8394, "step": 6820 }, { "epoch": 0.61, "grad_norm": 6.420277143623738, "learning_rate": 8.153941154894678e-06, "loss": 0.7558, "step": 6821 }, { "epoch": 0.61, "grad_norm": 5.861708346173799, "learning_rate": 8.153380594360702e-06, "loss": 0.741, "step": 6822 }, { "epoch": 0.61, "grad_norm": 5.84953062076325, "learning_rate": 8.152819968006812e-06, "loss": 0.7214, "step": 6823 }, { "epoch": 0.61, "grad_norm": 4.7404295635205695, "learning_rate": 8.15225927584471e-06, "loss": 0.8035, "step": 6824 }, { "epoch": 0.61, "grad_norm": 6.575158836595933, "learning_rate": 8.1516985178861e-06, "loss": 0.7956, "step": 6825 }, { "epoch": 0.61, "grad_norm": 5.075126304521108, "learning_rate": 8.151137694142684e-06, "loss": 0.7828, "step": 6826 }, { "epoch": 0.61, "grad_norm": 4.2820124381509865, "learning_rate": 8.15057680462617e-06, "loss": 0.7978, "step": 6827 }, { "epoch": 0.61, "grad_norm": 5.983107723039327, "learning_rate": 8.150015849348265e-06, "loss": 0.8165, "step": 6828 }, { "epoch": 0.61, "grad_norm": 5.273383760305386, "learning_rate": 8.149454828320678e-06, "loss": 0.7939, "step": 6829 }, { "epoch": 0.61, "grad_norm": 5.251481852111514, "learning_rate": 8.148893741555118e-06, "loss": 0.7814, "step": 6830 }, { "epoch": 0.61, "grad_norm": 6.533898758373996, "learning_rate": 8.148332589063297e-06, "loss": 0.7927, "step": 6831 }, { "epoch": 0.61, "grad_norm": 7.315357139495274, "learning_rate": 8.147771370856929e-06, "loss": 0.8732, "step": 6832 }, { "epoch": 0.61, "grad_norm": 4.888643359440391, "learning_rate": 8.147210086947725e-06, "loss": 0.763, "step": 6833 }, { "epoch": 0.61, "grad_norm": 5.143635920893837, "learning_rate": 8.146648737347404e-06, "loss": 0.7956, "step": 6834 }, { "epoch": 0.61, "grad_norm": 5.103803573234262, "learning_rate": 8.146087322067682e-06, "loss": 0.8029, "step": 6835 }, { "epoch": 0.61, "grad_norm": 5.985284850030705, "learning_rate": 8.145525841120278e-06, "loss": 0.7638, "step": 6836 }, { "epoch": 0.61, "grad_norm": 4.5194660585594955, "learning_rate": 8.144964294516909e-06, "loss": 0.7877, "step": 6837 }, { "epoch": 0.61, "grad_norm": 4.520263103842538, "learning_rate": 8.144402682269296e-06, "loss": 0.8234, "step": 6838 }, { "epoch": 0.61, "grad_norm": 4.9904163951965215, "learning_rate": 8.143841004389168e-06, "loss": 0.7854, "step": 6839 }, { "epoch": 0.61, "grad_norm": 5.369234158703639, "learning_rate": 8.14327926088824e-06, "loss": 0.7867, "step": 6840 }, { "epoch": 0.61, "grad_norm": 6.690422618381915, "learning_rate": 8.142717451778242e-06, "loss": 0.8314, "step": 6841 }, { "epoch": 0.61, "grad_norm": 6.6109704626641514, "learning_rate": 8.1421555770709e-06, "loss": 0.816, "step": 6842 }, { "epoch": 0.61, "grad_norm": 6.839103517246004, "learning_rate": 8.14159363677794e-06, "loss": 0.7963, "step": 6843 }, { "epoch": 0.61, "grad_norm": 6.719017452422228, "learning_rate": 8.141031630911095e-06, "loss": 0.7266, "step": 6844 }, { "epoch": 0.61, "grad_norm": 5.59964376667567, "learning_rate": 8.140469559482092e-06, "loss": 0.7151, "step": 6845 }, { "epoch": 0.61, "grad_norm": 7.648427732564185, "learning_rate": 8.139907422502666e-06, "loss": 0.8053, "step": 6846 }, { "epoch": 0.61, "grad_norm": 4.824229723552528, "learning_rate": 8.139345219984546e-06, "loss": 0.808, "step": 6847 }, { "epoch": 0.61, "grad_norm": 5.963415150439329, "learning_rate": 8.138782951939473e-06, "loss": 0.8125, "step": 6848 }, { "epoch": 0.61, "grad_norm": 5.611454503742912, "learning_rate": 8.138220618379179e-06, "loss": 0.8663, "step": 6849 }, { "epoch": 0.61, "grad_norm": 5.904752700901874, "learning_rate": 8.1376582193154e-06, "loss": 0.8021, "step": 6850 }, { "epoch": 0.61, "grad_norm": 6.725328702630351, "learning_rate": 8.137095754759878e-06, "loss": 0.8336, "step": 6851 }, { "epoch": 0.61, "grad_norm": 6.6699802046968335, "learning_rate": 8.136533224724354e-06, "loss": 0.8291, "step": 6852 }, { "epoch": 0.61, "grad_norm": 4.314859010449418, "learning_rate": 8.135970629220567e-06, "loss": 0.8737, "step": 6853 }, { "epoch": 0.61, "grad_norm": 4.622577967365904, "learning_rate": 8.135407968260258e-06, "loss": 0.853, "step": 6854 }, { "epoch": 0.61, "grad_norm": 7.950566508529718, "learning_rate": 8.134845241855178e-06, "loss": 0.9329, "step": 6855 }, { "epoch": 0.61, "grad_norm": 5.779800606728138, "learning_rate": 8.134282450017067e-06, "loss": 0.7486, "step": 6856 }, { "epoch": 0.61, "grad_norm": 7.164634793368161, "learning_rate": 8.133719592757673e-06, "loss": 0.8316, "step": 6857 }, { "epoch": 0.61, "grad_norm": 6.662442881375417, "learning_rate": 8.133156670088747e-06, "loss": 0.8398, "step": 6858 }, { "epoch": 0.61, "grad_norm": 5.357576422627782, "learning_rate": 8.132593682022037e-06, "loss": 0.7757, "step": 6859 }, { "epoch": 0.61, "grad_norm": 4.582341819156483, "learning_rate": 8.132030628569292e-06, "loss": 0.7918, "step": 6860 }, { "epoch": 0.61, "grad_norm": 5.50008046140628, "learning_rate": 8.131467509742267e-06, "loss": 0.7525, "step": 6861 }, { "epoch": 0.61, "grad_norm": 5.272915891838216, "learning_rate": 8.130904325552717e-06, "loss": 0.7832, "step": 6862 }, { "epoch": 0.61, "grad_norm": 5.646948385609108, "learning_rate": 8.130341076012396e-06, "loss": 0.8402, "step": 6863 }, { "epoch": 0.61, "grad_norm": 5.2661541941171635, "learning_rate": 8.12977776113306e-06, "loss": 0.8862, "step": 6864 }, { "epoch": 0.61, "grad_norm": 5.003583840613984, "learning_rate": 8.129214380926466e-06, "loss": 0.8464, "step": 6865 }, { "epoch": 0.61, "grad_norm": 5.381261050874977, "learning_rate": 8.128650935404376e-06, "loss": 0.8136, "step": 6866 }, { "epoch": 0.61, "grad_norm": 7.54366513678714, "learning_rate": 8.128087424578548e-06, "loss": 0.8031, "step": 6867 }, { "epoch": 0.61, "grad_norm": 6.590172837204918, "learning_rate": 8.127523848460746e-06, "loss": 0.7808, "step": 6868 }, { "epoch": 0.61, "grad_norm": 5.662201052328477, "learning_rate": 8.126960207062734e-06, "loss": 0.824, "step": 6869 }, { "epoch": 0.61, "grad_norm": 5.5167848768471295, "learning_rate": 8.126396500396275e-06, "loss": 0.7918, "step": 6870 }, { "epoch": 0.61, "grad_norm": 5.81065370020721, "learning_rate": 8.125832728473136e-06, "loss": 0.8308, "step": 6871 }, { "epoch": 0.61, "grad_norm": 5.127690825568373, "learning_rate": 8.125268891305083e-06, "loss": 0.7917, "step": 6872 }, { "epoch": 0.61, "grad_norm": 6.312659700105328, "learning_rate": 8.124704988903886e-06, "loss": 0.7825, "step": 6873 }, { "epoch": 0.61, "grad_norm": 4.27607470677544, "learning_rate": 8.124141021281318e-06, "loss": 0.7204, "step": 6874 }, { "epoch": 0.61, "grad_norm": 6.275915745151287, "learning_rate": 8.123576988449144e-06, "loss": 0.7385, "step": 6875 }, { "epoch": 0.61, "grad_norm": 5.913608185297566, "learning_rate": 8.123012890419145e-06, "loss": 0.8451, "step": 6876 }, { "epoch": 0.61, "grad_norm": 5.371533331276638, "learning_rate": 8.122448727203088e-06, "loss": 0.8647, "step": 6877 }, { "epoch": 0.61, "grad_norm": 4.801931362257748, "learning_rate": 8.121884498812755e-06, "loss": 0.833, "step": 6878 }, { "epoch": 0.61, "grad_norm": 5.780468494552725, "learning_rate": 8.121320205259918e-06, "loss": 0.802, "step": 6879 }, { "epoch": 0.61, "grad_norm": 6.546743786599118, "learning_rate": 8.120755846556356e-06, "loss": 0.8544, "step": 6880 }, { "epoch": 0.61, "grad_norm": 5.154814399333846, "learning_rate": 8.120191422713852e-06, "loss": 0.7827, "step": 6881 }, { "epoch": 0.61, "grad_norm": 6.4609596957640045, "learning_rate": 8.119626933744185e-06, "loss": 0.7515, "step": 6882 }, { "epoch": 0.61, "grad_norm": 6.115323064992698, "learning_rate": 8.119062379659135e-06, "loss": 0.8391, "step": 6883 }, { "epoch": 0.61, "grad_norm": 7.339099166158037, "learning_rate": 8.118497760470492e-06, "loss": 0.8104, "step": 6884 }, { "epoch": 0.61, "grad_norm": 4.640247223517294, "learning_rate": 8.117933076190037e-06, "loss": 0.7984, "step": 6885 }, { "epoch": 0.61, "grad_norm": 6.2658884939225175, "learning_rate": 8.117368326829555e-06, "loss": 0.7895, "step": 6886 }, { "epoch": 0.61, "grad_norm": 5.997831738657247, "learning_rate": 8.116803512400836e-06, "loss": 0.8183, "step": 6887 }, { "epoch": 0.61, "grad_norm": 5.690688217996239, "learning_rate": 8.116238632915672e-06, "loss": 0.7754, "step": 6888 }, { "epoch": 0.61, "grad_norm": 5.315484525702357, "learning_rate": 8.11567368838585e-06, "loss": 0.7402, "step": 6889 }, { "epoch": 0.61, "grad_norm": 5.346423521077991, "learning_rate": 8.115108678823163e-06, "loss": 0.7768, "step": 6890 }, { "epoch": 0.61, "grad_norm": 6.013819994425285, "learning_rate": 8.114543604239404e-06, "loss": 0.7731, "step": 6891 }, { "epoch": 0.61, "grad_norm": 5.631999179450564, "learning_rate": 8.113978464646368e-06, "loss": 0.7658, "step": 6892 }, { "epoch": 0.61, "grad_norm": 6.443977734159103, "learning_rate": 8.113413260055852e-06, "loss": 0.8557, "step": 6893 }, { "epoch": 0.62, "grad_norm": 6.393525082723005, "learning_rate": 8.11284799047965e-06, "loss": 0.7758, "step": 6894 }, { "epoch": 0.62, "grad_norm": 5.81174470702288, "learning_rate": 8.112282655929566e-06, "loss": 0.7732, "step": 6895 }, { "epoch": 0.62, "grad_norm": 5.599158599692469, "learning_rate": 8.111717256417396e-06, "loss": 0.7886, "step": 6896 }, { "epoch": 0.62, "grad_norm": 5.996027548143773, "learning_rate": 8.111151791954944e-06, "loss": 0.8364, "step": 6897 }, { "epoch": 0.62, "grad_norm": 4.633620807994186, "learning_rate": 8.11058626255401e-06, "loss": 0.8174, "step": 6898 }, { "epoch": 0.62, "grad_norm": 4.92331886638172, "learning_rate": 8.110020668226401e-06, "loss": 0.8033, "step": 6899 }, { "epoch": 0.62, "grad_norm": 10.210251747134011, "learning_rate": 8.109455008983919e-06, "loss": 0.8682, "step": 6900 }, { "epoch": 0.62, "grad_norm": 6.153554570934183, "learning_rate": 8.108889284838377e-06, "loss": 0.8647, "step": 6901 }, { "epoch": 0.62, "grad_norm": 7.095209242551938, "learning_rate": 8.108323495801577e-06, "loss": 0.7837, "step": 6902 }, { "epoch": 0.62, "grad_norm": 6.081346769070026, "learning_rate": 8.107757641885333e-06, "loss": 0.8002, "step": 6903 }, { "epoch": 0.62, "grad_norm": 5.8573624924735475, "learning_rate": 8.107191723101452e-06, "loss": 0.8411, "step": 6904 }, { "epoch": 0.62, "grad_norm": 5.0430957202438815, "learning_rate": 8.106625739461748e-06, "loss": 0.7544, "step": 6905 }, { "epoch": 0.62, "grad_norm": 7.17503847955542, "learning_rate": 8.106059690978038e-06, "loss": 0.7827, "step": 6906 }, { "epoch": 0.62, "grad_norm": 6.664366730544066, "learning_rate": 8.105493577662131e-06, "loss": 0.824, "step": 6907 }, { "epoch": 0.62, "grad_norm": 5.557421645602761, "learning_rate": 8.104927399525847e-06, "loss": 0.8958, "step": 6908 }, { "epoch": 0.62, "grad_norm": 5.954758798922541, "learning_rate": 8.104361156581006e-06, "loss": 0.7815, "step": 6909 }, { "epoch": 0.62, "grad_norm": 4.7029777278349165, "learning_rate": 8.103794848839422e-06, "loss": 0.8143, "step": 6910 }, { "epoch": 0.62, "grad_norm": 6.124038698323085, "learning_rate": 8.103228476312919e-06, "loss": 0.667, "step": 6911 }, { "epoch": 0.62, "grad_norm": 6.279534586050763, "learning_rate": 8.102662039013317e-06, "loss": 0.8412, "step": 6912 }, { "epoch": 0.62, "grad_norm": 7.205922362220809, "learning_rate": 8.102095536952438e-06, "loss": 0.8195, "step": 6913 }, { "epoch": 0.62, "grad_norm": 7.988199418043433, "learning_rate": 8.10152897014211e-06, "loss": 0.7739, "step": 6914 }, { "epoch": 0.62, "grad_norm": 6.073357368037343, "learning_rate": 8.100962338594157e-06, "loss": 0.8021, "step": 6915 }, { "epoch": 0.62, "grad_norm": 4.990205614124662, "learning_rate": 8.100395642320406e-06, "loss": 0.7871, "step": 6916 }, { "epoch": 0.62, "grad_norm": 5.394887112714275, "learning_rate": 8.099828881332686e-06, "loss": 0.7476, "step": 6917 }, { "epoch": 0.62, "grad_norm": 5.768547687627983, "learning_rate": 8.099262055642824e-06, "loss": 0.7648, "step": 6918 }, { "epoch": 0.62, "grad_norm": 6.9642161602317705, "learning_rate": 8.098695165262656e-06, "loss": 0.8223, "step": 6919 }, { "epoch": 0.62, "grad_norm": 4.796981725855802, "learning_rate": 8.098128210204012e-06, "loss": 0.7987, "step": 6920 }, { "epoch": 0.62, "grad_norm": 5.702603305336524, "learning_rate": 8.097561190478728e-06, "loss": 0.8386, "step": 6921 }, { "epoch": 0.62, "grad_norm": 5.533722035063568, "learning_rate": 8.096994106098636e-06, "loss": 0.8168, "step": 6922 }, { "epoch": 0.62, "grad_norm": 5.135957973632429, "learning_rate": 8.096426957075574e-06, "loss": 0.7984, "step": 6923 }, { "epoch": 0.62, "grad_norm": 5.866694709323272, "learning_rate": 8.095859743421381e-06, "loss": 0.81, "step": 6924 }, { "epoch": 0.62, "grad_norm": 5.32213808354182, "learning_rate": 8.095292465147897e-06, "loss": 0.789, "step": 6925 }, { "epoch": 0.62, "grad_norm": 6.7277875063639785, "learning_rate": 8.09472512226696e-06, "loss": 0.8294, "step": 6926 }, { "epoch": 0.62, "grad_norm": 4.545443037200798, "learning_rate": 8.094157714790413e-06, "loss": 0.8521, "step": 6927 }, { "epoch": 0.62, "grad_norm": 7.61571860077136, "learning_rate": 8.0935902427301e-06, "loss": 0.8807, "step": 6928 }, { "epoch": 0.62, "grad_norm": 5.322962944322857, "learning_rate": 8.093022706097866e-06, "loss": 0.7581, "step": 6929 }, { "epoch": 0.62, "grad_norm": 5.644755943220788, "learning_rate": 8.092455104905557e-06, "loss": 0.7437, "step": 6930 }, { "epoch": 0.62, "grad_norm": 6.5746084370535085, "learning_rate": 8.09188743916502e-06, "loss": 0.8115, "step": 6931 }, { "epoch": 0.62, "grad_norm": 6.296235367060424, "learning_rate": 8.091319708888103e-06, "loss": 0.7614, "step": 6932 }, { "epoch": 0.62, "grad_norm": 5.144345425633739, "learning_rate": 8.090751914086657e-06, "loss": 0.833, "step": 6933 }, { "epoch": 0.62, "grad_norm": 5.128175621627171, "learning_rate": 8.090184054772534e-06, "loss": 0.7565, "step": 6934 }, { "epoch": 0.62, "grad_norm": 7.813601358074685, "learning_rate": 8.089616130957585e-06, "loss": 0.7906, "step": 6935 }, { "epoch": 0.62, "grad_norm": 8.180421075103032, "learning_rate": 8.089048142653667e-06, "loss": 0.8108, "step": 6936 }, { "epoch": 0.62, "grad_norm": 6.891601226164926, "learning_rate": 8.088480089872633e-06, "loss": 0.8914, "step": 6937 }, { "epoch": 0.62, "grad_norm": 5.571865654652032, "learning_rate": 8.087911972626342e-06, "loss": 0.8394, "step": 6938 }, { "epoch": 0.62, "grad_norm": 6.303675364258027, "learning_rate": 8.08734379092665e-06, "loss": 0.8698, "step": 6939 }, { "epoch": 0.62, "grad_norm": 5.756576248684503, "learning_rate": 8.086775544785417e-06, "loss": 0.7857, "step": 6940 }, { "epoch": 0.62, "grad_norm": 5.298888694930089, "learning_rate": 8.086207234214505e-06, "loss": 0.7454, "step": 6941 }, { "epoch": 0.62, "grad_norm": 7.373510938220605, "learning_rate": 8.085638859225775e-06, "loss": 0.7796, "step": 6942 }, { "epoch": 0.62, "grad_norm": 5.316286544153729, "learning_rate": 8.085070419831092e-06, "loss": 0.7773, "step": 6943 }, { "epoch": 0.62, "grad_norm": 7.015787438423901, "learning_rate": 8.08450191604232e-06, "loss": 0.8078, "step": 6944 }, { "epoch": 0.62, "grad_norm": 4.881251841329082, "learning_rate": 8.083933347871325e-06, "loss": 0.7755, "step": 6945 }, { "epoch": 0.62, "grad_norm": 7.003916918990588, "learning_rate": 8.083364715329976e-06, "loss": 0.7666, "step": 6946 }, { "epoch": 0.62, "grad_norm": 5.362655848918575, "learning_rate": 8.082796018430142e-06, "loss": 0.8167, "step": 6947 }, { "epoch": 0.62, "grad_norm": 8.310372583636804, "learning_rate": 8.08222725718369e-06, "loss": 0.8206, "step": 6948 }, { "epoch": 0.62, "grad_norm": 6.778996741694583, "learning_rate": 8.081658431602495e-06, "loss": 0.8258, "step": 6949 }, { "epoch": 0.62, "grad_norm": 4.56039996098141, "learning_rate": 8.081089541698428e-06, "loss": 0.7602, "step": 6950 }, { "epoch": 0.62, "grad_norm": 6.865085658706781, "learning_rate": 8.080520587483366e-06, "loss": 0.8212, "step": 6951 }, { "epoch": 0.62, "grad_norm": 7.027833013730606, "learning_rate": 8.079951568969183e-06, "loss": 0.8003, "step": 6952 }, { "epoch": 0.62, "grad_norm": 5.428786319954096, "learning_rate": 8.079382486167753e-06, "loss": 0.8202, "step": 6953 }, { "epoch": 0.62, "grad_norm": 7.339472826038203, "learning_rate": 8.07881333909096e-06, "loss": 0.8908, "step": 6954 }, { "epoch": 0.62, "grad_norm": 5.590374295838503, "learning_rate": 8.07824412775068e-06, "loss": 0.7839, "step": 6955 }, { "epoch": 0.62, "grad_norm": 5.094366051187591, "learning_rate": 8.077674852158795e-06, "loss": 0.8216, "step": 6956 }, { "epoch": 0.62, "grad_norm": 4.2496740817483225, "learning_rate": 8.07710551232719e-06, "loss": 0.758, "step": 6957 }, { "epoch": 0.62, "grad_norm": 4.399303174182608, "learning_rate": 8.076536108267743e-06, "loss": 0.8259, "step": 6958 }, { "epoch": 0.62, "grad_norm": 6.37455476070981, "learning_rate": 8.075966639992342e-06, "loss": 0.8402, "step": 6959 }, { "epoch": 0.62, "grad_norm": 6.32986596248156, "learning_rate": 8.075397107512876e-06, "loss": 0.764, "step": 6960 }, { "epoch": 0.62, "grad_norm": 5.453865737659811, "learning_rate": 8.07482751084123e-06, "loss": 0.8024, "step": 6961 }, { "epoch": 0.62, "grad_norm": 8.52957073829763, "learning_rate": 8.074257849989293e-06, "loss": 0.809, "step": 6962 }, { "epoch": 0.62, "grad_norm": 4.527078502512016, "learning_rate": 8.073688124968955e-06, "loss": 0.8165, "step": 6963 }, { "epoch": 0.62, "grad_norm": 6.030371922159815, "learning_rate": 8.07311833579211e-06, "loss": 0.7702, "step": 6964 }, { "epoch": 0.62, "grad_norm": 4.979139956783152, "learning_rate": 8.07254848247065e-06, "loss": 0.9268, "step": 6965 }, { "epoch": 0.62, "grad_norm": 5.931818972339798, "learning_rate": 8.071978565016468e-06, "loss": 0.853, "step": 6966 }, { "epoch": 0.62, "grad_norm": 5.31965954621122, "learning_rate": 8.071408583441462e-06, "loss": 0.801, "step": 6967 }, { "epoch": 0.62, "grad_norm": 5.995403120231001, "learning_rate": 8.070838537757526e-06, "loss": 0.7903, "step": 6968 }, { "epoch": 0.62, "grad_norm": 7.59954487126442, "learning_rate": 8.070268427976562e-06, "loss": 0.7634, "step": 6969 }, { "epoch": 0.62, "grad_norm": 5.682777497293176, "learning_rate": 8.069698254110468e-06, "loss": 0.8763, "step": 6970 }, { "epoch": 0.62, "grad_norm": 5.993497868050582, "learning_rate": 8.069128016171147e-06, "loss": 0.7872, "step": 6971 }, { "epoch": 0.62, "grad_norm": 5.094022367626832, "learning_rate": 8.068557714170498e-06, "loss": 0.7709, "step": 6972 }, { "epoch": 0.62, "grad_norm": 6.010277345778064, "learning_rate": 8.067987348120427e-06, "loss": 0.785, "step": 6973 }, { "epoch": 0.62, "grad_norm": 4.259601463129585, "learning_rate": 8.06741691803284e-06, "loss": 0.7683, "step": 6974 }, { "epoch": 0.62, "grad_norm": 5.276894797465551, "learning_rate": 8.066846423919642e-06, "loss": 0.7652, "step": 6975 }, { "epoch": 0.62, "grad_norm": 5.168862560500962, "learning_rate": 8.066275865792741e-06, "loss": 0.8275, "step": 6976 }, { "epoch": 0.62, "grad_norm": 6.192228496971852, "learning_rate": 8.065705243664045e-06, "loss": 0.8005, "step": 6977 }, { "epoch": 0.62, "grad_norm": 5.7627853852827515, "learning_rate": 8.065134557545468e-06, "loss": 0.8165, "step": 6978 }, { "epoch": 0.62, "grad_norm": 9.448146352857938, "learning_rate": 8.06456380744892e-06, "loss": 0.8638, "step": 6979 }, { "epoch": 0.62, "grad_norm": 5.109266598211198, "learning_rate": 8.063992993386311e-06, "loss": 0.8211, "step": 6980 }, { "epoch": 0.62, "grad_norm": 4.556512022133831, "learning_rate": 8.06342211536956e-06, "loss": 0.7668, "step": 6981 }, { "epoch": 0.62, "grad_norm": 6.163084457007714, "learning_rate": 8.062851173410582e-06, "loss": 0.8003, "step": 6982 }, { "epoch": 0.62, "grad_norm": 6.362397295399718, "learning_rate": 8.06228016752129e-06, "loss": 0.8017, "step": 6983 }, { "epoch": 0.62, "grad_norm": 5.417428878720762, "learning_rate": 8.06170909771361e-06, "loss": 0.7566, "step": 6984 }, { "epoch": 0.62, "grad_norm": 6.115304861950151, "learning_rate": 8.061137963999455e-06, "loss": 0.7407, "step": 6985 }, { "epoch": 0.62, "grad_norm": 6.542282605303808, "learning_rate": 8.06056676639075e-06, "loss": 0.9014, "step": 6986 }, { "epoch": 0.62, "grad_norm": 5.806480335211902, "learning_rate": 8.059995504899415e-06, "loss": 0.7989, "step": 6987 }, { "epoch": 0.62, "grad_norm": 5.119646499339333, "learning_rate": 8.059424179537376e-06, "loss": 0.9001, "step": 6988 }, { "epoch": 0.62, "grad_norm": 6.022981879479564, "learning_rate": 8.058852790316557e-06, "loss": 0.7774, "step": 6989 }, { "epoch": 0.62, "grad_norm": 6.326752012408423, "learning_rate": 8.058281337248884e-06, "loss": 0.7773, "step": 6990 }, { "epoch": 0.62, "grad_norm": 4.27137035487564, "learning_rate": 8.057709820346287e-06, "loss": 0.7625, "step": 6991 }, { "epoch": 0.62, "grad_norm": 6.079449025463701, "learning_rate": 8.057138239620693e-06, "loss": 0.8139, "step": 6992 }, { "epoch": 0.62, "grad_norm": 5.062453255358927, "learning_rate": 8.056566595084034e-06, "loss": 0.7979, "step": 6993 }, { "epoch": 0.62, "grad_norm": 6.7117081009231025, "learning_rate": 8.05599488674824e-06, "loss": 0.7816, "step": 6994 }, { "epoch": 0.62, "grad_norm": 3.9722134823029234, "learning_rate": 8.055423114625246e-06, "loss": 0.8481, "step": 6995 }, { "epoch": 0.62, "grad_norm": 6.221278278133744, "learning_rate": 8.054851278726985e-06, "loss": 0.7564, "step": 6996 }, { "epoch": 0.62, "grad_norm": 6.254355286044848, "learning_rate": 8.054279379065395e-06, "loss": 0.8121, "step": 6997 }, { "epoch": 0.62, "grad_norm": 5.393736860176314, "learning_rate": 8.05370741565241e-06, "loss": 0.8305, "step": 6998 }, { "epoch": 0.62, "grad_norm": 5.7463245610590095, "learning_rate": 8.05313538849997e-06, "loss": 0.749, "step": 6999 }, { "epoch": 0.62, "grad_norm": 4.7156110096443715, "learning_rate": 8.052563297620017e-06, "loss": 0.8256, "step": 7000 }, { "epoch": 0.62, "grad_norm": 4.847610923145393, "learning_rate": 8.051991143024488e-06, "loss": 0.7734, "step": 7001 }, { "epoch": 0.62, "grad_norm": 4.874496945779397, "learning_rate": 8.05141892472533e-06, "loss": 0.8338, "step": 7002 }, { "epoch": 0.62, "grad_norm": 4.126585432122027, "learning_rate": 8.050846642734484e-06, "loss": 0.8316, "step": 7003 }, { "epoch": 0.62, "grad_norm": 4.788278386563417, "learning_rate": 8.050274297063894e-06, "loss": 0.755, "step": 7004 }, { "epoch": 0.62, "grad_norm": 4.354138131514921, "learning_rate": 8.049701887725509e-06, "loss": 0.7373, "step": 7005 }, { "epoch": 0.63, "grad_norm": 5.830904952011638, "learning_rate": 8.049129414731276e-06, "loss": 0.811, "step": 7006 }, { "epoch": 0.63, "grad_norm": 5.716719742526026, "learning_rate": 8.048556878093145e-06, "loss": 0.774, "step": 7007 }, { "epoch": 0.63, "grad_norm": 4.835406280812972, "learning_rate": 8.047984277823064e-06, "loss": 0.7893, "step": 7008 }, { "epoch": 0.63, "grad_norm": 6.423963106554698, "learning_rate": 8.047411613932986e-06, "loss": 0.9137, "step": 7009 }, { "epoch": 0.63, "grad_norm": 3.9766995976438695, "learning_rate": 8.046838886434865e-06, "loss": 0.7635, "step": 7010 }, { "epoch": 0.63, "grad_norm": 6.346345911486274, "learning_rate": 8.046266095340653e-06, "loss": 0.8763, "step": 7011 }, { "epoch": 0.63, "grad_norm": 5.718229827045991, "learning_rate": 8.045693240662309e-06, "loss": 0.7753, "step": 7012 }, { "epoch": 0.63, "grad_norm": 6.069104316441401, "learning_rate": 8.045120322411789e-06, "loss": 0.85, "step": 7013 }, { "epoch": 0.63, "grad_norm": 4.36903341643116, "learning_rate": 8.04454734060105e-06, "loss": 0.8455, "step": 7014 }, { "epoch": 0.63, "grad_norm": 6.04164733465387, "learning_rate": 8.043974295242052e-06, "loss": 0.808, "step": 7015 }, { "epoch": 0.63, "grad_norm": 9.52087450889189, "learning_rate": 8.043401186346758e-06, "loss": 0.7793, "step": 7016 }, { "epoch": 0.63, "grad_norm": 5.578440221600089, "learning_rate": 8.04282801392713e-06, "loss": 0.7606, "step": 7017 }, { "epoch": 0.63, "grad_norm": 5.431670872794137, "learning_rate": 8.042254777995129e-06, "loss": 0.7496, "step": 7018 }, { "epoch": 0.63, "grad_norm": 6.767166350642211, "learning_rate": 8.041681478562722e-06, "loss": 0.7245, "step": 7019 }, { "epoch": 0.63, "grad_norm": 5.755335391898238, "learning_rate": 8.041108115641876e-06, "loss": 0.6964, "step": 7020 }, { "epoch": 0.63, "grad_norm": 5.5996637492447165, "learning_rate": 8.040534689244558e-06, "loss": 0.8686, "step": 7021 }, { "epoch": 0.63, "grad_norm": 4.291805282630803, "learning_rate": 8.039961199382737e-06, "loss": 0.8295, "step": 7022 }, { "epoch": 0.63, "grad_norm": 4.988289567559861, "learning_rate": 8.039387646068384e-06, "loss": 0.83, "step": 7023 }, { "epoch": 0.63, "grad_norm": 5.7152843342582855, "learning_rate": 8.038814029313469e-06, "loss": 0.7755, "step": 7024 }, { "epoch": 0.63, "grad_norm": 4.8196808429672995, "learning_rate": 8.038240349129967e-06, "loss": 0.8208, "step": 7025 }, { "epoch": 0.63, "grad_norm": 4.791741879972288, "learning_rate": 8.03766660552985e-06, "loss": 0.836, "step": 7026 }, { "epoch": 0.63, "grad_norm": 5.244375499668143, "learning_rate": 8.037092798525096e-06, "loss": 0.8224, "step": 7027 }, { "epoch": 0.63, "grad_norm": 4.592336215535681, "learning_rate": 8.036518928127681e-06, "loss": 0.7325, "step": 7028 }, { "epoch": 0.63, "grad_norm": 6.114217574329579, "learning_rate": 8.035944994349582e-06, "loss": 0.7263, "step": 7029 }, { "epoch": 0.63, "grad_norm": 4.9744858274176575, "learning_rate": 8.035370997202782e-06, "loss": 0.8185, "step": 7030 }, { "epoch": 0.63, "grad_norm": 5.795482905861157, "learning_rate": 8.03479693669926e-06, "loss": 0.7557, "step": 7031 }, { "epoch": 0.63, "grad_norm": 5.602858180056339, "learning_rate": 8.034222812850996e-06, "loss": 0.8134, "step": 7032 }, { "epoch": 0.63, "grad_norm": 6.391495932907588, "learning_rate": 8.033648625669975e-06, "loss": 0.8293, "step": 7033 }, { "epoch": 0.63, "grad_norm": 6.411199003633842, "learning_rate": 8.033074375168184e-06, "loss": 0.7533, "step": 7034 }, { "epoch": 0.63, "grad_norm": 5.6619719788390155, "learning_rate": 8.032500061357606e-06, "loss": 0.8253, "step": 7035 }, { "epoch": 0.63, "grad_norm": 6.839745859656985, "learning_rate": 8.031925684250234e-06, "loss": 0.8253, "step": 7036 }, { "epoch": 0.63, "grad_norm": 6.718863463448694, "learning_rate": 8.03135124385805e-06, "loss": 0.8816, "step": 7037 }, { "epoch": 0.63, "grad_norm": 6.933706627198956, "learning_rate": 8.03077674019305e-06, "loss": 0.8363, "step": 7038 }, { "epoch": 0.63, "grad_norm": 5.792455413883634, "learning_rate": 8.03020217326722e-06, "loss": 0.8069, "step": 7039 }, { "epoch": 0.63, "grad_norm": 5.537959413519084, "learning_rate": 8.029627543092556e-06, "loss": 0.769, "step": 7040 }, { "epoch": 0.63, "grad_norm": 7.155058850312842, "learning_rate": 8.029052849681055e-06, "loss": 0.7921, "step": 7041 }, { "epoch": 0.63, "grad_norm": 6.550400233954453, "learning_rate": 8.028478093044705e-06, "loss": 0.7928, "step": 7042 }, { "epoch": 0.63, "grad_norm": 7.371479964405629, "learning_rate": 8.02790327319551e-06, "loss": 0.863, "step": 7043 }, { "epoch": 0.63, "grad_norm": 6.743055698624136, "learning_rate": 8.027328390145463e-06, "loss": 0.7525, "step": 7044 }, { "epoch": 0.63, "grad_norm": 5.9074166791069604, "learning_rate": 8.026753443906568e-06, "loss": 0.8038, "step": 7045 }, { "epoch": 0.63, "grad_norm": 5.515769909609539, "learning_rate": 8.026178434490822e-06, "loss": 0.8469, "step": 7046 }, { "epoch": 0.63, "grad_norm": 7.977402205530207, "learning_rate": 8.025603361910227e-06, "loss": 0.8832, "step": 7047 }, { "epoch": 0.63, "grad_norm": 6.86357245747819, "learning_rate": 8.02502822617679e-06, "loss": 0.7804, "step": 7048 }, { "epoch": 0.63, "grad_norm": 7.394674841555729, "learning_rate": 8.024453027302512e-06, "loss": 0.8342, "step": 7049 }, { "epoch": 0.63, "grad_norm": 6.509559317939442, "learning_rate": 8.0238777652994e-06, "loss": 0.866, "step": 7050 }, { "epoch": 0.63, "grad_norm": 7.099844168997305, "learning_rate": 8.023302440179465e-06, "loss": 0.8004, "step": 7051 }, { "epoch": 0.63, "grad_norm": 4.972225297610281, "learning_rate": 8.02272705195471e-06, "loss": 0.8459, "step": 7052 }, { "epoch": 0.63, "grad_norm": 5.539912404481116, "learning_rate": 8.022151600637146e-06, "loss": 0.7894, "step": 7053 }, { "epoch": 0.63, "grad_norm": 4.973858503146333, "learning_rate": 8.02157608623879e-06, "loss": 0.8396, "step": 7054 }, { "epoch": 0.63, "grad_norm": 5.46430670236725, "learning_rate": 8.021000508771648e-06, "loss": 0.8133, "step": 7055 }, { "epoch": 0.63, "grad_norm": 5.467361944901082, "learning_rate": 8.020424868247735e-06, "loss": 0.8718, "step": 7056 }, { "epoch": 0.63, "grad_norm": 5.255701349071418, "learning_rate": 8.01984916467907e-06, "loss": 0.724, "step": 7057 }, { "epoch": 0.63, "grad_norm": 7.765384480155345, "learning_rate": 8.019273398077664e-06, "loss": 0.852, "step": 7058 }, { "epoch": 0.63, "grad_norm": 4.966234130880603, "learning_rate": 8.01869756845554e-06, "loss": 0.8092, "step": 7059 }, { "epoch": 0.63, "grad_norm": 4.976389750085673, "learning_rate": 8.018121675824715e-06, "loss": 0.7081, "step": 7060 }, { "epoch": 0.63, "grad_norm": 5.83501919620551, "learning_rate": 8.01754572019721e-06, "loss": 0.7809, "step": 7061 }, { "epoch": 0.63, "grad_norm": 9.036397957382544, "learning_rate": 8.016969701585045e-06, "loss": 0.7693, "step": 7062 }, { "epoch": 0.63, "grad_norm": 4.288783439739562, "learning_rate": 8.016393620000246e-06, "loss": 0.8437, "step": 7063 }, { "epoch": 0.63, "grad_norm": 6.043949685566679, "learning_rate": 8.015817475454835e-06, "loss": 0.8359, "step": 7064 }, { "epoch": 0.63, "grad_norm": 6.843052176563591, "learning_rate": 8.015241267960841e-06, "loss": 0.7996, "step": 7065 }, { "epoch": 0.63, "grad_norm": 5.77622263400318, "learning_rate": 8.014664997530286e-06, "loss": 0.7947, "step": 7066 }, { "epoch": 0.63, "grad_norm": 5.939327262265271, "learning_rate": 8.0140886641752e-06, "loss": 0.8101, "step": 7067 }, { "epoch": 0.63, "grad_norm": 4.558395161053029, "learning_rate": 8.013512267907617e-06, "loss": 0.7287, "step": 7068 }, { "epoch": 0.63, "grad_norm": 7.170562339925411, "learning_rate": 8.012935808739563e-06, "loss": 0.8459, "step": 7069 }, { "epoch": 0.63, "grad_norm": 5.953126610185626, "learning_rate": 8.012359286683074e-06, "loss": 0.7659, "step": 7070 }, { "epoch": 0.63, "grad_norm": 5.539972862389098, "learning_rate": 8.01178270175018e-06, "loss": 0.7979, "step": 7071 }, { "epoch": 0.63, "grad_norm": 6.36841507519751, "learning_rate": 8.01120605395292e-06, "loss": 0.8496, "step": 7072 }, { "epoch": 0.63, "grad_norm": 6.084842810517694, "learning_rate": 8.010629343303326e-06, "loss": 0.8623, "step": 7073 }, { "epoch": 0.63, "grad_norm": 6.87893568290355, "learning_rate": 8.010052569813438e-06, "loss": 0.7758, "step": 7074 }, { "epoch": 0.63, "grad_norm": 6.383253508386346, "learning_rate": 8.009475733495296e-06, "loss": 0.7515, "step": 7075 }, { "epoch": 0.63, "grad_norm": 9.667473670864817, "learning_rate": 8.008898834360936e-06, "loss": 0.8792, "step": 7076 }, { "epoch": 0.63, "grad_norm": 7.293729966908839, "learning_rate": 8.008321872422404e-06, "loss": 0.8575, "step": 7077 }, { "epoch": 0.63, "grad_norm": 4.219686596223907, "learning_rate": 8.00774484769174e-06, "loss": 0.6861, "step": 7078 }, { "epoch": 0.63, "grad_norm": 4.769179728538904, "learning_rate": 8.007167760180992e-06, "loss": 0.8385, "step": 7079 }, { "epoch": 0.63, "grad_norm": 7.122907223576569, "learning_rate": 8.0065906099022e-06, "loss": 0.7308, "step": 7080 }, { "epoch": 0.63, "grad_norm": 6.593321514469411, "learning_rate": 8.006013396867414e-06, "loss": 0.8087, "step": 7081 }, { "epoch": 0.63, "grad_norm": 6.434738309630223, "learning_rate": 8.00543612108868e-06, "loss": 0.8116, "step": 7082 }, { "epoch": 0.63, "grad_norm": 4.444906310592417, "learning_rate": 8.00485878257805e-06, "loss": 0.7667, "step": 7083 }, { "epoch": 0.63, "grad_norm": 5.62291175052663, "learning_rate": 8.004281381347574e-06, "loss": 0.8221, "step": 7084 }, { "epoch": 0.63, "grad_norm": 5.1095544185532855, "learning_rate": 8.003703917409302e-06, "loss": 0.7835, "step": 7085 }, { "epoch": 0.63, "grad_norm": 7.319947699797402, "learning_rate": 8.003126390775288e-06, "loss": 0.7676, "step": 7086 }, { "epoch": 0.63, "grad_norm": 4.186718700001167, "learning_rate": 8.00254880145759e-06, "loss": 0.8396, "step": 7087 }, { "epoch": 0.63, "grad_norm": 6.4041124070561235, "learning_rate": 8.001971149468262e-06, "loss": 0.7785, "step": 7088 }, { "epoch": 0.63, "grad_norm": 7.041241862971773, "learning_rate": 8.001393434819358e-06, "loss": 0.8328, "step": 7089 }, { "epoch": 0.63, "grad_norm": 4.752196509495697, "learning_rate": 8.00081565752294e-06, "loss": 0.8131, "step": 7090 }, { "epoch": 0.63, "grad_norm": 6.472559279782797, "learning_rate": 8.000237817591066e-06, "loss": 0.8317, "step": 7091 }, { "epoch": 0.63, "grad_norm": 5.599518534580797, "learning_rate": 7.999659915035798e-06, "loss": 0.7877, "step": 7092 }, { "epoch": 0.63, "grad_norm": 5.7328134871082295, "learning_rate": 7.999081949869199e-06, "loss": 0.8108, "step": 7093 }, { "epoch": 0.63, "grad_norm": 6.216479736063277, "learning_rate": 7.998503922103332e-06, "loss": 0.8569, "step": 7094 }, { "epoch": 0.63, "grad_norm": 5.392998955474488, "learning_rate": 7.997925831750262e-06, "loss": 0.8236, "step": 7095 }, { "epoch": 0.63, "grad_norm": 5.364324899972853, "learning_rate": 7.997347678822056e-06, "loss": 0.8317, "step": 7096 }, { "epoch": 0.63, "grad_norm": 4.226467488240026, "learning_rate": 7.996769463330781e-06, "loss": 0.8252, "step": 7097 }, { "epoch": 0.63, "grad_norm": 6.827149300415938, "learning_rate": 7.996191185288507e-06, "loss": 0.791, "step": 7098 }, { "epoch": 0.63, "grad_norm": 5.388159486164026, "learning_rate": 7.995612844707301e-06, "loss": 0.7265, "step": 7099 }, { "epoch": 0.63, "grad_norm": 5.9674713816651535, "learning_rate": 7.99503444159924e-06, "loss": 0.801, "step": 7100 }, { "epoch": 0.63, "grad_norm": 5.137809615954206, "learning_rate": 7.994455975976395e-06, "loss": 0.7572, "step": 7101 }, { "epoch": 0.63, "grad_norm": 5.357335441831199, "learning_rate": 7.993877447850836e-06, "loss": 0.7985, "step": 7102 }, { "epoch": 0.63, "grad_norm": 6.9980945499030165, "learning_rate": 7.993298857234644e-06, "loss": 0.843, "step": 7103 }, { "epoch": 0.63, "grad_norm": 4.872885667832998, "learning_rate": 7.992720204139892e-06, "loss": 0.7373, "step": 7104 }, { "epoch": 0.63, "grad_norm": 5.867299428915381, "learning_rate": 7.99214148857866e-06, "loss": 0.8037, "step": 7105 }, { "epoch": 0.63, "grad_norm": 7.015789022639338, "learning_rate": 7.991562710563028e-06, "loss": 0.731, "step": 7106 }, { "epoch": 0.63, "grad_norm": 9.137653169603446, "learning_rate": 7.990983870105073e-06, "loss": 0.7742, "step": 7107 }, { "epoch": 0.63, "grad_norm": 5.992130198680315, "learning_rate": 7.990404967216885e-06, "loss": 0.7359, "step": 7108 }, { "epoch": 0.63, "grad_norm": 4.6488929766309, "learning_rate": 7.98982600191054e-06, "loss": 0.7936, "step": 7109 }, { "epoch": 0.63, "grad_norm": 6.2024639625078954, "learning_rate": 7.989246974198121e-06, "loss": 0.8464, "step": 7110 }, { "epoch": 0.63, "grad_norm": 4.729778313052926, "learning_rate": 7.988667884091723e-06, "loss": 0.9203, "step": 7111 }, { "epoch": 0.63, "grad_norm": 5.959284467179346, "learning_rate": 7.988088731603425e-06, "loss": 0.785, "step": 7112 }, { "epoch": 0.63, "grad_norm": 5.692437993750479, "learning_rate": 7.98750951674532e-06, "loss": 0.8785, "step": 7113 }, { "epoch": 0.63, "grad_norm": 6.396835201093804, "learning_rate": 7.986930239529496e-06, "loss": 0.7663, "step": 7114 }, { "epoch": 0.63, "grad_norm": 4.830505230831576, "learning_rate": 7.986350899968043e-06, "loss": 0.7886, "step": 7115 }, { "epoch": 0.63, "grad_norm": 6.575023790757853, "learning_rate": 7.985771498073057e-06, "loss": 0.7786, "step": 7116 }, { "epoch": 0.63, "grad_norm": 4.552919942851462, "learning_rate": 7.985192033856628e-06, "loss": 0.8478, "step": 7117 }, { "epoch": 0.63, "grad_norm": 4.538397680427876, "learning_rate": 7.984612507330854e-06, "loss": 0.8623, "step": 7118 }, { "epoch": 0.64, "grad_norm": 5.305844739752404, "learning_rate": 7.98403291850783e-06, "loss": 0.7577, "step": 7119 }, { "epoch": 0.64, "grad_norm": 4.534933049691424, "learning_rate": 7.983453267399654e-06, "loss": 0.8346, "step": 7120 }, { "epoch": 0.64, "grad_norm": 4.447465584127629, "learning_rate": 7.982873554018424e-06, "loss": 0.7828, "step": 7121 }, { "epoch": 0.64, "grad_norm": 8.55093278414744, "learning_rate": 7.982293778376242e-06, "loss": 0.8793, "step": 7122 }, { "epoch": 0.64, "grad_norm": 5.8021097373060195, "learning_rate": 7.981713940485207e-06, "loss": 0.7564, "step": 7123 }, { "epoch": 0.64, "grad_norm": 5.967270342504546, "learning_rate": 7.981134040357424e-06, "loss": 0.7775, "step": 7124 }, { "epoch": 0.64, "grad_norm": 5.93978524199754, "learning_rate": 7.980554078004996e-06, "loss": 0.8057, "step": 7125 }, { "epoch": 0.64, "grad_norm": 5.761814276188255, "learning_rate": 7.979974053440029e-06, "loss": 0.6866, "step": 7126 }, { "epoch": 0.64, "grad_norm": 5.342386558952088, "learning_rate": 7.979393966674631e-06, "loss": 0.7961, "step": 7127 }, { "epoch": 0.64, "grad_norm": 7.177919145147326, "learning_rate": 7.978813817720907e-06, "loss": 0.831, "step": 7128 }, { "epoch": 0.64, "grad_norm": 4.844519158442585, "learning_rate": 7.97823360659097e-06, "loss": 0.7735, "step": 7129 }, { "epoch": 0.64, "grad_norm": 5.620381075913567, "learning_rate": 7.977653333296927e-06, "loss": 0.794, "step": 7130 }, { "epoch": 0.64, "grad_norm": 6.494716805813692, "learning_rate": 7.977072997850892e-06, "loss": 0.806, "step": 7131 }, { "epoch": 0.64, "grad_norm": 5.77216118495672, "learning_rate": 7.976492600264979e-06, "loss": 0.7877, "step": 7132 }, { "epoch": 0.64, "grad_norm": 5.7127219834052845, "learning_rate": 7.9759121405513e-06, "loss": 0.7327, "step": 7133 }, { "epoch": 0.64, "grad_norm": 6.47182389795068, "learning_rate": 7.975331618721973e-06, "loss": 0.7809, "step": 7134 }, { "epoch": 0.64, "grad_norm": 6.382731441418061, "learning_rate": 7.974751034789113e-06, "loss": 0.7955, "step": 7135 }, { "epoch": 0.64, "grad_norm": 4.76941928410649, "learning_rate": 7.974170388764842e-06, "loss": 0.8206, "step": 7136 }, { "epoch": 0.64, "grad_norm": 4.785592700142208, "learning_rate": 7.973589680661276e-06, "loss": 0.7956, "step": 7137 }, { "epoch": 0.64, "grad_norm": 4.978132435679496, "learning_rate": 7.973008910490536e-06, "loss": 0.8359, "step": 7138 }, { "epoch": 0.64, "grad_norm": 5.099315767011291, "learning_rate": 7.972428078264748e-06, "loss": 0.8504, "step": 7139 }, { "epoch": 0.64, "grad_norm": 5.438212352377424, "learning_rate": 7.971847183996033e-06, "loss": 0.7854, "step": 7140 }, { "epoch": 0.64, "grad_norm": 5.9110520860490405, "learning_rate": 7.971266227696517e-06, "loss": 0.8315, "step": 7141 }, { "epoch": 0.64, "grad_norm": 6.950218407590673, "learning_rate": 7.970685209378324e-06, "loss": 0.7437, "step": 7142 }, { "epoch": 0.64, "grad_norm": 5.940572093958994, "learning_rate": 7.970104129053582e-06, "loss": 0.7831, "step": 7143 }, { "epoch": 0.64, "grad_norm": 5.0754034272261785, "learning_rate": 7.969522986734424e-06, "loss": 0.8969, "step": 7144 }, { "epoch": 0.64, "grad_norm": 5.768471675956912, "learning_rate": 7.968941782432975e-06, "loss": 0.8067, "step": 7145 }, { "epoch": 0.64, "grad_norm": 4.853950591500136, "learning_rate": 7.968360516161367e-06, "loss": 0.8059, "step": 7146 }, { "epoch": 0.64, "grad_norm": 6.586888013477863, "learning_rate": 7.967779187931735e-06, "loss": 0.8075, "step": 7147 }, { "epoch": 0.64, "grad_norm": 4.690843801807002, "learning_rate": 7.967197797756212e-06, "loss": 0.7416, "step": 7148 }, { "epoch": 0.64, "grad_norm": 6.251863217520145, "learning_rate": 7.966616345646932e-06, "loss": 0.7835, "step": 7149 }, { "epoch": 0.64, "grad_norm": 6.399052024106859, "learning_rate": 7.966034831616034e-06, "loss": 0.7996, "step": 7150 }, { "epoch": 0.64, "grad_norm": 6.975569850836193, "learning_rate": 7.965453255675653e-06, "loss": 0.7327, "step": 7151 }, { "epoch": 0.64, "grad_norm": 5.236391260689289, "learning_rate": 7.96487161783793e-06, "loss": 0.8013, "step": 7152 }, { "epoch": 0.64, "grad_norm": 5.450463150716186, "learning_rate": 7.964289918115006e-06, "loss": 0.7505, "step": 7153 }, { "epoch": 0.64, "grad_norm": 5.39168306416975, "learning_rate": 7.96370815651902e-06, "loss": 0.8323, "step": 7154 }, { "epoch": 0.64, "grad_norm": 6.906180339071581, "learning_rate": 7.963126333062116e-06, "loss": 0.7732, "step": 7155 }, { "epoch": 0.64, "grad_norm": 5.680217760636456, "learning_rate": 7.962544447756441e-06, "loss": 0.8168, "step": 7156 }, { "epoch": 0.64, "grad_norm": 5.222176341716212, "learning_rate": 7.961962500614138e-06, "loss": 0.8515, "step": 7157 }, { "epoch": 0.64, "grad_norm": 5.865733563190256, "learning_rate": 7.961380491647355e-06, "loss": 0.8076, "step": 7158 }, { "epoch": 0.64, "grad_norm": 4.6567732195155, "learning_rate": 7.960798420868238e-06, "loss": 0.7329, "step": 7159 }, { "epoch": 0.64, "grad_norm": 5.901595974019546, "learning_rate": 7.96021628828894e-06, "loss": 0.8335, "step": 7160 }, { "epoch": 0.64, "grad_norm": 6.381971946494121, "learning_rate": 7.959634093921609e-06, "loss": 0.8489, "step": 7161 }, { "epoch": 0.64, "grad_norm": 6.105244537796855, "learning_rate": 7.959051837778396e-06, "loss": 0.8827, "step": 7162 }, { "epoch": 0.64, "grad_norm": 6.780793919285622, "learning_rate": 7.958469519871457e-06, "loss": 0.7392, "step": 7163 }, { "epoch": 0.64, "grad_norm": 5.990536252121846, "learning_rate": 7.957887140212948e-06, "loss": 0.7902, "step": 7164 }, { "epoch": 0.64, "grad_norm": 5.787813708942032, "learning_rate": 7.95730469881502e-06, "loss": 0.7737, "step": 7165 }, { "epoch": 0.64, "grad_norm": 7.186932483286272, "learning_rate": 7.956722195689835e-06, "loss": 0.8428, "step": 7166 }, { "epoch": 0.64, "grad_norm": 5.910863737449839, "learning_rate": 7.956139630849546e-06, "loss": 0.7763, "step": 7167 }, { "epoch": 0.64, "grad_norm": 5.702711309318253, "learning_rate": 7.955557004306318e-06, "loss": 0.7925, "step": 7168 }, { "epoch": 0.64, "grad_norm": 6.324992145622875, "learning_rate": 7.954974316072311e-06, "loss": 0.8021, "step": 7169 }, { "epoch": 0.64, "grad_norm": 7.4009170535411855, "learning_rate": 7.954391566159685e-06, "loss": 0.8006, "step": 7170 }, { "epoch": 0.64, "grad_norm": 5.056026641183625, "learning_rate": 7.953808754580604e-06, "loss": 0.8234, "step": 7171 }, { "epoch": 0.64, "grad_norm": 5.412695805105385, "learning_rate": 7.953225881347235e-06, "loss": 0.7703, "step": 7172 }, { "epoch": 0.64, "grad_norm": 5.5636363758463565, "learning_rate": 7.952642946471744e-06, "loss": 0.8035, "step": 7173 }, { "epoch": 0.64, "grad_norm": 7.543112604106601, "learning_rate": 7.952059949966298e-06, "loss": 0.7971, "step": 7174 }, { "epoch": 0.64, "grad_norm": 5.853332910493852, "learning_rate": 7.951476891843065e-06, "loss": 0.7686, "step": 7175 }, { "epoch": 0.64, "grad_norm": 5.377433609287862, "learning_rate": 7.950893772114214e-06, "loss": 0.8062, "step": 7176 }, { "epoch": 0.64, "grad_norm": 6.631972114388808, "learning_rate": 7.95031059079192e-06, "loss": 0.8118, "step": 7177 }, { "epoch": 0.64, "grad_norm": 6.106129396870192, "learning_rate": 7.949727347888353e-06, "loss": 0.7807, "step": 7178 }, { "epoch": 0.64, "grad_norm": 7.311860196863349, "learning_rate": 7.949144043415687e-06, "loss": 0.826, "step": 7179 }, { "epoch": 0.64, "grad_norm": 5.692610663551311, "learning_rate": 7.948560677386098e-06, "loss": 0.7331, "step": 7180 }, { "epoch": 0.64, "grad_norm": 5.346531093810068, "learning_rate": 7.94797724981176e-06, "loss": 0.8613, "step": 7181 }, { "epoch": 0.64, "grad_norm": 5.386050339431628, "learning_rate": 7.947393760704856e-06, "loss": 0.8655, "step": 7182 }, { "epoch": 0.64, "grad_norm": 6.611064510648886, "learning_rate": 7.94681021007756e-06, "loss": 0.8095, "step": 7183 }, { "epoch": 0.64, "grad_norm": 4.417359526456947, "learning_rate": 7.946226597942055e-06, "loss": 0.8369, "step": 7184 }, { "epoch": 0.64, "grad_norm": 4.542607137790846, "learning_rate": 7.945642924310521e-06, "loss": 0.7616, "step": 7185 }, { "epoch": 0.64, "grad_norm": 5.50589412908159, "learning_rate": 7.945059189195143e-06, "loss": 0.7993, "step": 7186 }, { "epoch": 0.64, "grad_norm": 6.1084154940957065, "learning_rate": 7.944475392608102e-06, "loss": 0.7756, "step": 7187 }, { "epoch": 0.64, "grad_norm": 7.908818041742095, "learning_rate": 7.943891534561589e-06, "loss": 0.8473, "step": 7188 }, { "epoch": 0.64, "grad_norm": 6.335015459320931, "learning_rate": 7.943307615067784e-06, "loss": 0.8245, "step": 7189 }, { "epoch": 0.64, "grad_norm": 6.6170813584678285, "learning_rate": 7.94272363413888e-06, "loss": 0.7809, "step": 7190 }, { "epoch": 0.64, "grad_norm": 4.974828555791969, "learning_rate": 7.942139591787063e-06, "loss": 0.7836, "step": 7191 }, { "epoch": 0.64, "grad_norm": 7.132435752450243, "learning_rate": 7.941555488024526e-06, "loss": 0.7296, "step": 7192 }, { "epoch": 0.64, "grad_norm": 5.111289958115326, "learning_rate": 7.940971322863462e-06, "loss": 0.7903, "step": 7193 }, { "epoch": 0.64, "grad_norm": 5.7686044624207975, "learning_rate": 7.94038709631606e-06, "loss": 0.7968, "step": 7194 }, { "epoch": 0.64, "grad_norm": 5.99693170637764, "learning_rate": 7.939802808394516e-06, "loss": 0.7899, "step": 7195 }, { "epoch": 0.64, "grad_norm": 5.297872839370755, "learning_rate": 7.939218459111029e-06, "loss": 0.7568, "step": 7196 }, { "epoch": 0.64, "grad_norm": 4.95202702075069, "learning_rate": 7.938634048477791e-06, "loss": 0.7843, "step": 7197 }, { "epoch": 0.64, "grad_norm": 5.656631366917541, "learning_rate": 7.938049576507004e-06, "loss": 0.752, "step": 7198 }, { "epoch": 0.64, "grad_norm": 3.9900320414658026, "learning_rate": 7.937465043210866e-06, "loss": 0.8317, "step": 7199 }, { "epoch": 0.64, "grad_norm": 6.769767306004276, "learning_rate": 7.936880448601578e-06, "loss": 0.7528, "step": 7200 }, { "epoch": 0.64, "grad_norm": 4.868983632651134, "learning_rate": 7.936295792691344e-06, "loss": 0.7688, "step": 7201 }, { "epoch": 0.64, "grad_norm": 5.421767324804065, "learning_rate": 7.935711075492365e-06, "loss": 0.7483, "step": 7202 }, { "epoch": 0.64, "grad_norm": 6.2945783246476825, "learning_rate": 7.935126297016844e-06, "loss": 0.7656, "step": 7203 }, { "epoch": 0.64, "grad_norm": 5.1082692023643235, "learning_rate": 7.934541457276992e-06, "loss": 0.7032, "step": 7204 }, { "epoch": 0.64, "grad_norm": 5.34025015321995, "learning_rate": 7.933956556285012e-06, "loss": 0.8338, "step": 7205 }, { "epoch": 0.64, "grad_norm": 5.310280839226349, "learning_rate": 7.933371594053115e-06, "loss": 0.7911, "step": 7206 }, { "epoch": 0.64, "grad_norm": 6.501390053569232, "learning_rate": 7.932786570593509e-06, "loss": 0.7938, "step": 7207 }, { "epoch": 0.64, "grad_norm": 5.805010391872277, "learning_rate": 7.932201485918407e-06, "loss": 0.787, "step": 7208 }, { "epoch": 0.64, "grad_norm": 4.614903038580794, "learning_rate": 7.931616340040022e-06, "loss": 0.7444, "step": 7209 }, { "epoch": 0.64, "grad_norm": 6.487685373681917, "learning_rate": 7.931031132970563e-06, "loss": 0.8572, "step": 7210 }, { "epoch": 0.64, "grad_norm": 7.792600653136466, "learning_rate": 7.930445864722249e-06, "loss": 0.7888, "step": 7211 }, { "epoch": 0.64, "grad_norm": 5.002078894549036, "learning_rate": 7.929860535307297e-06, "loss": 0.8518, "step": 7212 }, { "epoch": 0.64, "grad_norm": 5.813766776569422, "learning_rate": 7.92927514473792e-06, "loss": 0.7909, "step": 7213 }, { "epoch": 0.64, "grad_norm": 5.1413793123651335, "learning_rate": 7.92868969302634e-06, "loss": 0.7461, "step": 7214 }, { "epoch": 0.64, "grad_norm": 6.9898223051449255, "learning_rate": 7.928104180184778e-06, "loss": 0.7993, "step": 7215 }, { "epoch": 0.64, "grad_norm": 5.1012715641085835, "learning_rate": 7.927518606225452e-06, "loss": 0.8368, "step": 7216 }, { "epoch": 0.64, "grad_norm": 5.230024986936379, "learning_rate": 7.926932971160585e-06, "loss": 0.8135, "step": 7217 }, { "epoch": 0.64, "grad_norm": 5.97867935896615, "learning_rate": 7.926347275002405e-06, "loss": 0.7696, "step": 7218 }, { "epoch": 0.64, "grad_norm": 5.449945213885118, "learning_rate": 7.925761517763132e-06, "loss": 0.8082, "step": 7219 }, { "epoch": 0.64, "grad_norm": 5.346071806605596, "learning_rate": 7.925175699454996e-06, "loss": 0.8435, "step": 7220 }, { "epoch": 0.64, "grad_norm": 5.242269329615449, "learning_rate": 7.924589820090222e-06, "loss": 0.7813, "step": 7221 }, { "epoch": 0.64, "grad_norm": 6.971169568964333, "learning_rate": 7.924003879681042e-06, "loss": 0.8019, "step": 7222 }, { "epoch": 0.64, "grad_norm": 5.129299271237798, "learning_rate": 7.923417878239683e-06, "loss": 0.8018, "step": 7223 }, { "epoch": 0.64, "grad_norm": 7.128031418502038, "learning_rate": 7.92283181577838e-06, "loss": 0.7805, "step": 7224 }, { "epoch": 0.64, "grad_norm": 6.116397491987267, "learning_rate": 7.922245692309362e-06, "loss": 0.7987, "step": 7225 }, { "epoch": 0.64, "grad_norm": 8.012571814523596, "learning_rate": 7.921659507844865e-06, "loss": 0.7627, "step": 7226 }, { "epoch": 0.64, "grad_norm": 7.819239789309228, "learning_rate": 7.921073262397125e-06, "loss": 0.7735, "step": 7227 }, { "epoch": 0.64, "grad_norm": 5.792327577246019, "learning_rate": 7.920486955978376e-06, "loss": 0.7777, "step": 7228 }, { "epoch": 0.64, "grad_norm": 6.647934133552474, "learning_rate": 7.919900588600857e-06, "loss": 0.825, "step": 7229 }, { "epoch": 0.64, "grad_norm": 4.445092138065056, "learning_rate": 7.91931416027681e-06, "loss": 0.7275, "step": 7230 }, { "epoch": 0.65, "grad_norm": 6.434968001415219, "learning_rate": 7.918727671018474e-06, "loss": 0.8563, "step": 7231 }, { "epoch": 0.65, "grad_norm": 7.549055789434388, "learning_rate": 7.918141120838088e-06, "loss": 0.8247, "step": 7232 }, { "epoch": 0.65, "grad_norm": 5.687548484692197, "learning_rate": 7.917554509747895e-06, "loss": 0.7086, "step": 7233 }, { "epoch": 0.65, "grad_norm": 8.13273184801703, "learning_rate": 7.916967837760142e-06, "loss": 0.8045, "step": 7234 }, { "epoch": 0.65, "grad_norm": 5.080516609874037, "learning_rate": 7.916381104887074e-06, "loss": 0.7507, "step": 7235 }, { "epoch": 0.65, "grad_norm": 5.124350716765973, "learning_rate": 7.91579431114094e-06, "loss": 0.8155, "step": 7236 }, { "epoch": 0.65, "grad_norm": 4.58882956296973, "learning_rate": 7.91520745653398e-06, "loss": 0.6804, "step": 7237 }, { "epoch": 0.65, "grad_norm": 4.434060716063205, "learning_rate": 7.914620541078453e-06, "loss": 0.8209, "step": 7238 }, { "epoch": 0.65, "grad_norm": 6.748709634359352, "learning_rate": 7.914033564786603e-06, "loss": 0.844, "step": 7239 }, { "epoch": 0.65, "grad_norm": 5.224734810466471, "learning_rate": 7.913446527670683e-06, "loss": 0.789, "step": 7240 }, { "epoch": 0.65, "grad_norm": 6.947488832221033, "learning_rate": 7.912859429742948e-06, "loss": 0.8002, "step": 7241 }, { "epoch": 0.65, "grad_norm": 8.392794057546384, "learning_rate": 7.912272271015653e-06, "loss": 0.8401, "step": 7242 }, { "epoch": 0.65, "grad_norm": 6.373573646816047, "learning_rate": 7.91168505150105e-06, "loss": 0.8033, "step": 7243 }, { "epoch": 0.65, "grad_norm": 6.544997526908821, "learning_rate": 7.911097771211397e-06, "loss": 0.8019, "step": 7244 }, { "epoch": 0.65, "grad_norm": 5.881580523577525, "learning_rate": 7.910510430158957e-06, "loss": 0.7811, "step": 7245 }, { "epoch": 0.65, "grad_norm": 6.257777905054153, "learning_rate": 7.909923028355983e-06, "loss": 0.8702, "step": 7246 }, { "epoch": 0.65, "grad_norm": 6.099734851019966, "learning_rate": 7.909335565814739e-06, "loss": 0.7359, "step": 7247 }, { "epoch": 0.65, "grad_norm": 6.481762950154281, "learning_rate": 7.908748042547487e-06, "loss": 0.8612, "step": 7248 }, { "epoch": 0.65, "grad_norm": 5.735974967263165, "learning_rate": 7.908160458566489e-06, "loss": 0.7944, "step": 7249 }, { "epoch": 0.65, "grad_norm": 4.916726684682544, "learning_rate": 7.90757281388401e-06, "loss": 0.8167, "step": 7250 }, { "epoch": 0.65, "grad_norm": 3.9880982841065142, "learning_rate": 7.906985108512316e-06, "loss": 0.7675, "step": 7251 }, { "epoch": 0.65, "grad_norm": 5.94238101407038, "learning_rate": 7.906397342463674e-06, "loss": 0.7874, "step": 7252 }, { "epoch": 0.65, "grad_norm": 5.848934951080192, "learning_rate": 7.905809515750353e-06, "loss": 0.8023, "step": 7253 }, { "epoch": 0.65, "grad_norm": 4.736213250254177, "learning_rate": 7.905221628384624e-06, "loss": 0.8805, "step": 7254 }, { "epoch": 0.65, "grad_norm": 5.910024518987341, "learning_rate": 7.904633680378753e-06, "loss": 0.8168, "step": 7255 }, { "epoch": 0.65, "grad_norm": 4.2350365781298684, "learning_rate": 7.904045671745016e-06, "loss": 0.6997, "step": 7256 }, { "epoch": 0.65, "grad_norm": 5.7653602170746305, "learning_rate": 7.903457602495683e-06, "loss": 0.7601, "step": 7257 }, { "epoch": 0.65, "grad_norm": 6.015273138663125, "learning_rate": 7.902869472643033e-06, "loss": 0.8212, "step": 7258 }, { "epoch": 0.65, "grad_norm": 4.990322082149091, "learning_rate": 7.90228128219934e-06, "loss": 0.797, "step": 7259 }, { "epoch": 0.65, "grad_norm": 6.116367455695627, "learning_rate": 7.90169303117688e-06, "loss": 0.8275, "step": 7260 }, { "epoch": 0.65, "grad_norm": 6.369276659295666, "learning_rate": 7.901104719587935e-06, "loss": 0.8567, "step": 7261 }, { "epoch": 0.65, "grad_norm": 5.2335048746974335, "learning_rate": 7.90051634744478e-06, "loss": 0.8202, "step": 7262 }, { "epoch": 0.65, "grad_norm": 4.156657758970991, "learning_rate": 7.8999279147597e-06, "loss": 0.8024, "step": 7263 }, { "epoch": 0.65, "grad_norm": 4.2729147372795975, "learning_rate": 7.899339421544973e-06, "loss": 0.8001, "step": 7264 }, { "epoch": 0.65, "grad_norm": 4.203240280290182, "learning_rate": 7.898750867812888e-06, "loss": 0.7611, "step": 7265 }, { "epoch": 0.65, "grad_norm": 6.36285217702354, "learning_rate": 7.898162253575726e-06, "loss": 0.7528, "step": 7266 }, { "epoch": 0.65, "grad_norm": 5.719140862340553, "learning_rate": 7.897573578845773e-06, "loss": 0.8106, "step": 7267 }, { "epoch": 0.65, "grad_norm": 5.265776888296912, "learning_rate": 7.896984843635317e-06, "loss": 0.8529, "step": 7268 }, { "epoch": 0.65, "grad_norm": 4.9567604515626345, "learning_rate": 7.896396047956648e-06, "loss": 0.8039, "step": 7269 }, { "epoch": 0.65, "grad_norm": 5.710371368529087, "learning_rate": 7.895807191822052e-06, "loss": 0.7792, "step": 7270 }, { "epoch": 0.65, "grad_norm": 5.038424414351003, "learning_rate": 7.895218275243825e-06, "loss": 0.7992, "step": 7271 }, { "epoch": 0.65, "grad_norm": 8.344977035259465, "learning_rate": 7.894629298234256e-06, "loss": 0.7372, "step": 7272 }, { "epoch": 0.65, "grad_norm": 7.23354358448285, "learning_rate": 7.89404026080564e-06, "loss": 0.825, "step": 7273 }, { "epoch": 0.65, "grad_norm": 8.05871983947589, "learning_rate": 7.89345116297027e-06, "loss": 0.8189, "step": 7274 }, { "epoch": 0.65, "grad_norm": 5.141599957985732, "learning_rate": 7.892862004740445e-06, "loss": 0.7563, "step": 7275 }, { "epoch": 0.65, "grad_norm": 5.305056291007838, "learning_rate": 7.89227278612846e-06, "loss": 0.7334, "step": 7276 }, { "epoch": 0.65, "grad_norm": 3.732819083054324, "learning_rate": 7.891683507146614e-06, "loss": 0.7513, "step": 7277 }, { "epoch": 0.65, "grad_norm": 5.659585730234145, "learning_rate": 7.89109416780721e-06, "loss": 0.8193, "step": 7278 }, { "epoch": 0.65, "grad_norm": 5.812057731981562, "learning_rate": 7.890504768122544e-06, "loss": 0.712, "step": 7279 }, { "epoch": 0.65, "grad_norm": 7.525918061121079, "learning_rate": 7.889915308104922e-06, "loss": 0.7251, "step": 7280 }, { "epoch": 0.65, "grad_norm": 5.44733136122289, "learning_rate": 7.889325787766649e-06, "loss": 0.8394, "step": 7281 }, { "epoch": 0.65, "grad_norm": 5.198110113395652, "learning_rate": 7.888736207120025e-06, "loss": 0.817, "step": 7282 }, { "epoch": 0.65, "grad_norm": 5.822806229816122, "learning_rate": 7.88814656617736e-06, "loss": 0.8216, "step": 7283 }, { "epoch": 0.65, "grad_norm": 6.076727873866262, "learning_rate": 7.887556864950959e-06, "loss": 0.8642, "step": 7284 }, { "epoch": 0.65, "grad_norm": 9.332622396357536, "learning_rate": 7.886967103453132e-06, "loss": 0.8173, "step": 7285 }, { "epoch": 0.65, "grad_norm": 4.335076820512628, "learning_rate": 7.88637728169619e-06, "loss": 0.7558, "step": 7286 }, { "epoch": 0.65, "grad_norm": 8.162635549762367, "learning_rate": 7.885787399692443e-06, "loss": 0.8483, "step": 7287 }, { "epoch": 0.65, "grad_norm": 6.013498006173284, "learning_rate": 7.885197457454204e-06, "loss": 0.7623, "step": 7288 }, { "epoch": 0.65, "grad_norm": 4.362446750102223, "learning_rate": 7.884607454993786e-06, "loss": 0.8284, "step": 7289 }, { "epoch": 0.65, "grad_norm": 5.461025240924296, "learning_rate": 7.884017392323507e-06, "loss": 0.8018, "step": 7290 }, { "epoch": 0.65, "grad_norm": 5.609227283145032, "learning_rate": 7.883427269455679e-06, "loss": 0.8331, "step": 7291 }, { "epoch": 0.65, "grad_norm": 5.91373396400926, "learning_rate": 7.88283708640262e-06, "loss": 0.8236, "step": 7292 }, { "epoch": 0.65, "grad_norm": 5.248486336807874, "learning_rate": 7.882246843176653e-06, "loss": 0.7922, "step": 7293 }, { "epoch": 0.65, "grad_norm": 6.495533169212878, "learning_rate": 7.881656539790093e-06, "loss": 0.7792, "step": 7294 }, { "epoch": 0.65, "grad_norm": 6.70612395545589, "learning_rate": 7.881066176255264e-06, "loss": 0.7407, "step": 7295 }, { "epoch": 0.65, "grad_norm": 5.744159844952703, "learning_rate": 7.88047575258449e-06, "loss": 0.7817, "step": 7296 }, { "epoch": 0.65, "grad_norm": 5.8452625134196206, "learning_rate": 7.879885268790088e-06, "loss": 0.8309, "step": 7297 }, { "epoch": 0.65, "grad_norm": 6.239506705738267, "learning_rate": 7.879294724884392e-06, "loss": 0.7538, "step": 7298 }, { "epoch": 0.65, "grad_norm": 6.23430136958058, "learning_rate": 7.878704120879724e-06, "loss": 0.8008, "step": 7299 }, { "epoch": 0.65, "grad_norm": 7.241654515200779, "learning_rate": 7.87811345678841e-06, "loss": 0.836, "step": 7300 }, { "epoch": 0.65, "grad_norm": 3.9556640012837665, "learning_rate": 7.87752273262278e-06, "loss": 0.8102, "step": 7301 }, { "epoch": 0.65, "grad_norm": 5.578975126501275, "learning_rate": 7.876931948395165e-06, "loss": 0.8802, "step": 7302 }, { "epoch": 0.65, "grad_norm": 4.8071491566931845, "learning_rate": 7.8763411041179e-06, "loss": 0.7898, "step": 7303 }, { "epoch": 0.65, "grad_norm": 5.060252415692179, "learning_rate": 7.875750199803309e-06, "loss": 0.8203, "step": 7304 }, { "epoch": 0.65, "grad_norm": 5.395421557449016, "learning_rate": 7.875159235463731e-06, "loss": 0.7633, "step": 7305 }, { "epoch": 0.65, "grad_norm": 6.3678283660052974, "learning_rate": 7.874568211111502e-06, "loss": 0.7553, "step": 7306 }, { "epoch": 0.65, "grad_norm": 5.470882638072655, "learning_rate": 7.873977126758957e-06, "loss": 0.8363, "step": 7307 }, { "epoch": 0.65, "grad_norm": 5.571162588805288, "learning_rate": 7.873385982418432e-06, "loss": 0.8218, "step": 7308 }, { "epoch": 0.65, "grad_norm": 4.605982042840392, "learning_rate": 7.872794778102265e-06, "loss": 0.8643, "step": 7309 }, { "epoch": 0.65, "grad_norm": 5.148067799623685, "learning_rate": 7.872203513822802e-06, "loss": 0.6854, "step": 7310 }, { "epoch": 0.65, "grad_norm": 6.339483046941087, "learning_rate": 7.87161218959238e-06, "loss": 0.7489, "step": 7311 }, { "epoch": 0.65, "grad_norm": 6.070236153899968, "learning_rate": 7.871020805423342e-06, "loss": 0.8052, "step": 7312 }, { "epoch": 0.65, "grad_norm": 6.2775659088580635, "learning_rate": 7.87042936132803e-06, "loss": 0.8152, "step": 7313 }, { "epoch": 0.65, "grad_norm": 5.076219946801336, "learning_rate": 7.869837857318794e-06, "loss": 0.8201, "step": 7314 }, { "epoch": 0.65, "grad_norm": 5.15062480925235, "learning_rate": 7.869246293407978e-06, "loss": 0.8046, "step": 7315 }, { "epoch": 0.65, "grad_norm": 6.317952062872707, "learning_rate": 7.868654669607926e-06, "loss": 0.7586, "step": 7316 }, { "epoch": 0.65, "grad_norm": 5.249601190619039, "learning_rate": 7.868062985930993e-06, "loss": 0.8936, "step": 7317 }, { "epoch": 0.65, "grad_norm": 4.936380091318307, "learning_rate": 7.867471242389524e-06, "loss": 0.7353, "step": 7318 }, { "epoch": 0.65, "grad_norm": 5.958910690182678, "learning_rate": 7.866879438995874e-06, "loss": 0.8762, "step": 7319 }, { "epoch": 0.65, "grad_norm": 6.296027589945574, "learning_rate": 7.866287575762395e-06, "loss": 0.7643, "step": 7320 }, { "epoch": 0.65, "grad_norm": 4.274585012651619, "learning_rate": 7.865695652701437e-06, "loss": 0.8474, "step": 7321 }, { "epoch": 0.65, "grad_norm": 5.248878039963356, "learning_rate": 7.865103669825359e-06, "loss": 0.8126, "step": 7322 }, { "epoch": 0.65, "grad_norm": 4.8428638624909714, "learning_rate": 7.864511627146517e-06, "loss": 0.8058, "step": 7323 }, { "epoch": 0.65, "grad_norm": 5.000210861059267, "learning_rate": 7.863919524677268e-06, "loss": 0.7731, "step": 7324 }, { "epoch": 0.65, "grad_norm": 5.933925084999985, "learning_rate": 7.86332736242997e-06, "loss": 0.7899, "step": 7325 }, { "epoch": 0.65, "grad_norm": 5.256431592684385, "learning_rate": 7.862735140416986e-06, "loss": 0.7574, "step": 7326 }, { "epoch": 0.65, "grad_norm": 5.627357158107828, "learning_rate": 7.862142858650674e-06, "loss": 0.7406, "step": 7327 }, { "epoch": 0.65, "grad_norm": 4.835428255300179, "learning_rate": 7.861550517143397e-06, "loss": 0.7707, "step": 7328 }, { "epoch": 0.65, "grad_norm": 4.724149272160683, "learning_rate": 7.860958115907519e-06, "loss": 0.704, "step": 7329 }, { "epoch": 0.65, "grad_norm": 6.124178195445194, "learning_rate": 7.860365654955407e-06, "loss": 0.7959, "step": 7330 }, { "epoch": 0.65, "grad_norm": 5.296760982164352, "learning_rate": 7.859773134299427e-06, "loss": 0.8126, "step": 7331 }, { "epoch": 0.65, "grad_norm": 5.607890626205313, "learning_rate": 7.859180553951944e-06, "loss": 0.775, "step": 7332 }, { "epoch": 0.65, "grad_norm": 5.3619845723020045, "learning_rate": 7.858587913925329e-06, "loss": 0.7477, "step": 7333 }, { "epoch": 0.65, "grad_norm": 4.842663520612056, "learning_rate": 7.857995214231952e-06, "loss": 0.7456, "step": 7334 }, { "epoch": 0.65, "grad_norm": 5.9579059556835485, "learning_rate": 7.857402454884181e-06, "loss": 0.7927, "step": 7335 }, { "epoch": 0.65, "grad_norm": 4.492023148069855, "learning_rate": 7.856809635894395e-06, "loss": 0.7861, "step": 7336 }, { "epoch": 0.65, "grad_norm": 5.309008058821417, "learning_rate": 7.856216757274963e-06, "loss": 0.7895, "step": 7337 }, { "epoch": 0.65, "grad_norm": 6.907688791225626, "learning_rate": 7.85562381903826e-06, "loss": 0.8757, "step": 7338 }, { "epoch": 0.65, "grad_norm": 5.457530281619961, "learning_rate": 7.855030821196666e-06, "loss": 0.7745, "step": 7339 }, { "epoch": 0.65, "grad_norm": 4.323453649106245, "learning_rate": 7.854437763762555e-06, "loss": 0.7268, "step": 7340 }, { "epoch": 0.65, "grad_norm": 4.278759093217568, "learning_rate": 7.853844646748305e-06, "loss": 0.8042, "step": 7341 }, { "epoch": 0.65, "grad_norm": 5.781913976451842, "learning_rate": 7.853251470166299e-06, "loss": 0.8369, "step": 7342 }, { "epoch": 0.66, "grad_norm": 5.497147122941115, "learning_rate": 7.852658234028919e-06, "loss": 0.8042, "step": 7343 }, { "epoch": 0.66, "grad_norm": 5.864820526591365, "learning_rate": 7.852064938348542e-06, "loss": 0.8177, "step": 7344 }, { "epoch": 0.66, "grad_norm": 5.978160050925628, "learning_rate": 7.851471583137557e-06, "loss": 0.7876, "step": 7345 }, { "epoch": 0.66, "grad_norm": 5.9300065661324535, "learning_rate": 7.850878168408349e-06, "loss": 0.7757, "step": 7346 }, { "epoch": 0.66, "grad_norm": 5.970402507752749, "learning_rate": 7.8502846941733e-06, "loss": 0.8426, "step": 7347 }, { "epoch": 0.66, "grad_norm": 5.325681388590815, "learning_rate": 7.8496911604448e-06, "loss": 0.8354, "step": 7348 }, { "epoch": 0.66, "grad_norm": 5.947281744804757, "learning_rate": 7.84909756723524e-06, "loss": 0.8198, "step": 7349 }, { "epoch": 0.66, "grad_norm": 6.0900812278997085, "learning_rate": 7.848503914557007e-06, "loss": 0.7669, "step": 7350 }, { "epoch": 0.66, "grad_norm": 4.654326716114816, "learning_rate": 7.847910202422491e-06, "loss": 0.8442, "step": 7351 }, { "epoch": 0.66, "grad_norm": 5.708785376643339, "learning_rate": 7.847316430844087e-06, "loss": 0.7857, "step": 7352 }, { "epoch": 0.66, "grad_norm": 7.1618957974259825, "learning_rate": 7.84672259983419e-06, "loss": 0.8453, "step": 7353 }, { "epoch": 0.66, "grad_norm": 4.501265237871065, "learning_rate": 7.846128709405192e-06, "loss": 0.7608, "step": 7354 }, { "epoch": 0.66, "grad_norm": 6.696238247978455, "learning_rate": 7.845534759569488e-06, "loss": 0.8208, "step": 7355 }, { "epoch": 0.66, "grad_norm": 5.623232923079764, "learning_rate": 7.84494075033948e-06, "loss": 0.7881, "step": 7356 }, { "epoch": 0.66, "grad_norm": 4.237142003767484, "learning_rate": 7.844346681727564e-06, "loss": 0.7713, "step": 7357 }, { "epoch": 0.66, "grad_norm": 7.056016373178894, "learning_rate": 7.843752553746139e-06, "loss": 0.7781, "step": 7358 }, { "epoch": 0.66, "grad_norm": 5.051125020974088, "learning_rate": 7.843158366407608e-06, "loss": 0.7735, "step": 7359 }, { "epoch": 0.66, "grad_norm": 4.220364196747527, "learning_rate": 7.842564119724372e-06, "loss": 0.7977, "step": 7360 }, { "epoch": 0.66, "grad_norm": 5.1960591554155515, "learning_rate": 7.841969813708835e-06, "loss": 0.6877, "step": 7361 }, { "epoch": 0.66, "grad_norm": 5.271859734805716, "learning_rate": 7.841375448373403e-06, "loss": 0.7946, "step": 7362 }, { "epoch": 0.66, "grad_norm": 5.730182374400626, "learning_rate": 7.84078102373048e-06, "loss": 0.8084, "step": 7363 }, { "epoch": 0.66, "grad_norm": 4.633062131008258, "learning_rate": 7.840186539792474e-06, "loss": 0.7502, "step": 7364 }, { "epoch": 0.66, "grad_norm": 5.80858529836988, "learning_rate": 7.839591996571793e-06, "loss": 0.7994, "step": 7365 }, { "epoch": 0.66, "grad_norm": 5.372162763752777, "learning_rate": 7.83899739408085e-06, "loss": 0.7344, "step": 7366 }, { "epoch": 0.66, "grad_norm": 4.259772707999068, "learning_rate": 7.838402732332053e-06, "loss": 0.6923, "step": 7367 }, { "epoch": 0.66, "grad_norm": 5.159662510739582, "learning_rate": 7.837808011337815e-06, "loss": 0.8355, "step": 7368 }, { "epoch": 0.66, "grad_norm": 4.443209326393017, "learning_rate": 7.83721323111055e-06, "loss": 0.7362, "step": 7369 }, { "epoch": 0.66, "grad_norm": 6.036826139213099, "learning_rate": 7.836618391662671e-06, "loss": 0.7695, "step": 7370 }, { "epoch": 0.66, "grad_norm": 7.557108750179034, "learning_rate": 7.836023493006595e-06, "loss": 0.8334, "step": 7371 }, { "epoch": 0.66, "grad_norm": 5.878773726073794, "learning_rate": 7.83542853515474e-06, "loss": 0.7855, "step": 7372 }, { "epoch": 0.66, "grad_norm": 7.462918774804227, "learning_rate": 7.834833518119524e-06, "loss": 0.8511, "step": 7373 }, { "epoch": 0.66, "grad_norm": 6.8977479928547325, "learning_rate": 7.834238441913366e-06, "loss": 0.8534, "step": 7374 }, { "epoch": 0.66, "grad_norm": 5.438329995486269, "learning_rate": 7.833643306548688e-06, "loss": 0.8383, "step": 7375 }, { "epoch": 0.66, "grad_norm": 6.479095024010013, "learning_rate": 7.833048112037911e-06, "loss": 0.7664, "step": 7376 }, { "epoch": 0.66, "grad_norm": 4.604747839881378, "learning_rate": 7.832452858393459e-06, "loss": 0.8273, "step": 7377 }, { "epoch": 0.66, "grad_norm": 6.057427305150213, "learning_rate": 7.831857545627756e-06, "loss": 0.8936, "step": 7378 }, { "epoch": 0.66, "grad_norm": 5.029733113100272, "learning_rate": 7.831262173753229e-06, "loss": 0.804, "step": 7379 }, { "epoch": 0.66, "grad_norm": 10.794482306609783, "learning_rate": 7.830666742782305e-06, "loss": 0.8274, "step": 7380 }, { "epoch": 0.66, "grad_norm": 4.4876841709064275, "learning_rate": 7.830071252727412e-06, "loss": 0.7576, "step": 7381 }, { "epoch": 0.66, "grad_norm": 6.745694631135, "learning_rate": 7.829475703600979e-06, "loss": 0.8292, "step": 7382 }, { "epoch": 0.66, "grad_norm": 7.262324673310644, "learning_rate": 7.828880095415435e-06, "loss": 0.773, "step": 7383 }, { "epoch": 0.66, "grad_norm": 4.418367446227206, "learning_rate": 7.828284428183217e-06, "loss": 0.8672, "step": 7384 }, { "epoch": 0.66, "grad_norm": 5.020411379843967, "learning_rate": 7.827688701916754e-06, "loss": 0.7699, "step": 7385 }, { "epoch": 0.66, "grad_norm": 6.6224461938085994, "learning_rate": 7.827092916628482e-06, "loss": 0.7855, "step": 7386 }, { "epoch": 0.66, "grad_norm": 5.566688437953154, "learning_rate": 7.826497072330836e-06, "loss": 0.861, "step": 7387 }, { "epoch": 0.66, "grad_norm": 7.080779276092116, "learning_rate": 7.825901169036255e-06, "loss": 0.8022, "step": 7388 }, { "epoch": 0.66, "grad_norm": 6.300548287793895, "learning_rate": 7.825305206757175e-06, "loss": 0.7872, "step": 7389 }, { "epoch": 0.66, "grad_norm": 6.450541149050415, "learning_rate": 7.824709185506036e-06, "loss": 0.7452, "step": 7390 }, { "epoch": 0.66, "grad_norm": 5.483459206662198, "learning_rate": 7.82411310529528e-06, "loss": 0.844, "step": 7391 }, { "epoch": 0.66, "grad_norm": 5.623636863336024, "learning_rate": 7.823516966137346e-06, "loss": 0.8186, "step": 7392 }, { "epoch": 0.66, "grad_norm": 5.047842117818669, "learning_rate": 7.82292076804468e-06, "loss": 0.7967, "step": 7393 }, { "epoch": 0.66, "grad_norm": 5.9096519901035505, "learning_rate": 7.822324511029724e-06, "loss": 0.8453, "step": 7394 }, { "epoch": 0.66, "grad_norm": 6.552917856349465, "learning_rate": 7.821728195104925e-06, "loss": 0.7676, "step": 7395 }, { "epoch": 0.66, "grad_norm": 4.581830366408834, "learning_rate": 7.821131820282729e-06, "loss": 0.764, "step": 7396 }, { "epoch": 0.66, "grad_norm": 4.9754073774863885, "learning_rate": 7.820535386575584e-06, "loss": 0.7997, "step": 7397 }, { "epoch": 0.66, "grad_norm": 7.17223771946283, "learning_rate": 7.819938893995942e-06, "loss": 0.802, "step": 7398 }, { "epoch": 0.66, "grad_norm": 7.3008576456931085, "learning_rate": 7.81934234255625e-06, "loss": 0.7824, "step": 7399 }, { "epoch": 0.66, "grad_norm": 4.861907931011364, "learning_rate": 7.81874573226896e-06, "loss": 0.7564, "step": 7400 }, { "epoch": 0.66, "grad_norm": 6.185863865137588, "learning_rate": 7.818149063146524e-06, "loss": 0.8043, "step": 7401 }, { "epoch": 0.66, "grad_norm": 6.367895952970188, "learning_rate": 7.817552335201401e-06, "loss": 0.8444, "step": 7402 }, { "epoch": 0.66, "grad_norm": 5.395208810649217, "learning_rate": 7.816955548446044e-06, "loss": 0.82, "step": 7403 }, { "epoch": 0.66, "grad_norm": 7.423413306877656, "learning_rate": 7.816358702892907e-06, "loss": 0.7631, "step": 7404 }, { "epoch": 0.66, "grad_norm": 5.400677541344206, "learning_rate": 7.81576179855445e-06, "loss": 0.7906, "step": 7405 }, { "epoch": 0.66, "grad_norm": 6.3878550346033345, "learning_rate": 7.815164835443132e-06, "loss": 0.8297, "step": 7406 }, { "epoch": 0.66, "grad_norm": 5.049159133847353, "learning_rate": 7.814567813571415e-06, "loss": 0.7839, "step": 7407 }, { "epoch": 0.66, "grad_norm": 4.525131506003655, "learning_rate": 7.813970732951756e-06, "loss": 0.8564, "step": 7408 }, { "epoch": 0.66, "grad_norm": 6.151103755559571, "learning_rate": 7.81337359359662e-06, "loss": 0.8385, "step": 7409 }, { "epoch": 0.66, "grad_norm": 6.845025771948788, "learning_rate": 7.812776395518474e-06, "loss": 0.7912, "step": 7410 }, { "epoch": 0.66, "grad_norm": 7.696669846916749, "learning_rate": 7.81217913872978e-06, "loss": 0.8103, "step": 7411 }, { "epoch": 0.66, "grad_norm": 9.52335312768078, "learning_rate": 7.811581823243003e-06, "loss": 0.8263, "step": 7412 }, { "epoch": 0.66, "grad_norm": 6.483031821265717, "learning_rate": 7.810984449070616e-06, "loss": 0.8597, "step": 7413 }, { "epoch": 0.66, "grad_norm": 6.00420843402275, "learning_rate": 7.810387016225082e-06, "loss": 0.8941, "step": 7414 }, { "epoch": 0.66, "grad_norm": 4.699197019486055, "learning_rate": 7.809789524718875e-06, "loss": 0.8253, "step": 7415 }, { "epoch": 0.66, "grad_norm": 5.5767196391763285, "learning_rate": 7.809191974564464e-06, "loss": 0.8272, "step": 7416 }, { "epoch": 0.66, "grad_norm": 8.343788810953756, "learning_rate": 7.808594365774323e-06, "loss": 0.7648, "step": 7417 }, { "epoch": 0.66, "grad_norm": 6.169236519126121, "learning_rate": 7.807996698360926e-06, "loss": 0.8837, "step": 7418 }, { "epoch": 0.66, "grad_norm": 6.411817226572251, "learning_rate": 7.807398972336747e-06, "loss": 0.7743, "step": 7419 }, { "epoch": 0.66, "grad_norm": 5.034283214494565, "learning_rate": 7.806801187714262e-06, "loss": 0.7705, "step": 7420 }, { "epoch": 0.66, "grad_norm": 7.098984401105007, "learning_rate": 7.80620334450595e-06, "loss": 0.7011, "step": 7421 }, { "epoch": 0.66, "grad_norm": 5.542192010732576, "learning_rate": 7.805605442724286e-06, "loss": 0.7623, "step": 7422 }, { "epoch": 0.66, "grad_norm": 6.482261402943133, "learning_rate": 7.805007482381755e-06, "loss": 0.7543, "step": 7423 }, { "epoch": 0.66, "grad_norm": 5.410804410738877, "learning_rate": 7.804409463490836e-06, "loss": 0.9009, "step": 7424 }, { "epoch": 0.66, "grad_norm": 4.267569412633374, "learning_rate": 7.80381138606401e-06, "loss": 0.7779, "step": 7425 }, { "epoch": 0.66, "grad_norm": 4.83310431261269, "learning_rate": 7.803213250113761e-06, "loss": 0.7864, "step": 7426 }, { "epoch": 0.66, "grad_norm": 6.060449039044572, "learning_rate": 7.802615055652577e-06, "loss": 0.8205, "step": 7427 }, { "epoch": 0.66, "grad_norm": 6.092746689300448, "learning_rate": 7.802016802692937e-06, "loss": 0.7702, "step": 7428 }, { "epoch": 0.66, "grad_norm": 5.3893760787985485, "learning_rate": 7.801418491247336e-06, "loss": 0.8443, "step": 7429 }, { "epoch": 0.66, "grad_norm": 7.115477280380238, "learning_rate": 7.800820121328259e-06, "loss": 0.7831, "step": 7430 }, { "epoch": 0.66, "grad_norm": 5.994712298719262, "learning_rate": 7.800221692948194e-06, "loss": 0.7969, "step": 7431 }, { "epoch": 0.66, "grad_norm": 4.802075469705073, "learning_rate": 7.799623206119634e-06, "loss": 0.8362, "step": 7432 }, { "epoch": 0.66, "grad_norm": 5.757693675074636, "learning_rate": 7.799024660855072e-06, "loss": 0.7597, "step": 7433 }, { "epoch": 0.66, "grad_norm": 4.942355293544224, "learning_rate": 7.798426057166998e-06, "loss": 0.7899, "step": 7434 }, { "epoch": 0.66, "grad_norm": 5.3246267469651105, "learning_rate": 7.797827395067909e-06, "loss": 0.7243, "step": 7435 }, { "epoch": 0.66, "grad_norm": 4.637842070657123, "learning_rate": 7.7972286745703e-06, "loss": 0.763, "step": 7436 }, { "epoch": 0.66, "grad_norm": 6.507162777412712, "learning_rate": 7.796629895686669e-06, "loss": 0.7411, "step": 7437 }, { "epoch": 0.66, "grad_norm": 5.92817626786634, "learning_rate": 7.796031058429513e-06, "loss": 0.8309, "step": 7438 }, { "epoch": 0.66, "grad_norm": 7.472058410044816, "learning_rate": 7.795432162811331e-06, "loss": 0.8117, "step": 7439 }, { "epoch": 0.66, "grad_norm": 7.170945857249831, "learning_rate": 7.794833208844627e-06, "loss": 0.8012, "step": 7440 }, { "epoch": 0.66, "grad_norm": 4.4650173980939165, "learning_rate": 7.794234196541898e-06, "loss": 0.856, "step": 7441 }, { "epoch": 0.66, "grad_norm": 4.742274310277386, "learning_rate": 7.79363512591565e-06, "loss": 0.7294, "step": 7442 }, { "epoch": 0.66, "grad_norm": 5.641570779242813, "learning_rate": 7.79303599697839e-06, "loss": 0.868, "step": 7443 }, { "epoch": 0.66, "grad_norm": 4.387013757077916, "learning_rate": 7.792436809742617e-06, "loss": 0.7884, "step": 7444 }, { "epoch": 0.66, "grad_norm": 6.591329216991061, "learning_rate": 7.791837564220838e-06, "loss": 0.7999, "step": 7445 }, { "epoch": 0.66, "grad_norm": 5.7735798418799895, "learning_rate": 7.791238260425568e-06, "loss": 0.7094, "step": 7446 }, { "epoch": 0.66, "grad_norm": 6.338181744880929, "learning_rate": 7.790638898369312e-06, "loss": 0.765, "step": 7447 }, { "epoch": 0.66, "grad_norm": 4.812979902052341, "learning_rate": 7.790039478064579e-06, "loss": 0.7765, "step": 7448 }, { "epoch": 0.66, "grad_norm": 6.609783915680946, "learning_rate": 7.789439999523883e-06, "loss": 0.7937, "step": 7449 }, { "epoch": 0.66, "grad_norm": 10.027366712764692, "learning_rate": 7.788840462759736e-06, "loss": 0.8173, "step": 7450 }, { "epoch": 0.66, "grad_norm": 10.224786400190451, "learning_rate": 7.788240867784648e-06, "loss": 0.8742, "step": 7451 }, { "epoch": 0.66, "grad_norm": 6.074150603589031, "learning_rate": 7.787641214611142e-06, "loss": 0.824, "step": 7452 }, { "epoch": 0.66, "grad_norm": 8.056629318735109, "learning_rate": 7.787041503251731e-06, "loss": 0.7922, "step": 7453 }, { "epoch": 0.66, "grad_norm": 5.8789505612724815, "learning_rate": 7.78644173371893e-06, "loss": 0.7773, "step": 7454 }, { "epoch": 0.67, "grad_norm": 5.038437330754451, "learning_rate": 7.785841906025261e-06, "loss": 0.8071, "step": 7455 }, { "epoch": 0.67, "grad_norm": 6.315954357882153, "learning_rate": 7.785242020183243e-06, "loss": 0.8025, "step": 7456 }, { "epoch": 0.67, "grad_norm": 6.409234553980946, "learning_rate": 7.784642076205396e-06, "loss": 0.753, "step": 7457 }, { "epoch": 0.67, "grad_norm": 4.810209668398402, "learning_rate": 7.784042074104246e-06, "loss": 0.7644, "step": 7458 }, { "epoch": 0.67, "grad_norm": 5.880103522416976, "learning_rate": 7.783442013892313e-06, "loss": 0.7407, "step": 7459 }, { "epoch": 0.67, "grad_norm": 7.452319557353951, "learning_rate": 7.782841895582125e-06, "loss": 0.7745, "step": 7460 }, { "epoch": 0.67, "grad_norm": 4.719329245029103, "learning_rate": 7.782241719186206e-06, "loss": 0.8035, "step": 7461 }, { "epoch": 0.67, "grad_norm": 8.243621961409922, "learning_rate": 7.781641484717085e-06, "loss": 0.7511, "step": 7462 }, { "epoch": 0.67, "grad_norm": 5.055682719721051, "learning_rate": 7.78104119218729e-06, "loss": 0.7768, "step": 7463 }, { "epoch": 0.67, "grad_norm": 4.941090867236563, "learning_rate": 7.780440841609347e-06, "loss": 0.7124, "step": 7464 }, { "epoch": 0.67, "grad_norm": 5.720256993964334, "learning_rate": 7.779840432995793e-06, "loss": 0.8388, "step": 7465 }, { "epoch": 0.67, "grad_norm": 4.335830485627826, "learning_rate": 7.779239966359158e-06, "loss": 0.7909, "step": 7466 }, { "epoch": 0.67, "grad_norm": 4.353500822085093, "learning_rate": 7.778639441711975e-06, "loss": 0.7893, "step": 7467 }, { "epoch": 0.67, "grad_norm": 5.026685698017558, "learning_rate": 7.778038859066778e-06, "loss": 0.7928, "step": 7468 }, { "epoch": 0.67, "grad_norm": 5.555662163050832, "learning_rate": 7.777438218436104e-06, "loss": 0.8814, "step": 7469 }, { "epoch": 0.67, "grad_norm": 5.523573302708189, "learning_rate": 7.77683751983249e-06, "loss": 0.7544, "step": 7470 }, { "epoch": 0.67, "grad_norm": 6.371150779549276, "learning_rate": 7.776236763268474e-06, "loss": 0.7943, "step": 7471 }, { "epoch": 0.67, "grad_norm": 6.322112233881904, "learning_rate": 7.775635948756594e-06, "loss": 0.7966, "step": 7472 }, { "epoch": 0.67, "grad_norm": 3.860584656954639, "learning_rate": 7.775035076309395e-06, "loss": 0.7757, "step": 7473 }, { "epoch": 0.67, "grad_norm": 5.46546717247418, "learning_rate": 7.774434145939414e-06, "loss": 0.8296, "step": 7474 }, { "epoch": 0.67, "grad_norm": 6.9721070630061295, "learning_rate": 7.773833157659197e-06, "loss": 0.776, "step": 7475 }, { "epoch": 0.67, "grad_norm": 4.8483853880028205, "learning_rate": 7.773232111481285e-06, "loss": 0.8011, "step": 7476 }, { "epoch": 0.67, "grad_norm": 6.615022459926625, "learning_rate": 7.772631007418229e-06, "loss": 0.8053, "step": 7477 }, { "epoch": 0.67, "grad_norm": 6.224253464396829, "learning_rate": 7.77202984548257e-06, "loss": 0.7753, "step": 7478 }, { "epoch": 0.67, "grad_norm": 4.284333998130052, "learning_rate": 7.771428625686861e-06, "loss": 0.7504, "step": 7479 }, { "epoch": 0.67, "grad_norm": 4.905304618450035, "learning_rate": 7.770827348043648e-06, "loss": 0.7692, "step": 7480 }, { "epoch": 0.67, "grad_norm": 5.87468582521652, "learning_rate": 7.77022601256548e-06, "loss": 0.7123, "step": 7481 }, { "epoch": 0.67, "grad_norm": 7.888654115754701, "learning_rate": 7.769624619264914e-06, "loss": 0.8174, "step": 7482 }, { "epoch": 0.67, "grad_norm": 5.664896813535755, "learning_rate": 7.769023168154498e-06, "loss": 0.7847, "step": 7483 }, { "epoch": 0.67, "grad_norm": 5.471761227961322, "learning_rate": 7.768421659246787e-06, "loss": 0.7603, "step": 7484 }, { "epoch": 0.67, "grad_norm": 5.098265012694944, "learning_rate": 7.767820092554338e-06, "loss": 0.7946, "step": 7485 }, { "epoch": 0.67, "grad_norm": 5.559392064449401, "learning_rate": 7.767218468089704e-06, "loss": 0.8049, "step": 7486 }, { "epoch": 0.67, "grad_norm": 7.059632232186752, "learning_rate": 7.766616785865445e-06, "loss": 0.7467, "step": 7487 }, { "epoch": 0.67, "grad_norm": 6.545976807050508, "learning_rate": 7.766015045894121e-06, "loss": 0.7692, "step": 7488 }, { "epoch": 0.67, "grad_norm": 3.9923207491867045, "learning_rate": 7.76541324818829e-06, "loss": 0.778, "step": 7489 }, { "epoch": 0.67, "grad_norm": 4.339795473067474, "learning_rate": 7.764811392760513e-06, "loss": 0.8989, "step": 7490 }, { "epoch": 0.67, "grad_norm": 5.574408205552939, "learning_rate": 7.764209479623352e-06, "loss": 0.7727, "step": 7491 }, { "epoch": 0.67, "grad_norm": 5.294976042389653, "learning_rate": 7.763607508789373e-06, "loss": 0.7312, "step": 7492 }, { "epoch": 0.67, "grad_norm": 5.021326947507948, "learning_rate": 7.763005480271139e-06, "loss": 0.768, "step": 7493 }, { "epoch": 0.67, "grad_norm": 5.924394715015294, "learning_rate": 7.762403394081217e-06, "loss": 0.8924, "step": 7494 }, { "epoch": 0.67, "grad_norm": 4.939244244236905, "learning_rate": 7.761801250232173e-06, "loss": 0.789, "step": 7495 }, { "epoch": 0.67, "grad_norm": 5.897915601585195, "learning_rate": 7.761199048736577e-06, "loss": 0.9073, "step": 7496 }, { "epoch": 0.67, "grad_norm": 5.388472526516305, "learning_rate": 7.760596789606997e-06, "loss": 0.9327, "step": 7497 }, { "epoch": 0.67, "grad_norm": 4.483017563935484, "learning_rate": 7.759994472856004e-06, "loss": 0.816, "step": 7498 }, { "epoch": 0.67, "grad_norm": 4.7708611944107275, "learning_rate": 7.759392098496173e-06, "loss": 0.7574, "step": 7499 }, { "epoch": 0.67, "grad_norm": 6.199861090391169, "learning_rate": 7.758789666540073e-06, "loss": 0.788, "step": 7500 }, { "epoch": 0.67, "grad_norm": 7.2514675308991485, "learning_rate": 7.75818717700028e-06, "loss": 0.7373, "step": 7501 }, { "epoch": 0.67, "grad_norm": 6.2848284098652405, "learning_rate": 7.757584629889371e-06, "loss": 0.7811, "step": 7502 }, { "epoch": 0.67, "grad_norm": 5.055339403271627, "learning_rate": 7.756982025219921e-06, "loss": 0.8211, "step": 7503 }, { "epoch": 0.67, "grad_norm": 6.30030436019759, "learning_rate": 7.756379363004511e-06, "loss": 0.7305, "step": 7504 }, { "epoch": 0.67, "grad_norm": 7.288245604262926, "learning_rate": 7.755776643255716e-06, "loss": 0.7964, "step": 7505 }, { "epoch": 0.67, "grad_norm": 7.399847616934398, "learning_rate": 7.75517386598612e-06, "loss": 0.8381, "step": 7506 }, { "epoch": 0.67, "grad_norm": 6.072708138317721, "learning_rate": 7.754571031208303e-06, "loss": 0.8298, "step": 7507 }, { "epoch": 0.67, "grad_norm": 5.969950788593255, "learning_rate": 7.753968138934847e-06, "loss": 0.7781, "step": 7508 }, { "epoch": 0.67, "grad_norm": 3.8941821459111083, "learning_rate": 7.753365189178338e-06, "loss": 0.7307, "step": 7509 }, { "epoch": 0.67, "grad_norm": 5.2469069051309, "learning_rate": 7.752762181951362e-06, "loss": 0.826, "step": 7510 }, { "epoch": 0.67, "grad_norm": 5.800080248190455, "learning_rate": 7.7521591172665e-06, "loss": 0.8099, "step": 7511 }, { "epoch": 0.67, "grad_norm": 5.605009777553003, "learning_rate": 7.751555995136345e-06, "loss": 0.8097, "step": 7512 }, { "epoch": 0.67, "grad_norm": 5.740456209253779, "learning_rate": 7.750952815573488e-06, "loss": 0.7803, "step": 7513 }, { "epoch": 0.67, "grad_norm": 5.563251671931665, "learning_rate": 7.750349578590512e-06, "loss": 0.8143, "step": 7514 }, { "epoch": 0.67, "grad_norm": 6.676702229494565, "learning_rate": 7.749746284200012e-06, "loss": 0.8346, "step": 7515 }, { "epoch": 0.67, "grad_norm": 5.681368341302862, "learning_rate": 7.74914293241458e-06, "loss": 0.7919, "step": 7516 }, { "epoch": 0.67, "grad_norm": 5.234965002351286, "learning_rate": 7.74853952324681e-06, "loss": 0.7802, "step": 7517 }, { "epoch": 0.67, "grad_norm": 6.145384294399352, "learning_rate": 7.747936056709297e-06, "loss": 0.7165, "step": 7518 }, { "epoch": 0.67, "grad_norm": 5.408297329191453, "learning_rate": 7.747332532814638e-06, "loss": 0.8071, "step": 7519 }, { "epoch": 0.67, "grad_norm": 5.3917498614157715, "learning_rate": 7.746728951575427e-06, "loss": 0.7524, "step": 7520 }, { "epoch": 0.67, "grad_norm": 6.936195231288735, "learning_rate": 7.746125313004265e-06, "loss": 0.8054, "step": 7521 }, { "epoch": 0.67, "grad_norm": 5.043188812864851, "learning_rate": 7.745521617113751e-06, "loss": 0.8053, "step": 7522 }, { "epoch": 0.67, "grad_norm": 4.346574341615688, "learning_rate": 7.744917863916487e-06, "loss": 0.7975, "step": 7523 }, { "epoch": 0.67, "grad_norm": 4.546262937543969, "learning_rate": 7.74431405342507e-06, "loss": 0.7468, "step": 7524 }, { "epoch": 0.67, "grad_norm": 5.695233951053181, "learning_rate": 7.743710185652111e-06, "loss": 0.7974, "step": 7525 }, { "epoch": 0.67, "grad_norm": 4.4771025845503445, "learning_rate": 7.74310626061021e-06, "loss": 0.7589, "step": 7526 }, { "epoch": 0.67, "grad_norm": 6.121654099150269, "learning_rate": 7.742502278311972e-06, "loss": 0.7545, "step": 7527 }, { "epoch": 0.67, "grad_norm": 5.8252625151652415, "learning_rate": 7.741898238770005e-06, "loss": 0.8069, "step": 7528 }, { "epoch": 0.67, "grad_norm": 5.724082015941043, "learning_rate": 7.741294141996919e-06, "loss": 0.7036, "step": 7529 }, { "epoch": 0.67, "grad_norm": 5.8175877473392905, "learning_rate": 7.740689988005318e-06, "loss": 0.7586, "step": 7530 }, { "epoch": 0.67, "grad_norm": 6.325114379816399, "learning_rate": 7.740085776807817e-06, "loss": 0.8575, "step": 7531 }, { "epoch": 0.67, "grad_norm": 6.934851664176571, "learning_rate": 7.739481508417024e-06, "loss": 0.8395, "step": 7532 }, { "epoch": 0.67, "grad_norm": 5.877481984389563, "learning_rate": 7.738877182845557e-06, "loss": 0.8353, "step": 7533 }, { "epoch": 0.67, "grad_norm": 4.92148218063268, "learning_rate": 7.738272800106026e-06, "loss": 0.779, "step": 7534 }, { "epoch": 0.67, "grad_norm": 5.2079132865382745, "learning_rate": 7.737668360211047e-06, "loss": 0.8278, "step": 7535 }, { "epoch": 0.67, "grad_norm": 5.319059423578033, "learning_rate": 7.737063863173236e-06, "loss": 0.7888, "step": 7536 }, { "epoch": 0.67, "grad_norm": 6.314627972334559, "learning_rate": 7.736459309005211e-06, "loss": 0.7394, "step": 7537 }, { "epoch": 0.67, "grad_norm": 6.9155800687412485, "learning_rate": 7.735854697719591e-06, "loss": 0.8245, "step": 7538 }, { "epoch": 0.67, "grad_norm": 7.212602533842416, "learning_rate": 7.735250029328994e-06, "loss": 0.7774, "step": 7539 }, { "epoch": 0.67, "grad_norm": 7.712850254882772, "learning_rate": 7.734645303846045e-06, "loss": 0.7865, "step": 7540 }, { "epoch": 0.67, "grad_norm": 5.122854507520816, "learning_rate": 7.734040521283363e-06, "loss": 0.6799, "step": 7541 }, { "epoch": 0.67, "grad_norm": 6.198791734124865, "learning_rate": 7.733435681653571e-06, "loss": 0.7939, "step": 7542 }, { "epoch": 0.67, "grad_norm": 6.410299578072398, "learning_rate": 7.732830784969299e-06, "loss": 0.8284, "step": 7543 }, { "epoch": 0.67, "grad_norm": 6.983794122480936, "learning_rate": 7.732225831243166e-06, "loss": 0.874, "step": 7544 }, { "epoch": 0.67, "grad_norm": 4.8959525825583015, "learning_rate": 7.731620820487804e-06, "loss": 0.8404, "step": 7545 }, { "epoch": 0.67, "grad_norm": 6.3005387383297515, "learning_rate": 7.731015752715837e-06, "loss": 0.8193, "step": 7546 }, { "epoch": 0.67, "grad_norm": 5.124731011279562, "learning_rate": 7.7304106279399e-06, "loss": 0.7781, "step": 7547 }, { "epoch": 0.67, "grad_norm": 4.4392462130905495, "learning_rate": 7.72980544617262e-06, "loss": 0.765, "step": 7548 }, { "epoch": 0.67, "grad_norm": 5.752957594871339, "learning_rate": 7.729200207426628e-06, "loss": 0.7225, "step": 7549 }, { "epoch": 0.67, "grad_norm": 4.470868470252671, "learning_rate": 7.72859491171456e-06, "loss": 0.7905, "step": 7550 }, { "epoch": 0.67, "grad_norm": 5.92229975792626, "learning_rate": 7.727989559049048e-06, "loss": 0.7857, "step": 7551 }, { "epoch": 0.67, "grad_norm": 6.300262247435067, "learning_rate": 7.727384149442729e-06, "loss": 0.8547, "step": 7552 }, { "epoch": 0.67, "grad_norm": 6.226955130946679, "learning_rate": 7.726778682908239e-06, "loss": 0.7752, "step": 7553 }, { "epoch": 0.67, "grad_norm": 6.623105866623984, "learning_rate": 7.726173159458213e-06, "loss": 0.8225, "step": 7554 }, { "epoch": 0.67, "grad_norm": 5.235355999002692, "learning_rate": 7.725567579105295e-06, "loss": 0.8277, "step": 7555 }, { "epoch": 0.67, "grad_norm": 5.986799402747926, "learning_rate": 7.72496194186212e-06, "loss": 0.7906, "step": 7556 }, { "epoch": 0.67, "grad_norm": 7.16504428184052, "learning_rate": 7.724356247741335e-06, "loss": 0.7392, "step": 7557 }, { "epoch": 0.67, "grad_norm": 7.3687765881955345, "learning_rate": 7.723750496755578e-06, "loss": 0.7648, "step": 7558 }, { "epoch": 0.67, "grad_norm": 5.9853025667174045, "learning_rate": 7.723144688917494e-06, "loss": 0.7954, "step": 7559 }, { "epoch": 0.67, "grad_norm": 8.10396475214905, "learning_rate": 7.72253882423973e-06, "loss": 0.8374, "step": 7560 }, { "epoch": 0.67, "grad_norm": 5.353069816884688, "learning_rate": 7.721932902734929e-06, "loss": 0.8016, "step": 7561 }, { "epoch": 0.67, "grad_norm": 4.4135523640419345, "learning_rate": 7.721326924415739e-06, "loss": 0.7364, "step": 7562 }, { "epoch": 0.67, "grad_norm": 3.8622763979619656, "learning_rate": 7.720720889294811e-06, "loss": 0.7258, "step": 7563 }, { "epoch": 0.67, "grad_norm": 7.744100049264777, "learning_rate": 7.720114797384791e-06, "loss": 0.7474, "step": 7564 }, { "epoch": 0.67, "grad_norm": 5.697409236372265, "learning_rate": 7.719508648698332e-06, "loss": 0.7554, "step": 7565 }, { "epoch": 0.67, "grad_norm": 4.553193568860331, "learning_rate": 7.718902443248086e-06, "loss": 0.7675, "step": 7566 }, { "epoch": 0.68, "grad_norm": 6.969497420222376, "learning_rate": 7.718296181046705e-06, "loss": 0.7864, "step": 7567 }, { "epoch": 0.68, "grad_norm": 5.961430525761238, "learning_rate": 7.717689862106844e-06, "loss": 0.8709, "step": 7568 }, { "epoch": 0.68, "grad_norm": 6.3796229010381165, "learning_rate": 7.71708348644116e-06, "loss": 0.7679, "step": 7569 }, { "epoch": 0.68, "grad_norm": 6.654873177011836, "learning_rate": 7.716477054062308e-06, "loss": 0.8204, "step": 7570 }, { "epoch": 0.68, "grad_norm": 5.328227211049488, "learning_rate": 7.715870564982947e-06, "loss": 0.7799, "step": 7571 }, { "epoch": 0.68, "grad_norm": 7.327583361305452, "learning_rate": 7.715264019215734e-06, "loss": 0.8037, "step": 7572 }, { "epoch": 0.68, "grad_norm": 6.132519949781146, "learning_rate": 7.714657416773332e-06, "loss": 0.913, "step": 7573 }, { "epoch": 0.68, "grad_norm": 5.397600225276254, "learning_rate": 7.714050757668403e-06, "loss": 0.8172, "step": 7574 }, { "epoch": 0.68, "grad_norm": 7.754757228857548, "learning_rate": 7.713444041913604e-06, "loss": 0.7836, "step": 7575 }, { "epoch": 0.68, "grad_norm": 4.95741302463087, "learning_rate": 7.712837269521608e-06, "loss": 0.8251, "step": 7576 }, { "epoch": 0.68, "grad_norm": 6.723652098968439, "learning_rate": 7.712230440505072e-06, "loss": 0.7561, "step": 7577 }, { "epoch": 0.68, "grad_norm": 6.007896900117036, "learning_rate": 7.711623554876666e-06, "loss": 0.832, "step": 7578 }, { "epoch": 0.68, "grad_norm": 5.478615476850892, "learning_rate": 7.711016612649056e-06, "loss": 0.7798, "step": 7579 }, { "epoch": 0.68, "grad_norm": 5.728711126555095, "learning_rate": 7.71040961383491e-06, "loss": 0.8031, "step": 7580 }, { "epoch": 0.68, "grad_norm": 5.894156152622334, "learning_rate": 7.709802558446902e-06, "loss": 0.8448, "step": 7581 }, { "epoch": 0.68, "grad_norm": 4.537927602880863, "learning_rate": 7.709195446497698e-06, "loss": 0.7758, "step": 7582 }, { "epoch": 0.68, "grad_norm": 6.113675294480688, "learning_rate": 7.708588277999974e-06, "loss": 0.7387, "step": 7583 }, { "epoch": 0.68, "grad_norm": 5.2939288985472555, "learning_rate": 7.7079810529664e-06, "loss": 0.7805, "step": 7584 }, { "epoch": 0.68, "grad_norm": 4.5966073155328235, "learning_rate": 7.70737377140965e-06, "loss": 0.8126, "step": 7585 }, { "epoch": 0.68, "grad_norm": 5.111686144306963, "learning_rate": 7.706766433342406e-06, "loss": 0.8508, "step": 7586 }, { "epoch": 0.68, "grad_norm": 7.223499834489987, "learning_rate": 7.706159038777336e-06, "loss": 0.7827, "step": 7587 }, { "epoch": 0.68, "grad_norm": 6.757637621094529, "learning_rate": 7.705551587727125e-06, "loss": 0.7415, "step": 7588 }, { "epoch": 0.68, "grad_norm": 5.901784190731154, "learning_rate": 7.704944080204449e-06, "loss": 0.7484, "step": 7589 }, { "epoch": 0.68, "grad_norm": 6.571368981992699, "learning_rate": 7.704336516221989e-06, "loss": 0.8625, "step": 7590 }, { "epoch": 0.68, "grad_norm": 6.124041521442684, "learning_rate": 7.703728895792428e-06, "loss": 0.8718, "step": 7591 }, { "epoch": 0.68, "grad_norm": 6.470537489038532, "learning_rate": 7.703121218928445e-06, "loss": 0.829, "step": 7592 }, { "epoch": 0.68, "grad_norm": 7.302213240823461, "learning_rate": 7.702513485642727e-06, "loss": 0.8686, "step": 7593 }, { "epoch": 0.68, "grad_norm": 5.7221579850980735, "learning_rate": 7.701905695947959e-06, "loss": 0.8127, "step": 7594 }, { "epoch": 0.68, "grad_norm": 7.666509447114909, "learning_rate": 7.701297849856825e-06, "loss": 0.7666, "step": 7595 }, { "epoch": 0.68, "grad_norm": 4.848931954086172, "learning_rate": 7.700689947382017e-06, "loss": 0.8003, "step": 7596 }, { "epoch": 0.68, "grad_norm": 5.795657633408427, "learning_rate": 7.700081988536219e-06, "loss": 0.8089, "step": 7597 }, { "epoch": 0.68, "grad_norm": 5.526746166157627, "learning_rate": 7.699473973332123e-06, "loss": 0.7647, "step": 7598 }, { "epoch": 0.68, "grad_norm": 7.495556037668056, "learning_rate": 7.69886590178242e-06, "loss": 0.8053, "step": 7599 }, { "epoch": 0.68, "grad_norm": 4.623456839049134, "learning_rate": 7.6982577738998e-06, "loss": 0.7484, "step": 7600 }, { "epoch": 0.68, "grad_norm": 7.387415474864637, "learning_rate": 7.69764958969696e-06, "loss": 0.7736, "step": 7601 }, { "epoch": 0.68, "grad_norm": 5.472397997316965, "learning_rate": 7.697041349186593e-06, "loss": 0.7808, "step": 7602 }, { "epoch": 0.68, "grad_norm": 6.439914276946039, "learning_rate": 7.696433052381392e-06, "loss": 0.7855, "step": 7603 }, { "epoch": 0.68, "grad_norm": 5.669193703937444, "learning_rate": 7.695824699294056e-06, "loss": 0.7698, "step": 7604 }, { "epoch": 0.68, "grad_norm": 4.934326363998305, "learning_rate": 7.695216289937287e-06, "loss": 0.8101, "step": 7605 }, { "epoch": 0.68, "grad_norm": 6.5523741864640925, "learning_rate": 7.694607824323777e-06, "loss": 0.7511, "step": 7606 }, { "epoch": 0.68, "grad_norm": 6.796093406215694, "learning_rate": 7.69399930246623e-06, "loss": 0.8755, "step": 7607 }, { "epoch": 0.68, "grad_norm": 6.828821947883862, "learning_rate": 7.693390724377348e-06, "loss": 0.8129, "step": 7608 }, { "epoch": 0.68, "grad_norm": 5.164511882673904, "learning_rate": 7.692782090069832e-06, "loss": 0.7354, "step": 7609 }, { "epoch": 0.68, "grad_norm": 6.277087166028941, "learning_rate": 7.692173399556388e-06, "loss": 0.8081, "step": 7610 }, { "epoch": 0.68, "grad_norm": 7.1231503211088745, "learning_rate": 7.69156465284972e-06, "loss": 0.8269, "step": 7611 }, { "epoch": 0.68, "grad_norm": 5.121707824418647, "learning_rate": 7.690955849962534e-06, "loss": 0.7238, "step": 7612 }, { "epoch": 0.68, "grad_norm": 6.42702493899873, "learning_rate": 7.690346990907538e-06, "loss": 0.7923, "step": 7613 }, { "epoch": 0.68, "grad_norm": 5.322468672201752, "learning_rate": 7.68973807569744e-06, "loss": 0.7404, "step": 7614 }, { "epoch": 0.68, "grad_norm": 7.664416811582935, "learning_rate": 7.68912910434495e-06, "loss": 0.781, "step": 7615 }, { "epoch": 0.68, "grad_norm": 5.232354803837314, "learning_rate": 7.68852007686278e-06, "loss": 0.8114, "step": 7616 }, { "epoch": 0.68, "grad_norm": 5.493728268618887, "learning_rate": 7.68791099326364e-06, "loss": 0.7949, "step": 7617 }, { "epoch": 0.68, "grad_norm": 5.537499436920661, "learning_rate": 7.687301853560245e-06, "loss": 0.8621, "step": 7618 }, { "epoch": 0.68, "grad_norm": 5.298623530139193, "learning_rate": 7.686692657765309e-06, "loss": 0.7395, "step": 7619 }, { "epoch": 0.68, "grad_norm": 6.331878587684508, "learning_rate": 7.686083405891547e-06, "loss": 0.7121, "step": 7620 }, { "epoch": 0.68, "grad_norm": 5.314164479993293, "learning_rate": 7.685474097951678e-06, "loss": 0.8244, "step": 7621 }, { "epoch": 0.68, "grad_norm": 7.927201992974882, "learning_rate": 7.684864733958416e-06, "loss": 0.7753, "step": 7622 }, { "epoch": 0.68, "grad_norm": 5.387592043297697, "learning_rate": 7.684255313924482e-06, "loss": 0.7903, "step": 7623 }, { "epoch": 0.68, "grad_norm": 5.0540792298762405, "learning_rate": 7.6836458378626e-06, "loss": 0.8249, "step": 7624 }, { "epoch": 0.68, "grad_norm": 6.276759778654334, "learning_rate": 7.683036305785485e-06, "loss": 0.8355, "step": 7625 }, { "epoch": 0.68, "grad_norm": 4.962711074640727, "learning_rate": 7.682426717705864e-06, "loss": 0.7472, "step": 7626 }, { "epoch": 0.68, "grad_norm": 5.952867844412923, "learning_rate": 7.68181707363646e-06, "loss": 0.7695, "step": 7627 }, { "epoch": 0.68, "grad_norm": 5.008032312956153, "learning_rate": 7.681207373589998e-06, "loss": 0.8715, "step": 7628 }, { "epoch": 0.68, "grad_norm": 5.717460136143103, "learning_rate": 7.680597617579204e-06, "loss": 0.8474, "step": 7629 }, { "epoch": 0.68, "grad_norm": 5.223582580563883, "learning_rate": 7.679987805616804e-06, "loss": 0.7835, "step": 7630 }, { "epoch": 0.68, "grad_norm": 4.970207876097407, "learning_rate": 7.679377937715529e-06, "loss": 0.7749, "step": 7631 }, { "epoch": 0.68, "grad_norm": 4.6929326020658175, "learning_rate": 7.678768013888106e-06, "loss": 0.7902, "step": 7632 }, { "epoch": 0.68, "grad_norm": 4.095250175947519, "learning_rate": 7.678158034147266e-06, "loss": 0.8595, "step": 7633 }, { "epoch": 0.68, "grad_norm": 5.5298173502012276, "learning_rate": 7.677547998505746e-06, "loss": 0.7931, "step": 7634 }, { "epoch": 0.68, "grad_norm": 4.559817785456491, "learning_rate": 7.676937906976272e-06, "loss": 0.7747, "step": 7635 }, { "epoch": 0.68, "grad_norm": 6.501244567035963, "learning_rate": 7.676327759571583e-06, "loss": 0.8002, "step": 7636 }, { "epoch": 0.68, "grad_norm": 4.7674458629614955, "learning_rate": 7.675717556304412e-06, "loss": 0.7882, "step": 7637 }, { "epoch": 0.68, "grad_norm": 5.689030379873994, "learning_rate": 7.675107297187498e-06, "loss": 0.8238, "step": 7638 }, { "epoch": 0.68, "grad_norm": 11.13753738789413, "learning_rate": 7.674496982233576e-06, "loss": 0.8597, "step": 7639 }, { "epoch": 0.68, "grad_norm": 5.102618248338764, "learning_rate": 7.673886611455388e-06, "loss": 0.7381, "step": 7640 }, { "epoch": 0.68, "grad_norm": 6.108141908473352, "learning_rate": 7.673276184865672e-06, "loss": 0.7809, "step": 7641 }, { "epoch": 0.68, "grad_norm": 6.755688625291136, "learning_rate": 7.67266570247717e-06, "loss": 0.8006, "step": 7642 }, { "epoch": 0.68, "grad_norm": 7.188839246472388, "learning_rate": 7.672055164302624e-06, "loss": 0.7609, "step": 7643 }, { "epoch": 0.68, "grad_norm": 8.677550120517731, "learning_rate": 7.67144457035478e-06, "loss": 0.7007, "step": 7644 }, { "epoch": 0.68, "grad_norm": 4.533281272204738, "learning_rate": 7.670833920646378e-06, "loss": 0.8053, "step": 7645 }, { "epoch": 0.68, "grad_norm": 6.68738023831253, "learning_rate": 7.67022321519017e-06, "loss": 0.7849, "step": 7646 }, { "epoch": 0.68, "grad_norm": 6.280047136552515, "learning_rate": 7.669612453998899e-06, "loss": 0.8099, "step": 7647 }, { "epoch": 0.68, "grad_norm": 6.006534606727385, "learning_rate": 7.669001637085313e-06, "loss": 0.8352, "step": 7648 }, { "epoch": 0.68, "grad_norm": 5.347157511480133, "learning_rate": 7.668390764462163e-06, "loss": 0.7394, "step": 7649 }, { "epoch": 0.68, "grad_norm": 5.683916945860286, "learning_rate": 7.667779836142201e-06, "loss": 0.7896, "step": 7650 }, { "epoch": 0.68, "grad_norm": 5.351700824810665, "learning_rate": 7.667168852138178e-06, "loss": 0.8193, "step": 7651 }, { "epoch": 0.68, "grad_norm": 4.98005612380919, "learning_rate": 7.666557812462844e-06, "loss": 0.8473, "step": 7652 }, { "epoch": 0.68, "grad_norm": 5.027058815739654, "learning_rate": 7.665946717128955e-06, "loss": 0.7079, "step": 7653 }, { "epoch": 0.68, "grad_norm": 4.810606430972879, "learning_rate": 7.66533556614927e-06, "loss": 0.8989, "step": 7654 }, { "epoch": 0.68, "grad_norm": 4.806155762886301, "learning_rate": 7.664724359536538e-06, "loss": 0.7961, "step": 7655 }, { "epoch": 0.68, "grad_norm": 7.405791466740857, "learning_rate": 7.664113097303525e-06, "loss": 0.7249, "step": 7656 }, { "epoch": 0.68, "grad_norm": 6.169939066397133, "learning_rate": 7.663501779462982e-06, "loss": 0.8094, "step": 7657 }, { "epoch": 0.68, "grad_norm": 5.399083939796335, "learning_rate": 7.662890406027673e-06, "loss": 0.8183, "step": 7658 }, { "epoch": 0.68, "grad_norm": 5.382317761372601, "learning_rate": 7.662278977010359e-06, "loss": 0.8083, "step": 7659 }, { "epoch": 0.68, "grad_norm": 4.885688943320695, "learning_rate": 7.661667492423801e-06, "loss": 0.8369, "step": 7660 }, { "epoch": 0.68, "grad_norm": 5.2572005973740294, "learning_rate": 7.661055952280762e-06, "loss": 0.7978, "step": 7661 }, { "epoch": 0.68, "grad_norm": 4.567625991294728, "learning_rate": 7.66044435659401e-06, "loss": 0.8259, "step": 7662 }, { "epoch": 0.68, "grad_norm": 6.931050309975084, "learning_rate": 7.659832705376307e-06, "loss": 0.7678, "step": 7663 }, { "epoch": 0.68, "grad_norm": 4.445643445279682, "learning_rate": 7.659220998640423e-06, "loss": 0.6865, "step": 7664 }, { "epoch": 0.68, "grad_norm": 4.911501034946819, "learning_rate": 7.658609236399121e-06, "loss": 0.8047, "step": 7665 }, { "epoch": 0.68, "grad_norm": 4.8075100553659365, "learning_rate": 7.657997418665176e-06, "loss": 0.8405, "step": 7666 }, { "epoch": 0.68, "grad_norm": 5.747992427245949, "learning_rate": 7.657385545451355e-06, "loss": 0.7816, "step": 7667 }, { "epoch": 0.68, "grad_norm": 7.909697654096059, "learning_rate": 7.656773616770429e-06, "loss": 0.7786, "step": 7668 }, { "epoch": 0.68, "grad_norm": 7.4223571957077725, "learning_rate": 7.656161632635174e-06, "loss": 0.7798, "step": 7669 }, { "epoch": 0.68, "grad_norm": 4.903839068644271, "learning_rate": 7.65554959305836e-06, "loss": 0.7862, "step": 7670 }, { "epoch": 0.68, "grad_norm": 5.134746282712212, "learning_rate": 7.654937498052766e-06, "loss": 0.7988, "step": 7671 }, { "epoch": 0.68, "grad_norm": 8.424630766692243, "learning_rate": 7.654325347631165e-06, "loss": 0.8057, "step": 7672 }, { "epoch": 0.68, "grad_norm": 6.262187716173571, "learning_rate": 7.653713141806334e-06, "loss": 0.8335, "step": 7673 }, { "epoch": 0.68, "grad_norm": 6.283608874631508, "learning_rate": 7.653100880591054e-06, "loss": 0.7552, "step": 7674 }, { "epoch": 0.68, "grad_norm": 4.384746040310324, "learning_rate": 7.652488563998103e-06, "loss": 0.8317, "step": 7675 }, { "epoch": 0.68, "grad_norm": 4.871801048686925, "learning_rate": 7.651876192040262e-06, "loss": 0.7437, "step": 7676 }, { "epoch": 0.68, "grad_norm": 5.164777094118929, "learning_rate": 7.651263764730313e-06, "loss": 0.7768, "step": 7677 }, { "epoch": 0.68, "grad_norm": 5.840105261437366, "learning_rate": 7.65065128208104e-06, "loss": 0.7928, "step": 7678 }, { "epoch": 0.69, "grad_norm": 5.29190650242915, "learning_rate": 7.650038744105226e-06, "loss": 0.8085, "step": 7679 }, { "epoch": 0.69, "grad_norm": 5.289565494745867, "learning_rate": 7.649426150815656e-06, "loss": 0.7966, "step": 7680 }, { "epoch": 0.69, "grad_norm": 5.826725641641609, "learning_rate": 7.648813502225117e-06, "loss": 0.7999, "step": 7681 }, { "epoch": 0.69, "grad_norm": 6.729576577581364, "learning_rate": 7.648200798346397e-06, "loss": 0.822, "step": 7682 }, { "epoch": 0.69, "grad_norm": 5.978937697706007, "learning_rate": 7.647588039192286e-06, "loss": 0.7612, "step": 7683 }, { "epoch": 0.69, "grad_norm": 7.410845851931574, "learning_rate": 7.646975224775573e-06, "loss": 0.8525, "step": 7684 }, { "epoch": 0.69, "grad_norm": 5.081322451979987, "learning_rate": 7.646362355109047e-06, "loss": 0.8499, "step": 7685 }, { "epoch": 0.69, "grad_norm": 8.224663423942024, "learning_rate": 7.645749430205504e-06, "loss": 0.7714, "step": 7686 }, { "epoch": 0.69, "grad_norm": 5.78383570324417, "learning_rate": 7.645136450077735e-06, "loss": 0.7617, "step": 7687 }, { "epoch": 0.69, "grad_norm": 5.694176786141585, "learning_rate": 7.644523414738535e-06, "loss": 0.7775, "step": 7688 }, { "epoch": 0.69, "grad_norm": 6.6513403183789634, "learning_rate": 7.6439103242007e-06, "loss": 0.7672, "step": 7689 }, { "epoch": 0.69, "grad_norm": 4.804358370771572, "learning_rate": 7.643297178477027e-06, "loss": 0.8118, "step": 7690 }, { "epoch": 0.69, "grad_norm": 4.53979341748493, "learning_rate": 7.642683977580315e-06, "loss": 0.8309, "step": 7691 }, { "epoch": 0.69, "grad_norm": 5.062637954352693, "learning_rate": 7.642070721523363e-06, "loss": 0.7343, "step": 7692 }, { "epoch": 0.69, "grad_norm": 6.13969727212979, "learning_rate": 7.641457410318969e-06, "loss": 0.7687, "step": 7693 }, { "epoch": 0.69, "grad_norm": 7.541801383950333, "learning_rate": 7.640844043979938e-06, "loss": 0.7322, "step": 7694 }, { "epoch": 0.69, "grad_norm": 5.488669727563981, "learning_rate": 7.640230622519069e-06, "loss": 0.7476, "step": 7695 }, { "epoch": 0.69, "grad_norm": 6.18995436725868, "learning_rate": 7.63961714594917e-06, "loss": 0.7792, "step": 7696 }, { "epoch": 0.69, "grad_norm": 4.93590377495648, "learning_rate": 7.639003614283041e-06, "loss": 0.7273, "step": 7697 }, { "epoch": 0.69, "grad_norm": 5.155477598379602, "learning_rate": 7.638390027533493e-06, "loss": 0.7782, "step": 7698 }, { "epoch": 0.69, "grad_norm": 5.577477857257526, "learning_rate": 7.63777638571333e-06, "loss": 0.811, "step": 7699 }, { "epoch": 0.69, "grad_norm": 5.589771586501075, "learning_rate": 7.637162688835363e-06, "loss": 0.7271, "step": 7700 }, { "epoch": 0.69, "grad_norm": 5.179842247270028, "learning_rate": 7.636548936912398e-06, "loss": 0.7791, "step": 7701 }, { "epoch": 0.69, "grad_norm": 5.172001204925341, "learning_rate": 7.635935129957248e-06, "loss": 0.7463, "step": 7702 }, { "epoch": 0.69, "grad_norm": 5.1689630207134645, "learning_rate": 7.635321267982727e-06, "loss": 0.7943, "step": 7703 }, { "epoch": 0.69, "grad_norm": 4.789810060525493, "learning_rate": 7.634707351001645e-06, "loss": 0.8055, "step": 7704 }, { "epoch": 0.69, "grad_norm": 4.711283655182005, "learning_rate": 7.634093379026816e-06, "loss": 0.7667, "step": 7705 }, { "epoch": 0.69, "grad_norm": 6.424836307989746, "learning_rate": 7.633479352071055e-06, "loss": 0.8493, "step": 7706 }, { "epoch": 0.69, "grad_norm": 5.819645503478086, "learning_rate": 7.632865270147184e-06, "loss": 0.754, "step": 7707 }, { "epoch": 0.69, "grad_norm": 5.229314484424215, "learning_rate": 7.632251133268014e-06, "loss": 0.7243, "step": 7708 }, { "epoch": 0.69, "grad_norm": 4.700344300412818, "learning_rate": 7.631636941446365e-06, "loss": 0.8331, "step": 7709 }, { "epoch": 0.69, "grad_norm": 5.221842818951836, "learning_rate": 7.63102269469506e-06, "loss": 0.7916, "step": 7710 }, { "epoch": 0.69, "grad_norm": 8.931643693120213, "learning_rate": 7.630408393026917e-06, "loss": 0.8178, "step": 7711 }, { "epoch": 0.69, "grad_norm": 6.21715536672453, "learning_rate": 7.629794036454758e-06, "loss": 0.7423, "step": 7712 }, { "epoch": 0.69, "grad_norm": 5.183891450762643, "learning_rate": 7.629179624991409e-06, "loss": 0.756, "step": 7713 }, { "epoch": 0.69, "grad_norm": 5.247314115658371, "learning_rate": 7.628565158649695e-06, "loss": 0.8435, "step": 7714 }, { "epoch": 0.69, "grad_norm": 5.637357420647921, "learning_rate": 7.627950637442438e-06, "loss": 0.7799, "step": 7715 }, { "epoch": 0.69, "grad_norm": 5.755633476780596, "learning_rate": 7.627336061382467e-06, "loss": 0.7856, "step": 7716 }, { "epoch": 0.69, "grad_norm": 6.680912597047716, "learning_rate": 7.626721430482609e-06, "loss": 0.7868, "step": 7717 }, { "epoch": 0.69, "grad_norm": 4.3313107631818974, "learning_rate": 7.626106744755693e-06, "loss": 0.7486, "step": 7718 }, { "epoch": 0.69, "grad_norm": 5.728963927086245, "learning_rate": 7.6254920042145495e-06, "loss": 0.8309, "step": 7719 }, { "epoch": 0.69, "grad_norm": 7.792120401205434, "learning_rate": 7.624877208872013e-06, "loss": 0.8733, "step": 7720 }, { "epoch": 0.69, "grad_norm": 5.096236477487446, "learning_rate": 7.624262358740911e-06, "loss": 0.7895, "step": 7721 }, { "epoch": 0.69, "grad_norm": 4.945059947329456, "learning_rate": 7.623647453834078e-06, "loss": 0.7874, "step": 7722 }, { "epoch": 0.69, "grad_norm": 5.737551051257854, "learning_rate": 7.623032494164353e-06, "loss": 0.8228, "step": 7723 }, { "epoch": 0.69, "grad_norm": 6.3206134388419875, "learning_rate": 7.622417479744566e-06, "loss": 0.8513, "step": 7724 }, { "epoch": 0.69, "grad_norm": 4.131212065934153, "learning_rate": 7.62180241058756e-06, "loss": 0.7844, "step": 7725 }, { "epoch": 0.69, "grad_norm": 4.961527462552656, "learning_rate": 7.621187286706169e-06, "loss": 0.8085, "step": 7726 }, { "epoch": 0.69, "grad_norm": 4.913261636565339, "learning_rate": 7.6205721081132335e-06, "loss": 0.8332, "step": 7727 }, { "epoch": 0.69, "grad_norm": 6.714761347633321, "learning_rate": 7.619956874821595e-06, "loss": 0.7663, "step": 7728 }, { "epoch": 0.69, "grad_norm": 4.9023365376783214, "learning_rate": 7.619341586844094e-06, "loss": 0.8579, "step": 7729 }, { "epoch": 0.69, "grad_norm": 5.657651671712657, "learning_rate": 7.618726244193573e-06, "loss": 0.7932, "step": 7730 }, { "epoch": 0.69, "grad_norm": 6.7280969010376, "learning_rate": 7.618110846882876e-06, "loss": 0.8253, "step": 7731 }, { "epoch": 0.69, "grad_norm": 4.828053340995662, "learning_rate": 7.617495394924849e-06, "loss": 0.8108, "step": 7732 }, { "epoch": 0.69, "grad_norm": 5.959620632630165, "learning_rate": 7.61687988833234e-06, "loss": 0.7201, "step": 7733 }, { "epoch": 0.69, "grad_norm": 6.7440344499426965, "learning_rate": 7.6162643271181926e-06, "loss": 0.8018, "step": 7734 }, { "epoch": 0.69, "grad_norm": 5.620902132049789, "learning_rate": 7.615648711295256e-06, "loss": 0.7865, "step": 7735 }, { "epoch": 0.69, "grad_norm": 4.833026730175112, "learning_rate": 7.615033040876383e-06, "loss": 0.699, "step": 7736 }, { "epoch": 0.69, "grad_norm": 4.625427238744532, "learning_rate": 7.614417315874421e-06, "loss": 0.7538, "step": 7737 }, { "epoch": 0.69, "grad_norm": 5.792546750856068, "learning_rate": 7.613801536302221e-06, "loss": 0.7692, "step": 7738 }, { "epoch": 0.69, "grad_norm": 5.883082508959225, "learning_rate": 7.613185702172641e-06, "loss": 0.795, "step": 7739 }, { "epoch": 0.69, "grad_norm": 5.434478610232027, "learning_rate": 7.612569813498531e-06, "loss": 0.7463, "step": 7740 }, { "epoch": 0.69, "grad_norm": 7.2185370021833615, "learning_rate": 7.611953870292747e-06, "loss": 0.8249, "step": 7741 }, { "epoch": 0.69, "grad_norm": 6.966976924185312, "learning_rate": 7.611337872568148e-06, "loss": 0.7383, "step": 7742 }, { "epoch": 0.69, "grad_norm": 5.842655661245851, "learning_rate": 7.610721820337587e-06, "loss": 0.8197, "step": 7743 }, { "epoch": 0.69, "grad_norm": 5.2676029234463515, "learning_rate": 7.610105713613927e-06, "loss": 0.7648, "step": 7744 }, { "epoch": 0.69, "grad_norm": 6.207206808581557, "learning_rate": 7.6094895524100274e-06, "loss": 0.8527, "step": 7745 }, { "epoch": 0.69, "grad_norm": 5.3358658383946755, "learning_rate": 7.608873336738746e-06, "loss": 0.7896, "step": 7746 }, { "epoch": 0.69, "grad_norm": 6.209204834858133, "learning_rate": 7.608257066612947e-06, "loss": 0.7746, "step": 7747 }, { "epoch": 0.69, "grad_norm": 7.592089915783581, "learning_rate": 7.6076407420454946e-06, "loss": 0.8349, "step": 7748 }, { "epoch": 0.69, "grad_norm": 6.100964353577968, "learning_rate": 7.607024363049251e-06, "loss": 0.8254, "step": 7749 }, { "epoch": 0.69, "grad_norm": 5.5881070712212955, "learning_rate": 7.606407929637085e-06, "loss": 0.7915, "step": 7750 }, { "epoch": 0.69, "grad_norm": 5.2208729311664035, "learning_rate": 7.6057914418218615e-06, "loss": 0.8326, "step": 7751 }, { "epoch": 0.69, "grad_norm": 6.627568394650325, "learning_rate": 7.605174899616448e-06, "loss": 0.9071, "step": 7752 }, { "epoch": 0.69, "grad_norm": 6.168531721188039, "learning_rate": 7.604558303033712e-06, "loss": 0.8359, "step": 7753 }, { "epoch": 0.69, "grad_norm": 6.275327588170875, "learning_rate": 7.603941652086526e-06, "loss": 0.8292, "step": 7754 }, { "epoch": 0.69, "grad_norm": 6.087752838058639, "learning_rate": 7.603324946787762e-06, "loss": 0.7906, "step": 7755 }, { "epoch": 0.69, "grad_norm": 4.854889335384123, "learning_rate": 7.602708187150289e-06, "loss": 0.8505, "step": 7756 }, { "epoch": 0.69, "grad_norm": 18.084879787185653, "learning_rate": 7.602091373186984e-06, "loss": 0.7773, "step": 7757 }, { "epoch": 0.69, "grad_norm": 5.328897127579517, "learning_rate": 7.6014745049107195e-06, "loss": 0.7242, "step": 7758 }, { "epoch": 0.69, "grad_norm": 5.799291711416466, "learning_rate": 7.600857582334371e-06, "loss": 0.8106, "step": 7759 }, { "epoch": 0.69, "grad_norm": 4.941821490468286, "learning_rate": 7.600240605470817e-06, "loss": 0.7951, "step": 7760 }, { "epoch": 0.69, "grad_norm": 7.385817494026444, "learning_rate": 7.599623574332935e-06, "loss": 0.7476, "step": 7761 }, { "epoch": 0.69, "grad_norm": 5.759030226030275, "learning_rate": 7.599006488933604e-06, "loss": 0.8143, "step": 7762 }, { "epoch": 0.69, "grad_norm": 4.113537488768801, "learning_rate": 7.598389349285704e-06, "loss": 0.7944, "step": 7763 }, { "epoch": 0.69, "grad_norm": 6.672178896154457, "learning_rate": 7.597772155402116e-06, "loss": 0.803, "step": 7764 }, { "epoch": 0.69, "grad_norm": 6.26967571706157, "learning_rate": 7.597154907295725e-06, "loss": 0.7334, "step": 7765 }, { "epoch": 0.69, "grad_norm": 5.11640031887388, "learning_rate": 7.596537604979413e-06, "loss": 0.8036, "step": 7766 }, { "epoch": 0.69, "grad_norm": 5.6740098195767565, "learning_rate": 7.595920248466062e-06, "loss": 0.8553, "step": 7767 }, { "epoch": 0.69, "grad_norm": 4.7840054777453025, "learning_rate": 7.595302837768564e-06, "loss": 0.8113, "step": 7768 }, { "epoch": 0.69, "grad_norm": 5.818079305901796, "learning_rate": 7.594685372899801e-06, "loss": 0.8293, "step": 7769 }, { "epoch": 0.69, "grad_norm": 4.686436701133617, "learning_rate": 7.594067853872664e-06, "loss": 0.7271, "step": 7770 }, { "epoch": 0.69, "grad_norm": 9.848466977834743, "learning_rate": 7.593450280700041e-06, "loss": 0.8555, "step": 7771 }, { "epoch": 0.69, "grad_norm": 7.8253302609235265, "learning_rate": 7.5928326533948225e-06, "loss": 0.8364, "step": 7772 }, { "epoch": 0.69, "grad_norm": 5.211660282281385, "learning_rate": 7.5922149719699e-06, "loss": 0.7742, "step": 7773 }, { "epoch": 0.69, "grad_norm": 5.355644647754552, "learning_rate": 7.5915972364381685e-06, "loss": 0.7853, "step": 7774 }, { "epoch": 0.69, "grad_norm": 6.678127314965765, "learning_rate": 7.59097944681252e-06, "loss": 0.7663, "step": 7775 }, { "epoch": 0.69, "grad_norm": 3.8325556515400865, "learning_rate": 7.590361603105849e-06, "loss": 0.8324, "step": 7776 }, { "epoch": 0.69, "grad_norm": 6.503552599136412, "learning_rate": 7.589743705331053e-06, "loss": 0.819, "step": 7777 }, { "epoch": 0.69, "grad_norm": 5.336523492574578, "learning_rate": 7.589125753501028e-06, "loss": 0.7745, "step": 7778 }, { "epoch": 0.69, "grad_norm": 6.68392287809145, "learning_rate": 7.588507747628673e-06, "loss": 0.7829, "step": 7779 }, { "epoch": 0.69, "grad_norm": 6.061711967122832, "learning_rate": 7.587889687726888e-06, "loss": 0.7629, "step": 7780 }, { "epoch": 0.69, "grad_norm": 5.026018695630051, "learning_rate": 7.587271573808572e-06, "loss": 0.748, "step": 7781 }, { "epoch": 0.69, "grad_norm": 7.029616924310662, "learning_rate": 7.586653405886629e-06, "loss": 0.7501, "step": 7782 }, { "epoch": 0.69, "grad_norm": 4.826568665358502, "learning_rate": 7.586035183973961e-06, "loss": 0.7551, "step": 7783 }, { "epoch": 0.69, "grad_norm": 4.540810077037549, "learning_rate": 7.585416908083472e-06, "loss": 0.809, "step": 7784 }, { "epoch": 0.69, "grad_norm": 5.176376722276423, "learning_rate": 7.584798578228066e-06, "loss": 0.7427, "step": 7785 }, { "epoch": 0.69, "grad_norm": 7.373850758402862, "learning_rate": 7.58418019442065e-06, "loss": 0.7973, "step": 7786 }, { "epoch": 0.69, "grad_norm": 7.032555205817951, "learning_rate": 7.5835617566741335e-06, "loss": 0.7707, "step": 7787 }, { "epoch": 0.69, "grad_norm": 5.030417923008486, "learning_rate": 7.582943265001421e-06, "loss": 0.7903, "step": 7788 }, { "epoch": 0.69, "grad_norm": 7.846774119583063, "learning_rate": 7.582324719415426e-06, "loss": 0.7739, "step": 7789 }, { "epoch": 0.69, "grad_norm": 5.880265095166864, "learning_rate": 7.5817061199290575e-06, "loss": 0.7871, "step": 7790 }, { "epoch": 0.7, "grad_norm": 6.7944368654209635, "learning_rate": 7.581087466555227e-06, "loss": 0.8025, "step": 7791 }, { "epoch": 0.7, "grad_norm": 5.232343469797591, "learning_rate": 7.580468759306848e-06, "loss": 0.7778, "step": 7792 }, { "epoch": 0.7, "grad_norm": 6.2334531641192585, "learning_rate": 7.579849998196836e-06, "loss": 0.8013, "step": 7793 }, { "epoch": 0.7, "grad_norm": 5.6092815089931305, "learning_rate": 7.579231183238105e-06, "loss": 0.8444, "step": 7794 }, { "epoch": 0.7, "grad_norm": 5.2275733104394915, "learning_rate": 7.578612314443569e-06, "loss": 0.8052, "step": 7795 }, { "epoch": 0.7, "grad_norm": 7.009702547382585, "learning_rate": 7.577993391826151e-06, "loss": 0.8452, "step": 7796 }, { "epoch": 0.7, "grad_norm": 4.579621334836527, "learning_rate": 7.577374415398765e-06, "loss": 0.77, "step": 7797 }, { "epoch": 0.7, "grad_norm": 6.748482527571452, "learning_rate": 7.576755385174332e-06, "loss": 0.8617, "step": 7798 }, { "epoch": 0.7, "grad_norm": 7.743221940491091, "learning_rate": 7.576136301165775e-06, "loss": 0.794, "step": 7799 }, { "epoch": 0.7, "grad_norm": 3.683305017055746, "learning_rate": 7.575517163386014e-06, "loss": 0.8156, "step": 7800 }, { "epoch": 0.7, "grad_norm": 6.118925260296863, "learning_rate": 7.574897971847972e-06, "loss": 0.7384, "step": 7801 }, { "epoch": 0.7, "grad_norm": 6.810778123809922, "learning_rate": 7.5742787265645735e-06, "loss": 0.8114, "step": 7802 }, { "epoch": 0.7, "grad_norm": 7.231998514406285, "learning_rate": 7.5736594275487455e-06, "loss": 0.8192, "step": 7803 }, { "epoch": 0.7, "grad_norm": 6.253144963502085, "learning_rate": 7.573040074813411e-06, "loss": 0.7795, "step": 7804 }, { "epoch": 0.7, "grad_norm": 6.2943829258620285, "learning_rate": 7.572420668371501e-06, "loss": 0.7774, "step": 7805 }, { "epoch": 0.7, "grad_norm": 4.991360882166256, "learning_rate": 7.571801208235943e-06, "loss": 0.8585, "step": 7806 }, { "epoch": 0.7, "grad_norm": 5.944324112962722, "learning_rate": 7.571181694419667e-06, "loss": 0.7451, "step": 7807 }, { "epoch": 0.7, "grad_norm": 5.223429317209248, "learning_rate": 7.570562126935603e-06, "loss": 0.7214, "step": 7808 }, { "epoch": 0.7, "grad_norm": 6.310989535856731, "learning_rate": 7.569942505796686e-06, "loss": 0.8144, "step": 7809 }, { "epoch": 0.7, "grad_norm": 5.655584430258291, "learning_rate": 7.569322831015846e-06, "loss": 0.7899, "step": 7810 }, { "epoch": 0.7, "grad_norm": 5.548310198115499, "learning_rate": 7.568703102606019e-06, "loss": 0.7645, "step": 7811 }, { "epoch": 0.7, "grad_norm": 6.025484086164654, "learning_rate": 7.56808332058014e-06, "loss": 0.8405, "step": 7812 }, { "epoch": 0.7, "grad_norm": 5.553719643712117, "learning_rate": 7.567463484951145e-06, "loss": 0.806, "step": 7813 }, { "epoch": 0.7, "grad_norm": 5.9407775653348756, "learning_rate": 7.566843595731972e-06, "loss": 0.7731, "step": 7814 }, { "epoch": 0.7, "grad_norm": 6.111956448056105, "learning_rate": 7.566223652935562e-06, "loss": 0.7457, "step": 7815 }, { "epoch": 0.7, "grad_norm": 6.065972336617041, "learning_rate": 7.565603656574852e-06, "loss": 0.8364, "step": 7816 }, { "epoch": 0.7, "grad_norm": 7.85008685665357, "learning_rate": 7.564983606662784e-06, "loss": 0.8258, "step": 7817 }, { "epoch": 0.7, "grad_norm": 4.96120824788405, "learning_rate": 7.564363503212302e-06, "loss": 0.7288, "step": 7818 }, { "epoch": 0.7, "grad_norm": 4.900400499346372, "learning_rate": 7.563743346236346e-06, "loss": 0.7361, "step": 7819 }, { "epoch": 0.7, "grad_norm": 6.480278003015133, "learning_rate": 7.5631231357478626e-06, "loss": 0.8, "step": 7820 }, { "epoch": 0.7, "grad_norm": 5.690685501591544, "learning_rate": 7.562502871759798e-06, "loss": 0.775, "step": 7821 }, { "epoch": 0.7, "grad_norm": 6.629694590265112, "learning_rate": 7.561882554285098e-06, "loss": 0.7434, "step": 7822 }, { "epoch": 0.7, "grad_norm": 7.49386658263311, "learning_rate": 7.561262183336708e-06, "loss": 0.7466, "step": 7823 }, { "epoch": 0.7, "grad_norm": 6.132165412094173, "learning_rate": 7.560641758927581e-06, "loss": 0.8012, "step": 7824 }, { "epoch": 0.7, "grad_norm": 7.848971329412663, "learning_rate": 7.560021281070664e-06, "loss": 0.8425, "step": 7825 }, { "epoch": 0.7, "grad_norm": 5.65638998851288, "learning_rate": 7.559400749778908e-06, "loss": 0.7534, "step": 7826 }, { "epoch": 0.7, "grad_norm": 4.673181954886724, "learning_rate": 7.558780165065267e-06, "loss": 0.8019, "step": 7827 }, { "epoch": 0.7, "grad_norm": 7.164732762403855, "learning_rate": 7.558159526942694e-06, "loss": 0.7756, "step": 7828 }, { "epoch": 0.7, "grad_norm": 4.305652724124263, "learning_rate": 7.557538835424145e-06, "loss": 0.7935, "step": 7829 }, { "epoch": 0.7, "grad_norm": 6.788069478258926, "learning_rate": 7.556918090522572e-06, "loss": 0.8343, "step": 7830 }, { "epoch": 0.7, "grad_norm": 7.232431297646364, "learning_rate": 7.556297292250933e-06, "loss": 0.7977, "step": 7831 }, { "epoch": 0.7, "grad_norm": 4.921999653919874, "learning_rate": 7.555676440622186e-06, "loss": 0.8036, "step": 7832 }, { "epoch": 0.7, "grad_norm": 7.6719370516591825, "learning_rate": 7.555055535649291e-06, "loss": 0.7283, "step": 7833 }, { "epoch": 0.7, "grad_norm": 6.411359595028725, "learning_rate": 7.554434577345206e-06, "loss": 0.8391, "step": 7834 }, { "epoch": 0.7, "grad_norm": 4.117088039428969, "learning_rate": 7.553813565722895e-06, "loss": 0.8813, "step": 7835 }, { "epoch": 0.7, "grad_norm": 5.904147082684149, "learning_rate": 7.553192500795317e-06, "loss": 0.8002, "step": 7836 }, { "epoch": 0.7, "grad_norm": 6.272842172200147, "learning_rate": 7.552571382575439e-06, "loss": 0.787, "step": 7837 }, { "epoch": 0.7, "grad_norm": 5.628148595888817, "learning_rate": 7.551950211076223e-06, "loss": 0.7911, "step": 7838 }, { "epoch": 0.7, "grad_norm": 6.283511854800911, "learning_rate": 7.551328986310633e-06, "loss": 0.8145, "step": 7839 }, { "epoch": 0.7, "grad_norm": 7.734395681903562, "learning_rate": 7.550707708291638e-06, "loss": 0.7842, "step": 7840 }, { "epoch": 0.7, "grad_norm": 7.066661527741526, "learning_rate": 7.550086377032208e-06, "loss": 0.87, "step": 7841 }, { "epoch": 0.7, "grad_norm": 5.003928525813774, "learning_rate": 7.5494649925453075e-06, "loss": 0.8112, "step": 7842 }, { "epoch": 0.7, "grad_norm": 5.396889539308282, "learning_rate": 7.548843554843909e-06, "loss": 0.7555, "step": 7843 }, { "epoch": 0.7, "grad_norm": 5.6887202503333, "learning_rate": 7.548222063940984e-06, "loss": 0.7666, "step": 7844 }, { "epoch": 0.7, "grad_norm": 5.454635316584767, "learning_rate": 7.547600519849503e-06, "loss": 0.8203, "step": 7845 }, { "epoch": 0.7, "grad_norm": 6.393008114544564, "learning_rate": 7.54697892258244e-06, "loss": 0.8499, "step": 7846 }, { "epoch": 0.7, "grad_norm": 7.177286907921559, "learning_rate": 7.546357272152771e-06, "loss": 0.8685, "step": 7847 }, { "epoch": 0.7, "grad_norm": 6.54538618675291, "learning_rate": 7.545735568573469e-06, "loss": 0.7642, "step": 7848 }, { "epoch": 0.7, "grad_norm": 5.9196611634493435, "learning_rate": 7.545113811857512e-06, "loss": 0.7952, "step": 7849 }, { "epoch": 0.7, "grad_norm": 6.488990762588969, "learning_rate": 7.544492002017878e-06, "loss": 0.7784, "step": 7850 }, { "epoch": 0.7, "grad_norm": 6.95866428236915, "learning_rate": 7.543870139067548e-06, "loss": 0.7896, "step": 7851 }, { "epoch": 0.7, "grad_norm": 6.87871819005342, "learning_rate": 7.543248223019497e-06, "loss": 0.8516, "step": 7852 }, { "epoch": 0.7, "grad_norm": 4.517430052820431, "learning_rate": 7.54262625388671e-06, "loss": 0.7722, "step": 7853 }, { "epoch": 0.7, "grad_norm": 5.586733022871997, "learning_rate": 7.542004231682169e-06, "loss": 0.8378, "step": 7854 }, { "epoch": 0.7, "grad_norm": 5.618817242462028, "learning_rate": 7.541382156418855e-06, "loss": 0.8183, "step": 7855 }, { "epoch": 0.7, "grad_norm": 5.528276905007494, "learning_rate": 7.5407600281097544e-06, "loss": 0.8203, "step": 7856 }, { "epoch": 0.7, "grad_norm": 5.528647657234929, "learning_rate": 7.540137846767854e-06, "loss": 0.8097, "step": 7857 }, { "epoch": 0.7, "grad_norm": 5.96543787915373, "learning_rate": 7.539515612406138e-06, "loss": 0.7758, "step": 7858 }, { "epoch": 0.7, "grad_norm": 6.105050539097277, "learning_rate": 7.538893325037594e-06, "loss": 0.7603, "step": 7859 }, { "epoch": 0.7, "grad_norm": 6.553920191776604, "learning_rate": 7.538270984675213e-06, "loss": 0.7852, "step": 7860 }, { "epoch": 0.7, "grad_norm": 5.36374936013212, "learning_rate": 7.537648591331983e-06, "loss": 0.7072, "step": 7861 }, { "epoch": 0.7, "grad_norm": 4.6439729130044025, "learning_rate": 7.537026145020896e-06, "loss": 0.8075, "step": 7862 }, { "epoch": 0.7, "grad_norm": 5.480528788658741, "learning_rate": 7.536403645754946e-06, "loss": 0.796, "step": 7863 }, { "epoch": 0.7, "grad_norm": 6.435124620731268, "learning_rate": 7.535781093547123e-06, "loss": 0.7872, "step": 7864 }, { "epoch": 0.7, "grad_norm": 5.475068686046202, "learning_rate": 7.5351584884104235e-06, "loss": 0.7615, "step": 7865 }, { "epoch": 0.7, "grad_norm": 4.438740466815408, "learning_rate": 7.534535830357843e-06, "loss": 0.7907, "step": 7866 }, { "epoch": 0.7, "grad_norm": 6.122997814134779, "learning_rate": 7.533913119402376e-06, "loss": 0.8247, "step": 7867 }, { "epoch": 0.7, "grad_norm": 7.119934743495468, "learning_rate": 7.533290355557023e-06, "loss": 0.8722, "step": 7868 }, { "epoch": 0.7, "grad_norm": 5.567769982017343, "learning_rate": 7.532667538834781e-06, "loss": 0.8343, "step": 7869 }, { "epoch": 0.7, "grad_norm": 5.563206758886511, "learning_rate": 7.53204466924865e-06, "loss": 0.822, "step": 7870 }, { "epoch": 0.7, "grad_norm": 5.70998211484728, "learning_rate": 7.531421746811632e-06, "loss": 0.8287, "step": 7871 }, { "epoch": 0.7, "grad_norm": 6.102254035007287, "learning_rate": 7.530798771536731e-06, "loss": 0.8824, "step": 7872 }, { "epoch": 0.7, "grad_norm": 6.1040125668783665, "learning_rate": 7.530175743436946e-06, "loss": 0.7198, "step": 7873 }, { "epoch": 0.7, "grad_norm": 5.9469505659596695, "learning_rate": 7.5295526625252836e-06, "loss": 0.7612, "step": 7874 }, { "epoch": 0.7, "grad_norm": 6.124614989267985, "learning_rate": 7.528929528814748e-06, "loss": 0.8149, "step": 7875 }, { "epoch": 0.7, "grad_norm": 7.092041180365186, "learning_rate": 7.528306342318348e-06, "loss": 0.8282, "step": 7876 }, { "epoch": 0.7, "grad_norm": 6.435438301347454, "learning_rate": 7.527683103049089e-06, "loss": 0.7912, "step": 7877 }, { "epoch": 0.7, "grad_norm": 5.451859472193517, "learning_rate": 7.527059811019981e-06, "loss": 0.7708, "step": 7878 }, { "epoch": 0.7, "grad_norm": 5.509187525046455, "learning_rate": 7.526436466244035e-06, "loss": 0.8397, "step": 7879 }, { "epoch": 0.7, "grad_norm": 4.95157469949699, "learning_rate": 7.52581306873426e-06, "loss": 0.7483, "step": 7880 }, { "epoch": 0.7, "grad_norm": 5.16849512622031, "learning_rate": 7.525189618503668e-06, "loss": 0.7549, "step": 7881 }, { "epoch": 0.7, "grad_norm": 5.993666015908522, "learning_rate": 7.5245661155652724e-06, "loss": 0.78, "step": 7882 }, { "epoch": 0.7, "grad_norm": 6.346253725950219, "learning_rate": 7.5239425599320894e-06, "loss": 0.829, "step": 7883 }, { "epoch": 0.7, "grad_norm": 5.797758638284803, "learning_rate": 7.5233189516171315e-06, "loss": 0.7772, "step": 7884 }, { "epoch": 0.7, "grad_norm": 4.765972617280916, "learning_rate": 7.522695290633417e-06, "loss": 0.8346, "step": 7885 }, { "epoch": 0.7, "grad_norm": 6.024752305518248, "learning_rate": 7.522071576993962e-06, "loss": 0.7341, "step": 7886 }, { "epoch": 0.7, "grad_norm": 5.554926243065915, "learning_rate": 7.521447810711788e-06, "loss": 0.7788, "step": 7887 }, { "epoch": 0.7, "grad_norm": 3.874739691479034, "learning_rate": 7.52082399179991e-06, "loss": 0.7933, "step": 7888 }, { "epoch": 0.7, "grad_norm": 7.02207266181155, "learning_rate": 7.520200120271355e-06, "loss": 0.8124, "step": 7889 }, { "epoch": 0.7, "grad_norm": 5.656656284374934, "learning_rate": 7.519576196139139e-06, "loss": 0.7464, "step": 7890 }, { "epoch": 0.7, "grad_norm": 6.477325351809031, "learning_rate": 7.518952219416289e-06, "loss": 0.7343, "step": 7891 }, { "epoch": 0.7, "grad_norm": 6.231243750365789, "learning_rate": 7.518328190115826e-06, "loss": 0.805, "step": 7892 }, { "epoch": 0.7, "grad_norm": 5.332267489773912, "learning_rate": 7.5177041082507795e-06, "loss": 0.7758, "step": 7893 }, { "epoch": 0.7, "grad_norm": 5.075942177353138, "learning_rate": 7.517079973834174e-06, "loss": 0.7812, "step": 7894 }, { "epoch": 0.7, "grad_norm": 5.941485705644206, "learning_rate": 7.516455786879034e-06, "loss": 0.8448, "step": 7895 }, { "epoch": 0.7, "grad_norm": 6.3544069235507665, "learning_rate": 7.515831547398391e-06, "loss": 0.7579, "step": 7896 }, { "epoch": 0.7, "grad_norm": 6.445919834064377, "learning_rate": 7.515207255405275e-06, "loss": 0.7285, "step": 7897 }, { "epoch": 0.7, "grad_norm": 4.8643161684477825, "learning_rate": 7.5145829109127145e-06, "loss": 0.7455, "step": 7898 }, { "epoch": 0.7, "grad_norm": 6.027416982330641, "learning_rate": 7.513958513933742e-06, "loss": 0.8154, "step": 7899 }, { "epoch": 0.7, "grad_norm": 5.335692405155143, "learning_rate": 7.513334064481392e-06, "loss": 0.7861, "step": 7900 }, { "epoch": 0.7, "grad_norm": 6.436777758612885, "learning_rate": 7.512709562568699e-06, "loss": 0.8011, "step": 7901 }, { "epoch": 0.7, "grad_norm": 6.217070995792567, "learning_rate": 7.512085008208695e-06, "loss": 0.7622, "step": 7902 }, { "epoch": 0.71, "grad_norm": 5.628476670564611, "learning_rate": 7.5114604014144175e-06, "loss": 0.8211, "step": 7903 }, { "epoch": 0.71, "grad_norm": 4.36981244611042, "learning_rate": 7.510835742198905e-06, "loss": 0.7753, "step": 7904 }, { "epoch": 0.71, "grad_norm": 4.7674256043102075, "learning_rate": 7.510211030575194e-06, "loss": 0.861, "step": 7905 }, { "epoch": 0.71, "grad_norm": 4.99404613953194, "learning_rate": 7.509586266556325e-06, "loss": 0.8203, "step": 7906 }, { "epoch": 0.71, "grad_norm": 5.035907106404633, "learning_rate": 7.508961450155341e-06, "loss": 0.7645, "step": 7907 }, { "epoch": 0.71, "grad_norm": 6.219645012423327, "learning_rate": 7.508336581385278e-06, "loss": 0.7595, "step": 7908 }, { "epoch": 0.71, "grad_norm": 6.614879847842888, "learning_rate": 7.507711660259185e-06, "loss": 0.78, "step": 7909 }, { "epoch": 0.71, "grad_norm": 5.923404295236724, "learning_rate": 7.507086686790101e-06, "loss": 0.7263, "step": 7910 }, { "epoch": 0.71, "grad_norm": 5.731790421706726, "learning_rate": 7.506461660991074e-06, "loss": 0.8505, "step": 7911 }, { "epoch": 0.71, "grad_norm": 6.478198151652288, "learning_rate": 7.505836582875148e-06, "loss": 0.8013, "step": 7912 }, { "epoch": 0.71, "grad_norm": 4.871592158150468, "learning_rate": 7.50521145245537e-06, "loss": 0.7092, "step": 7913 }, { "epoch": 0.71, "grad_norm": 6.0648864151901885, "learning_rate": 7.504586269744791e-06, "loss": 0.7975, "step": 7914 }, { "epoch": 0.71, "grad_norm": 5.953803857014598, "learning_rate": 7.5039610347564576e-06, "loss": 0.7763, "step": 7915 }, { "epoch": 0.71, "grad_norm": 6.186578028640509, "learning_rate": 7.503335747503422e-06, "loss": 0.8263, "step": 7916 }, { "epoch": 0.71, "grad_norm": 5.251882100897378, "learning_rate": 7.502710407998734e-06, "loss": 0.7691, "step": 7917 }, { "epoch": 0.71, "grad_norm": 5.388343339629617, "learning_rate": 7.502085016255448e-06, "loss": 0.7964, "step": 7918 }, { "epoch": 0.71, "grad_norm": 6.09830814613878, "learning_rate": 7.501459572286616e-06, "loss": 0.85, "step": 7919 }, { "epoch": 0.71, "grad_norm": 5.6296564597298735, "learning_rate": 7.5008340761052944e-06, "loss": 0.7587, "step": 7920 }, { "epoch": 0.71, "grad_norm": 5.040971949524659, "learning_rate": 7.5002085277245375e-06, "loss": 0.8399, "step": 7921 }, { "epoch": 0.71, "grad_norm": 5.929842743114392, "learning_rate": 7.499582927157403e-06, "loss": 0.7502, "step": 7922 }, { "epoch": 0.71, "grad_norm": 4.145787935809559, "learning_rate": 7.498957274416949e-06, "loss": 0.7673, "step": 7923 }, { "epoch": 0.71, "grad_norm": 6.200613696788193, "learning_rate": 7.498331569516235e-06, "loss": 0.8217, "step": 7924 }, { "epoch": 0.71, "grad_norm": 5.689162556241866, "learning_rate": 7.49770581246832e-06, "loss": 0.8217, "step": 7925 }, { "epoch": 0.71, "grad_norm": 5.2903860817715636, "learning_rate": 7.497080003286266e-06, "loss": 0.7791, "step": 7926 }, { "epoch": 0.71, "grad_norm": 7.195338927654314, "learning_rate": 7.4964541419831364e-06, "loss": 0.8093, "step": 7927 }, { "epoch": 0.71, "grad_norm": 4.605212134074036, "learning_rate": 7.495828228571992e-06, "loss": 0.7745, "step": 7928 }, { "epoch": 0.71, "grad_norm": 8.270806770763496, "learning_rate": 7.495202263065899e-06, "loss": 0.8336, "step": 7929 }, { "epoch": 0.71, "grad_norm": 6.918842157779109, "learning_rate": 7.494576245477927e-06, "loss": 0.8571, "step": 7930 }, { "epoch": 0.71, "grad_norm": 5.4266868681253575, "learning_rate": 7.493950175821135e-06, "loss": 0.7889, "step": 7931 }, { "epoch": 0.71, "grad_norm": 5.126392829036127, "learning_rate": 7.493324054108595e-06, "loss": 0.829, "step": 7932 }, { "epoch": 0.71, "grad_norm": 5.879861413354143, "learning_rate": 7.492697880353378e-06, "loss": 0.8686, "step": 7933 }, { "epoch": 0.71, "grad_norm": 7.412309973767169, "learning_rate": 7.492071654568549e-06, "loss": 0.8097, "step": 7934 }, { "epoch": 0.71, "grad_norm": 4.587588824285416, "learning_rate": 7.491445376767183e-06, "loss": 0.8115, "step": 7935 }, { "epoch": 0.71, "grad_norm": 6.476274925425593, "learning_rate": 7.4908190469623524e-06, "loss": 0.7675, "step": 7936 }, { "epoch": 0.71, "grad_norm": 4.316033836344861, "learning_rate": 7.490192665167128e-06, "loss": 0.774, "step": 7937 }, { "epoch": 0.71, "grad_norm": 4.116460361254473, "learning_rate": 7.489566231394584e-06, "loss": 0.7694, "step": 7938 }, { "epoch": 0.71, "grad_norm": 5.910833239388987, "learning_rate": 7.488939745657797e-06, "loss": 0.7995, "step": 7939 }, { "epoch": 0.71, "grad_norm": 4.509383481856893, "learning_rate": 7.488313207969844e-06, "loss": 0.7622, "step": 7940 }, { "epoch": 0.71, "grad_norm": 5.4769049527856914, "learning_rate": 7.487686618343802e-06, "loss": 0.765, "step": 7941 }, { "epoch": 0.71, "grad_norm": 6.886538184029175, "learning_rate": 7.487059976792751e-06, "loss": 0.7678, "step": 7942 }, { "epoch": 0.71, "grad_norm": 6.033860959358567, "learning_rate": 7.486433283329768e-06, "loss": 0.7665, "step": 7943 }, { "epoch": 0.71, "grad_norm": 5.310937782136536, "learning_rate": 7.485806537967936e-06, "loss": 0.751, "step": 7944 }, { "epoch": 0.71, "grad_norm": 4.268179878899627, "learning_rate": 7.485179740720336e-06, "loss": 0.7493, "step": 7945 }, { "epoch": 0.71, "grad_norm": 5.663235897141652, "learning_rate": 7.484552891600052e-06, "loss": 0.7498, "step": 7946 }, { "epoch": 0.71, "grad_norm": 5.1224345744818605, "learning_rate": 7.483925990620166e-06, "loss": 0.7968, "step": 7947 }, { "epoch": 0.71, "grad_norm": 7.088512957347719, "learning_rate": 7.483299037793766e-06, "loss": 0.7898, "step": 7948 }, { "epoch": 0.71, "grad_norm": 6.548352327984504, "learning_rate": 7.482672033133937e-06, "loss": 0.7375, "step": 7949 }, { "epoch": 0.71, "grad_norm": 5.896448641265528, "learning_rate": 7.4820449766537665e-06, "loss": 0.9026, "step": 7950 }, { "epoch": 0.71, "grad_norm": 5.519729857381605, "learning_rate": 7.481417868366342e-06, "loss": 0.7458, "step": 7951 }, { "epoch": 0.71, "grad_norm": 4.735707612904966, "learning_rate": 7.480790708284755e-06, "loss": 0.7829, "step": 7952 }, { "epoch": 0.71, "grad_norm": 4.072438189754723, "learning_rate": 7.480163496422094e-06, "loss": 0.7868, "step": 7953 }, { "epoch": 0.71, "grad_norm": 6.975894387896344, "learning_rate": 7.47953623279145e-06, "loss": 0.7786, "step": 7954 }, { "epoch": 0.71, "grad_norm": 4.259798562731409, "learning_rate": 7.478908917405919e-06, "loss": 0.8591, "step": 7955 }, { "epoch": 0.71, "grad_norm": 5.34747026775682, "learning_rate": 7.478281550278592e-06, "loss": 0.8661, "step": 7956 }, { "epoch": 0.71, "grad_norm": 4.683883994461897, "learning_rate": 7.4776541314225646e-06, "loss": 0.7179, "step": 7957 }, { "epoch": 0.71, "grad_norm": 5.370563087366387, "learning_rate": 7.4770266608509355e-06, "loss": 0.7912, "step": 7958 }, { "epoch": 0.71, "grad_norm": 4.958175318516089, "learning_rate": 7.476399138576798e-06, "loss": 0.7434, "step": 7959 }, { "epoch": 0.71, "grad_norm": 6.121702412802429, "learning_rate": 7.475771564613252e-06, "loss": 0.7562, "step": 7960 }, { "epoch": 0.71, "grad_norm": 5.232632216492475, "learning_rate": 7.475143938973395e-06, "loss": 0.7316, "step": 7961 }, { "epoch": 0.71, "grad_norm": 5.677583787445041, "learning_rate": 7.474516261670331e-06, "loss": 0.8221, "step": 7962 }, { "epoch": 0.71, "grad_norm": 5.2085394295605925, "learning_rate": 7.4738885327171574e-06, "loss": 0.7669, "step": 7963 }, { "epoch": 0.71, "grad_norm": 5.9359241498103135, "learning_rate": 7.473260752126979e-06, "loss": 0.8438, "step": 7964 }, { "epoch": 0.71, "grad_norm": 5.183368338108208, "learning_rate": 7.472632919912899e-06, "loss": 0.8234, "step": 7965 }, { "epoch": 0.71, "grad_norm": 6.45842404253519, "learning_rate": 7.472005036088022e-06, "loss": 0.8004, "step": 7966 }, { "epoch": 0.71, "grad_norm": 4.9117025194193635, "learning_rate": 7.4713771006654525e-06, "loss": 0.7703, "step": 7967 }, { "epoch": 0.71, "grad_norm": 4.344896461286907, "learning_rate": 7.4707491136583e-06, "loss": 0.737, "step": 7968 }, { "epoch": 0.71, "grad_norm": 4.940715703060105, "learning_rate": 7.470121075079668e-06, "loss": 0.7417, "step": 7969 }, { "epoch": 0.71, "grad_norm": 5.753066274510542, "learning_rate": 7.469492984942669e-06, "loss": 0.7349, "step": 7970 }, { "epoch": 0.71, "grad_norm": 7.4388063057399165, "learning_rate": 7.468864843260413e-06, "loss": 0.8499, "step": 7971 }, { "epoch": 0.71, "grad_norm": 4.469337012215663, "learning_rate": 7.468236650046009e-06, "loss": 0.802, "step": 7972 }, { "epoch": 0.71, "grad_norm": 5.605774912796959, "learning_rate": 7.467608405312571e-06, "loss": 0.7791, "step": 7973 }, { "epoch": 0.71, "grad_norm": 5.57428060986952, "learning_rate": 7.466980109073212e-06, "loss": 0.8383, "step": 7974 }, { "epoch": 0.71, "grad_norm": 5.494400537280058, "learning_rate": 7.466351761341045e-06, "loss": 0.7812, "step": 7975 }, { "epoch": 0.71, "grad_norm": 6.5001810282970025, "learning_rate": 7.4657233621291854e-06, "loss": 0.8066, "step": 7976 }, { "epoch": 0.71, "grad_norm": 5.5320189647704145, "learning_rate": 7.465094911450752e-06, "loss": 0.8033, "step": 7977 }, { "epoch": 0.71, "grad_norm": 4.844897158238273, "learning_rate": 7.46446640931886e-06, "loss": 0.7423, "step": 7978 }, { "epoch": 0.71, "grad_norm": 6.058934531548048, "learning_rate": 7.463837855746629e-06, "loss": 0.8144, "step": 7979 }, { "epoch": 0.71, "grad_norm": 5.718618247789218, "learning_rate": 7.463209250747179e-06, "loss": 0.7641, "step": 7980 }, { "epoch": 0.71, "grad_norm": 6.273851375582472, "learning_rate": 7.46258059433363e-06, "loss": 0.7871, "step": 7981 }, { "epoch": 0.71, "grad_norm": 5.334867709882106, "learning_rate": 7.461951886519103e-06, "loss": 0.8523, "step": 7982 }, { "epoch": 0.71, "grad_norm": 5.617835352315898, "learning_rate": 7.461323127316723e-06, "loss": 0.8118, "step": 7983 }, { "epoch": 0.71, "grad_norm": 5.701130267559413, "learning_rate": 7.460694316739613e-06, "loss": 0.8166, "step": 7984 }, { "epoch": 0.71, "grad_norm": 5.16149481946943, "learning_rate": 7.460065454800898e-06, "loss": 0.7683, "step": 7985 }, { "epoch": 0.71, "grad_norm": 6.514484970689203, "learning_rate": 7.459436541513703e-06, "loss": 0.808, "step": 7986 }, { "epoch": 0.71, "grad_norm": 5.438039719745585, "learning_rate": 7.4588075768911574e-06, "loss": 0.7754, "step": 7987 }, { "epoch": 0.71, "grad_norm": 4.428282140089798, "learning_rate": 7.4581785609463875e-06, "loss": 0.7777, "step": 7988 }, { "epoch": 0.71, "grad_norm": 7.118732774029786, "learning_rate": 7.457549493692524e-06, "loss": 0.8185, "step": 7989 }, { "epoch": 0.71, "grad_norm": 5.816571939645837, "learning_rate": 7.456920375142697e-06, "loss": 0.8015, "step": 7990 }, { "epoch": 0.71, "grad_norm": 5.238664078366661, "learning_rate": 7.456291205310036e-06, "loss": 0.8317, "step": 7991 }, { "epoch": 0.71, "grad_norm": 6.792155842965622, "learning_rate": 7.455661984207677e-06, "loss": 0.7715, "step": 7992 }, { "epoch": 0.71, "grad_norm": 7.103944548272625, "learning_rate": 7.455032711848752e-06, "loss": 0.7797, "step": 7993 }, { "epoch": 0.71, "grad_norm": 5.425875869950076, "learning_rate": 7.4544033882463936e-06, "loss": 0.8122, "step": 7994 }, { "epoch": 0.71, "grad_norm": 5.224675412885934, "learning_rate": 7.453774013413743e-06, "loss": 0.8473, "step": 7995 }, { "epoch": 0.71, "grad_norm": 5.163577630186194, "learning_rate": 7.453144587363931e-06, "loss": 0.7615, "step": 7996 }, { "epoch": 0.71, "grad_norm": 6.967016705994163, "learning_rate": 7.452515110110099e-06, "loss": 0.7238, "step": 7997 }, { "epoch": 0.71, "grad_norm": 6.545385444486682, "learning_rate": 7.451885581665383e-06, "loss": 0.8065, "step": 7998 }, { "epoch": 0.71, "grad_norm": 6.0476525158702605, "learning_rate": 7.451256002042927e-06, "loss": 0.7832, "step": 7999 }, { "epoch": 0.71, "grad_norm": 5.551306847669112, "learning_rate": 7.45062637125587e-06, "loss": 0.8126, "step": 8000 }, { "epoch": 0.71, "grad_norm": 7.024414540058065, "learning_rate": 7.449996689317354e-06, "loss": 0.7829, "step": 8001 }, { "epoch": 0.71, "grad_norm": 4.904464708256639, "learning_rate": 7.449366956240523e-06, "loss": 0.823, "step": 8002 }, { "epoch": 0.71, "grad_norm": 4.907929397456919, "learning_rate": 7.448737172038521e-06, "loss": 0.7938, "step": 8003 }, { "epoch": 0.71, "grad_norm": 6.5429783286907055, "learning_rate": 7.448107336724491e-06, "loss": 0.7785, "step": 8004 }, { "epoch": 0.71, "grad_norm": 6.005685999013478, "learning_rate": 7.447477450311583e-06, "loss": 0.7851, "step": 8005 }, { "epoch": 0.71, "grad_norm": 5.185831030034902, "learning_rate": 7.446847512812943e-06, "loss": 0.8478, "step": 8006 }, { "epoch": 0.71, "grad_norm": 5.372254665939628, "learning_rate": 7.44621752424172e-06, "loss": 0.8494, "step": 8007 }, { "epoch": 0.71, "grad_norm": 5.477188119150958, "learning_rate": 7.445587484611063e-06, "loss": 0.7814, "step": 8008 }, { "epoch": 0.71, "grad_norm": 5.64414601924335, "learning_rate": 7.444957393934124e-06, "loss": 0.8799, "step": 8009 }, { "epoch": 0.71, "grad_norm": 5.991734478414799, "learning_rate": 7.444327252224053e-06, "loss": 0.8146, "step": 8010 }, { "epoch": 0.71, "grad_norm": 4.094979933536866, "learning_rate": 7.443697059494004e-06, "loss": 0.7555, "step": 8011 }, { "epoch": 0.71, "grad_norm": 7.661851772396895, "learning_rate": 7.443066815757129e-06, "loss": 0.7087, "step": 8012 }, { "epoch": 0.71, "grad_norm": 5.191510730363255, "learning_rate": 7.442436521026586e-06, "loss": 0.7501, "step": 8013 }, { "epoch": 0.71, "grad_norm": 5.822495016134685, "learning_rate": 7.441806175315528e-06, "loss": 0.7888, "step": 8014 }, { "epoch": 0.72, "grad_norm": 5.9978825236928675, "learning_rate": 7.441175778637115e-06, "loss": 0.7264, "step": 8015 }, { "epoch": 0.72, "grad_norm": 5.757944816888849, "learning_rate": 7.440545331004503e-06, "loss": 0.7752, "step": 8016 }, { "epoch": 0.72, "grad_norm": 5.991364706193056, "learning_rate": 7.439914832430852e-06, "loss": 0.802, "step": 8017 }, { "epoch": 0.72, "grad_norm": 6.032419924250394, "learning_rate": 7.439284282929322e-06, "loss": 0.7597, "step": 8018 }, { "epoch": 0.72, "grad_norm": 5.924327138063203, "learning_rate": 7.438653682513077e-06, "loss": 0.8228, "step": 8019 }, { "epoch": 0.72, "grad_norm": 5.830989757755557, "learning_rate": 7.438023031195274e-06, "loss": 0.7566, "step": 8020 }, { "epoch": 0.72, "grad_norm": 6.097135696112259, "learning_rate": 7.437392328989079e-06, "loss": 0.8224, "step": 8021 }, { "epoch": 0.72, "grad_norm": 5.399269588750674, "learning_rate": 7.436761575907658e-06, "loss": 0.7175, "step": 8022 }, { "epoch": 0.72, "grad_norm": 5.368379706559058, "learning_rate": 7.4361307719641765e-06, "loss": 0.7709, "step": 8023 }, { "epoch": 0.72, "grad_norm": 6.2676705058070095, "learning_rate": 7.435499917171799e-06, "loss": 0.8164, "step": 8024 }, { "epoch": 0.72, "grad_norm": 4.695651484389013, "learning_rate": 7.434869011543695e-06, "loss": 0.7614, "step": 8025 }, { "epoch": 0.72, "grad_norm": 5.442745625473054, "learning_rate": 7.434238055093033e-06, "loss": 0.7897, "step": 8026 }, { "epoch": 0.72, "grad_norm": 4.800147121056146, "learning_rate": 7.43360704783298e-06, "loss": 0.7836, "step": 8027 }, { "epoch": 0.72, "grad_norm": 6.103996772789028, "learning_rate": 7.432975989776713e-06, "loss": 0.9125, "step": 8028 }, { "epoch": 0.72, "grad_norm": 5.709939589985719, "learning_rate": 7.432344880937398e-06, "loss": 0.8184, "step": 8029 }, { "epoch": 0.72, "grad_norm": 6.161755132978119, "learning_rate": 7.4317137213282105e-06, "loss": 0.7484, "step": 8030 }, { "epoch": 0.72, "grad_norm": 5.816584053101581, "learning_rate": 7.431082510962325e-06, "loss": 0.7499, "step": 8031 }, { "epoch": 0.72, "grad_norm": 6.521452395539106, "learning_rate": 7.430451249852917e-06, "loss": 0.8247, "step": 8032 }, { "epoch": 0.72, "grad_norm": 5.070655321601379, "learning_rate": 7.42981993801316e-06, "loss": 0.7511, "step": 8033 }, { "epoch": 0.72, "grad_norm": 7.221753564987714, "learning_rate": 7.429188575456233e-06, "loss": 0.8182, "step": 8034 }, { "epoch": 0.72, "grad_norm": 6.247898204509015, "learning_rate": 7.428557162195315e-06, "loss": 0.7805, "step": 8035 }, { "epoch": 0.72, "grad_norm": 5.742003127800784, "learning_rate": 7.427925698243586e-06, "loss": 0.8711, "step": 8036 }, { "epoch": 0.72, "grad_norm": 6.924523221611583, "learning_rate": 7.427294183614222e-06, "loss": 0.775, "step": 8037 }, { "epoch": 0.72, "grad_norm": 5.886684759122131, "learning_rate": 7.426662618320411e-06, "loss": 0.7602, "step": 8038 }, { "epoch": 0.72, "grad_norm": 4.811448495610583, "learning_rate": 7.42603100237533e-06, "loss": 0.7956, "step": 8039 }, { "epoch": 0.72, "grad_norm": 7.331838056102882, "learning_rate": 7.425399335792165e-06, "loss": 0.8584, "step": 8040 }, { "epoch": 0.72, "grad_norm": 5.55502569266274, "learning_rate": 7.424767618584101e-06, "loss": 0.7394, "step": 8041 }, { "epoch": 0.72, "grad_norm": 6.264047961932086, "learning_rate": 7.424135850764323e-06, "loss": 0.7571, "step": 8042 }, { "epoch": 0.72, "grad_norm": 5.271421482802664, "learning_rate": 7.423504032346018e-06, "loss": 0.8584, "step": 8043 }, { "epoch": 0.72, "grad_norm": 6.875133446570406, "learning_rate": 7.422872163342373e-06, "loss": 0.7992, "step": 8044 }, { "epoch": 0.72, "grad_norm": 6.214581793257712, "learning_rate": 7.422240243766578e-06, "loss": 0.7901, "step": 8045 }, { "epoch": 0.72, "grad_norm": 5.4851103066679485, "learning_rate": 7.421608273631821e-06, "loss": 0.805, "step": 8046 }, { "epoch": 0.72, "grad_norm": 6.729076233876768, "learning_rate": 7.420976252951297e-06, "loss": 0.774, "step": 8047 }, { "epoch": 0.72, "grad_norm": 7.366001637431151, "learning_rate": 7.420344181738195e-06, "loss": 0.7874, "step": 8048 }, { "epoch": 0.72, "grad_norm": 6.390072893948882, "learning_rate": 7.419712060005709e-06, "loss": 0.7848, "step": 8049 }, { "epoch": 0.72, "grad_norm": 4.284758521567236, "learning_rate": 7.419079887767031e-06, "loss": 0.7245, "step": 8050 }, { "epoch": 0.72, "grad_norm": 5.365093470384115, "learning_rate": 7.41844766503536e-06, "loss": 0.7591, "step": 8051 }, { "epoch": 0.72, "grad_norm": 5.430665002476206, "learning_rate": 7.417815391823889e-06, "loss": 0.6807, "step": 8052 }, { "epoch": 0.72, "grad_norm": 5.243914104753792, "learning_rate": 7.417183068145817e-06, "loss": 0.8249, "step": 8053 }, { "epoch": 0.72, "grad_norm": 4.7619871268356055, "learning_rate": 7.4165506940143415e-06, "loss": 0.7392, "step": 8054 }, { "epoch": 0.72, "grad_norm": 7.069915985809673, "learning_rate": 7.415918269442663e-06, "loss": 0.7983, "step": 8055 }, { "epoch": 0.72, "grad_norm": 5.207341731924211, "learning_rate": 7.4152857944439815e-06, "loss": 0.7607, "step": 8056 }, { "epoch": 0.72, "grad_norm": 5.757325688357405, "learning_rate": 7.414653269031499e-06, "loss": 0.7802, "step": 8057 }, { "epoch": 0.72, "grad_norm": 4.460560833637711, "learning_rate": 7.414020693218415e-06, "loss": 0.8178, "step": 8058 }, { "epoch": 0.72, "grad_norm": 6.033523413576593, "learning_rate": 7.4133880670179385e-06, "loss": 0.8409, "step": 8059 }, { "epoch": 0.72, "grad_norm": 7.023159347672714, "learning_rate": 7.412755390443271e-06, "loss": 0.871, "step": 8060 }, { "epoch": 0.72, "grad_norm": 4.562324662321809, "learning_rate": 7.412122663507617e-06, "loss": 0.787, "step": 8061 }, { "epoch": 0.72, "grad_norm": 5.991271620341486, "learning_rate": 7.411489886224186e-06, "loss": 0.8641, "step": 8062 }, { "epoch": 0.72, "grad_norm": 7.450840664430811, "learning_rate": 7.4108570586061846e-06, "loss": 0.8252, "step": 8063 }, { "epoch": 0.72, "grad_norm": 4.452824198281665, "learning_rate": 7.410224180666821e-06, "loss": 0.7534, "step": 8064 }, { "epoch": 0.72, "grad_norm": 5.082834713896673, "learning_rate": 7.409591252419306e-06, "loss": 0.7968, "step": 8065 }, { "epoch": 0.72, "grad_norm": 5.530098289703796, "learning_rate": 7.408958273876851e-06, "loss": 0.7647, "step": 8066 }, { "epoch": 0.72, "grad_norm": 6.033100009270034, "learning_rate": 7.408325245052669e-06, "loss": 0.7412, "step": 8067 }, { "epoch": 0.72, "grad_norm": 6.557487312823466, "learning_rate": 7.40769216595997e-06, "loss": 0.8212, "step": 8068 }, { "epoch": 0.72, "grad_norm": 4.900700131930921, "learning_rate": 7.40705903661197e-06, "loss": 0.759, "step": 8069 }, { "epoch": 0.72, "grad_norm": 5.525619028396252, "learning_rate": 7.406425857021884e-06, "loss": 0.7373, "step": 8070 }, { "epoch": 0.72, "grad_norm": 7.215475095677604, "learning_rate": 7.405792627202928e-06, "loss": 0.8002, "step": 8071 }, { "epoch": 0.72, "grad_norm": 6.872485857116215, "learning_rate": 7.405159347168319e-06, "loss": 0.8114, "step": 8072 }, { "epoch": 0.72, "grad_norm": 5.689126977154325, "learning_rate": 7.4045260169312775e-06, "loss": 0.8426, "step": 8073 }, { "epoch": 0.72, "grad_norm": 5.645605479448751, "learning_rate": 7.403892636505021e-06, "loss": 0.7489, "step": 8074 }, { "epoch": 0.72, "grad_norm": 6.429986074623284, "learning_rate": 7.403259205902769e-06, "loss": 0.8179, "step": 8075 }, { "epoch": 0.72, "grad_norm": 6.5515264168482625, "learning_rate": 7.402625725137745e-06, "loss": 0.7886, "step": 8076 }, { "epoch": 0.72, "grad_norm": 5.767561530613094, "learning_rate": 7.40199219422317e-06, "loss": 0.7815, "step": 8077 }, { "epoch": 0.72, "grad_norm": 4.896408479255329, "learning_rate": 7.401358613172267e-06, "loss": 0.7952, "step": 8078 }, { "epoch": 0.72, "grad_norm": 7.4332076164625, "learning_rate": 7.400724981998264e-06, "loss": 0.82, "step": 8079 }, { "epoch": 0.72, "grad_norm": 5.736632020424963, "learning_rate": 7.400091300714384e-06, "loss": 0.7324, "step": 8080 }, { "epoch": 0.72, "grad_norm": 4.650131794427687, "learning_rate": 7.3994575693338536e-06, "loss": 0.8035, "step": 8081 }, { "epoch": 0.72, "grad_norm": 5.24862959608484, "learning_rate": 7.398823787869902e-06, "loss": 0.8062, "step": 8082 }, { "epoch": 0.72, "grad_norm": 4.949614691939611, "learning_rate": 7.398189956335757e-06, "loss": 0.8102, "step": 8083 }, { "epoch": 0.72, "grad_norm": 6.010469125602329, "learning_rate": 7.397556074744648e-06, "loss": 0.7887, "step": 8084 }, { "epoch": 0.72, "grad_norm": 5.928153021689938, "learning_rate": 7.396922143109806e-06, "loss": 0.7623, "step": 8085 }, { "epoch": 0.72, "grad_norm": 6.225091879996679, "learning_rate": 7.396288161444465e-06, "loss": 0.7358, "step": 8086 }, { "epoch": 0.72, "grad_norm": 4.831578777871091, "learning_rate": 7.3956541297618554e-06, "loss": 0.7937, "step": 8087 }, { "epoch": 0.72, "grad_norm": 6.573984753831123, "learning_rate": 7.3950200480752125e-06, "loss": 0.85, "step": 8088 }, { "epoch": 0.72, "grad_norm": 6.3831684880060715, "learning_rate": 7.394385916397772e-06, "loss": 0.8154, "step": 8089 }, { "epoch": 0.72, "grad_norm": 4.097601028992834, "learning_rate": 7.393751734742768e-06, "loss": 0.863, "step": 8090 }, { "epoch": 0.72, "grad_norm": 5.797569781526311, "learning_rate": 7.393117503123438e-06, "loss": 0.8042, "step": 8091 }, { "epoch": 0.72, "grad_norm": 4.5974693941746105, "learning_rate": 7.392483221553023e-06, "loss": 0.7763, "step": 8092 }, { "epoch": 0.72, "grad_norm": 5.876360920060236, "learning_rate": 7.391848890044758e-06, "loss": 0.7489, "step": 8093 }, { "epoch": 0.72, "grad_norm": 5.474114196917781, "learning_rate": 7.391214508611886e-06, "loss": 0.7918, "step": 8094 }, { "epoch": 0.72, "grad_norm": 5.36036327977871, "learning_rate": 7.390580077267649e-06, "loss": 0.7892, "step": 8095 }, { "epoch": 0.72, "grad_norm": 5.373289081161579, "learning_rate": 7.389945596025289e-06, "loss": 0.7818, "step": 8096 }, { "epoch": 0.72, "grad_norm": 6.522423534688371, "learning_rate": 7.3893110648980455e-06, "loss": 0.8074, "step": 8097 }, { "epoch": 0.72, "grad_norm": 4.9853846354372955, "learning_rate": 7.388676483899167e-06, "loss": 0.8494, "step": 8098 }, { "epoch": 0.72, "grad_norm": 5.905254239154744, "learning_rate": 7.3880418530419005e-06, "loss": 0.7931, "step": 8099 }, { "epoch": 0.72, "grad_norm": 5.079028929520829, "learning_rate": 7.3874071723394865e-06, "loss": 0.7707, "step": 8100 }, { "epoch": 0.72, "grad_norm": 5.878756442282105, "learning_rate": 7.386772441805179e-06, "loss": 0.8079, "step": 8101 }, { "epoch": 0.72, "grad_norm": 5.054177897286903, "learning_rate": 7.386137661452222e-06, "loss": 0.8909, "step": 8102 }, { "epoch": 0.72, "grad_norm": 3.8070255214040443, "learning_rate": 7.385502831293868e-06, "loss": 0.8159, "step": 8103 }, { "epoch": 0.72, "grad_norm": 5.85498944391718, "learning_rate": 7.3848679513433665e-06, "loss": 0.7975, "step": 8104 }, { "epoch": 0.72, "grad_norm": 6.086668663920648, "learning_rate": 7.384233021613969e-06, "loss": 0.7573, "step": 8105 }, { "epoch": 0.72, "grad_norm": 4.170362369744039, "learning_rate": 7.383598042118928e-06, "loss": 0.794, "step": 8106 }, { "epoch": 0.72, "grad_norm": 5.96305497172723, "learning_rate": 7.382963012871498e-06, "loss": 0.7547, "step": 8107 }, { "epoch": 0.72, "grad_norm": 7.0219327973450145, "learning_rate": 7.382327933884934e-06, "loss": 0.7656, "step": 8108 }, { "epoch": 0.72, "grad_norm": 4.508180235186709, "learning_rate": 7.3816928051724915e-06, "loss": 0.8169, "step": 8109 }, { "epoch": 0.72, "grad_norm": 5.009409383704669, "learning_rate": 7.3810576267474276e-06, "loss": 0.7714, "step": 8110 }, { "epoch": 0.72, "grad_norm": 6.895419659055805, "learning_rate": 7.380422398623e-06, "loss": 0.7436, "step": 8111 }, { "epoch": 0.72, "grad_norm": 6.017394689451529, "learning_rate": 7.379787120812467e-06, "loss": 0.7944, "step": 8112 }, { "epoch": 0.72, "grad_norm": 7.139692936981693, "learning_rate": 7.379151793329091e-06, "loss": 0.8596, "step": 8113 }, { "epoch": 0.72, "grad_norm": 6.492424084958046, "learning_rate": 7.378516416186131e-06, "loss": 0.8167, "step": 8114 }, { "epoch": 0.72, "grad_norm": 6.163571427289567, "learning_rate": 7.37788098939685e-06, "loss": 0.7745, "step": 8115 }, { "epoch": 0.72, "grad_norm": 4.56024607509027, "learning_rate": 7.377245512974509e-06, "loss": 0.7268, "step": 8116 }, { "epoch": 0.72, "grad_norm": 5.460747330606476, "learning_rate": 7.376609986932377e-06, "loss": 0.7669, "step": 8117 }, { "epoch": 0.72, "grad_norm": 5.568018710852844, "learning_rate": 7.375974411283715e-06, "loss": 0.7892, "step": 8118 }, { "epoch": 0.72, "grad_norm": 6.05694562092402, "learning_rate": 7.37533878604179e-06, "loss": 0.7418, "step": 8119 }, { "epoch": 0.72, "grad_norm": 6.636941744156954, "learning_rate": 7.374703111219868e-06, "loss": 0.8294, "step": 8120 }, { "epoch": 0.72, "grad_norm": 5.605860803618198, "learning_rate": 7.3740673868312226e-06, "loss": 0.7617, "step": 8121 }, { "epoch": 0.72, "grad_norm": 6.257943659706699, "learning_rate": 7.373431612889117e-06, "loss": 0.7898, "step": 8122 }, { "epoch": 0.72, "grad_norm": 5.114357954971109, "learning_rate": 7.3727957894068255e-06, "loss": 0.8347, "step": 8123 }, { "epoch": 0.72, "grad_norm": 4.666580088384638, "learning_rate": 7.372159916397619e-06, "loss": 0.7464, "step": 8124 }, { "epoch": 0.72, "grad_norm": 8.450746272545596, "learning_rate": 7.371523993874767e-06, "loss": 0.8168, "step": 8125 }, { "epoch": 0.72, "grad_norm": 5.383294505620616, "learning_rate": 7.3708880218515464e-06, "loss": 0.7939, "step": 8126 }, { "epoch": 0.73, "grad_norm": 4.138773240577823, "learning_rate": 7.370252000341231e-06, "loss": 0.7847, "step": 8127 }, { "epoch": 0.73, "grad_norm": 4.676041973607977, "learning_rate": 7.369615929357095e-06, "loss": 0.7214, "step": 8128 }, { "epoch": 0.73, "grad_norm": 7.399500133804675, "learning_rate": 7.368979808912415e-06, "loss": 0.7378, "step": 8129 }, { "epoch": 0.73, "grad_norm": 6.188150923110593, "learning_rate": 7.368343639020472e-06, "loss": 0.7962, "step": 8130 }, { "epoch": 0.73, "grad_norm": 4.751843690438909, "learning_rate": 7.36770741969454e-06, "loss": 0.8084, "step": 8131 }, { "epoch": 0.73, "grad_norm": 7.116544426286438, "learning_rate": 7.367071150947902e-06, "loss": 0.8465, "step": 8132 }, { "epoch": 0.73, "grad_norm": 4.752078045604564, "learning_rate": 7.366434832793836e-06, "loss": 0.8067, "step": 8133 }, { "epoch": 0.73, "grad_norm": 6.560948144841868, "learning_rate": 7.365798465245627e-06, "loss": 0.7136, "step": 8134 }, { "epoch": 0.73, "grad_norm": 4.9771654726093075, "learning_rate": 7.3651620483165555e-06, "loss": 0.7784, "step": 8135 }, { "epoch": 0.73, "grad_norm": 5.411138094075645, "learning_rate": 7.364525582019905e-06, "loss": 0.7858, "step": 8136 }, { "epoch": 0.73, "grad_norm": 6.360833997413353, "learning_rate": 7.363889066368962e-06, "loss": 0.7581, "step": 8137 }, { "epoch": 0.73, "grad_norm": 4.6924788277530025, "learning_rate": 7.363252501377012e-06, "loss": 0.8068, "step": 8138 }, { "epoch": 0.73, "grad_norm": 4.429921911217902, "learning_rate": 7.362615887057341e-06, "loss": 0.7824, "step": 8139 }, { "epoch": 0.73, "grad_norm": 5.25015939684266, "learning_rate": 7.361979223423237e-06, "loss": 0.8334, "step": 8140 }, { "epoch": 0.73, "grad_norm": 9.39274350546816, "learning_rate": 7.36134251048799e-06, "loss": 0.8137, "step": 8141 }, { "epoch": 0.73, "grad_norm": 5.706601558361519, "learning_rate": 7.360705748264888e-06, "loss": 0.7938, "step": 8142 }, { "epoch": 0.73, "grad_norm": 3.8880271821776367, "learning_rate": 7.3600689367672255e-06, "loss": 0.7763, "step": 8143 }, { "epoch": 0.73, "grad_norm": 6.124075217787345, "learning_rate": 7.359432076008291e-06, "loss": 0.8068, "step": 8144 }, { "epoch": 0.73, "grad_norm": 5.925306416161488, "learning_rate": 7.3587951660013775e-06, "loss": 0.8039, "step": 8145 }, { "epoch": 0.73, "grad_norm": 5.026750889156244, "learning_rate": 7.358158206759784e-06, "loss": 0.7589, "step": 8146 }, { "epoch": 0.73, "grad_norm": 5.050649136075178, "learning_rate": 7.357521198296801e-06, "loss": 0.8721, "step": 8147 }, { "epoch": 0.73, "grad_norm": 4.921946267799854, "learning_rate": 7.356884140625724e-06, "loss": 0.6958, "step": 8148 }, { "epoch": 0.73, "grad_norm": 4.754425094493915, "learning_rate": 7.3562470337598544e-06, "loss": 0.7722, "step": 8149 }, { "epoch": 0.73, "grad_norm": 6.1375326585252035, "learning_rate": 7.355609877712487e-06, "loss": 0.7722, "step": 8150 }, { "epoch": 0.73, "grad_norm": 5.270026953886901, "learning_rate": 7.354972672496922e-06, "loss": 0.8242, "step": 8151 }, { "epoch": 0.73, "grad_norm": 5.956239758151039, "learning_rate": 7.354335418126461e-06, "loss": 0.8421, "step": 8152 }, { "epoch": 0.73, "grad_norm": 6.524715051282911, "learning_rate": 7.353698114614401e-06, "loss": 0.7988, "step": 8153 }, { "epoch": 0.73, "grad_norm": 5.246882918529215, "learning_rate": 7.353060761974051e-06, "loss": 0.7344, "step": 8154 }, { "epoch": 0.73, "grad_norm": 5.153823344463887, "learning_rate": 7.352423360218709e-06, "loss": 0.7302, "step": 8155 }, { "epoch": 0.73, "grad_norm": 6.82420779525798, "learning_rate": 7.3517859093616815e-06, "loss": 0.7752, "step": 8156 }, { "epoch": 0.73, "grad_norm": 6.429377116574914, "learning_rate": 7.351148409416272e-06, "loss": 0.7353, "step": 8157 }, { "epoch": 0.73, "grad_norm": 5.480967619913319, "learning_rate": 7.35051086039579e-06, "loss": 0.7279, "step": 8158 }, { "epoch": 0.73, "grad_norm": 6.23828478101436, "learning_rate": 7.349873262313541e-06, "loss": 0.8069, "step": 8159 }, { "epoch": 0.73, "grad_norm": 7.181121934820009, "learning_rate": 7.349235615182833e-06, "loss": 0.8229, "step": 8160 }, { "epoch": 0.73, "grad_norm": 5.479877518489671, "learning_rate": 7.348597919016978e-06, "loss": 0.7423, "step": 8161 }, { "epoch": 0.73, "grad_norm": 6.937150441700704, "learning_rate": 7.347960173829283e-06, "loss": 0.8332, "step": 8162 }, { "epoch": 0.73, "grad_norm": 5.61873402128602, "learning_rate": 7.347322379633063e-06, "loss": 0.8333, "step": 8163 }, { "epoch": 0.73, "grad_norm": 6.009773550000838, "learning_rate": 7.346684536441627e-06, "loss": 0.7234, "step": 8164 }, { "epoch": 0.73, "grad_norm": 5.751044588983, "learning_rate": 7.3460466442682925e-06, "loss": 0.7445, "step": 8165 }, { "epoch": 0.73, "grad_norm": 6.04958946728549, "learning_rate": 7.34540870312637e-06, "loss": 0.8324, "step": 8166 }, { "epoch": 0.73, "grad_norm": 5.220895890956877, "learning_rate": 7.344770713029178e-06, "loss": 0.7754, "step": 8167 }, { "epoch": 0.73, "grad_norm": 6.098962774753667, "learning_rate": 7.344132673990032e-06, "loss": 0.8167, "step": 8168 }, { "epoch": 0.73, "grad_norm": 5.188955759673362, "learning_rate": 7.343494586022252e-06, "loss": 0.7944, "step": 8169 }, { "epoch": 0.73, "grad_norm": 5.281147425714562, "learning_rate": 7.342856449139152e-06, "loss": 0.7211, "step": 8170 }, { "epoch": 0.73, "grad_norm": 5.262832867239455, "learning_rate": 7.342218263354055e-06, "loss": 0.7757, "step": 8171 }, { "epoch": 0.73, "grad_norm": 5.34352971900085, "learning_rate": 7.341580028680282e-06, "loss": 0.7379, "step": 8172 }, { "epoch": 0.73, "grad_norm": 4.878759422939054, "learning_rate": 7.340941745131154e-06, "loss": 0.8073, "step": 8173 }, { "epoch": 0.73, "grad_norm": 4.726836511678035, "learning_rate": 7.3403034127199915e-06, "loss": 0.8296, "step": 8174 }, { "epoch": 0.73, "grad_norm": 6.618128115119916, "learning_rate": 7.339665031460124e-06, "loss": 0.7737, "step": 8175 }, { "epoch": 0.73, "grad_norm": 6.386523619329982, "learning_rate": 7.33902660136487e-06, "loss": 0.7319, "step": 8176 }, { "epoch": 0.73, "grad_norm": 5.9633759418296055, "learning_rate": 7.338388122447559e-06, "loss": 0.793, "step": 8177 }, { "epoch": 0.73, "grad_norm": 5.086040039350024, "learning_rate": 7.337749594721517e-06, "loss": 0.7752, "step": 8178 }, { "epoch": 0.73, "grad_norm": 7.313619430119193, "learning_rate": 7.337111018200071e-06, "loss": 0.8243, "step": 8179 }, { "epoch": 0.73, "grad_norm": 5.514620412827863, "learning_rate": 7.336472392896551e-06, "loss": 0.8052, "step": 8180 }, { "epoch": 0.73, "grad_norm": 4.75484674856618, "learning_rate": 7.335833718824288e-06, "loss": 0.7219, "step": 8181 }, { "epoch": 0.73, "grad_norm": 6.022613627224324, "learning_rate": 7.33519499599661e-06, "loss": 0.7937, "step": 8182 }, { "epoch": 0.73, "grad_norm": 5.336702845776959, "learning_rate": 7.334556224426851e-06, "loss": 0.7473, "step": 8183 }, { "epoch": 0.73, "grad_norm": 4.9264764653449395, "learning_rate": 7.3339174041283435e-06, "loss": 0.7896, "step": 8184 }, { "epoch": 0.73, "grad_norm": 5.517621043911835, "learning_rate": 7.333278535114421e-06, "loss": 0.7836, "step": 8185 }, { "epoch": 0.73, "grad_norm": 8.255672475787565, "learning_rate": 7.332639617398418e-06, "loss": 0.7545, "step": 8186 }, { "epoch": 0.73, "grad_norm": 5.313940753626284, "learning_rate": 7.332000650993674e-06, "loss": 0.8306, "step": 8187 }, { "epoch": 0.73, "grad_norm": 6.043628525359937, "learning_rate": 7.331361635913521e-06, "loss": 0.8038, "step": 8188 }, { "epoch": 0.73, "grad_norm": 7.432803833918143, "learning_rate": 7.330722572171299e-06, "loss": 0.7357, "step": 8189 }, { "epoch": 0.73, "grad_norm": 7.452159314785163, "learning_rate": 7.330083459780349e-06, "loss": 0.7858, "step": 8190 }, { "epoch": 0.73, "grad_norm": 6.455305135730963, "learning_rate": 7.32944429875401e-06, "loss": 0.7786, "step": 8191 }, { "epoch": 0.73, "grad_norm": 6.827093264614046, "learning_rate": 7.328805089105622e-06, "loss": 0.8125, "step": 8192 }, { "epoch": 0.73, "grad_norm": 5.617177401438749, "learning_rate": 7.3281658308485275e-06, "loss": 0.7723, "step": 8193 }, { "epoch": 0.73, "grad_norm": 6.895723488884495, "learning_rate": 7.32752652399607e-06, "loss": 0.7858, "step": 8194 }, { "epoch": 0.73, "grad_norm": 6.253474884294706, "learning_rate": 7.326887168561594e-06, "loss": 0.7833, "step": 8195 }, { "epoch": 0.73, "grad_norm": 5.553497574157832, "learning_rate": 7.326247764558443e-06, "loss": 0.7596, "step": 8196 }, { "epoch": 0.73, "grad_norm": 5.409867943097077, "learning_rate": 7.325608311999966e-06, "loss": 0.7846, "step": 8197 }, { "epoch": 0.73, "grad_norm": 6.389838011765209, "learning_rate": 7.324968810899508e-06, "loss": 0.8353, "step": 8198 }, { "epoch": 0.73, "grad_norm": 5.398505278605998, "learning_rate": 7.324329261270417e-06, "loss": 0.771, "step": 8199 }, { "epoch": 0.73, "grad_norm": 5.011640528397752, "learning_rate": 7.323689663126044e-06, "loss": 0.8059, "step": 8200 }, { "epoch": 0.73, "grad_norm": 6.026970729297101, "learning_rate": 7.323050016479736e-06, "loss": 0.8159, "step": 8201 }, { "epoch": 0.73, "grad_norm": 5.670324547392072, "learning_rate": 7.322410321344846e-06, "loss": 0.7766, "step": 8202 }, { "epoch": 0.73, "grad_norm": 4.806274148249362, "learning_rate": 7.321770577734729e-06, "loss": 0.7961, "step": 8203 }, { "epoch": 0.73, "grad_norm": 7.629675460350589, "learning_rate": 7.321130785662736e-06, "loss": 0.7861, "step": 8204 }, { "epoch": 0.73, "grad_norm": 9.62343965694151, "learning_rate": 7.320490945142219e-06, "loss": 0.7361, "step": 8205 }, { "epoch": 0.73, "grad_norm": 5.226919766783721, "learning_rate": 7.319851056186536e-06, "loss": 0.8629, "step": 8206 }, { "epoch": 0.73, "grad_norm": 8.258630049767156, "learning_rate": 7.319211118809043e-06, "loss": 0.8151, "step": 8207 }, { "epoch": 0.73, "grad_norm": 7.4765162352314025, "learning_rate": 7.318571133023096e-06, "loss": 0.6623, "step": 8208 }, { "epoch": 0.73, "grad_norm": 5.144531713469088, "learning_rate": 7.317931098842054e-06, "loss": 0.7927, "step": 8209 }, { "epoch": 0.73, "grad_norm": 5.184474694383772, "learning_rate": 7.317291016279278e-06, "loss": 0.758, "step": 8210 }, { "epoch": 0.73, "grad_norm": 6.396792159301318, "learning_rate": 7.3166508853481265e-06, "loss": 0.7809, "step": 8211 }, { "epoch": 0.73, "grad_norm": 5.407328161941519, "learning_rate": 7.31601070606196e-06, "loss": 0.8261, "step": 8212 }, { "epoch": 0.73, "grad_norm": 4.699366166844901, "learning_rate": 7.315370478434144e-06, "loss": 0.7752, "step": 8213 }, { "epoch": 0.73, "grad_norm": 3.998094230871877, "learning_rate": 7.314730202478038e-06, "loss": 0.791, "step": 8214 }, { "epoch": 0.73, "grad_norm": 4.479004902472032, "learning_rate": 7.314089878207008e-06, "loss": 0.825, "step": 8215 }, { "epoch": 0.73, "grad_norm": 8.001385622532375, "learning_rate": 7.313449505634421e-06, "loss": 0.861, "step": 8216 }, { "epoch": 0.73, "grad_norm": 4.514787635929313, "learning_rate": 7.312809084773641e-06, "loss": 0.7687, "step": 8217 }, { "epoch": 0.73, "grad_norm": 7.286280660477105, "learning_rate": 7.3121686156380354e-06, "loss": 0.844, "step": 8218 }, { "epoch": 0.73, "grad_norm": 5.837593908439315, "learning_rate": 7.311528098240975e-06, "loss": 0.7651, "step": 8219 }, { "epoch": 0.73, "grad_norm": 4.86543775712285, "learning_rate": 7.310887532595828e-06, "loss": 0.8092, "step": 8220 }, { "epoch": 0.73, "grad_norm": 4.334627167643357, "learning_rate": 7.310246918715964e-06, "loss": 0.7645, "step": 8221 }, { "epoch": 0.73, "grad_norm": 7.094707964612209, "learning_rate": 7.3096062566147554e-06, "loss": 0.7294, "step": 8222 }, { "epoch": 0.73, "grad_norm": 4.9148127637503025, "learning_rate": 7.308965546305573e-06, "loss": 0.7821, "step": 8223 }, { "epoch": 0.73, "grad_norm": 7.430021899578567, "learning_rate": 7.3083247878017914e-06, "loss": 0.772, "step": 8224 }, { "epoch": 0.73, "grad_norm": 6.482422037390778, "learning_rate": 7.307683981116787e-06, "loss": 0.7808, "step": 8225 }, { "epoch": 0.73, "grad_norm": 6.087836271555243, "learning_rate": 7.307043126263932e-06, "loss": 0.7416, "step": 8226 }, { "epoch": 0.73, "grad_norm": 6.070878468122808, "learning_rate": 7.306402223256604e-06, "loss": 0.7476, "step": 8227 }, { "epoch": 0.73, "grad_norm": 8.749045045204904, "learning_rate": 7.3057612721081785e-06, "loss": 0.7187, "step": 8228 }, { "epoch": 0.73, "grad_norm": 6.3930924104658295, "learning_rate": 7.305120272832039e-06, "loss": 0.744, "step": 8229 }, { "epoch": 0.73, "grad_norm": 5.111530002366528, "learning_rate": 7.304479225441561e-06, "loss": 0.7952, "step": 8230 }, { "epoch": 0.73, "grad_norm": 5.684668024561761, "learning_rate": 7.3038381299501245e-06, "loss": 0.7456, "step": 8231 }, { "epoch": 0.73, "grad_norm": 5.689876949463993, "learning_rate": 7.303196986371114e-06, "loss": 0.7704, "step": 8232 }, { "epoch": 0.73, "grad_norm": 6.270557586515891, "learning_rate": 7.30255579471791e-06, "loss": 0.7943, "step": 8233 }, { "epoch": 0.73, "grad_norm": 5.670306618048739, "learning_rate": 7.301914555003896e-06, "loss": 0.7939, "step": 8234 }, { "epoch": 0.73, "grad_norm": 5.825631473379009, "learning_rate": 7.301273267242458e-06, "loss": 0.6778, "step": 8235 }, { "epoch": 0.73, "grad_norm": 5.848727848154191, "learning_rate": 7.300631931446978e-06, "loss": 0.8059, "step": 8236 }, { "epoch": 0.73, "grad_norm": 5.056150144197105, "learning_rate": 7.2999905476308465e-06, "loss": 0.7837, "step": 8237 }, { "epoch": 0.73, "grad_norm": 5.335078503327824, "learning_rate": 7.299349115807448e-06, "loss": 0.7806, "step": 8238 }, { "epoch": 0.74, "grad_norm": 5.214139373350839, "learning_rate": 7.298707635990173e-06, "loss": 0.7914, "step": 8239 }, { "epoch": 0.74, "grad_norm": 7.444822233434615, "learning_rate": 7.298066108192409e-06, "loss": 0.8222, "step": 8240 }, { "epoch": 0.74, "grad_norm": 6.80980767552241, "learning_rate": 7.2974245324275495e-06, "loss": 0.7735, "step": 8241 }, { "epoch": 0.74, "grad_norm": 8.92577650398868, "learning_rate": 7.296782908708984e-06, "loss": 0.8211, "step": 8242 }, { "epoch": 0.74, "grad_norm": 4.547194658509991, "learning_rate": 7.296141237050104e-06, "loss": 0.8115, "step": 8243 }, { "epoch": 0.74, "grad_norm": 5.05520107636626, "learning_rate": 7.295499517464304e-06, "loss": 0.7453, "step": 8244 }, { "epoch": 0.74, "grad_norm": 4.884272134058154, "learning_rate": 7.29485774996498e-06, "loss": 0.7609, "step": 8245 }, { "epoch": 0.74, "grad_norm": 4.568628495034229, "learning_rate": 7.294215934565525e-06, "loss": 0.8129, "step": 8246 }, { "epoch": 0.74, "grad_norm": 5.867750842044337, "learning_rate": 7.293574071279337e-06, "loss": 0.8114, "step": 8247 }, { "epoch": 0.74, "grad_norm": 4.492842197924874, "learning_rate": 7.292932160119814e-06, "loss": 0.7539, "step": 8248 }, { "epoch": 0.74, "grad_norm": 4.9680380616880475, "learning_rate": 7.292290201100351e-06, "loss": 0.8569, "step": 8249 }, { "epoch": 0.74, "grad_norm": 5.4373297380795735, "learning_rate": 7.291648194234353e-06, "loss": 0.734, "step": 8250 }, { "epoch": 0.74, "grad_norm": 5.943839619023628, "learning_rate": 7.291006139535216e-06, "loss": 0.7609, "step": 8251 }, { "epoch": 0.74, "grad_norm": 6.78657726247374, "learning_rate": 7.290364037016343e-06, "loss": 0.7456, "step": 8252 }, { "epoch": 0.74, "grad_norm": 4.730714651251813, "learning_rate": 7.289721886691136e-06, "loss": 0.7415, "step": 8253 }, { "epoch": 0.74, "grad_norm": 7.920655195668091, "learning_rate": 7.289079688573002e-06, "loss": 0.8746, "step": 8254 }, { "epoch": 0.74, "grad_norm": 5.100700638119718, "learning_rate": 7.28843744267534e-06, "loss": 0.8321, "step": 8255 }, { "epoch": 0.74, "grad_norm": 4.600113981041793, "learning_rate": 7.287795149011559e-06, "loss": 0.7663, "step": 8256 }, { "epoch": 0.74, "grad_norm": 9.765073908816124, "learning_rate": 7.2871528075950626e-06, "loss": 0.8414, "step": 8257 }, { "epoch": 0.74, "grad_norm": 4.542197844300765, "learning_rate": 7.286510418439262e-06, "loss": 0.8065, "step": 8258 }, { "epoch": 0.74, "grad_norm": 5.551351258624759, "learning_rate": 7.285867981557563e-06, "loss": 0.8763, "step": 8259 }, { "epoch": 0.74, "grad_norm": 5.5642115416911535, "learning_rate": 7.285225496963377e-06, "loss": 0.8296, "step": 8260 }, { "epoch": 0.74, "grad_norm": 5.536206175448339, "learning_rate": 7.284582964670112e-06, "loss": 0.8203, "step": 8261 }, { "epoch": 0.74, "grad_norm": 6.003951357070649, "learning_rate": 7.283940384691181e-06, "loss": 0.7495, "step": 8262 }, { "epoch": 0.74, "grad_norm": 6.678447676034929, "learning_rate": 7.283297757039996e-06, "loss": 0.7951, "step": 8263 }, { "epoch": 0.74, "grad_norm": 4.06897896765556, "learning_rate": 7.282655081729971e-06, "loss": 0.7912, "step": 8264 }, { "epoch": 0.74, "grad_norm": 6.104381861032628, "learning_rate": 7.282012358774519e-06, "loss": 0.8079, "step": 8265 }, { "epoch": 0.74, "grad_norm": 10.492019584871516, "learning_rate": 7.281369588187058e-06, "loss": 0.8701, "step": 8266 }, { "epoch": 0.74, "grad_norm": 6.781702773911845, "learning_rate": 7.2807267699810035e-06, "loss": 0.8559, "step": 8267 }, { "epoch": 0.74, "grad_norm": 6.35518996228275, "learning_rate": 7.28008390416977e-06, "loss": 0.7923, "step": 8268 }, { "epoch": 0.74, "grad_norm": 5.939295055465337, "learning_rate": 7.279440990766781e-06, "loss": 0.7855, "step": 8269 }, { "epoch": 0.74, "grad_norm": 5.555794425119006, "learning_rate": 7.278798029785452e-06, "loss": 0.8316, "step": 8270 }, { "epoch": 0.74, "grad_norm": 5.815948646560741, "learning_rate": 7.278155021239205e-06, "loss": 0.7295, "step": 8271 }, { "epoch": 0.74, "grad_norm": 6.144362607227897, "learning_rate": 7.277511965141459e-06, "loss": 0.7853, "step": 8272 }, { "epoch": 0.74, "grad_norm": 5.213283979141265, "learning_rate": 7.2768688615056416e-06, "loss": 0.8109, "step": 8273 }, { "epoch": 0.74, "grad_norm": 6.6177343103774815, "learning_rate": 7.27622571034517e-06, "loss": 0.7801, "step": 8274 }, { "epoch": 0.74, "grad_norm": 5.729287431402088, "learning_rate": 7.275582511673473e-06, "loss": 0.8309, "step": 8275 }, { "epoch": 0.74, "grad_norm": 5.313679522605185, "learning_rate": 7.274939265503975e-06, "loss": 0.7962, "step": 8276 }, { "epoch": 0.74, "grad_norm": 4.065568880518448, "learning_rate": 7.274295971850102e-06, "loss": 0.8099, "step": 8277 }, { "epoch": 0.74, "grad_norm": 6.36026109988822, "learning_rate": 7.273652630725279e-06, "loss": 0.8384, "step": 8278 }, { "epoch": 0.74, "grad_norm": 6.450842053470196, "learning_rate": 7.2730092421429384e-06, "loss": 0.7207, "step": 8279 }, { "epoch": 0.74, "grad_norm": 6.889531149092267, "learning_rate": 7.272365806116508e-06, "loss": 0.8569, "step": 8280 }, { "epoch": 0.74, "grad_norm": 6.386139379552158, "learning_rate": 7.271722322659417e-06, "loss": 0.7769, "step": 8281 }, { "epoch": 0.74, "grad_norm": 5.155952634226903, "learning_rate": 7.271078791785097e-06, "loss": 0.7587, "step": 8282 }, { "epoch": 0.74, "grad_norm": 5.964535463160814, "learning_rate": 7.270435213506981e-06, "loss": 0.7375, "step": 8283 }, { "epoch": 0.74, "grad_norm": 5.9868275580688515, "learning_rate": 7.269791587838504e-06, "loss": 0.8224, "step": 8284 }, { "epoch": 0.74, "grad_norm": 5.182215152717567, "learning_rate": 7.269147914793095e-06, "loss": 0.7067, "step": 8285 }, { "epoch": 0.74, "grad_norm": 4.207456478193949, "learning_rate": 7.268504194384196e-06, "loss": 0.7964, "step": 8286 }, { "epoch": 0.74, "grad_norm": 6.7156438986789615, "learning_rate": 7.267860426625237e-06, "loss": 0.7899, "step": 8287 }, { "epoch": 0.74, "grad_norm": 5.997498136531247, "learning_rate": 7.26721661152966e-06, "loss": 0.797, "step": 8288 }, { "epoch": 0.74, "grad_norm": 5.81284517993646, "learning_rate": 7.266572749110901e-06, "loss": 0.7381, "step": 8289 }, { "epoch": 0.74, "grad_norm": 5.4735471210760895, "learning_rate": 7.265928839382399e-06, "loss": 0.7415, "step": 8290 }, { "epoch": 0.74, "grad_norm": 5.297761158358614, "learning_rate": 7.265284882357596e-06, "loss": 0.7919, "step": 8291 }, { "epoch": 0.74, "grad_norm": 5.714953384695105, "learning_rate": 7.264640878049931e-06, "loss": 0.8531, "step": 8292 }, { "epoch": 0.74, "grad_norm": 6.333871927312308, "learning_rate": 7.263996826472848e-06, "loss": 0.7717, "step": 8293 }, { "epoch": 0.74, "grad_norm": 4.301446077504863, "learning_rate": 7.263352727639788e-06, "loss": 0.8516, "step": 8294 }, { "epoch": 0.74, "grad_norm": 6.0921639161196595, "learning_rate": 7.262708581564196e-06, "loss": 0.8347, "step": 8295 }, { "epoch": 0.74, "grad_norm": 6.955115848699971, "learning_rate": 7.26206438825952e-06, "loss": 0.7869, "step": 8296 }, { "epoch": 0.74, "grad_norm": 5.241591799274524, "learning_rate": 7.261420147739202e-06, "loss": 0.7625, "step": 8297 }, { "epoch": 0.74, "grad_norm": 5.854121834151947, "learning_rate": 7.260775860016691e-06, "loss": 0.8807, "step": 8298 }, { "epoch": 0.74, "grad_norm": 4.91167508145503, "learning_rate": 7.260131525105436e-06, "loss": 0.7106, "step": 8299 }, { "epoch": 0.74, "grad_norm": 5.867536414253444, "learning_rate": 7.259487143018884e-06, "loss": 0.743, "step": 8300 }, { "epoch": 0.74, "grad_norm": 6.477922929120844, "learning_rate": 7.258842713770487e-06, "loss": 0.8052, "step": 8301 }, { "epoch": 0.74, "grad_norm": 5.990262324693517, "learning_rate": 7.258198237373694e-06, "loss": 0.7262, "step": 8302 }, { "epoch": 0.74, "grad_norm": 7.859882223332972, "learning_rate": 7.257553713841958e-06, "loss": 0.7423, "step": 8303 }, { "epoch": 0.74, "grad_norm": 6.088863299287504, "learning_rate": 7.256909143188733e-06, "loss": 0.7933, "step": 8304 }, { "epoch": 0.74, "grad_norm": 6.118562571875256, "learning_rate": 7.256264525427472e-06, "loss": 0.7897, "step": 8305 }, { "epoch": 0.74, "grad_norm": 10.586078210268099, "learning_rate": 7.25561986057163e-06, "loss": 0.7604, "step": 8306 }, { "epoch": 0.74, "grad_norm": 5.47781222536973, "learning_rate": 7.254975148634663e-06, "loss": 0.7635, "step": 8307 }, { "epoch": 0.74, "grad_norm": 5.0372527152208315, "learning_rate": 7.254330389630028e-06, "loss": 0.7566, "step": 8308 }, { "epoch": 0.74, "grad_norm": 4.434800256519501, "learning_rate": 7.2536855835711835e-06, "loss": 0.7928, "step": 8309 }, { "epoch": 0.74, "grad_norm": 5.547799212745193, "learning_rate": 7.253040730471588e-06, "loss": 0.7672, "step": 8310 }, { "epoch": 0.74, "grad_norm": 5.1258534537314295, "learning_rate": 7.2523958303447015e-06, "loss": 0.7641, "step": 8311 }, { "epoch": 0.74, "grad_norm": 4.879963092008109, "learning_rate": 7.2517508832039854e-06, "loss": 0.7799, "step": 8312 }, { "epoch": 0.74, "grad_norm": 6.082152480610103, "learning_rate": 7.251105889062899e-06, "loss": 0.7915, "step": 8313 }, { "epoch": 0.74, "grad_norm": 4.740330182518155, "learning_rate": 7.250460847934907e-06, "loss": 0.7964, "step": 8314 }, { "epoch": 0.74, "grad_norm": 5.206982947354539, "learning_rate": 7.249815759833475e-06, "loss": 0.853, "step": 8315 }, { "epoch": 0.74, "grad_norm": 6.386502128638249, "learning_rate": 7.249170624772064e-06, "loss": 0.7444, "step": 8316 }, { "epoch": 0.74, "grad_norm": 6.47890943013288, "learning_rate": 7.248525442764143e-06, "loss": 0.8339, "step": 8317 }, { "epoch": 0.74, "grad_norm": 8.792987110329758, "learning_rate": 7.247880213823178e-06, "loss": 0.7854, "step": 8318 }, { "epoch": 0.74, "grad_norm": 6.07755200826069, "learning_rate": 7.247234937962635e-06, "loss": 0.7944, "step": 8319 }, { "epoch": 0.74, "grad_norm": 7.257484652885722, "learning_rate": 7.246589615195984e-06, "loss": 0.7665, "step": 8320 }, { "epoch": 0.74, "grad_norm": 4.818876149323978, "learning_rate": 7.245944245536698e-06, "loss": 0.8659, "step": 8321 }, { "epoch": 0.74, "grad_norm": 7.996314975722696, "learning_rate": 7.2452988289982405e-06, "loss": 0.8176, "step": 8322 }, { "epoch": 0.74, "grad_norm": 7.7302282132724445, "learning_rate": 7.244653365594089e-06, "loss": 0.7654, "step": 8323 }, { "epoch": 0.74, "grad_norm": 5.409073160446657, "learning_rate": 7.2440078553377135e-06, "loss": 0.8151, "step": 8324 }, { "epoch": 0.74, "grad_norm": 5.896642578481406, "learning_rate": 7.243362298242588e-06, "loss": 0.7133, "step": 8325 }, { "epoch": 0.74, "grad_norm": 5.080283589731239, "learning_rate": 7.2427166943221875e-06, "loss": 0.8341, "step": 8326 }, { "epoch": 0.74, "grad_norm": 6.038394674870549, "learning_rate": 7.242071043589989e-06, "loss": 0.817, "step": 8327 }, { "epoch": 0.74, "grad_norm": 4.664615757285098, "learning_rate": 7.241425346059466e-06, "loss": 0.7472, "step": 8328 }, { "epoch": 0.74, "grad_norm": 5.445515093722944, "learning_rate": 7.2407796017440975e-06, "loss": 0.7538, "step": 8329 }, { "epoch": 0.74, "grad_norm": 5.722247303594539, "learning_rate": 7.240133810657363e-06, "loss": 0.8149, "step": 8330 }, { "epoch": 0.74, "grad_norm": 5.100761950454002, "learning_rate": 7.23948797281274e-06, "loss": 0.7438, "step": 8331 }, { "epoch": 0.74, "grad_norm": 5.444312098701101, "learning_rate": 7.23884208822371e-06, "loss": 0.7998, "step": 8332 }, { "epoch": 0.74, "grad_norm": 7.55869478480386, "learning_rate": 7.238196156903754e-06, "loss": 0.8334, "step": 8333 }, { "epoch": 0.74, "grad_norm": 5.892199754993843, "learning_rate": 7.2375501788663574e-06, "loss": 0.9232, "step": 8334 }, { "epoch": 0.74, "grad_norm": 6.977625103021928, "learning_rate": 7.236904154125e-06, "loss": 0.7941, "step": 8335 }, { "epoch": 0.74, "grad_norm": 5.3258147420106114, "learning_rate": 7.236258082693165e-06, "loss": 0.7737, "step": 8336 }, { "epoch": 0.74, "grad_norm": 5.1943668532581055, "learning_rate": 7.235611964584341e-06, "loss": 0.743, "step": 8337 }, { "epoch": 0.74, "grad_norm": 8.751534400072075, "learning_rate": 7.2349657998120135e-06, "loss": 0.7453, "step": 8338 }, { "epoch": 0.74, "grad_norm": 6.7831565416906106, "learning_rate": 7.234319588389669e-06, "loss": 0.7246, "step": 8339 }, { "epoch": 0.74, "grad_norm": 5.662634411961692, "learning_rate": 7.233673330330797e-06, "loss": 0.7993, "step": 8340 }, { "epoch": 0.74, "grad_norm": 6.1490217949167105, "learning_rate": 7.233027025648886e-06, "loss": 0.753, "step": 8341 }, { "epoch": 0.74, "grad_norm": 5.062131627271469, "learning_rate": 7.232380674357425e-06, "loss": 0.8385, "step": 8342 }, { "epoch": 0.74, "grad_norm": 6.266242047399319, "learning_rate": 7.231734276469905e-06, "loss": 0.8341, "step": 8343 }, { "epoch": 0.74, "grad_norm": 4.623009683654718, "learning_rate": 7.231087831999822e-06, "loss": 0.7532, "step": 8344 }, { "epoch": 0.74, "grad_norm": 4.336365418647049, "learning_rate": 7.230441340960666e-06, "loss": 0.7439, "step": 8345 }, { "epoch": 0.74, "grad_norm": 9.050153365356373, "learning_rate": 7.229794803365931e-06, "loss": 0.8587, "step": 8346 }, { "epoch": 0.74, "grad_norm": 5.221961658720766, "learning_rate": 7.229148219229112e-06, "loss": 0.7488, "step": 8347 }, { "epoch": 0.74, "grad_norm": 5.185613535631729, "learning_rate": 7.228501588563707e-06, "loss": 0.7979, "step": 8348 }, { "epoch": 0.74, "grad_norm": 5.970222451934138, "learning_rate": 7.227854911383211e-06, "loss": 0.7659, "step": 8349 }, { "epoch": 0.74, "grad_norm": 6.387784850195836, "learning_rate": 7.227208187701122e-06, "loss": 0.7848, "step": 8350 }, { "epoch": 0.74, "grad_norm": 6.438330868539399, "learning_rate": 7.22656141753094e-06, "loss": 0.8169, "step": 8351 }, { "epoch": 0.75, "grad_norm": 5.223597107351562, "learning_rate": 7.225914600886164e-06, "loss": 0.79, "step": 8352 }, { "epoch": 0.75, "grad_norm": 5.336398098488384, "learning_rate": 7.225267737780297e-06, "loss": 0.8299, "step": 8353 }, { "epoch": 0.75, "grad_norm": 4.666711439008565, "learning_rate": 7.224620828226838e-06, "loss": 0.7139, "step": 8354 }, { "epoch": 0.75, "grad_norm": 8.378117430766114, "learning_rate": 7.223973872239292e-06, "loss": 0.7609, "step": 8355 }, { "epoch": 0.75, "grad_norm": 6.664315752685664, "learning_rate": 7.223326869831162e-06, "loss": 0.7853, "step": 8356 }, { "epoch": 0.75, "grad_norm": 5.189116361289905, "learning_rate": 7.222679821015952e-06, "loss": 0.7652, "step": 8357 }, { "epoch": 0.75, "grad_norm": 5.933663059972507, "learning_rate": 7.222032725807168e-06, "loss": 0.7612, "step": 8358 }, { "epoch": 0.75, "grad_norm": 5.224587589646313, "learning_rate": 7.221385584218318e-06, "loss": 0.7162, "step": 8359 }, { "epoch": 0.75, "grad_norm": 5.3009386411949455, "learning_rate": 7.220738396262907e-06, "loss": 0.8257, "step": 8360 }, { "epoch": 0.75, "grad_norm": 5.627393102759216, "learning_rate": 7.220091161954445e-06, "loss": 0.7583, "step": 8361 }, { "epoch": 0.75, "grad_norm": 5.447362463435873, "learning_rate": 7.219443881306445e-06, "loss": 0.7487, "step": 8362 }, { "epoch": 0.75, "grad_norm": 6.1954855729349685, "learning_rate": 7.218796554332412e-06, "loss": 0.7881, "step": 8363 }, { "epoch": 0.75, "grad_norm": 5.735313710259194, "learning_rate": 7.21814918104586e-06, "loss": 0.7549, "step": 8364 }, { "epoch": 0.75, "grad_norm": 5.36333590645361, "learning_rate": 7.217501761460303e-06, "loss": 0.755, "step": 8365 }, { "epoch": 0.75, "grad_norm": 5.030231107760213, "learning_rate": 7.216854295589251e-06, "loss": 0.7928, "step": 8366 }, { "epoch": 0.75, "grad_norm": 4.5258319501116, "learning_rate": 7.216206783446222e-06, "loss": 0.7428, "step": 8367 }, { "epoch": 0.75, "grad_norm": 5.629756917956121, "learning_rate": 7.215559225044729e-06, "loss": 0.7716, "step": 8368 }, { "epoch": 0.75, "grad_norm": 6.813427559292424, "learning_rate": 7.2149116203982905e-06, "loss": 0.7704, "step": 8369 }, { "epoch": 0.75, "grad_norm": 8.131204386503361, "learning_rate": 7.214263969520422e-06, "loss": 0.8797, "step": 8370 }, { "epoch": 0.75, "grad_norm": 6.7882775545446306, "learning_rate": 7.213616272424641e-06, "loss": 0.7792, "step": 8371 }, { "epoch": 0.75, "grad_norm": 9.33117526773447, "learning_rate": 7.212968529124469e-06, "loss": 0.849, "step": 8372 }, { "epoch": 0.75, "grad_norm": 7.802091795408552, "learning_rate": 7.212320739633425e-06, "loss": 0.7722, "step": 8373 }, { "epoch": 0.75, "grad_norm": 5.941900574028413, "learning_rate": 7.21167290396503e-06, "loss": 0.8287, "step": 8374 }, { "epoch": 0.75, "grad_norm": 4.386806106809522, "learning_rate": 7.211025022132807e-06, "loss": 0.8305, "step": 8375 }, { "epoch": 0.75, "grad_norm": 8.35052619983981, "learning_rate": 7.2103770941502795e-06, "loss": 0.7508, "step": 8376 }, { "epoch": 0.75, "grad_norm": 6.606942277935321, "learning_rate": 7.2097291200309704e-06, "loss": 0.8042, "step": 8377 }, { "epoch": 0.75, "grad_norm": 5.121855885853477, "learning_rate": 7.209081099788404e-06, "loss": 0.7285, "step": 8378 }, { "epoch": 0.75, "grad_norm": 5.702280579456126, "learning_rate": 7.2084330334361095e-06, "loss": 0.7317, "step": 8379 }, { "epoch": 0.75, "grad_norm": 5.559239209528451, "learning_rate": 7.20778492098761e-06, "loss": 0.8188, "step": 8380 }, { "epoch": 0.75, "grad_norm": 5.046253534632505, "learning_rate": 7.207136762456436e-06, "loss": 0.7898, "step": 8381 }, { "epoch": 0.75, "grad_norm": 6.36679177196306, "learning_rate": 7.206488557856116e-06, "loss": 0.7433, "step": 8382 }, { "epoch": 0.75, "grad_norm": 5.5797347217738285, "learning_rate": 7.205840307200178e-06, "loss": 0.7433, "step": 8383 }, { "epoch": 0.75, "grad_norm": 5.733312199559989, "learning_rate": 7.205192010502156e-06, "loss": 0.7846, "step": 8384 }, { "epoch": 0.75, "grad_norm": 6.403596344087073, "learning_rate": 7.204543667775581e-06, "loss": 0.8124, "step": 8385 }, { "epoch": 0.75, "grad_norm": 6.266665246256145, "learning_rate": 7.203895279033982e-06, "loss": 0.8335, "step": 8386 }, { "epoch": 0.75, "grad_norm": 8.660108354890642, "learning_rate": 7.2032468442908965e-06, "loss": 0.7213, "step": 8387 }, { "epoch": 0.75, "grad_norm": 5.906848130125493, "learning_rate": 7.202598363559859e-06, "loss": 0.8056, "step": 8388 }, { "epoch": 0.75, "grad_norm": 6.399929098048576, "learning_rate": 7.2019498368544025e-06, "loss": 0.7797, "step": 8389 }, { "epoch": 0.75, "grad_norm": 6.499543402175248, "learning_rate": 7.201301264188066e-06, "loss": 0.7857, "step": 8390 }, { "epoch": 0.75, "grad_norm": 6.60553211878301, "learning_rate": 7.200652645574389e-06, "loss": 0.759, "step": 8391 }, { "epoch": 0.75, "grad_norm": 4.198775320010959, "learning_rate": 7.200003981026904e-06, "loss": 0.8393, "step": 8392 }, { "epoch": 0.75, "grad_norm": 5.7636545866125175, "learning_rate": 7.199355270559155e-06, "loss": 0.8372, "step": 8393 }, { "epoch": 0.75, "grad_norm": 5.72186868799078, "learning_rate": 7.198706514184683e-06, "loss": 0.8962, "step": 8394 }, { "epoch": 0.75, "grad_norm": 6.340620939695928, "learning_rate": 7.198057711917025e-06, "loss": 0.7516, "step": 8395 }, { "epoch": 0.75, "grad_norm": 4.393906384017675, "learning_rate": 7.197408863769726e-06, "loss": 0.6771, "step": 8396 }, { "epoch": 0.75, "grad_norm": 5.149218752732445, "learning_rate": 7.19675996975633e-06, "loss": 0.7409, "step": 8397 }, { "epoch": 0.75, "grad_norm": 5.876031133364807, "learning_rate": 7.19611102989038e-06, "loss": 0.7105, "step": 8398 }, { "epoch": 0.75, "grad_norm": 7.399813958680154, "learning_rate": 7.195462044185424e-06, "loss": 0.8024, "step": 8399 }, { "epoch": 0.75, "grad_norm": 4.87287201639954, "learning_rate": 7.194813012655003e-06, "loss": 0.7435, "step": 8400 }, { "epoch": 0.75, "grad_norm": 5.643938396183751, "learning_rate": 7.194163935312668e-06, "loss": 0.8285, "step": 8401 }, { "epoch": 0.75, "grad_norm": 5.195344351184833, "learning_rate": 7.193514812171965e-06, "loss": 0.7266, "step": 8402 }, { "epoch": 0.75, "grad_norm": 5.942468657388759, "learning_rate": 7.192865643246445e-06, "loss": 0.7201, "step": 8403 }, { "epoch": 0.75, "grad_norm": 5.173437328858069, "learning_rate": 7.192216428549658e-06, "loss": 0.8078, "step": 8404 }, { "epoch": 0.75, "grad_norm": 6.206392573938052, "learning_rate": 7.191567168095152e-06, "loss": 0.8537, "step": 8405 }, { "epoch": 0.75, "grad_norm": 5.1855961535176816, "learning_rate": 7.190917861896482e-06, "loss": 0.7877, "step": 8406 }, { "epoch": 0.75, "grad_norm": 7.489091860694121, "learning_rate": 7.190268509967199e-06, "loss": 0.7551, "step": 8407 }, { "epoch": 0.75, "grad_norm": 4.753403007819794, "learning_rate": 7.189619112320858e-06, "loss": 0.8196, "step": 8408 }, { "epoch": 0.75, "grad_norm": 6.235309327023768, "learning_rate": 7.188969668971013e-06, "loss": 0.8115, "step": 8409 }, { "epoch": 0.75, "grad_norm": 4.647414374190883, "learning_rate": 7.1883201799312194e-06, "loss": 0.7742, "step": 8410 }, { "epoch": 0.75, "grad_norm": 4.741160106877535, "learning_rate": 7.1876706452150345e-06, "loss": 0.7848, "step": 8411 }, { "epoch": 0.75, "grad_norm": 6.090642518223347, "learning_rate": 7.187021064836016e-06, "loss": 0.7159, "step": 8412 }, { "epoch": 0.75, "grad_norm": 5.715633455717547, "learning_rate": 7.186371438807723e-06, "loss": 0.7866, "step": 8413 }, { "epoch": 0.75, "grad_norm": 5.082965754532854, "learning_rate": 7.185721767143714e-06, "loss": 0.8743, "step": 8414 }, { "epoch": 0.75, "grad_norm": 8.312972622029012, "learning_rate": 7.185072049857549e-06, "loss": 0.7529, "step": 8415 }, { "epoch": 0.75, "grad_norm": 6.315061212449712, "learning_rate": 7.184422286962791e-06, "loss": 0.7726, "step": 8416 }, { "epoch": 0.75, "grad_norm": 4.58604553366292, "learning_rate": 7.183772478473001e-06, "loss": 0.7226, "step": 8417 }, { "epoch": 0.75, "grad_norm": 6.5427728660284545, "learning_rate": 7.1831226244017424e-06, "loss": 0.7252, "step": 8418 }, { "epoch": 0.75, "grad_norm": 6.162894030921875, "learning_rate": 7.18247272476258e-06, "loss": 0.8715, "step": 8419 }, { "epoch": 0.75, "grad_norm": 6.370564914665877, "learning_rate": 7.181822779569081e-06, "loss": 0.7635, "step": 8420 }, { "epoch": 0.75, "grad_norm": 5.203296995969408, "learning_rate": 7.181172788834809e-06, "loss": 0.7392, "step": 8421 }, { "epoch": 0.75, "grad_norm": 4.97900302669416, "learning_rate": 7.18052275257333e-06, "loss": 0.785, "step": 8422 }, { "epoch": 0.75, "grad_norm": 6.675128764213535, "learning_rate": 7.179872670798216e-06, "loss": 0.8616, "step": 8423 }, { "epoch": 0.75, "grad_norm": 6.7744205415312395, "learning_rate": 7.179222543523033e-06, "loss": 0.7754, "step": 8424 }, { "epoch": 0.75, "grad_norm": 5.6572016044584235, "learning_rate": 7.178572370761349e-06, "loss": 0.8073, "step": 8425 }, { "epoch": 0.75, "grad_norm": 6.099172005829504, "learning_rate": 7.177922152526742e-06, "loss": 0.7836, "step": 8426 }, { "epoch": 0.75, "grad_norm": 5.9088207888392965, "learning_rate": 7.177271888832778e-06, "loss": 0.7586, "step": 8427 }, { "epoch": 0.75, "grad_norm": 5.870768131746314, "learning_rate": 7.17662157969303e-06, "loss": 0.723, "step": 8428 }, { "epoch": 0.75, "grad_norm": 4.99519362145291, "learning_rate": 7.1759712251210746e-06, "loss": 0.7421, "step": 8429 }, { "epoch": 0.75, "grad_norm": 5.67553997574567, "learning_rate": 7.175320825130485e-06, "loss": 0.7291, "step": 8430 }, { "epoch": 0.75, "grad_norm": 5.150899260068749, "learning_rate": 7.174670379734837e-06, "loss": 0.7692, "step": 8431 }, { "epoch": 0.75, "grad_norm": 7.070696800612766, "learning_rate": 7.174019888947708e-06, "loss": 0.8351, "step": 8432 }, { "epoch": 0.75, "grad_norm": 4.5400558586834565, "learning_rate": 7.173369352782674e-06, "loss": 0.8047, "step": 8433 }, { "epoch": 0.75, "grad_norm": 5.820875532404524, "learning_rate": 7.172718771253314e-06, "loss": 0.7878, "step": 8434 }, { "epoch": 0.75, "grad_norm": 8.009984336825047, "learning_rate": 7.1720681443732095e-06, "loss": 0.7392, "step": 8435 }, { "epoch": 0.75, "grad_norm": 5.3255603849172095, "learning_rate": 7.171417472155939e-06, "loss": 0.8376, "step": 8436 }, { "epoch": 0.75, "grad_norm": 6.51828593771627, "learning_rate": 7.170766754615083e-06, "loss": 0.8052, "step": 8437 }, { "epoch": 0.75, "grad_norm": 5.658179066851794, "learning_rate": 7.170115991764224e-06, "loss": 0.7575, "step": 8438 }, { "epoch": 0.75, "grad_norm": 6.397547032504536, "learning_rate": 7.1694651836169485e-06, "loss": 0.8623, "step": 8439 }, { "epoch": 0.75, "grad_norm": 4.4366466022221935, "learning_rate": 7.168814330186836e-06, "loss": 0.7142, "step": 8440 }, { "epoch": 0.75, "grad_norm": 4.857981067454428, "learning_rate": 7.1681634314874745e-06, "loss": 0.7473, "step": 8441 }, { "epoch": 0.75, "grad_norm": 6.166990378189652, "learning_rate": 7.167512487532452e-06, "loss": 0.7494, "step": 8442 }, { "epoch": 0.75, "grad_norm": 6.056175245528765, "learning_rate": 7.166861498335351e-06, "loss": 0.847, "step": 8443 }, { "epoch": 0.75, "grad_norm": 6.112171346922174, "learning_rate": 7.166210463909762e-06, "loss": 0.7872, "step": 8444 }, { "epoch": 0.75, "grad_norm": 5.459483718106011, "learning_rate": 7.165559384269274e-06, "loss": 0.7604, "step": 8445 }, { "epoch": 0.75, "grad_norm": 5.418166920909698, "learning_rate": 7.164908259427474e-06, "loss": 0.7273, "step": 8446 }, { "epoch": 0.75, "grad_norm": 5.605621286106605, "learning_rate": 7.164257089397957e-06, "loss": 0.7759, "step": 8447 }, { "epoch": 0.75, "grad_norm": 5.14773209596183, "learning_rate": 7.163605874194313e-06, "loss": 0.7403, "step": 8448 }, { "epoch": 0.75, "grad_norm": 5.726234560755668, "learning_rate": 7.162954613830136e-06, "loss": 0.7008, "step": 8449 }, { "epoch": 0.75, "grad_norm": 7.386883210181328, "learning_rate": 7.162303308319015e-06, "loss": 0.8311, "step": 8450 }, { "epoch": 0.75, "grad_norm": 5.752366303785898, "learning_rate": 7.161651957674549e-06, "loss": 0.7321, "step": 8451 }, { "epoch": 0.75, "grad_norm": 6.975674626194347, "learning_rate": 7.161000561910334e-06, "loss": 0.7969, "step": 8452 }, { "epoch": 0.75, "grad_norm": 5.274405938659868, "learning_rate": 7.160349121039963e-06, "loss": 0.7913, "step": 8453 }, { "epoch": 0.75, "grad_norm": 5.2526794059224615, "learning_rate": 7.1596976350770345e-06, "loss": 0.767, "step": 8454 }, { "epoch": 0.75, "grad_norm": 5.315204047428044, "learning_rate": 7.15904610403515e-06, "loss": 0.8677, "step": 8455 }, { "epoch": 0.75, "grad_norm": 4.771898498310723, "learning_rate": 7.1583945279279035e-06, "loss": 0.7169, "step": 8456 }, { "epoch": 0.75, "grad_norm": 6.118943353037846, "learning_rate": 7.157742906768901e-06, "loss": 0.7094, "step": 8457 }, { "epoch": 0.75, "grad_norm": 4.512675401770207, "learning_rate": 7.157091240571739e-06, "loss": 0.7862, "step": 8458 }, { "epoch": 0.75, "grad_norm": 4.773519712059057, "learning_rate": 7.1564395293500215e-06, "loss": 0.7443, "step": 8459 }, { "epoch": 0.75, "grad_norm": 4.874952173545283, "learning_rate": 7.155787773117351e-06, "loss": 0.7725, "step": 8460 }, { "epoch": 0.75, "grad_norm": 4.6770917743246585, "learning_rate": 7.155135971887333e-06, "loss": 0.7689, "step": 8461 }, { "epoch": 0.75, "grad_norm": 6.378333364800352, "learning_rate": 7.15448412567357e-06, "loss": 0.8108, "step": 8462 }, { "epoch": 0.75, "grad_norm": 5.863528728826642, "learning_rate": 7.153832234489668e-06, "loss": 0.7155, "step": 8463 }, { "epoch": 0.76, "grad_norm": 4.461846907910876, "learning_rate": 7.1531802983492375e-06, "loss": 0.7728, "step": 8464 }, { "epoch": 0.76, "grad_norm": 4.716510714810709, "learning_rate": 7.1525283172658835e-06, "loss": 0.7485, "step": 8465 }, { "epoch": 0.76, "grad_norm": 6.720187914618092, "learning_rate": 7.1518762912532135e-06, "loss": 0.7997, "step": 8466 }, { "epoch": 0.76, "grad_norm": 5.034561180222616, "learning_rate": 7.151224220324838e-06, "loss": 0.7599, "step": 8467 }, { "epoch": 0.76, "grad_norm": 9.558538981505068, "learning_rate": 7.15057210449437e-06, "loss": 0.7671, "step": 8468 }, { "epoch": 0.76, "grad_norm": 6.175672235907901, "learning_rate": 7.149919943775417e-06, "loss": 0.7859, "step": 8469 }, { "epoch": 0.76, "grad_norm": 5.522737869656957, "learning_rate": 7.149267738181595e-06, "loss": 0.7887, "step": 8470 }, { "epoch": 0.76, "grad_norm": 4.777875392533825, "learning_rate": 7.148615487726516e-06, "loss": 0.7595, "step": 8471 }, { "epoch": 0.76, "grad_norm": 8.958412887625077, "learning_rate": 7.147963192423792e-06, "loss": 0.6681, "step": 8472 }, { "epoch": 0.76, "grad_norm": 5.093942370918403, "learning_rate": 7.147310852287042e-06, "loss": 0.7954, "step": 8473 }, { "epoch": 0.76, "grad_norm": 5.676659061586891, "learning_rate": 7.14665846732988e-06, "loss": 0.8388, "step": 8474 }, { "epoch": 0.76, "grad_norm": 5.05849182330456, "learning_rate": 7.1460060375659235e-06, "loss": 0.7733, "step": 8475 }, { "epoch": 0.76, "grad_norm": 6.707363705403732, "learning_rate": 7.14535356300879e-06, "loss": 0.7536, "step": 8476 }, { "epoch": 0.76, "grad_norm": 5.725243142186797, "learning_rate": 7.1447010436720995e-06, "loss": 0.8603, "step": 8477 }, { "epoch": 0.76, "grad_norm": 5.674095185953578, "learning_rate": 7.144048479569473e-06, "loss": 0.8038, "step": 8478 }, { "epoch": 0.76, "grad_norm": 5.813837239402594, "learning_rate": 7.143395870714529e-06, "loss": 0.8272, "step": 8479 }, { "epoch": 0.76, "grad_norm": 6.627530831496922, "learning_rate": 7.14274321712089e-06, "loss": 0.7733, "step": 8480 }, { "epoch": 0.76, "grad_norm": 7.180235797761122, "learning_rate": 7.142090518802179e-06, "loss": 0.8048, "step": 8481 }, { "epoch": 0.76, "grad_norm": 4.810392961081586, "learning_rate": 7.1414377757720174e-06, "loss": 0.7787, "step": 8482 }, { "epoch": 0.76, "grad_norm": 4.0093732827784345, "learning_rate": 7.140784988044035e-06, "loss": 0.75, "step": 8483 }, { "epoch": 0.76, "grad_norm": 5.96579821492246, "learning_rate": 7.140132155631852e-06, "loss": 0.8204, "step": 8484 }, { "epoch": 0.76, "grad_norm": 4.790222016878875, "learning_rate": 7.1394792785491e-06, "loss": 0.7943, "step": 8485 }, { "epoch": 0.76, "grad_norm": 5.2891076406851525, "learning_rate": 7.138826356809401e-06, "loss": 0.7356, "step": 8486 }, { "epoch": 0.76, "grad_norm": 5.216554410122482, "learning_rate": 7.138173390426386e-06, "loss": 0.7485, "step": 8487 }, { "epoch": 0.76, "grad_norm": 5.5008756713437466, "learning_rate": 7.1375203794136835e-06, "loss": 0.7346, "step": 8488 }, { "epoch": 0.76, "grad_norm": 5.851773969638721, "learning_rate": 7.1368673237849255e-06, "loss": 0.7382, "step": 8489 }, { "epoch": 0.76, "grad_norm": 4.634542090323374, "learning_rate": 7.136214223553741e-06, "loss": 0.837, "step": 8490 }, { "epoch": 0.76, "grad_norm": 7.457605433860655, "learning_rate": 7.135561078733762e-06, "loss": 0.7631, "step": 8491 }, { "epoch": 0.76, "grad_norm": 7.249307173382591, "learning_rate": 7.134907889338624e-06, "loss": 0.7842, "step": 8492 }, { "epoch": 0.76, "grad_norm": 5.543722500432254, "learning_rate": 7.13425465538196e-06, "loss": 0.7841, "step": 8493 }, { "epoch": 0.76, "grad_norm": 5.617535438363643, "learning_rate": 7.133601376877401e-06, "loss": 0.8343, "step": 8494 }, { "epoch": 0.76, "grad_norm": 5.455328476326754, "learning_rate": 7.132948053838588e-06, "loss": 0.7958, "step": 8495 }, { "epoch": 0.76, "grad_norm": 5.104735781674468, "learning_rate": 7.132294686279156e-06, "loss": 0.8018, "step": 8496 }, { "epoch": 0.76, "grad_norm": 4.643810418479859, "learning_rate": 7.131641274212739e-06, "loss": 0.8163, "step": 8497 }, { "epoch": 0.76, "grad_norm": 5.479496718470921, "learning_rate": 7.13098781765298e-06, "loss": 0.8104, "step": 8498 }, { "epoch": 0.76, "grad_norm": 5.268275739039591, "learning_rate": 7.130334316613518e-06, "loss": 0.8628, "step": 8499 }, { "epoch": 0.76, "grad_norm": 5.670354202759074, "learning_rate": 7.129680771107993e-06, "loss": 0.7803, "step": 8500 }, { "epoch": 0.76, "grad_norm": 5.169012136892693, "learning_rate": 7.1290271811500435e-06, "loss": 0.79, "step": 8501 }, { "epoch": 0.76, "grad_norm": 5.1034808987322196, "learning_rate": 7.128373546753315e-06, "loss": 0.7146, "step": 8502 }, { "epoch": 0.76, "grad_norm": 6.351634929741886, "learning_rate": 7.1277198679314505e-06, "loss": 0.8274, "step": 8503 }, { "epoch": 0.76, "grad_norm": 6.936849846257414, "learning_rate": 7.127066144698094e-06, "loss": 0.7809, "step": 8504 }, { "epoch": 0.76, "grad_norm": 6.152009938821192, "learning_rate": 7.126412377066887e-06, "loss": 0.7596, "step": 8505 }, { "epoch": 0.76, "grad_norm": 4.634665984975528, "learning_rate": 7.125758565051482e-06, "loss": 0.7339, "step": 8506 }, { "epoch": 0.76, "grad_norm": 5.502896720373686, "learning_rate": 7.12510470866552e-06, "loss": 0.7912, "step": 8507 }, { "epoch": 0.76, "grad_norm": 5.887819195733592, "learning_rate": 7.124450807922652e-06, "loss": 0.7714, "step": 8508 }, { "epoch": 0.76, "grad_norm": 6.235649482623136, "learning_rate": 7.1237968628365265e-06, "loss": 0.7268, "step": 8509 }, { "epoch": 0.76, "grad_norm": 7.44630660574832, "learning_rate": 7.123142873420791e-06, "loss": 0.7434, "step": 8510 }, { "epoch": 0.76, "grad_norm": 5.817105570382496, "learning_rate": 7.1224888396890976e-06, "loss": 0.8501, "step": 8511 }, { "epoch": 0.76, "grad_norm": 4.9859593258733215, "learning_rate": 7.121834761655099e-06, "loss": 0.7083, "step": 8512 }, { "epoch": 0.76, "grad_norm": 5.111927861188702, "learning_rate": 7.1211806393324455e-06, "loss": 0.7878, "step": 8513 }, { "epoch": 0.76, "grad_norm": 6.183424458084783, "learning_rate": 7.120526472734792e-06, "loss": 0.7597, "step": 8514 }, { "epoch": 0.76, "grad_norm": 5.963072336820858, "learning_rate": 7.119872261875793e-06, "loss": 0.949, "step": 8515 }, { "epoch": 0.76, "grad_norm": 7.929950443843143, "learning_rate": 7.1192180067691015e-06, "loss": 0.8407, "step": 8516 }, { "epoch": 0.76, "grad_norm": 5.971827437964212, "learning_rate": 7.118563707428376e-06, "loss": 0.8882, "step": 8517 }, { "epoch": 0.76, "grad_norm": 6.187536348717181, "learning_rate": 7.117909363867272e-06, "loss": 0.7884, "step": 8518 }, { "epoch": 0.76, "grad_norm": 6.323254326933913, "learning_rate": 7.117254976099449e-06, "loss": 0.801, "step": 8519 }, { "epoch": 0.76, "grad_norm": 7.096611662124351, "learning_rate": 7.1166005441385645e-06, "loss": 0.8163, "step": 8520 }, { "epoch": 0.76, "grad_norm": 5.658168271688718, "learning_rate": 7.115946067998279e-06, "loss": 0.8485, "step": 8521 }, { "epoch": 0.76, "grad_norm": 5.890814005756847, "learning_rate": 7.115291547692255e-06, "loss": 0.7807, "step": 8522 }, { "epoch": 0.76, "grad_norm": 6.694899351141687, "learning_rate": 7.114636983234149e-06, "loss": 0.7722, "step": 8523 }, { "epoch": 0.76, "grad_norm": 6.935215439162954, "learning_rate": 7.113982374637629e-06, "loss": 0.8293, "step": 8524 }, { "epoch": 0.76, "grad_norm": 4.038824396197224, "learning_rate": 7.113327721916357e-06, "loss": 0.7608, "step": 8525 }, { "epoch": 0.76, "grad_norm": 4.8085117551591, "learning_rate": 7.112673025083995e-06, "loss": 0.7068, "step": 8526 }, { "epoch": 0.76, "grad_norm": 6.728400284551263, "learning_rate": 7.1120182841542105e-06, "loss": 0.8353, "step": 8527 }, { "epoch": 0.76, "grad_norm": 5.059079466575563, "learning_rate": 7.111363499140671e-06, "loss": 0.7988, "step": 8528 }, { "epoch": 0.76, "grad_norm": 5.312661212356789, "learning_rate": 7.1107086700570424e-06, "loss": 0.721, "step": 8529 }, { "epoch": 0.76, "grad_norm": 4.384001896849415, "learning_rate": 7.1100537969169915e-06, "loss": 0.7767, "step": 8530 }, { "epoch": 0.76, "grad_norm": 4.842111866777924, "learning_rate": 7.1093988797341894e-06, "loss": 0.7492, "step": 8531 }, { "epoch": 0.76, "grad_norm": 4.7817112128253045, "learning_rate": 7.108743918522304e-06, "loss": 0.7867, "step": 8532 }, { "epoch": 0.76, "grad_norm": 6.9655484699768255, "learning_rate": 7.1080889132950085e-06, "loss": 0.8597, "step": 8533 }, { "epoch": 0.76, "grad_norm": 6.828116595509748, "learning_rate": 7.107433864065974e-06, "loss": 0.7983, "step": 8534 }, { "epoch": 0.76, "grad_norm": 4.8309106248896, "learning_rate": 7.10677877084887e-06, "loss": 0.8142, "step": 8535 }, { "epoch": 0.76, "grad_norm": 7.5137516192390805, "learning_rate": 7.106123633657376e-06, "loss": 0.8563, "step": 8536 }, { "epoch": 0.76, "grad_norm": 4.45346241124193, "learning_rate": 7.105468452505162e-06, "loss": 0.748, "step": 8537 }, { "epoch": 0.76, "grad_norm": 5.100384463048517, "learning_rate": 7.104813227405906e-06, "loss": 0.8028, "step": 8538 }, { "epoch": 0.76, "grad_norm": 3.661358651635437, "learning_rate": 7.104157958373281e-06, "loss": 0.7707, "step": 8539 }, { "epoch": 0.76, "grad_norm": 7.246023624020909, "learning_rate": 7.103502645420969e-06, "loss": 0.8398, "step": 8540 }, { "epoch": 0.76, "grad_norm": 5.715079535467981, "learning_rate": 7.1028472885626455e-06, "loss": 0.7598, "step": 8541 }, { "epoch": 0.76, "grad_norm": 6.717089517300284, "learning_rate": 7.102191887811989e-06, "loss": 0.8387, "step": 8542 }, { "epoch": 0.76, "grad_norm": 6.69655335215486, "learning_rate": 7.101536443182682e-06, "loss": 0.7868, "step": 8543 }, { "epoch": 0.76, "grad_norm": 5.333463136737821, "learning_rate": 7.1008809546884025e-06, "loss": 0.726, "step": 8544 }, { "epoch": 0.76, "grad_norm": 5.198239033693332, "learning_rate": 7.100225422342834e-06, "loss": 0.7821, "step": 8545 }, { "epoch": 0.76, "grad_norm": 4.836914752628593, "learning_rate": 7.0995698461596605e-06, "loss": 0.7062, "step": 8546 }, { "epoch": 0.76, "grad_norm": 5.392028595616593, "learning_rate": 7.098914226152565e-06, "loss": 0.8411, "step": 8547 }, { "epoch": 0.76, "grad_norm": 5.350330200689718, "learning_rate": 7.09825856233523e-06, "loss": 0.8146, "step": 8548 }, { "epoch": 0.76, "grad_norm": 3.9815562477129203, "learning_rate": 7.097602854721342e-06, "loss": 0.7682, "step": 8549 }, { "epoch": 0.76, "grad_norm": 5.946033016792804, "learning_rate": 7.09694710332459e-06, "loss": 0.7847, "step": 8550 }, { "epoch": 0.76, "grad_norm": 5.128925130269883, "learning_rate": 7.096291308158658e-06, "loss": 0.7918, "step": 8551 }, { "epoch": 0.76, "grad_norm": 4.986311349672925, "learning_rate": 7.095635469237236e-06, "loss": 0.7069, "step": 8552 }, { "epoch": 0.76, "grad_norm": 5.776740352378233, "learning_rate": 7.094979586574015e-06, "loss": 0.8008, "step": 8553 }, { "epoch": 0.76, "grad_norm": 4.729044131115055, "learning_rate": 7.09432366018268e-06, "loss": 0.7736, "step": 8554 }, { "epoch": 0.76, "grad_norm": 4.550740652826757, "learning_rate": 7.0936676900769265e-06, "loss": 0.7456, "step": 8555 }, { "epoch": 0.76, "grad_norm": 7.152281059750707, "learning_rate": 7.093011676270445e-06, "loss": 0.7775, "step": 8556 }, { "epoch": 0.76, "grad_norm": 7.515939408253878, "learning_rate": 7.092355618776928e-06, "loss": 0.8608, "step": 8557 }, { "epoch": 0.76, "grad_norm": 6.413621189887851, "learning_rate": 7.091699517610071e-06, "loss": 0.905, "step": 8558 }, { "epoch": 0.76, "grad_norm": 4.566346870551573, "learning_rate": 7.091043372783566e-06, "loss": 0.8565, "step": 8559 }, { "epoch": 0.76, "grad_norm": 5.904929806734705, "learning_rate": 7.090387184311109e-06, "loss": 0.8537, "step": 8560 }, { "epoch": 0.76, "grad_norm": 4.3133411270966615, "learning_rate": 7.0897309522064e-06, "loss": 0.7002, "step": 8561 }, { "epoch": 0.76, "grad_norm": 4.692325836663377, "learning_rate": 7.089074676483131e-06, "loss": 0.85, "step": 8562 }, { "epoch": 0.76, "grad_norm": 5.762263907932586, "learning_rate": 7.088418357155005e-06, "loss": 0.8481, "step": 8563 }, { "epoch": 0.76, "grad_norm": 8.68480988313835, "learning_rate": 7.0877619942357175e-06, "loss": 0.7459, "step": 8564 }, { "epoch": 0.76, "grad_norm": 4.486321572169034, "learning_rate": 7.087105587738974e-06, "loss": 0.7993, "step": 8565 }, { "epoch": 0.76, "grad_norm": 4.879630862506185, "learning_rate": 7.0864491376784695e-06, "loss": 0.7834, "step": 8566 }, { "epoch": 0.76, "grad_norm": 5.275618272592122, "learning_rate": 7.085792644067908e-06, "loss": 0.8631, "step": 8567 }, { "epoch": 0.76, "grad_norm": 5.220604526418978, "learning_rate": 7.085136106920994e-06, "loss": 0.7502, "step": 8568 }, { "epoch": 0.76, "grad_norm": 4.636024979973599, "learning_rate": 7.08447952625143e-06, "loss": 0.7028, "step": 8569 }, { "epoch": 0.76, "grad_norm": 5.000667657306148, "learning_rate": 7.083822902072921e-06, "loss": 0.7956, "step": 8570 }, { "epoch": 0.76, "grad_norm": 5.584325135583474, "learning_rate": 7.083166234399171e-06, "loss": 0.8134, "step": 8571 }, { "epoch": 0.76, "grad_norm": 5.396436541197673, "learning_rate": 7.08250952324389e-06, "loss": 0.8333, "step": 8572 }, { "epoch": 0.76, "grad_norm": 7.8237575569700475, "learning_rate": 7.0818527686207825e-06, "loss": 0.7463, "step": 8573 }, { "epoch": 0.76, "grad_norm": 5.523882520458294, "learning_rate": 7.081195970543558e-06, "loss": 0.8178, "step": 8574 }, { "epoch": 0.76, "grad_norm": 5.329425755392627, "learning_rate": 7.080539129025923e-06, "loss": 0.7855, "step": 8575 }, { "epoch": 0.77, "grad_norm": 6.498254644790025, "learning_rate": 7.079882244081593e-06, "loss": 0.7779, "step": 8576 }, { "epoch": 0.77, "grad_norm": 7.113938844245298, "learning_rate": 7.079225315724274e-06, "loss": 0.8186, "step": 8577 }, { "epoch": 0.77, "grad_norm": 5.600071114258012, "learning_rate": 7.07856834396768e-06, "loss": 0.748, "step": 8578 }, { "epoch": 0.77, "grad_norm": 5.751627150762267, "learning_rate": 7.077911328825526e-06, "loss": 0.8818, "step": 8579 }, { "epoch": 0.77, "grad_norm": 6.7875619149795225, "learning_rate": 7.077254270311522e-06, "loss": 0.7476, "step": 8580 }, { "epoch": 0.77, "grad_norm": 7.06552994467639, "learning_rate": 7.076597168439384e-06, "loss": 0.8232, "step": 8581 }, { "epoch": 0.77, "grad_norm": 6.691219882983257, "learning_rate": 7.075940023222829e-06, "loss": 0.8193, "step": 8582 }, { "epoch": 0.77, "grad_norm": 6.1557247622652875, "learning_rate": 7.075282834675571e-06, "loss": 0.7811, "step": 8583 }, { "epoch": 0.77, "grad_norm": 6.65841640148906, "learning_rate": 7.074625602811328e-06, "loss": 0.7634, "step": 8584 }, { "epoch": 0.77, "grad_norm": 4.562064231868681, "learning_rate": 7.073968327643821e-06, "loss": 0.7267, "step": 8585 }, { "epoch": 0.77, "grad_norm": 4.949169552730887, "learning_rate": 7.073311009186767e-06, "loss": 0.7577, "step": 8586 }, { "epoch": 0.77, "grad_norm": 5.093271328328505, "learning_rate": 7.0726536474538845e-06, "loss": 0.7986, "step": 8587 }, { "epoch": 0.77, "grad_norm": 6.38847598055717, "learning_rate": 7.071996242458896e-06, "loss": 0.855, "step": 8588 }, { "epoch": 0.77, "grad_norm": 6.145626620864722, "learning_rate": 7.0713387942155244e-06, "loss": 0.8214, "step": 8589 }, { "epoch": 0.77, "grad_norm": 6.523382980242347, "learning_rate": 7.070681302737491e-06, "loss": 0.7947, "step": 8590 }, { "epoch": 0.77, "grad_norm": 7.464788026919231, "learning_rate": 7.070023768038521e-06, "loss": 0.8611, "step": 8591 }, { "epoch": 0.77, "grad_norm": 6.182775229591709, "learning_rate": 7.069366190132337e-06, "loss": 0.8602, "step": 8592 }, { "epoch": 0.77, "grad_norm": 6.5656475760619095, "learning_rate": 7.0687085690326665e-06, "loss": 0.8155, "step": 8593 }, { "epoch": 0.77, "grad_norm": 5.092929266916508, "learning_rate": 7.068050904753235e-06, "loss": 0.7111, "step": 8594 }, { "epoch": 0.77, "grad_norm": 5.611102673194427, "learning_rate": 7.06739319730777e-06, "loss": 0.7579, "step": 8595 }, { "epoch": 0.77, "grad_norm": 5.247220213622029, "learning_rate": 7.0667354467100005e-06, "loss": 0.7649, "step": 8596 }, { "epoch": 0.77, "grad_norm": 6.5368421150959835, "learning_rate": 7.0660776529736515e-06, "loss": 0.7424, "step": 8597 }, { "epoch": 0.77, "grad_norm": 5.1500636926629815, "learning_rate": 7.065419816112459e-06, "loss": 0.8097, "step": 8598 }, { "epoch": 0.77, "grad_norm": 5.496015454738108, "learning_rate": 7.06476193614015e-06, "loss": 0.7345, "step": 8599 }, { "epoch": 0.77, "grad_norm": 6.089033973743254, "learning_rate": 7.064104013070456e-06, "loss": 0.8238, "step": 8600 }, { "epoch": 0.77, "grad_norm": 5.916422414102349, "learning_rate": 7.063446046917115e-06, "loss": 0.7188, "step": 8601 }, { "epoch": 0.77, "grad_norm": 4.891597314170852, "learning_rate": 7.062788037693853e-06, "loss": 0.8338, "step": 8602 }, { "epoch": 0.77, "grad_norm": 6.462826394889445, "learning_rate": 7.062129985414409e-06, "loss": 0.7428, "step": 8603 }, { "epoch": 0.77, "grad_norm": 4.952845164652138, "learning_rate": 7.061471890092518e-06, "loss": 0.7741, "step": 8604 }, { "epoch": 0.77, "grad_norm": 6.121399445356057, "learning_rate": 7.060813751741915e-06, "loss": 0.819, "step": 8605 }, { "epoch": 0.77, "grad_norm": 6.048621406064104, "learning_rate": 7.060155570376338e-06, "loss": 0.7891, "step": 8606 }, { "epoch": 0.77, "grad_norm": 5.0979410676740935, "learning_rate": 7.059497346009526e-06, "loss": 0.7276, "step": 8607 }, { "epoch": 0.77, "grad_norm": 6.357736330048923, "learning_rate": 7.058839078655218e-06, "loss": 0.8298, "step": 8608 }, { "epoch": 0.77, "grad_norm": 5.482643473817038, "learning_rate": 7.058180768327152e-06, "loss": 0.7523, "step": 8609 }, { "epoch": 0.77, "grad_norm": 5.729775901916862, "learning_rate": 7.057522415039069e-06, "loss": 0.7947, "step": 8610 }, { "epoch": 0.77, "grad_norm": 4.802095487790016, "learning_rate": 7.0568640188047125e-06, "loss": 0.8191, "step": 8611 }, { "epoch": 0.77, "grad_norm": 5.182879769284677, "learning_rate": 7.056205579637823e-06, "loss": 0.7168, "step": 8612 }, { "epoch": 0.77, "grad_norm": 5.138959148716554, "learning_rate": 7.055547097552146e-06, "loss": 0.7918, "step": 8613 }, { "epoch": 0.77, "grad_norm": 5.072039282970667, "learning_rate": 7.054888572561425e-06, "loss": 0.8097, "step": 8614 }, { "epoch": 0.77, "grad_norm": 6.532254596574271, "learning_rate": 7.054230004679405e-06, "loss": 0.7503, "step": 8615 }, { "epoch": 0.77, "grad_norm": 4.136702503002688, "learning_rate": 7.053571393919832e-06, "loss": 0.7364, "step": 8616 }, { "epoch": 0.77, "grad_norm": 5.528163500152788, "learning_rate": 7.052912740296454e-06, "loss": 0.8539, "step": 8617 }, { "epoch": 0.77, "grad_norm": 4.263385065017905, "learning_rate": 7.052254043823017e-06, "loss": 0.7728, "step": 8618 }, { "epoch": 0.77, "grad_norm": 4.729961703149936, "learning_rate": 7.051595304513273e-06, "loss": 0.7315, "step": 8619 }, { "epoch": 0.77, "grad_norm": 4.691112824850399, "learning_rate": 7.050936522380969e-06, "loss": 0.721, "step": 8620 }, { "epoch": 0.77, "grad_norm": 5.620644213768923, "learning_rate": 7.050277697439856e-06, "loss": 0.7921, "step": 8621 }, { "epoch": 0.77, "grad_norm": 4.280583418432948, "learning_rate": 7.049618829703687e-06, "loss": 0.7353, "step": 8622 }, { "epoch": 0.77, "grad_norm": 5.996966779740856, "learning_rate": 7.048959919186212e-06, "loss": 0.7876, "step": 8623 }, { "epoch": 0.77, "grad_norm": 5.5323939902869395, "learning_rate": 7.048300965901188e-06, "loss": 0.7576, "step": 8624 }, { "epoch": 0.77, "grad_norm": 8.880897610274847, "learning_rate": 7.047641969862365e-06, "loss": 0.7819, "step": 8625 }, { "epoch": 0.77, "grad_norm": 5.270349612871482, "learning_rate": 7.046982931083501e-06, "loss": 0.8385, "step": 8626 }, { "epoch": 0.77, "grad_norm": 6.902789953742002, "learning_rate": 7.046323849578353e-06, "loss": 0.744, "step": 8627 }, { "epoch": 0.77, "grad_norm": 5.0564871859159375, "learning_rate": 7.045664725360673e-06, "loss": 0.787, "step": 8628 }, { "epoch": 0.77, "grad_norm": 5.53756858752295, "learning_rate": 7.045005558444224e-06, "loss": 0.82, "step": 8629 }, { "epoch": 0.77, "grad_norm": 5.130417939927797, "learning_rate": 7.044346348842762e-06, "loss": 0.7521, "step": 8630 }, { "epoch": 0.77, "grad_norm": 5.3123738127346485, "learning_rate": 7.043687096570046e-06, "loss": 0.8334, "step": 8631 }, { "epoch": 0.77, "grad_norm": 5.7379462398617695, "learning_rate": 7.043027801639838e-06, "loss": 0.791, "step": 8632 }, { "epoch": 0.77, "grad_norm": 6.45203931612486, "learning_rate": 7.0423684640659e-06, "loss": 0.761, "step": 8633 }, { "epoch": 0.77, "grad_norm": 6.18012118780697, "learning_rate": 7.041709083861991e-06, "loss": 0.7799, "step": 8634 }, { "epoch": 0.77, "grad_norm": 5.773637842530373, "learning_rate": 7.041049661041877e-06, "loss": 0.772, "step": 8635 }, { "epoch": 0.77, "grad_norm": 4.0902460332241235, "learning_rate": 7.040390195619322e-06, "loss": 0.8057, "step": 8636 }, { "epoch": 0.77, "grad_norm": 4.105430368696957, "learning_rate": 7.03973068760809e-06, "loss": 0.8016, "step": 8637 }, { "epoch": 0.77, "grad_norm": 5.520327770118005, "learning_rate": 7.039071137021945e-06, "loss": 0.8248, "step": 8638 }, { "epoch": 0.77, "grad_norm": 4.225895786650584, "learning_rate": 7.038411543874657e-06, "loss": 0.7956, "step": 8639 }, { "epoch": 0.77, "grad_norm": 7.3790384956305415, "learning_rate": 7.0377519081799905e-06, "loss": 0.7586, "step": 8640 }, { "epoch": 0.77, "grad_norm": 3.964020171373996, "learning_rate": 7.0370922299517165e-06, "loss": 0.7678, "step": 8641 }, { "epoch": 0.77, "grad_norm": 5.9934738503145875, "learning_rate": 7.036432509203605e-06, "loss": 0.755, "step": 8642 }, { "epoch": 0.77, "grad_norm": 5.822118563807853, "learning_rate": 7.035772745949422e-06, "loss": 0.7679, "step": 8643 }, { "epoch": 0.77, "grad_norm": 6.916371019277149, "learning_rate": 7.035112940202943e-06, "loss": 0.8005, "step": 8644 }, { "epoch": 0.77, "grad_norm": 8.992323098987782, "learning_rate": 7.034453091977937e-06, "loss": 0.7527, "step": 8645 }, { "epoch": 0.77, "grad_norm": 5.754907902936067, "learning_rate": 7.033793201288179e-06, "loss": 0.8082, "step": 8646 }, { "epoch": 0.77, "grad_norm": 8.430636231759152, "learning_rate": 7.033133268147441e-06, "loss": 0.695, "step": 8647 }, { "epoch": 0.77, "grad_norm": 5.933098131757555, "learning_rate": 7.0324732925694985e-06, "loss": 0.7427, "step": 8648 }, { "epoch": 0.77, "grad_norm": 5.106873071408378, "learning_rate": 7.031813274568128e-06, "loss": 0.8342, "step": 8649 }, { "epoch": 0.77, "grad_norm": 5.19025691478056, "learning_rate": 7.031153214157105e-06, "loss": 0.7102, "step": 8650 }, { "epoch": 0.77, "grad_norm": 5.673780188951984, "learning_rate": 7.030493111350207e-06, "loss": 0.786, "step": 8651 }, { "epoch": 0.77, "grad_norm": 4.853396973020694, "learning_rate": 7.029832966161211e-06, "loss": 0.797, "step": 8652 }, { "epoch": 0.77, "grad_norm": 7.125287075266279, "learning_rate": 7.029172778603897e-06, "loss": 0.7447, "step": 8653 }, { "epoch": 0.77, "grad_norm": 6.124244550306576, "learning_rate": 7.0285125486920445e-06, "loss": 0.7118, "step": 8654 }, { "epoch": 0.77, "grad_norm": 5.816983436509502, "learning_rate": 7.027852276439437e-06, "loss": 0.8066, "step": 8655 }, { "epoch": 0.77, "grad_norm": 5.226018845279421, "learning_rate": 7.027191961859851e-06, "loss": 0.714, "step": 8656 }, { "epoch": 0.77, "grad_norm": 4.908459435049178, "learning_rate": 7.026531604967075e-06, "loss": 0.8056, "step": 8657 }, { "epoch": 0.77, "grad_norm": 4.7192084888365455, "learning_rate": 7.02587120577489e-06, "loss": 0.7691, "step": 8658 }, { "epoch": 0.77, "grad_norm": 5.939117880984281, "learning_rate": 7.025210764297079e-06, "loss": 0.7032, "step": 8659 }, { "epoch": 0.77, "grad_norm": 4.939471382052788, "learning_rate": 7.024550280547429e-06, "loss": 0.7931, "step": 8660 }, { "epoch": 0.77, "grad_norm": 4.878422788902638, "learning_rate": 7.023889754539725e-06, "loss": 0.7165, "step": 8661 }, { "epoch": 0.77, "grad_norm": 5.0033554366942194, "learning_rate": 7.023229186287755e-06, "loss": 0.7479, "step": 8662 }, { "epoch": 0.77, "grad_norm": 5.5569034881191754, "learning_rate": 7.022568575805307e-06, "loss": 0.7389, "step": 8663 }, { "epoch": 0.77, "grad_norm": 4.9509160532028895, "learning_rate": 7.021907923106167e-06, "loss": 0.815, "step": 8664 }, { "epoch": 0.77, "grad_norm": 7.25896838350508, "learning_rate": 7.021247228204129e-06, "loss": 0.7763, "step": 8665 }, { "epoch": 0.77, "grad_norm": 4.276753145928765, "learning_rate": 7.020586491112982e-06, "loss": 0.8098, "step": 8666 }, { "epoch": 0.77, "grad_norm": 5.636041684877542, "learning_rate": 7.0199257118465155e-06, "loss": 0.7827, "step": 8667 }, { "epoch": 0.77, "grad_norm": 5.49553482801673, "learning_rate": 7.019264890418524e-06, "loss": 0.8846, "step": 8668 }, { "epoch": 0.77, "grad_norm": 5.822765626685625, "learning_rate": 7.018604026842801e-06, "loss": 0.7964, "step": 8669 }, { "epoch": 0.77, "grad_norm": 4.175701294105965, "learning_rate": 7.0179431211331386e-06, "loss": 0.7491, "step": 8670 }, { "epoch": 0.77, "grad_norm": 6.2235715110235015, "learning_rate": 7.017282173303333e-06, "loss": 0.7993, "step": 8671 }, { "epoch": 0.77, "grad_norm": 5.33419695549831, "learning_rate": 7.01662118336718e-06, "loss": 0.7743, "step": 8672 }, { "epoch": 0.77, "grad_norm": 5.052462389922944, "learning_rate": 7.0159601513384754e-06, "loss": 0.6812, "step": 8673 }, { "epoch": 0.77, "grad_norm": 5.058178882174749, "learning_rate": 7.015299077231018e-06, "loss": 0.712, "step": 8674 }, { "epoch": 0.77, "grad_norm": 8.555893050241734, "learning_rate": 7.014637961058606e-06, "loss": 0.753, "step": 8675 }, { "epoch": 0.77, "grad_norm": 4.9600673390640155, "learning_rate": 7.0139768028350386e-06, "loss": 0.8019, "step": 8676 }, { "epoch": 0.77, "grad_norm": 4.5125027073245025, "learning_rate": 7.0133156025741154e-06, "loss": 0.8116, "step": 8677 }, { "epoch": 0.77, "grad_norm": 6.348857441653793, "learning_rate": 7.012654360289638e-06, "loss": 0.7326, "step": 8678 }, { "epoch": 0.77, "grad_norm": 5.13290288490775, "learning_rate": 7.011993075995409e-06, "loss": 0.792, "step": 8679 }, { "epoch": 0.77, "grad_norm": 6.337607642857867, "learning_rate": 7.01133174970523e-06, "loss": 0.8457, "step": 8680 }, { "epoch": 0.77, "grad_norm": 8.385371325897804, "learning_rate": 7.010670381432907e-06, "loss": 0.8372, "step": 8681 }, { "epoch": 0.77, "grad_norm": 5.107359164932941, "learning_rate": 7.010008971192241e-06, "loss": 0.7403, "step": 8682 }, { "epoch": 0.77, "grad_norm": 5.847199859293259, "learning_rate": 7.0093475189970405e-06, "loss": 0.7612, "step": 8683 }, { "epoch": 0.77, "grad_norm": 6.893853895487424, "learning_rate": 7.008686024861112e-06, "loss": 0.7599, "step": 8684 }, { "epoch": 0.77, "grad_norm": 5.1372292083109805, "learning_rate": 7.00802448879826e-06, "loss": 0.8457, "step": 8685 }, { "epoch": 0.77, "grad_norm": 4.69470677546294, "learning_rate": 7.007362910822295e-06, "loss": 0.7352, "step": 8686 }, { "epoch": 0.77, "grad_norm": 6.722329329951982, "learning_rate": 7.0067012909470265e-06, "loss": 0.725, "step": 8687 }, { "epoch": 0.78, "grad_norm": 7.371226386585198, "learning_rate": 7.006039629186262e-06, "loss": 0.8201, "step": 8688 }, { "epoch": 0.78, "grad_norm": 5.101062986796609, "learning_rate": 7.005377925553813e-06, "loss": 0.85, "step": 8689 }, { "epoch": 0.78, "grad_norm": 5.578162078671996, "learning_rate": 7.004716180063493e-06, "loss": 0.7668, "step": 8690 }, { "epoch": 0.78, "grad_norm": 5.477909131391436, "learning_rate": 7.004054392729112e-06, "loss": 0.7142, "step": 8691 }, { "epoch": 0.78, "grad_norm": 4.6778229110072544, "learning_rate": 7.003392563564483e-06, "loss": 0.6994, "step": 8692 }, { "epoch": 0.78, "grad_norm": 5.096129419029837, "learning_rate": 7.0027306925834234e-06, "loss": 0.8052, "step": 8693 }, { "epoch": 0.78, "grad_norm": 6.507391940768327, "learning_rate": 7.002068779799746e-06, "loss": 0.6833, "step": 8694 }, { "epoch": 0.78, "grad_norm": 7.121084023074429, "learning_rate": 7.0014068252272675e-06, "loss": 0.7241, "step": 8695 }, { "epoch": 0.78, "grad_norm": 6.677515719216135, "learning_rate": 7.000744828879804e-06, "loss": 0.8228, "step": 8696 }, { "epoch": 0.78, "grad_norm": 5.334503914127166, "learning_rate": 7.000082790771174e-06, "loss": 0.8295, "step": 8697 }, { "epoch": 0.78, "grad_norm": 7.7588246250853015, "learning_rate": 6.999420710915196e-06, "loss": 0.8003, "step": 8698 }, { "epoch": 0.78, "grad_norm": 5.705119526264606, "learning_rate": 6.998758589325688e-06, "loss": 0.7771, "step": 8699 }, { "epoch": 0.78, "grad_norm": 5.765272004935141, "learning_rate": 6.998096426016474e-06, "loss": 0.6978, "step": 8700 }, { "epoch": 0.78, "grad_norm": 9.402101225700655, "learning_rate": 6.997434221001371e-06, "loss": 0.8194, "step": 8701 }, { "epoch": 0.78, "grad_norm": 4.764403352485659, "learning_rate": 6.996771974294204e-06, "loss": 0.8163, "step": 8702 }, { "epoch": 0.78, "grad_norm": 4.9566025479288776, "learning_rate": 6.996109685908795e-06, "loss": 0.7431, "step": 8703 }, { "epoch": 0.78, "grad_norm": 5.029321315819416, "learning_rate": 6.995447355858967e-06, "loss": 0.7899, "step": 8704 }, { "epoch": 0.78, "grad_norm": 5.296764723478985, "learning_rate": 6.994784984158544e-06, "loss": 0.6696, "step": 8705 }, { "epoch": 0.78, "grad_norm": 4.551627925667924, "learning_rate": 6.994122570821357e-06, "loss": 0.7338, "step": 8706 }, { "epoch": 0.78, "grad_norm": 5.776684488490045, "learning_rate": 6.9934601158612244e-06, "loss": 0.7699, "step": 8707 }, { "epoch": 0.78, "grad_norm": 5.978215355176727, "learning_rate": 6.992797619291979e-06, "loss": 0.8188, "step": 8708 }, { "epoch": 0.78, "grad_norm": 4.294305712239087, "learning_rate": 6.992135081127448e-06, "loss": 0.7643, "step": 8709 }, { "epoch": 0.78, "grad_norm": 4.639776438866828, "learning_rate": 6.99147250138146e-06, "loss": 0.8239, "step": 8710 }, { "epoch": 0.78, "grad_norm": 7.533655469431066, "learning_rate": 6.990809880067843e-06, "loss": 0.7438, "step": 8711 }, { "epoch": 0.78, "grad_norm": 6.64018741551741, "learning_rate": 6.9901472172004316e-06, "loss": 0.7896, "step": 8712 }, { "epoch": 0.78, "grad_norm": 4.763957521268249, "learning_rate": 6.989484512793054e-06, "loss": 0.802, "step": 8713 }, { "epoch": 0.78, "grad_norm": 5.3284355846393785, "learning_rate": 6.988821766859545e-06, "loss": 0.7593, "step": 8714 }, { "epoch": 0.78, "grad_norm": 5.049808314967526, "learning_rate": 6.988158979413737e-06, "loss": 0.8177, "step": 8715 }, { "epoch": 0.78, "grad_norm": 4.531596241688552, "learning_rate": 6.987496150469465e-06, "loss": 0.826, "step": 8716 }, { "epoch": 0.78, "grad_norm": 4.413820365584003, "learning_rate": 6.986833280040561e-06, "loss": 0.8329, "step": 8717 }, { "epoch": 0.78, "grad_norm": 9.754624102743538, "learning_rate": 6.986170368140865e-06, "loss": 0.8406, "step": 8718 }, { "epoch": 0.78, "grad_norm": 6.209833937037458, "learning_rate": 6.985507414784213e-06, "loss": 0.7923, "step": 8719 }, { "epoch": 0.78, "grad_norm": 8.062856941827409, "learning_rate": 6.98484441998444e-06, "loss": 0.7498, "step": 8720 }, { "epoch": 0.78, "grad_norm": 4.441369272968425, "learning_rate": 6.984181383755386e-06, "loss": 0.7754, "step": 8721 }, { "epoch": 0.78, "grad_norm": 4.382854422908348, "learning_rate": 6.983518306110894e-06, "loss": 0.7601, "step": 8722 }, { "epoch": 0.78, "grad_norm": 5.266080920277388, "learning_rate": 6.9828551870647996e-06, "loss": 0.7247, "step": 8723 }, { "epoch": 0.78, "grad_norm": 5.455564339574714, "learning_rate": 6.982192026630945e-06, "loss": 0.7349, "step": 8724 }, { "epoch": 0.78, "grad_norm": 4.532274823209501, "learning_rate": 6.981528824823174e-06, "loss": 0.8564, "step": 8725 }, { "epoch": 0.78, "grad_norm": 6.76292770846291, "learning_rate": 6.980865581655327e-06, "loss": 0.7521, "step": 8726 }, { "epoch": 0.78, "grad_norm": 7.468150988718446, "learning_rate": 6.9802022971412485e-06, "loss": 0.7983, "step": 8727 }, { "epoch": 0.78, "grad_norm": 5.948115928090318, "learning_rate": 6.979538971294785e-06, "loss": 0.7209, "step": 8728 }, { "epoch": 0.78, "grad_norm": 4.785185389673297, "learning_rate": 6.97887560412978e-06, "loss": 0.7822, "step": 8729 }, { "epoch": 0.78, "grad_norm": 4.467324154195559, "learning_rate": 6.97821219566008e-06, "loss": 0.7361, "step": 8730 }, { "epoch": 0.78, "grad_norm": 5.566096646457569, "learning_rate": 6.9775487458995335e-06, "loss": 0.772, "step": 8731 }, { "epoch": 0.78, "grad_norm": 5.196555339629652, "learning_rate": 6.976885254861988e-06, "loss": 0.7656, "step": 8732 }, { "epoch": 0.78, "grad_norm": 10.022248240386226, "learning_rate": 6.9762217225612915e-06, "loss": 0.8759, "step": 8733 }, { "epoch": 0.78, "grad_norm": 4.551940366881301, "learning_rate": 6.975558149011293e-06, "loss": 0.8734, "step": 8734 }, { "epoch": 0.78, "grad_norm": 5.730763051793409, "learning_rate": 6.974894534225847e-06, "loss": 0.7713, "step": 8735 }, { "epoch": 0.78, "grad_norm": 6.068940664713013, "learning_rate": 6.974230878218801e-06, "loss": 0.9028, "step": 8736 }, { "epoch": 0.78, "grad_norm": 7.660510451386671, "learning_rate": 6.973567181004008e-06, "loss": 0.7339, "step": 8737 }, { "epoch": 0.78, "grad_norm": 5.118537617527327, "learning_rate": 6.9729034425953254e-06, "loss": 0.7708, "step": 8738 }, { "epoch": 0.78, "grad_norm": 5.233936864827721, "learning_rate": 6.972239663006602e-06, "loss": 0.7718, "step": 8739 }, { "epoch": 0.78, "grad_norm": 4.8112506944001, "learning_rate": 6.971575842251695e-06, "loss": 0.7404, "step": 8740 }, { "epoch": 0.78, "grad_norm": 5.507437468598479, "learning_rate": 6.970911980344461e-06, "loss": 0.7399, "step": 8741 }, { "epoch": 0.78, "grad_norm": 5.193477075567995, "learning_rate": 6.970248077298753e-06, "loss": 0.7499, "step": 8742 }, { "epoch": 0.78, "grad_norm": 6.883516293777517, "learning_rate": 6.969584133128432e-06, "loss": 0.805, "step": 8743 }, { "epoch": 0.78, "grad_norm": 7.209645475991228, "learning_rate": 6.968920147847356e-06, "loss": 0.8618, "step": 8744 }, { "epoch": 0.78, "grad_norm": 5.421834644241645, "learning_rate": 6.968256121469385e-06, "loss": 0.7323, "step": 8745 }, { "epoch": 0.78, "grad_norm": 6.828737317836777, "learning_rate": 6.9675920540083765e-06, "loss": 0.7422, "step": 8746 }, { "epoch": 0.78, "grad_norm": 4.675284800111492, "learning_rate": 6.966927945478193e-06, "loss": 0.8761, "step": 8747 }, { "epoch": 0.78, "grad_norm": 6.435909920368553, "learning_rate": 6.966263795892697e-06, "loss": 0.7874, "step": 8748 }, { "epoch": 0.78, "grad_norm": 7.567047397223353, "learning_rate": 6.965599605265749e-06, "loss": 0.7885, "step": 8749 }, { "epoch": 0.78, "grad_norm": 8.367958307212975, "learning_rate": 6.964935373611213e-06, "loss": 0.8004, "step": 8750 }, { "epoch": 0.78, "grad_norm": 6.908323854964598, "learning_rate": 6.964271100942956e-06, "loss": 0.8192, "step": 8751 }, { "epoch": 0.78, "grad_norm": 6.03702210421218, "learning_rate": 6.9636067872748426e-06, "loss": 0.6516, "step": 8752 }, { "epoch": 0.78, "grad_norm": 5.162900459124763, "learning_rate": 6.962942432620736e-06, "loss": 0.7893, "step": 8753 }, { "epoch": 0.78, "grad_norm": 5.560150329844022, "learning_rate": 6.962278036994505e-06, "loss": 0.8093, "step": 8754 }, { "epoch": 0.78, "grad_norm": 4.650219526719628, "learning_rate": 6.961613600410018e-06, "loss": 0.7464, "step": 8755 }, { "epoch": 0.78, "grad_norm": 5.308691334088479, "learning_rate": 6.960949122881141e-06, "loss": 0.7305, "step": 8756 }, { "epoch": 0.78, "grad_norm": 6.943569185117365, "learning_rate": 6.960284604421748e-06, "loss": 0.7926, "step": 8757 }, { "epoch": 0.78, "grad_norm": 6.478436457526467, "learning_rate": 6.959620045045705e-06, "loss": 0.807, "step": 8758 }, { "epoch": 0.78, "grad_norm": 5.063341609290813, "learning_rate": 6.958955444766886e-06, "loss": 0.8299, "step": 8759 }, { "epoch": 0.78, "grad_norm": 6.325802888834849, "learning_rate": 6.958290803599163e-06, "loss": 0.847, "step": 8760 }, { "epoch": 0.78, "grad_norm": 6.64414140597867, "learning_rate": 6.957626121556407e-06, "loss": 0.7967, "step": 8761 }, { "epoch": 0.78, "grad_norm": 5.166698153645633, "learning_rate": 6.956961398652491e-06, "loss": 0.7698, "step": 8762 }, { "epoch": 0.78, "grad_norm": 5.855507639401478, "learning_rate": 6.956296634901294e-06, "loss": 0.8143, "step": 8763 }, { "epoch": 0.78, "grad_norm": 4.919010968465467, "learning_rate": 6.955631830316688e-06, "loss": 0.8048, "step": 8764 }, { "epoch": 0.78, "grad_norm": 4.842304388563717, "learning_rate": 6.95496698491255e-06, "loss": 0.7787, "step": 8765 }, { "epoch": 0.78, "grad_norm": 7.941451591399437, "learning_rate": 6.954302098702759e-06, "loss": 0.815, "step": 8766 }, { "epoch": 0.78, "grad_norm": 5.44591927973432, "learning_rate": 6.953637171701191e-06, "loss": 0.7754, "step": 8767 }, { "epoch": 0.78, "grad_norm": 5.633333560082455, "learning_rate": 6.952972203921724e-06, "loss": 0.7836, "step": 8768 }, { "epoch": 0.78, "grad_norm": 5.136685079305134, "learning_rate": 6.9523071953782395e-06, "loss": 0.7482, "step": 8769 }, { "epoch": 0.78, "grad_norm": 7.033587900965573, "learning_rate": 6.9516421460846184e-06, "loss": 0.8036, "step": 8770 }, { "epoch": 0.78, "grad_norm": 6.0574686474447885, "learning_rate": 6.950977056054742e-06, "loss": 0.8191, "step": 8771 }, { "epoch": 0.78, "grad_norm": 6.732988301155716, "learning_rate": 6.950311925302491e-06, "loss": 0.7989, "step": 8772 }, { "epoch": 0.78, "grad_norm": 5.256539639709001, "learning_rate": 6.949646753841751e-06, "loss": 0.7719, "step": 8773 }, { "epoch": 0.78, "grad_norm": 5.4758730067637655, "learning_rate": 6.948981541686404e-06, "loss": 0.9168, "step": 8774 }, { "epoch": 0.78, "grad_norm": 4.5139770382602205, "learning_rate": 6.948316288850335e-06, "loss": 0.7832, "step": 8775 }, { "epoch": 0.78, "grad_norm": 5.543695192470939, "learning_rate": 6.9476509953474315e-06, "loss": 0.7497, "step": 8776 }, { "epoch": 0.78, "grad_norm": 5.2995260797640595, "learning_rate": 6.946985661191578e-06, "loss": 0.7931, "step": 8777 }, { "epoch": 0.78, "grad_norm": 5.209498930988276, "learning_rate": 6.946320286396661e-06, "loss": 0.7922, "step": 8778 }, { "epoch": 0.78, "grad_norm": 5.542852672214769, "learning_rate": 6.945654870976573e-06, "loss": 0.839, "step": 8779 }, { "epoch": 0.78, "grad_norm": 5.40461669508864, "learning_rate": 6.944989414945199e-06, "loss": 0.795, "step": 8780 }, { "epoch": 0.78, "grad_norm": 7.130349986596892, "learning_rate": 6.9443239183164315e-06, "loss": 0.7071, "step": 8781 }, { "epoch": 0.78, "grad_norm": 5.456066348319619, "learning_rate": 6.9436583811041594e-06, "loss": 0.8628, "step": 8782 }, { "epoch": 0.78, "grad_norm": 6.467129934819941, "learning_rate": 6.942992803322276e-06, "loss": 0.7533, "step": 8783 }, { "epoch": 0.78, "grad_norm": 5.95311823118279, "learning_rate": 6.9423271849846715e-06, "loss": 0.8677, "step": 8784 }, { "epoch": 0.78, "grad_norm": 5.504575244194517, "learning_rate": 6.941661526105241e-06, "loss": 0.7732, "step": 8785 }, { "epoch": 0.78, "grad_norm": 5.732323672663427, "learning_rate": 6.94099582669788e-06, "loss": 0.7114, "step": 8786 }, { "epoch": 0.78, "grad_norm": 4.66520220537095, "learning_rate": 6.940330086776479e-06, "loss": 0.7567, "step": 8787 }, { "epoch": 0.78, "grad_norm": 4.834789963194458, "learning_rate": 6.9396643063549396e-06, "loss": 0.8162, "step": 8788 }, { "epoch": 0.78, "grad_norm": 7.330490237684976, "learning_rate": 6.938998485447155e-06, "loss": 0.807, "step": 8789 }, { "epoch": 0.78, "grad_norm": 5.928042639929164, "learning_rate": 6.938332624067021e-06, "loss": 0.7897, "step": 8790 }, { "epoch": 0.78, "grad_norm": 4.411202479440829, "learning_rate": 6.9376667222284396e-06, "loss": 0.856, "step": 8791 }, { "epoch": 0.78, "grad_norm": 6.612793208902624, "learning_rate": 6.93700077994531e-06, "loss": 0.8439, "step": 8792 }, { "epoch": 0.78, "grad_norm": 6.7009731309011595, "learning_rate": 6.9363347972315296e-06, "loss": 0.7928, "step": 8793 }, { "epoch": 0.78, "grad_norm": 5.281401842302976, "learning_rate": 6.935668774101001e-06, "loss": 0.7002, "step": 8794 }, { "epoch": 0.78, "grad_norm": 5.278266443587289, "learning_rate": 6.935002710567627e-06, "loss": 0.8154, "step": 8795 }, { "epoch": 0.78, "grad_norm": 7.71029046591003, "learning_rate": 6.9343366066453085e-06, "loss": 0.7565, "step": 8796 }, { "epoch": 0.78, "grad_norm": 6.472494049169806, "learning_rate": 6.933670462347949e-06, "loss": 0.784, "step": 8797 }, { "epoch": 0.78, "grad_norm": 6.857073054840742, "learning_rate": 6.933004277689453e-06, "loss": 0.7878, "step": 8798 }, { "epoch": 0.78, "grad_norm": 5.404655289151616, "learning_rate": 6.932338052683727e-06, "loss": 0.8191, "step": 8799 }, { "epoch": 0.79, "grad_norm": 4.986714499533353, "learning_rate": 6.931671787344674e-06, "loss": 0.7696, "step": 8800 }, { "epoch": 0.79, "grad_norm": 6.3868838052409, "learning_rate": 6.931005481686205e-06, "loss": 0.8247, "step": 8801 }, { "epoch": 0.79, "grad_norm": 5.741346870898593, "learning_rate": 6.930339135722224e-06, "loss": 0.7446, "step": 8802 }, { "epoch": 0.79, "grad_norm": 6.124305963310402, "learning_rate": 6.9296727494666415e-06, "loss": 0.8559, "step": 8803 }, { "epoch": 0.79, "grad_norm": 4.898810014295062, "learning_rate": 6.929006322933365e-06, "loss": 0.7697, "step": 8804 }, { "epoch": 0.79, "grad_norm": 4.077895530198855, "learning_rate": 6.928339856136308e-06, "loss": 0.7527, "step": 8805 }, { "epoch": 0.79, "grad_norm": 5.709622136248156, "learning_rate": 6.927673349089378e-06, "loss": 0.746, "step": 8806 }, { "epoch": 0.79, "grad_norm": 8.868017333718155, "learning_rate": 6.927006801806488e-06, "loss": 0.7979, "step": 8807 }, { "epoch": 0.79, "grad_norm": 5.906422964647263, "learning_rate": 6.926340214301553e-06, "loss": 0.8176, "step": 8808 }, { "epoch": 0.79, "grad_norm": 7.7829421598384885, "learning_rate": 6.925673586588483e-06, "loss": 0.8388, "step": 8809 }, { "epoch": 0.79, "grad_norm": 5.777912231394991, "learning_rate": 6.925006918681195e-06, "loss": 0.824, "step": 8810 }, { "epoch": 0.79, "grad_norm": 5.730755976975412, "learning_rate": 6.924340210593603e-06, "loss": 0.7625, "step": 8811 }, { "epoch": 0.79, "grad_norm": 5.091987943009204, "learning_rate": 6.923673462339623e-06, "loss": 0.7113, "step": 8812 }, { "epoch": 0.79, "grad_norm": 5.467338480497293, "learning_rate": 6.923006673933172e-06, "loss": 0.8136, "step": 8813 }, { "epoch": 0.79, "grad_norm": 5.608862602545761, "learning_rate": 6.922339845388168e-06, "loss": 0.726, "step": 8814 }, { "epoch": 0.79, "grad_norm": 6.22275630090508, "learning_rate": 6.921672976718531e-06, "loss": 0.8577, "step": 8815 }, { "epoch": 0.79, "grad_norm": 6.633835086760427, "learning_rate": 6.9210060679381775e-06, "loss": 0.7464, "step": 8816 }, { "epoch": 0.79, "grad_norm": 5.8231285378359425, "learning_rate": 6.92033911906103e-06, "loss": 0.7356, "step": 8817 }, { "epoch": 0.79, "grad_norm": 4.1750410190413545, "learning_rate": 6.91967213010101e-06, "loss": 0.7983, "step": 8818 }, { "epoch": 0.79, "grad_norm": 6.665783978184943, "learning_rate": 6.919005101072036e-06, "loss": 0.8561, "step": 8819 }, { "epoch": 0.79, "grad_norm": 4.898461084643215, "learning_rate": 6.918338031988033e-06, "loss": 0.7535, "step": 8820 }, { "epoch": 0.79, "grad_norm": 5.668587970354986, "learning_rate": 6.917670922862927e-06, "loss": 0.7117, "step": 8821 }, { "epoch": 0.79, "grad_norm": 5.1654646425059365, "learning_rate": 6.917003773710638e-06, "loss": 0.8317, "step": 8822 }, { "epoch": 0.79, "grad_norm": 5.649197230220908, "learning_rate": 6.9163365845450935e-06, "loss": 0.7817, "step": 8823 }, { "epoch": 0.79, "grad_norm": 11.817527077694836, "learning_rate": 6.915669355380221e-06, "loss": 0.7654, "step": 8824 }, { "epoch": 0.79, "grad_norm": 7.358470673489723, "learning_rate": 6.915002086229945e-06, "loss": 0.7206, "step": 8825 }, { "epoch": 0.79, "grad_norm": 5.664089814451144, "learning_rate": 6.914334777108195e-06, "loss": 0.7532, "step": 8826 }, { "epoch": 0.79, "grad_norm": 6.540520861662685, "learning_rate": 6.913667428028899e-06, "loss": 0.8378, "step": 8827 }, { "epoch": 0.79, "grad_norm": 4.2923948765217315, "learning_rate": 6.913000039005984e-06, "loss": 0.7165, "step": 8828 }, { "epoch": 0.79, "grad_norm": 7.026892445475728, "learning_rate": 6.912332610053384e-06, "loss": 0.8451, "step": 8829 }, { "epoch": 0.79, "grad_norm": 4.3736731870898184, "learning_rate": 6.911665141185029e-06, "loss": 0.7592, "step": 8830 }, { "epoch": 0.79, "grad_norm": 6.614078356065484, "learning_rate": 6.910997632414851e-06, "loss": 0.8143, "step": 8831 }, { "epoch": 0.79, "grad_norm": 9.541582947547187, "learning_rate": 6.910330083756782e-06, "loss": 0.809, "step": 8832 }, { "epoch": 0.79, "grad_norm": 5.316803259142336, "learning_rate": 6.909662495224755e-06, "loss": 0.762, "step": 8833 }, { "epoch": 0.79, "grad_norm": 5.141846870131795, "learning_rate": 6.908994866832708e-06, "loss": 0.8004, "step": 8834 }, { "epoch": 0.79, "grad_norm": 5.988982666523254, "learning_rate": 6.908327198594571e-06, "loss": 0.7429, "step": 8835 }, { "epoch": 0.79, "grad_norm": 5.873681096149504, "learning_rate": 6.907659490524285e-06, "loss": 0.7773, "step": 8836 }, { "epoch": 0.79, "grad_norm": 5.126116146002044, "learning_rate": 6.906991742635784e-06, "loss": 0.7854, "step": 8837 }, { "epoch": 0.79, "grad_norm": 4.701297209385712, "learning_rate": 6.906323954943005e-06, "loss": 0.7458, "step": 8838 }, { "epoch": 0.79, "grad_norm": 10.645633836258698, "learning_rate": 6.905656127459891e-06, "loss": 0.7721, "step": 8839 }, { "epoch": 0.79, "grad_norm": 6.670065991810353, "learning_rate": 6.9049882602003785e-06, "loss": 0.8292, "step": 8840 }, { "epoch": 0.79, "grad_norm": 10.413841629902224, "learning_rate": 6.904320353178405e-06, "loss": 0.7232, "step": 8841 }, { "epoch": 0.79, "grad_norm": 4.733560025497825, "learning_rate": 6.903652406407917e-06, "loss": 0.7515, "step": 8842 }, { "epoch": 0.79, "grad_norm": 6.7902116307298925, "learning_rate": 6.902984419902854e-06, "loss": 0.8874, "step": 8843 }, { "epoch": 0.79, "grad_norm": 6.134001651217793, "learning_rate": 6.9023163936771576e-06, "loss": 0.7634, "step": 8844 }, { "epoch": 0.79, "grad_norm": 4.787607320533095, "learning_rate": 6.901648327744772e-06, "loss": 0.8858, "step": 8845 }, { "epoch": 0.79, "grad_norm": 6.240502814256087, "learning_rate": 6.900980222119644e-06, "loss": 0.7909, "step": 8846 }, { "epoch": 0.79, "grad_norm": 5.727571605730573, "learning_rate": 6.9003120768157164e-06, "loss": 0.8182, "step": 8847 }, { "epoch": 0.79, "grad_norm": 7.015008356436154, "learning_rate": 6.899643891846935e-06, "loss": 0.8081, "step": 8848 }, { "epoch": 0.79, "grad_norm": 5.7859972740346075, "learning_rate": 6.898975667227248e-06, "loss": 0.823, "step": 8849 }, { "epoch": 0.79, "grad_norm": 4.112589762501796, "learning_rate": 6.898307402970602e-06, "loss": 0.8174, "step": 8850 }, { "epoch": 0.79, "grad_norm": 5.51806833146756, "learning_rate": 6.897639099090946e-06, "loss": 0.7351, "step": 8851 }, { "epoch": 0.79, "grad_norm": 6.159084598276891, "learning_rate": 6.89697075560223e-06, "loss": 0.7442, "step": 8852 }, { "epoch": 0.79, "grad_norm": 5.430648302734665, "learning_rate": 6.896302372518405e-06, "loss": 0.8117, "step": 8853 }, { "epoch": 0.79, "grad_norm": 5.948527504877078, "learning_rate": 6.8956339498534195e-06, "loss": 0.8313, "step": 8854 }, { "epoch": 0.79, "grad_norm": 8.397941337666921, "learning_rate": 6.8949654876212266e-06, "loss": 0.809, "step": 8855 }, { "epoch": 0.79, "grad_norm": 5.595203649862717, "learning_rate": 6.89429698583578e-06, "loss": 0.7442, "step": 8856 }, { "epoch": 0.79, "grad_norm": 6.242395434669381, "learning_rate": 6.893628444511032e-06, "loss": 0.7872, "step": 8857 }, { "epoch": 0.79, "grad_norm": 5.823324318802721, "learning_rate": 6.892959863660936e-06, "loss": 0.8197, "step": 8858 }, { "epoch": 0.79, "grad_norm": 6.9555766658064595, "learning_rate": 6.89229124329945e-06, "loss": 0.8091, "step": 8859 }, { "epoch": 0.79, "grad_norm": 6.209856740723686, "learning_rate": 6.891622583440528e-06, "loss": 0.7723, "step": 8860 }, { "epoch": 0.79, "grad_norm": 5.731593551671059, "learning_rate": 6.890953884098126e-06, "loss": 0.8181, "step": 8861 }, { "epoch": 0.79, "grad_norm": 6.090682803796809, "learning_rate": 6.890285145286204e-06, "loss": 0.7107, "step": 8862 }, { "epoch": 0.79, "grad_norm": 5.3658584898223625, "learning_rate": 6.889616367018718e-06, "loss": 0.8744, "step": 8863 }, { "epoch": 0.79, "grad_norm": 5.682068313771945, "learning_rate": 6.888947549309629e-06, "loss": 0.8495, "step": 8864 }, { "epoch": 0.79, "grad_norm": 5.688367314210548, "learning_rate": 6.888278692172898e-06, "loss": 0.7528, "step": 8865 }, { "epoch": 0.79, "grad_norm": 6.611292152284029, "learning_rate": 6.887609795622483e-06, "loss": 0.7842, "step": 8866 }, { "epoch": 0.79, "grad_norm": 6.366402696231333, "learning_rate": 6.886940859672348e-06, "loss": 0.7769, "step": 8867 }, { "epoch": 0.79, "grad_norm": 6.899610003297146, "learning_rate": 6.886271884336455e-06, "loss": 0.6996, "step": 8868 }, { "epoch": 0.79, "grad_norm": 5.473031758982985, "learning_rate": 6.885602869628767e-06, "loss": 0.7763, "step": 8869 }, { "epoch": 0.79, "grad_norm": 6.562399444836806, "learning_rate": 6.884933815563248e-06, "loss": 0.7129, "step": 8870 }, { "epoch": 0.79, "grad_norm": 5.584762898108932, "learning_rate": 6.8842647221538636e-06, "loss": 0.6779, "step": 8871 }, { "epoch": 0.79, "grad_norm": 6.381184035995556, "learning_rate": 6.88359558941458e-06, "loss": 0.7709, "step": 8872 }, { "epoch": 0.79, "grad_norm": 5.193934288718516, "learning_rate": 6.882926417359363e-06, "loss": 0.7255, "step": 8873 }, { "epoch": 0.79, "grad_norm": 5.302543589833199, "learning_rate": 6.882257206002182e-06, "loss": 0.7794, "step": 8874 }, { "epoch": 0.79, "grad_norm": 4.414893970703186, "learning_rate": 6.881587955357004e-06, "loss": 0.7877, "step": 8875 }, { "epoch": 0.79, "grad_norm": 6.961303311252845, "learning_rate": 6.8809186654377965e-06, "loss": 0.7433, "step": 8876 }, { "epoch": 0.79, "grad_norm": 4.395797558105373, "learning_rate": 6.880249336258531e-06, "loss": 0.8405, "step": 8877 }, { "epoch": 0.79, "grad_norm": 10.028552049256819, "learning_rate": 6.879579967833179e-06, "loss": 0.8459, "step": 8878 }, { "epoch": 0.79, "grad_norm": 6.35603644048776, "learning_rate": 6.878910560175711e-06, "loss": 0.7909, "step": 8879 }, { "epoch": 0.79, "grad_norm": 6.043525622385374, "learning_rate": 6.8782411133000996e-06, "loss": 0.8457, "step": 8880 }, { "epoch": 0.79, "grad_norm": 5.7872326607346745, "learning_rate": 6.877571627220319e-06, "loss": 0.7811, "step": 8881 }, { "epoch": 0.79, "grad_norm": 5.339116775467267, "learning_rate": 6.876902101950342e-06, "loss": 0.7154, "step": 8882 }, { "epoch": 0.79, "grad_norm": 5.974058998628582, "learning_rate": 6.876232537504144e-06, "loss": 0.7677, "step": 8883 }, { "epoch": 0.79, "grad_norm": 6.307690896891668, "learning_rate": 6.875562933895701e-06, "loss": 0.8688, "step": 8884 }, { "epoch": 0.79, "grad_norm": 6.386930169088093, "learning_rate": 6.874893291138989e-06, "loss": 0.6666, "step": 8885 }, { "epoch": 0.79, "grad_norm": 4.902590431555418, "learning_rate": 6.874223609247986e-06, "loss": 0.8165, "step": 8886 }, { "epoch": 0.79, "grad_norm": 7.688379942097481, "learning_rate": 6.873553888236668e-06, "loss": 0.8051, "step": 8887 }, { "epoch": 0.79, "grad_norm": 4.73798120555801, "learning_rate": 6.8728841281190165e-06, "loss": 0.7984, "step": 8888 }, { "epoch": 0.79, "grad_norm": 6.048348432750598, "learning_rate": 6.8722143289090115e-06, "loss": 0.7772, "step": 8889 }, { "epoch": 0.79, "grad_norm": 5.791599070703299, "learning_rate": 6.871544490620631e-06, "loss": 0.7653, "step": 8890 }, { "epoch": 0.79, "grad_norm": 6.405147110748098, "learning_rate": 6.870874613267858e-06, "loss": 0.773, "step": 8891 }, { "epoch": 0.79, "grad_norm": 5.53296302045722, "learning_rate": 6.870204696864675e-06, "loss": 0.7126, "step": 8892 }, { "epoch": 0.79, "grad_norm": 13.013739820235273, "learning_rate": 6.869534741425064e-06, "loss": 0.7558, "step": 8893 }, { "epoch": 0.79, "grad_norm": 4.670897671238176, "learning_rate": 6.868864746963011e-06, "loss": 0.7869, "step": 8894 }, { "epoch": 0.79, "grad_norm": 5.644936375254622, "learning_rate": 6.8681947134924975e-06, "loss": 0.8094, "step": 8895 }, { "epoch": 0.79, "grad_norm": 20.66760019811634, "learning_rate": 6.867524641027512e-06, "loss": 0.7754, "step": 8896 }, { "epoch": 0.79, "grad_norm": 5.138747392442604, "learning_rate": 6.866854529582039e-06, "loss": 0.7803, "step": 8897 }, { "epoch": 0.79, "grad_norm": 5.3292578971826945, "learning_rate": 6.866184379170065e-06, "loss": 0.7842, "step": 8898 }, { "epoch": 0.79, "grad_norm": 6.077089406738545, "learning_rate": 6.86551418980558e-06, "loss": 0.7857, "step": 8899 }, { "epoch": 0.79, "grad_norm": 6.6169371722239285, "learning_rate": 6.864843961502572e-06, "loss": 0.8175, "step": 8900 }, { "epoch": 0.79, "grad_norm": 5.419929075311483, "learning_rate": 6.864173694275029e-06, "loss": 0.7561, "step": 8901 }, { "epoch": 0.79, "grad_norm": 4.395812639113104, "learning_rate": 6.863503388136943e-06, "loss": 0.7612, "step": 8902 }, { "epoch": 0.79, "grad_norm": 5.790073210301413, "learning_rate": 6.862833043102305e-06, "loss": 0.8127, "step": 8903 }, { "epoch": 0.79, "grad_norm": 6.843780880290494, "learning_rate": 6.8621626591851075e-06, "loss": 0.8348, "step": 8904 }, { "epoch": 0.79, "grad_norm": 3.746742677137354, "learning_rate": 6.861492236399341e-06, "loss": 0.7421, "step": 8905 }, { "epoch": 0.79, "grad_norm": 7.109303414387534, "learning_rate": 6.860821774759001e-06, "loss": 0.7962, "step": 8906 }, { "epoch": 0.79, "grad_norm": 5.779669334769916, "learning_rate": 6.860151274278083e-06, "loss": 0.7701, "step": 8907 }, { "epoch": 0.79, "grad_norm": 5.995411299322764, "learning_rate": 6.859480734970579e-06, "loss": 0.7368, "step": 8908 }, { "epoch": 0.79, "grad_norm": 4.823442556594654, "learning_rate": 6.858810156850488e-06, "loss": 0.7846, "step": 8909 }, { "epoch": 0.79, "grad_norm": 4.920303439107068, "learning_rate": 6.8581395399318065e-06, "loss": 0.8259, "step": 8910 }, { "epoch": 0.79, "grad_norm": 5.134068283460694, "learning_rate": 6.8574688842285305e-06, "loss": 0.7834, "step": 8911 }, { "epoch": 0.8, "grad_norm": 5.3324489682602225, "learning_rate": 6.856798189754658e-06, "loss": 0.6993, "step": 8912 }, { "epoch": 0.8, "grad_norm": 7.0907137176613, "learning_rate": 6.856127456524192e-06, "loss": 0.7753, "step": 8913 }, { "epoch": 0.8, "grad_norm": 7.151887232807324, "learning_rate": 6.855456684551129e-06, "loss": 0.8716, "step": 8914 }, { "epoch": 0.8, "grad_norm": 7.524889361807719, "learning_rate": 6.85478587384947e-06, "loss": 0.8064, "step": 8915 }, { "epoch": 0.8, "grad_norm": 4.791308791110216, "learning_rate": 6.854115024433219e-06, "loss": 0.7246, "step": 8916 }, { "epoch": 0.8, "grad_norm": 6.251966476631001, "learning_rate": 6.853444136316377e-06, "loss": 0.8134, "step": 8917 }, { "epoch": 0.8, "grad_norm": 3.9375206300566616, "learning_rate": 6.852773209512948e-06, "loss": 0.7485, "step": 8918 }, { "epoch": 0.8, "grad_norm": 5.51650228553902, "learning_rate": 6.852102244036936e-06, "loss": 0.7385, "step": 8919 }, { "epoch": 0.8, "grad_norm": 7.187012126624098, "learning_rate": 6.8514312399023456e-06, "loss": 0.7923, "step": 8920 }, { "epoch": 0.8, "grad_norm": 5.081527449869237, "learning_rate": 6.8507601971231815e-06, "loss": 0.7753, "step": 8921 }, { "epoch": 0.8, "grad_norm": 5.906807068896219, "learning_rate": 6.850089115713453e-06, "loss": 0.7828, "step": 8922 }, { "epoch": 0.8, "grad_norm": 7.507539535443064, "learning_rate": 6.849417995687164e-06, "loss": 0.7662, "step": 8923 }, { "epoch": 0.8, "grad_norm": 4.939277765104757, "learning_rate": 6.8487468370583256e-06, "loss": 0.8193, "step": 8924 }, { "epoch": 0.8, "grad_norm": 4.349033175401351, "learning_rate": 6.848075639840945e-06, "loss": 0.7462, "step": 8925 }, { "epoch": 0.8, "grad_norm": 6.400337614633899, "learning_rate": 6.847404404049034e-06, "loss": 0.7594, "step": 8926 }, { "epoch": 0.8, "grad_norm": 7.934260008253583, "learning_rate": 6.846733129696601e-06, "loss": 0.807, "step": 8927 }, { "epoch": 0.8, "grad_norm": 10.872961336248737, "learning_rate": 6.846061816797658e-06, "loss": 0.7582, "step": 8928 }, { "epoch": 0.8, "grad_norm": 6.163037942597244, "learning_rate": 6.845390465366217e-06, "loss": 0.754, "step": 8929 }, { "epoch": 0.8, "grad_norm": 5.380709270853941, "learning_rate": 6.844719075416293e-06, "loss": 0.7578, "step": 8930 }, { "epoch": 0.8, "grad_norm": 5.779486751349994, "learning_rate": 6.844047646961895e-06, "loss": 0.7115, "step": 8931 }, { "epoch": 0.8, "grad_norm": 6.271453024753318, "learning_rate": 6.843376180017045e-06, "loss": 0.7239, "step": 8932 }, { "epoch": 0.8, "grad_norm": 6.333861804517126, "learning_rate": 6.8427046745957525e-06, "loss": 0.738, "step": 8933 }, { "epoch": 0.8, "grad_norm": 5.991637429665713, "learning_rate": 6.842033130712033e-06, "loss": 0.8104, "step": 8934 }, { "epoch": 0.8, "grad_norm": 6.033951851124145, "learning_rate": 6.841361548379909e-06, "loss": 0.8347, "step": 8935 }, { "epoch": 0.8, "grad_norm": 5.71164174866826, "learning_rate": 6.840689927613394e-06, "loss": 0.7626, "step": 8936 }, { "epoch": 0.8, "grad_norm": 4.772787002906953, "learning_rate": 6.840018268426507e-06, "loss": 0.7536, "step": 8937 }, { "epoch": 0.8, "grad_norm": 5.74288147652492, "learning_rate": 6.8393465708332696e-06, "loss": 0.7833, "step": 8938 }, { "epoch": 0.8, "grad_norm": 4.881635084840276, "learning_rate": 6.8386748348477e-06, "loss": 0.7373, "step": 8939 }, { "epoch": 0.8, "grad_norm": 7.357382489159556, "learning_rate": 6.83800306048382e-06, "loss": 0.7695, "step": 8940 }, { "epoch": 0.8, "grad_norm": 7.031769554534384, "learning_rate": 6.837331247755651e-06, "loss": 0.7945, "step": 8941 }, { "epoch": 0.8, "grad_norm": 6.788737115865319, "learning_rate": 6.836659396677216e-06, "loss": 0.7719, "step": 8942 }, { "epoch": 0.8, "grad_norm": 6.245812328279025, "learning_rate": 6.835987507262538e-06, "loss": 0.7974, "step": 8943 }, { "epoch": 0.8, "grad_norm": 5.498715404712324, "learning_rate": 6.835315579525642e-06, "loss": 0.8269, "step": 8944 }, { "epoch": 0.8, "grad_norm": 6.427676297581858, "learning_rate": 6.834643613480553e-06, "loss": 0.7642, "step": 8945 }, { "epoch": 0.8, "grad_norm": 5.352290356651545, "learning_rate": 6.833971609141294e-06, "loss": 0.8166, "step": 8946 }, { "epoch": 0.8, "grad_norm": 4.967278826634626, "learning_rate": 6.8332995665218974e-06, "loss": 0.7049, "step": 8947 }, { "epoch": 0.8, "grad_norm": 5.706706214341631, "learning_rate": 6.832627485636386e-06, "loss": 0.7647, "step": 8948 }, { "epoch": 0.8, "grad_norm": 4.151264270034733, "learning_rate": 6.831955366498788e-06, "loss": 0.7187, "step": 8949 }, { "epoch": 0.8, "grad_norm": 5.901451604733674, "learning_rate": 6.831283209123134e-06, "loss": 0.7887, "step": 8950 }, { "epoch": 0.8, "grad_norm": 4.8069065238291735, "learning_rate": 6.830611013523455e-06, "loss": 0.7949, "step": 8951 }, { "epoch": 0.8, "grad_norm": 5.777627024748749, "learning_rate": 6.829938779713778e-06, "loss": 0.7731, "step": 8952 }, { "epoch": 0.8, "grad_norm": 6.2930136064236635, "learning_rate": 6.829266507708138e-06, "loss": 0.8153, "step": 8953 }, { "epoch": 0.8, "grad_norm": 5.323762556715136, "learning_rate": 6.828594197520565e-06, "loss": 0.7372, "step": 8954 }, { "epoch": 0.8, "grad_norm": 5.042257852614123, "learning_rate": 6.827921849165094e-06, "loss": 0.743, "step": 8955 }, { "epoch": 0.8, "grad_norm": 5.532124582968514, "learning_rate": 6.827249462655755e-06, "loss": 0.8231, "step": 8956 }, { "epoch": 0.8, "grad_norm": 5.7640365898871035, "learning_rate": 6.8265770380065865e-06, "loss": 0.788, "step": 8957 }, { "epoch": 0.8, "grad_norm": 4.814610220235087, "learning_rate": 6.825904575231623e-06, "loss": 0.8216, "step": 8958 }, { "epoch": 0.8, "grad_norm": 4.818041294787947, "learning_rate": 6.825232074344899e-06, "loss": 0.8016, "step": 8959 }, { "epoch": 0.8, "grad_norm": 5.579175247459985, "learning_rate": 6.824559535360453e-06, "loss": 0.8041, "step": 8960 }, { "epoch": 0.8, "grad_norm": 5.544542893134898, "learning_rate": 6.823886958292322e-06, "loss": 0.8165, "step": 8961 }, { "epoch": 0.8, "grad_norm": 4.3734226503405225, "learning_rate": 6.8232143431545465e-06, "loss": 0.7454, "step": 8962 }, { "epoch": 0.8, "grad_norm": 4.122217069429538, "learning_rate": 6.822541689961163e-06, "loss": 0.767, "step": 8963 }, { "epoch": 0.8, "grad_norm": 6.27368178911124, "learning_rate": 6.821868998726213e-06, "loss": 0.7987, "step": 8964 }, { "epoch": 0.8, "grad_norm": 4.323843161914525, "learning_rate": 6.821196269463738e-06, "loss": 0.7273, "step": 8965 }, { "epoch": 0.8, "grad_norm": 5.454473026252531, "learning_rate": 6.820523502187779e-06, "loss": 0.7604, "step": 8966 }, { "epoch": 0.8, "grad_norm": 5.6886932728790685, "learning_rate": 6.819850696912379e-06, "loss": 0.7297, "step": 8967 }, { "epoch": 0.8, "grad_norm": 6.200467942075487, "learning_rate": 6.819177853651582e-06, "loss": 0.7838, "step": 8968 }, { "epoch": 0.8, "grad_norm": 5.6732345759023435, "learning_rate": 6.818504972419429e-06, "loss": 0.8396, "step": 8969 }, { "epoch": 0.8, "grad_norm": 6.948356621737164, "learning_rate": 6.817832053229969e-06, "loss": 0.8414, "step": 8970 }, { "epoch": 0.8, "grad_norm": 6.1215228264217, "learning_rate": 6.817159096097246e-06, "loss": 0.8159, "step": 8971 }, { "epoch": 0.8, "grad_norm": 5.600232289202041, "learning_rate": 6.8164861010353045e-06, "loss": 0.7897, "step": 8972 }, { "epoch": 0.8, "grad_norm": 4.445965194437669, "learning_rate": 6.815813068058196e-06, "loss": 0.7861, "step": 8973 }, { "epoch": 0.8, "grad_norm": 4.5400118123772355, "learning_rate": 6.8151399971799635e-06, "loss": 0.6926, "step": 8974 }, { "epoch": 0.8, "grad_norm": 4.338548754085299, "learning_rate": 6.814466888414661e-06, "loss": 0.8646, "step": 8975 }, { "epoch": 0.8, "grad_norm": 4.391516056292958, "learning_rate": 6.813793741776335e-06, "loss": 0.7706, "step": 8976 }, { "epoch": 0.8, "grad_norm": 5.050186717071382, "learning_rate": 6.813120557279037e-06, "loss": 0.8605, "step": 8977 }, { "epoch": 0.8, "grad_norm": 7.661275537208663, "learning_rate": 6.812447334936818e-06, "loss": 0.8271, "step": 8978 }, { "epoch": 0.8, "grad_norm": 5.812292946319696, "learning_rate": 6.811774074763729e-06, "loss": 0.7575, "step": 8979 }, { "epoch": 0.8, "grad_norm": 4.920845824356115, "learning_rate": 6.811100776773826e-06, "loss": 0.7731, "step": 8980 }, { "epoch": 0.8, "grad_norm": 6.698305877414938, "learning_rate": 6.8104274409811586e-06, "loss": 0.8471, "step": 8981 }, { "epoch": 0.8, "grad_norm": 4.91481553155522, "learning_rate": 6.809754067399783e-06, "loss": 0.789, "step": 8982 }, { "epoch": 0.8, "grad_norm": 4.492634929410201, "learning_rate": 6.809080656043758e-06, "loss": 0.7458, "step": 8983 }, { "epoch": 0.8, "grad_norm": 5.224994620398197, "learning_rate": 6.808407206927133e-06, "loss": 0.7859, "step": 8984 }, { "epoch": 0.8, "grad_norm": 9.829323056238552, "learning_rate": 6.807733720063967e-06, "loss": 0.8009, "step": 8985 }, { "epoch": 0.8, "grad_norm": 7.395512871014527, "learning_rate": 6.807060195468321e-06, "loss": 0.8261, "step": 8986 }, { "epoch": 0.8, "grad_norm": 4.226849611082985, "learning_rate": 6.80638663315425e-06, "loss": 0.783, "step": 8987 }, { "epoch": 0.8, "grad_norm": 7.216187731707186, "learning_rate": 6.8057130331358125e-06, "loss": 0.7911, "step": 8988 }, { "epoch": 0.8, "grad_norm": 5.814493244353561, "learning_rate": 6.805039395427071e-06, "loss": 0.8861, "step": 8989 }, { "epoch": 0.8, "grad_norm": 5.7408557759747065, "learning_rate": 6.804365720042086e-06, "loss": 0.7942, "step": 8990 }, { "epoch": 0.8, "grad_norm": 4.621859480269982, "learning_rate": 6.803692006994918e-06, "loss": 0.7776, "step": 8991 }, { "epoch": 0.8, "grad_norm": 5.604723875696861, "learning_rate": 6.803018256299627e-06, "loss": 0.7836, "step": 8992 }, { "epoch": 0.8, "grad_norm": 4.210415287070769, "learning_rate": 6.802344467970281e-06, "loss": 0.7352, "step": 8993 }, { "epoch": 0.8, "grad_norm": 4.820463901836649, "learning_rate": 6.80167064202094e-06, "loss": 0.8188, "step": 8994 }, { "epoch": 0.8, "grad_norm": 6.117751216031344, "learning_rate": 6.80099677846567e-06, "loss": 0.794, "step": 8995 }, { "epoch": 0.8, "grad_norm": 6.036376109089421, "learning_rate": 6.800322877318537e-06, "loss": 0.7841, "step": 8996 }, { "epoch": 0.8, "grad_norm": 5.6564262592407, "learning_rate": 6.799648938593606e-06, "loss": 0.7198, "step": 8997 }, { "epoch": 0.8, "grad_norm": 8.707786483463643, "learning_rate": 6.798974962304945e-06, "loss": 0.7589, "step": 8998 }, { "epoch": 0.8, "grad_norm": 4.738571033737522, "learning_rate": 6.798300948466622e-06, "loss": 0.824, "step": 8999 }, { "epoch": 0.8, "grad_norm": 6.094796114802285, "learning_rate": 6.797626897092704e-06, "loss": 0.7398, "step": 9000 }, { "epoch": 0.8, "grad_norm": 5.788521713733064, "learning_rate": 6.79695280819726e-06, "loss": 0.8178, "step": 9001 }, { "epoch": 0.8, "grad_norm": 4.280779490782635, "learning_rate": 6.796278681794364e-06, "loss": 0.7448, "step": 9002 }, { "epoch": 0.8, "grad_norm": 4.172165665807148, "learning_rate": 6.795604517898082e-06, "loss": 0.77, "step": 9003 }, { "epoch": 0.8, "grad_norm": 7.48492146257616, "learning_rate": 6.794930316522489e-06, "loss": 0.8206, "step": 9004 }, { "epoch": 0.8, "grad_norm": 6.791921575871331, "learning_rate": 6.794256077681657e-06, "loss": 0.729, "step": 9005 }, { "epoch": 0.8, "grad_norm": 4.690671746661968, "learning_rate": 6.793581801389658e-06, "loss": 0.7626, "step": 9006 }, { "epoch": 0.8, "grad_norm": 3.8631866546735316, "learning_rate": 6.7929074876605675e-06, "loss": 0.8093, "step": 9007 }, { "epoch": 0.8, "grad_norm": 4.868744491043089, "learning_rate": 6.792233136508459e-06, "loss": 0.7956, "step": 9008 }, { "epoch": 0.8, "grad_norm": 5.324871311485903, "learning_rate": 6.791558747947409e-06, "loss": 0.7975, "step": 9009 }, { "epoch": 0.8, "grad_norm": 6.301041022352684, "learning_rate": 6.7908843219914926e-06, "loss": 0.74, "step": 9010 }, { "epoch": 0.8, "grad_norm": 5.337519238154856, "learning_rate": 6.790209858654789e-06, "loss": 0.858, "step": 9011 }, { "epoch": 0.8, "grad_norm": 6.022151802886974, "learning_rate": 6.789535357951376e-06, "loss": 0.7667, "step": 9012 }, { "epoch": 0.8, "grad_norm": 5.304019727707982, "learning_rate": 6.788860819895329e-06, "loss": 0.7888, "step": 9013 }, { "epoch": 0.8, "grad_norm": 5.177946072194452, "learning_rate": 6.7881862445007305e-06, "loss": 0.7579, "step": 9014 }, { "epoch": 0.8, "grad_norm": 6.132663641885758, "learning_rate": 6.7875116317816625e-06, "loss": 0.8391, "step": 9015 }, { "epoch": 0.8, "grad_norm": 4.5107180970293586, "learning_rate": 6.786836981752201e-06, "loss": 0.7789, "step": 9016 }, { "epoch": 0.8, "grad_norm": 4.586771278943711, "learning_rate": 6.786162294426431e-06, "loss": 0.7598, "step": 9017 }, { "epoch": 0.8, "grad_norm": 6.152289000865504, "learning_rate": 6.785487569818436e-06, "loss": 0.74, "step": 9018 }, { "epoch": 0.8, "grad_norm": 6.945654536699646, "learning_rate": 6.784812807942298e-06, "loss": 0.7692, "step": 9019 }, { "epoch": 0.8, "grad_norm": 6.7083158575206525, "learning_rate": 6.784138008812101e-06, "loss": 0.7817, "step": 9020 }, { "epoch": 0.8, "grad_norm": 5.300619429857944, "learning_rate": 6.783463172441931e-06, "loss": 0.8131, "step": 9021 }, { "epoch": 0.8, "grad_norm": 4.727063648176615, "learning_rate": 6.782788298845872e-06, "loss": 0.7242, "step": 9022 }, { "epoch": 0.8, "grad_norm": 5.903991124160926, "learning_rate": 6.782113388038011e-06, "loss": 0.7997, "step": 9023 }, { "epoch": 0.81, "grad_norm": 4.532985294444383, "learning_rate": 6.781438440032438e-06, "loss": 0.7347, "step": 9024 }, { "epoch": 0.81, "grad_norm": 4.448103758655382, "learning_rate": 6.780763454843236e-06, "loss": 0.824, "step": 9025 }, { "epoch": 0.81, "grad_norm": 4.149875609784967, "learning_rate": 6.780088432484499e-06, "loss": 0.7501, "step": 9026 }, { "epoch": 0.81, "grad_norm": 5.712536811692886, "learning_rate": 6.7794133729703115e-06, "loss": 0.7924, "step": 9027 }, { "epoch": 0.81, "grad_norm": 5.968367709837906, "learning_rate": 6.778738276314769e-06, "loss": 0.7821, "step": 9028 }, { "epoch": 0.81, "grad_norm": 6.348375415703808, "learning_rate": 6.7780631425319594e-06, "loss": 0.7691, "step": 9029 }, { "epoch": 0.81, "grad_norm": 6.138032260380681, "learning_rate": 6.777387971635976e-06, "loss": 0.7618, "step": 9030 }, { "epoch": 0.81, "grad_norm": 6.357861661550286, "learning_rate": 6.776712763640911e-06, "loss": 0.8459, "step": 9031 }, { "epoch": 0.81, "grad_norm": 6.872447275163056, "learning_rate": 6.7760375185608586e-06, "loss": 0.7858, "step": 9032 }, { "epoch": 0.81, "grad_norm": 5.995962011007614, "learning_rate": 6.7753622364099125e-06, "loss": 0.7358, "step": 9033 }, { "epoch": 0.81, "grad_norm": 4.972094239405947, "learning_rate": 6.774686917202168e-06, "loss": 0.8198, "step": 9034 }, { "epoch": 0.81, "grad_norm": 4.963086518202751, "learning_rate": 6.774011560951719e-06, "loss": 0.7381, "step": 9035 }, { "epoch": 0.81, "grad_norm": 6.5637838252865, "learning_rate": 6.773336167672664e-06, "loss": 0.7284, "step": 9036 }, { "epoch": 0.81, "grad_norm": 5.783214691661278, "learning_rate": 6.772660737379101e-06, "loss": 0.7685, "step": 9037 }, { "epoch": 0.81, "grad_norm": 4.629247921909131, "learning_rate": 6.771985270085127e-06, "loss": 0.8087, "step": 9038 }, { "epoch": 0.81, "grad_norm": 3.751093627423333, "learning_rate": 6.7713097658048385e-06, "loss": 0.7214, "step": 9039 }, { "epoch": 0.81, "grad_norm": 5.244987664175362, "learning_rate": 6.770634224552341e-06, "loss": 0.7625, "step": 9040 }, { "epoch": 0.81, "grad_norm": 5.920598199591266, "learning_rate": 6.769958646341733e-06, "loss": 0.7785, "step": 9041 }, { "epoch": 0.81, "grad_norm": 5.1787303430185405, "learning_rate": 6.76928303118711e-06, "loss": 0.7179, "step": 9042 }, { "epoch": 0.81, "grad_norm": 5.595860950818168, "learning_rate": 6.76860737910258e-06, "loss": 0.7805, "step": 9043 }, { "epoch": 0.81, "grad_norm": 5.549026867023611, "learning_rate": 6.767931690102245e-06, "loss": 0.7547, "step": 9044 }, { "epoch": 0.81, "grad_norm": 5.291792084506269, "learning_rate": 6.767255964200208e-06, "loss": 0.8019, "step": 9045 }, { "epoch": 0.81, "grad_norm": 3.9059759855424465, "learning_rate": 6.766580201410572e-06, "loss": 0.7326, "step": 9046 }, { "epoch": 0.81, "grad_norm": 4.109622057211347, "learning_rate": 6.765904401747443e-06, "loss": 0.774, "step": 9047 }, { "epoch": 0.81, "grad_norm": 4.262291659582292, "learning_rate": 6.765228565224927e-06, "loss": 0.7447, "step": 9048 }, { "epoch": 0.81, "grad_norm": 5.6380018170324115, "learning_rate": 6.764552691857131e-06, "loss": 0.7631, "step": 9049 }, { "epoch": 0.81, "grad_norm": 8.801002822451169, "learning_rate": 6.763876781658162e-06, "loss": 0.7989, "step": 9050 }, { "epoch": 0.81, "grad_norm": 4.957229886900569, "learning_rate": 6.763200834642127e-06, "loss": 0.7605, "step": 9051 }, { "epoch": 0.81, "grad_norm": 5.442124930835852, "learning_rate": 6.762524850823136e-06, "loss": 0.8174, "step": 9052 }, { "epoch": 0.81, "grad_norm": 4.797481182071632, "learning_rate": 6.761848830215299e-06, "loss": 0.6879, "step": 9053 }, { "epoch": 0.81, "grad_norm": 4.960131133290143, "learning_rate": 6.761172772832726e-06, "loss": 0.6961, "step": 9054 }, { "epoch": 0.81, "grad_norm": 5.344293623655568, "learning_rate": 6.760496678689529e-06, "loss": 0.8731, "step": 9055 }, { "epoch": 0.81, "grad_norm": 5.438291056581272, "learning_rate": 6.7598205477998195e-06, "loss": 0.7751, "step": 9056 }, { "epoch": 0.81, "grad_norm": 4.533819513816837, "learning_rate": 6.759144380177708e-06, "loss": 0.7651, "step": 9057 }, { "epoch": 0.81, "grad_norm": 6.9954378801150625, "learning_rate": 6.758468175837312e-06, "loss": 0.8857, "step": 9058 }, { "epoch": 0.81, "grad_norm": 5.928905215288331, "learning_rate": 6.757791934792742e-06, "loss": 0.7626, "step": 9059 }, { "epoch": 0.81, "grad_norm": 4.935427648879806, "learning_rate": 6.757115657058115e-06, "loss": 0.7234, "step": 9060 }, { "epoch": 0.81, "grad_norm": 4.756285790191452, "learning_rate": 6.756439342647547e-06, "loss": 0.7324, "step": 9061 }, { "epoch": 0.81, "grad_norm": 7.058768209600252, "learning_rate": 6.755762991575156e-06, "loss": 0.8128, "step": 9062 }, { "epoch": 0.81, "grad_norm": 3.9806948488850775, "learning_rate": 6.755086603855057e-06, "loss": 0.7021, "step": 9063 }, { "epoch": 0.81, "grad_norm": 5.125107251314303, "learning_rate": 6.754410179501367e-06, "loss": 0.8024, "step": 9064 }, { "epoch": 0.81, "grad_norm": 5.980247183133425, "learning_rate": 6.753733718528208e-06, "loss": 0.8717, "step": 9065 }, { "epoch": 0.81, "grad_norm": 5.4993379040274215, "learning_rate": 6.7530572209496984e-06, "loss": 0.7787, "step": 9066 }, { "epoch": 0.81, "grad_norm": 6.082129497557983, "learning_rate": 6.752380686779958e-06, "loss": 0.7309, "step": 9067 }, { "epoch": 0.81, "grad_norm": 5.9101174428366035, "learning_rate": 6.751704116033107e-06, "loss": 0.8154, "step": 9068 }, { "epoch": 0.81, "grad_norm": 6.729077176347446, "learning_rate": 6.751027508723273e-06, "loss": 0.7544, "step": 9069 }, { "epoch": 0.81, "grad_norm": 6.359909761582978, "learning_rate": 6.75035086486457e-06, "loss": 0.7983, "step": 9070 }, { "epoch": 0.81, "grad_norm": 5.02936114910174, "learning_rate": 6.749674184471127e-06, "loss": 0.7381, "step": 9071 }, { "epoch": 0.81, "grad_norm": 9.374414053173028, "learning_rate": 6.748997467557068e-06, "loss": 0.7983, "step": 9072 }, { "epoch": 0.81, "grad_norm": 6.490177724461621, "learning_rate": 6.748320714136516e-06, "loss": 0.8459, "step": 9073 }, { "epoch": 0.81, "grad_norm": 7.178101908622659, "learning_rate": 6.747643924223599e-06, "loss": 0.8487, "step": 9074 }, { "epoch": 0.81, "grad_norm": 6.062747464687736, "learning_rate": 6.746967097832442e-06, "loss": 0.7756, "step": 9075 }, { "epoch": 0.81, "grad_norm": 4.699292418490548, "learning_rate": 6.746290234977172e-06, "loss": 0.7426, "step": 9076 }, { "epoch": 0.81, "grad_norm": 5.203508252600028, "learning_rate": 6.745613335671919e-06, "loss": 0.7614, "step": 9077 }, { "epoch": 0.81, "grad_norm": 5.218007974553774, "learning_rate": 6.744936399930808e-06, "loss": 0.7441, "step": 9078 }, { "epoch": 0.81, "grad_norm": 5.897196098271374, "learning_rate": 6.744259427767974e-06, "loss": 0.7367, "step": 9079 }, { "epoch": 0.81, "grad_norm": 6.631907894283829, "learning_rate": 6.743582419197543e-06, "loss": 0.7467, "step": 9080 }, { "epoch": 0.81, "grad_norm": 5.445908822946712, "learning_rate": 6.742905374233646e-06, "loss": 0.7044, "step": 9081 }, { "epoch": 0.81, "grad_norm": 5.057748582546555, "learning_rate": 6.742228292890418e-06, "loss": 0.7452, "step": 9082 }, { "epoch": 0.81, "grad_norm": 5.458978352523111, "learning_rate": 6.74155117518199e-06, "loss": 0.7642, "step": 9083 }, { "epoch": 0.81, "grad_norm": 5.962435922308668, "learning_rate": 6.740874021122494e-06, "loss": 0.7467, "step": 9084 }, { "epoch": 0.81, "grad_norm": 5.840280922931275, "learning_rate": 6.7401968307260665e-06, "loss": 0.7412, "step": 9085 }, { "epoch": 0.81, "grad_norm": 6.913396395277208, "learning_rate": 6.73951960400684e-06, "loss": 0.7788, "step": 9086 }, { "epoch": 0.81, "grad_norm": 6.355900084606462, "learning_rate": 6.73884234097895e-06, "loss": 0.8072, "step": 9087 }, { "epoch": 0.81, "grad_norm": 6.089418294402775, "learning_rate": 6.738165041656537e-06, "loss": 0.7935, "step": 9088 }, { "epoch": 0.81, "grad_norm": 5.732213344814303, "learning_rate": 6.737487706053734e-06, "loss": 0.7977, "step": 9089 }, { "epoch": 0.81, "grad_norm": 4.413970945933723, "learning_rate": 6.7368103341846795e-06, "loss": 0.768, "step": 9090 }, { "epoch": 0.81, "grad_norm": 5.797082456670281, "learning_rate": 6.736132926063515e-06, "loss": 0.8431, "step": 9091 }, { "epoch": 0.81, "grad_norm": 7.704713167315286, "learning_rate": 6.735455481704376e-06, "loss": 0.8042, "step": 9092 }, { "epoch": 0.81, "grad_norm": 6.422378628106721, "learning_rate": 6.734778001121404e-06, "loss": 0.7406, "step": 9093 }, { "epoch": 0.81, "grad_norm": 6.40349602728623, "learning_rate": 6.734100484328742e-06, "loss": 0.7788, "step": 9094 }, { "epoch": 0.81, "grad_norm": 4.1377100530034445, "learning_rate": 6.733422931340528e-06, "loss": 0.776, "step": 9095 }, { "epoch": 0.81, "grad_norm": 7.676639968708884, "learning_rate": 6.732745342170907e-06, "loss": 0.7294, "step": 9096 }, { "epoch": 0.81, "grad_norm": 6.654546719789466, "learning_rate": 6.732067716834022e-06, "loss": 0.819, "step": 9097 }, { "epoch": 0.81, "grad_norm": 6.82698152153159, "learning_rate": 6.731390055344018e-06, "loss": 0.7198, "step": 9098 }, { "epoch": 0.81, "grad_norm": 6.9320144581148675, "learning_rate": 6.730712357715036e-06, "loss": 0.7552, "step": 9099 }, { "epoch": 0.81, "grad_norm": 5.310956504574239, "learning_rate": 6.730034623961224e-06, "loss": 0.7767, "step": 9100 }, { "epoch": 0.81, "grad_norm": 5.2993293852292584, "learning_rate": 6.729356854096729e-06, "loss": 0.772, "step": 9101 }, { "epoch": 0.81, "grad_norm": 5.584913906610851, "learning_rate": 6.728679048135695e-06, "loss": 0.7429, "step": 9102 }, { "epoch": 0.81, "grad_norm": 7.956494763984893, "learning_rate": 6.728001206092274e-06, "loss": 0.7821, "step": 9103 }, { "epoch": 0.81, "grad_norm": 6.179743581810423, "learning_rate": 6.727323327980611e-06, "loss": 0.7167, "step": 9104 }, { "epoch": 0.81, "grad_norm": 4.381994388276381, "learning_rate": 6.726645413814857e-06, "loss": 0.7494, "step": 9105 }, { "epoch": 0.81, "grad_norm": 7.552847230149235, "learning_rate": 6.7259674636091606e-06, "loss": 0.7253, "step": 9106 }, { "epoch": 0.81, "grad_norm": 6.019923454622278, "learning_rate": 6.725289477377675e-06, "loss": 0.8318, "step": 9107 }, { "epoch": 0.81, "grad_norm": 7.554346855450403, "learning_rate": 6.724611455134547e-06, "loss": 0.7855, "step": 9108 }, { "epoch": 0.81, "grad_norm": 5.892025837408739, "learning_rate": 6.723933396893932e-06, "loss": 0.746, "step": 9109 }, { "epoch": 0.81, "grad_norm": 5.557491534199239, "learning_rate": 6.723255302669984e-06, "loss": 0.7375, "step": 9110 }, { "epoch": 0.81, "grad_norm": 4.66373075473867, "learning_rate": 6.722577172476854e-06, "loss": 0.7693, "step": 9111 }, { "epoch": 0.81, "grad_norm": 5.452442523353855, "learning_rate": 6.7218990063286996e-06, "loss": 0.8076, "step": 9112 }, { "epoch": 0.81, "grad_norm": 5.8692719062231005, "learning_rate": 6.7212208042396745e-06, "loss": 0.724, "step": 9113 }, { "epoch": 0.81, "grad_norm": 5.838215398036215, "learning_rate": 6.720542566223933e-06, "loss": 0.731, "step": 9114 }, { "epoch": 0.81, "grad_norm": 5.796920545872942, "learning_rate": 6.7198642922956334e-06, "loss": 0.7356, "step": 9115 }, { "epoch": 0.81, "grad_norm": 6.40226511278919, "learning_rate": 6.7191859824689345e-06, "loss": 0.7502, "step": 9116 }, { "epoch": 0.81, "grad_norm": 3.7377975352727826, "learning_rate": 6.718507636757992e-06, "loss": 0.7706, "step": 9117 }, { "epoch": 0.81, "grad_norm": 4.041443667044097, "learning_rate": 6.717829255176967e-06, "loss": 0.7095, "step": 9118 }, { "epoch": 0.81, "grad_norm": 6.00743581573742, "learning_rate": 6.717150837740016e-06, "loss": 0.8115, "step": 9119 }, { "epoch": 0.81, "grad_norm": 8.499398604149865, "learning_rate": 6.716472384461306e-06, "loss": 0.7675, "step": 9120 }, { "epoch": 0.81, "grad_norm": 7.057859442556996, "learning_rate": 6.71579389535499e-06, "loss": 0.8166, "step": 9121 }, { "epoch": 0.81, "grad_norm": 4.840720890919522, "learning_rate": 6.715115370435236e-06, "loss": 0.813, "step": 9122 }, { "epoch": 0.81, "grad_norm": 5.982257857465485, "learning_rate": 6.714436809716205e-06, "loss": 0.7604, "step": 9123 }, { "epoch": 0.81, "grad_norm": 5.012551785073297, "learning_rate": 6.713758213212058e-06, "loss": 0.738, "step": 9124 }, { "epoch": 0.81, "grad_norm": 4.924849023216129, "learning_rate": 6.7130795809369635e-06, "loss": 0.7821, "step": 9125 }, { "epoch": 0.81, "grad_norm": 5.723099173898512, "learning_rate": 6.712400912905083e-06, "loss": 0.7518, "step": 9126 }, { "epoch": 0.81, "grad_norm": 5.505998817158916, "learning_rate": 6.7117222091305846e-06, "loss": 0.8052, "step": 9127 }, { "epoch": 0.81, "grad_norm": 5.127821701793819, "learning_rate": 6.711043469627633e-06, "loss": 0.7726, "step": 9128 }, { "epoch": 0.81, "grad_norm": 6.518616696234241, "learning_rate": 6.710364694410396e-06, "loss": 0.7576, "step": 9129 }, { "epoch": 0.81, "grad_norm": 5.7084337505403635, "learning_rate": 6.709685883493043e-06, "loss": 0.7415, "step": 9130 }, { "epoch": 0.81, "grad_norm": 5.142218252330915, "learning_rate": 6.709007036889739e-06, "loss": 0.7803, "step": 9131 }, { "epoch": 0.81, "grad_norm": 6.271720348328558, "learning_rate": 6.708328154614657e-06, "loss": 0.8149, "step": 9132 }, { "epoch": 0.81, "grad_norm": 7.430171974276038, "learning_rate": 6.707649236681966e-06, "loss": 0.7979, "step": 9133 }, { "epoch": 0.81, "grad_norm": 5.286023982013857, "learning_rate": 6.706970283105837e-06, "loss": 0.9138, "step": 9134 }, { "epoch": 0.81, "grad_norm": 5.070866696522087, "learning_rate": 6.706291293900441e-06, "loss": 0.8188, "step": 9135 }, { "epoch": 0.82, "grad_norm": 6.50575275280628, "learning_rate": 6.705612269079951e-06, "loss": 0.7231, "step": 9136 }, { "epoch": 0.82, "grad_norm": 6.079711250521131, "learning_rate": 6.70493320865854e-06, "loss": 0.8098, "step": 9137 }, { "epoch": 0.82, "grad_norm": 7.30638111095326, "learning_rate": 6.704254112650382e-06, "loss": 0.8653, "step": 9138 }, { "epoch": 0.82, "grad_norm": 5.065920183485765, "learning_rate": 6.703574981069653e-06, "loss": 0.7989, "step": 9139 }, { "epoch": 0.82, "grad_norm": 5.474011792552387, "learning_rate": 6.702895813930526e-06, "loss": 0.8721, "step": 9140 }, { "epoch": 0.82, "grad_norm": 6.311899803455266, "learning_rate": 6.702216611247176e-06, "loss": 0.7301, "step": 9141 }, { "epoch": 0.82, "grad_norm": 5.840893244326878, "learning_rate": 6.701537373033787e-06, "loss": 0.7566, "step": 9142 }, { "epoch": 0.82, "grad_norm": 5.483266517957952, "learning_rate": 6.700858099304528e-06, "loss": 0.7957, "step": 9143 }, { "epoch": 0.82, "grad_norm": 6.735192443882309, "learning_rate": 6.700178790073582e-06, "loss": 0.711, "step": 9144 }, { "epoch": 0.82, "grad_norm": 8.18337706786702, "learning_rate": 6.6994994453551274e-06, "loss": 0.8281, "step": 9145 }, { "epoch": 0.82, "grad_norm": 5.923255960125858, "learning_rate": 6.698820065163343e-06, "loss": 0.7113, "step": 9146 }, { "epoch": 0.82, "grad_norm": 5.907377275548001, "learning_rate": 6.69814064951241e-06, "loss": 0.7929, "step": 9147 }, { "epoch": 0.82, "grad_norm": 4.90011308191444, "learning_rate": 6.6974611984165106e-06, "loss": 0.7288, "step": 9148 }, { "epoch": 0.82, "grad_norm": 5.450382091149885, "learning_rate": 6.696781711889826e-06, "loss": 0.8296, "step": 9149 }, { "epoch": 0.82, "grad_norm": 6.702912950847071, "learning_rate": 6.6961021899465385e-06, "loss": 0.7379, "step": 9150 }, { "epoch": 0.82, "grad_norm": 6.34026374896719, "learning_rate": 6.695422632600832e-06, "loss": 0.6886, "step": 9151 }, { "epoch": 0.82, "grad_norm": 5.411821829548468, "learning_rate": 6.694743039866891e-06, "loss": 0.8001, "step": 9152 }, { "epoch": 0.82, "grad_norm": 5.265831106793937, "learning_rate": 6.694063411758901e-06, "loss": 0.7724, "step": 9153 }, { "epoch": 0.82, "grad_norm": 7.630785396505117, "learning_rate": 6.6933837482910465e-06, "loss": 0.8042, "step": 9154 }, { "epoch": 0.82, "grad_norm": 5.682882160925468, "learning_rate": 6.692704049477517e-06, "loss": 0.8075, "step": 9155 }, { "epoch": 0.82, "grad_norm": 5.522978061173116, "learning_rate": 6.692024315332495e-06, "loss": 0.7901, "step": 9156 }, { "epoch": 0.82, "grad_norm": 6.589824414535788, "learning_rate": 6.691344545870171e-06, "loss": 0.6906, "step": 9157 }, { "epoch": 0.82, "grad_norm": 5.3963834554433046, "learning_rate": 6.690664741104736e-06, "loss": 0.7899, "step": 9158 }, { "epoch": 0.82, "grad_norm": 6.04078981648403, "learning_rate": 6.6899849010503736e-06, "loss": 0.8428, "step": 9159 }, { "epoch": 0.82, "grad_norm": 4.528349520005722, "learning_rate": 6.689305025721278e-06, "loss": 0.7501, "step": 9160 }, { "epoch": 0.82, "grad_norm": 5.233638511071927, "learning_rate": 6.688625115131642e-06, "loss": 0.8235, "step": 9161 }, { "epoch": 0.82, "grad_norm": 6.426153092618917, "learning_rate": 6.687945169295652e-06, "loss": 0.7069, "step": 9162 }, { "epoch": 0.82, "grad_norm": 4.989908301336699, "learning_rate": 6.687265188227505e-06, "loss": 0.7495, "step": 9163 }, { "epoch": 0.82, "grad_norm": 5.350896262188914, "learning_rate": 6.686585171941391e-06, "loss": 0.7377, "step": 9164 }, { "epoch": 0.82, "grad_norm": 5.041948832057526, "learning_rate": 6.6859051204515056e-06, "loss": 0.7589, "step": 9165 }, { "epoch": 0.82, "grad_norm": 6.670240678654193, "learning_rate": 6.685225033772042e-06, "loss": 0.7588, "step": 9166 }, { "epoch": 0.82, "grad_norm": 5.589324936404713, "learning_rate": 6.684544911917199e-06, "loss": 0.7927, "step": 9167 }, { "epoch": 0.82, "grad_norm": 5.124572799578937, "learning_rate": 6.683864754901168e-06, "loss": 0.7399, "step": 9168 }, { "epoch": 0.82, "grad_norm": 4.605939273795954, "learning_rate": 6.683184562738147e-06, "loss": 0.7542, "step": 9169 }, { "epoch": 0.82, "grad_norm": 7.236992036135941, "learning_rate": 6.682504335442337e-06, "loss": 0.7582, "step": 9170 }, { "epoch": 0.82, "grad_norm": 5.2175149990144885, "learning_rate": 6.681824073027932e-06, "loss": 0.7505, "step": 9171 }, { "epoch": 0.82, "grad_norm": 5.068630173613845, "learning_rate": 6.681143775509133e-06, "loss": 0.6977, "step": 9172 }, { "epoch": 0.82, "grad_norm": 4.723855312064117, "learning_rate": 6.680463442900139e-06, "loss": 0.7783, "step": 9173 }, { "epoch": 0.82, "grad_norm": 5.525590744171022, "learning_rate": 6.679783075215152e-06, "loss": 0.772, "step": 9174 }, { "epoch": 0.82, "grad_norm": 4.712894486850788, "learning_rate": 6.679102672468369e-06, "loss": 0.7274, "step": 9175 }, { "epoch": 0.82, "grad_norm": 9.429183091489785, "learning_rate": 6.678422234673997e-06, "loss": 0.8055, "step": 9176 }, { "epoch": 0.82, "grad_norm": 6.526893658980028, "learning_rate": 6.677741761846237e-06, "loss": 0.7863, "step": 9177 }, { "epoch": 0.82, "grad_norm": 7.390006884137587, "learning_rate": 6.677061253999292e-06, "loss": 0.726, "step": 9178 }, { "epoch": 0.82, "grad_norm": 6.799464412573038, "learning_rate": 6.676380711147364e-06, "loss": 0.8181, "step": 9179 }, { "epoch": 0.82, "grad_norm": 5.927791013478489, "learning_rate": 6.675700133304662e-06, "loss": 0.8459, "step": 9180 }, { "epoch": 0.82, "grad_norm": 4.595248942027759, "learning_rate": 6.675019520485388e-06, "loss": 0.7017, "step": 9181 }, { "epoch": 0.82, "grad_norm": 5.5331004595809175, "learning_rate": 6.67433887270375e-06, "loss": 0.7471, "step": 9182 }, { "epoch": 0.82, "grad_norm": 6.195866366210726, "learning_rate": 6.673658189973956e-06, "loss": 0.8198, "step": 9183 }, { "epoch": 0.82, "grad_norm": 7.57307111183648, "learning_rate": 6.672977472310211e-06, "loss": 0.796, "step": 9184 }, { "epoch": 0.82, "grad_norm": 4.958181446148806, "learning_rate": 6.6722967197267264e-06, "loss": 0.7821, "step": 9185 }, { "epoch": 0.82, "grad_norm": 7.5746210909198455, "learning_rate": 6.671615932237708e-06, "loss": 0.7209, "step": 9186 }, { "epoch": 0.82, "grad_norm": 6.015818896899873, "learning_rate": 6.67093510985737e-06, "loss": 0.8394, "step": 9187 }, { "epoch": 0.82, "grad_norm": 5.935359750186638, "learning_rate": 6.6702542525999195e-06, "loss": 0.8084, "step": 9188 }, { "epoch": 0.82, "grad_norm": 5.45763842725462, "learning_rate": 6.6695733604795684e-06, "loss": 0.7586, "step": 9189 }, { "epoch": 0.82, "grad_norm": 6.156111708579572, "learning_rate": 6.668892433510532e-06, "loss": 0.8521, "step": 9190 }, { "epoch": 0.82, "grad_norm": 5.796532449832661, "learning_rate": 6.66821147170702e-06, "loss": 0.7319, "step": 9191 }, { "epoch": 0.82, "grad_norm": 5.511409845679262, "learning_rate": 6.667530475083247e-06, "loss": 0.8368, "step": 9192 }, { "epoch": 0.82, "grad_norm": 6.141351822992895, "learning_rate": 6.666849443653427e-06, "loss": 0.7949, "step": 9193 }, { "epoch": 0.82, "grad_norm": 4.617332008594722, "learning_rate": 6.666168377431776e-06, "loss": 0.7719, "step": 9194 }, { "epoch": 0.82, "grad_norm": 5.6021662106766685, "learning_rate": 6.665487276432508e-06, "loss": 0.7921, "step": 9195 }, { "epoch": 0.82, "grad_norm": 5.801139592040418, "learning_rate": 6.6648061406698395e-06, "loss": 0.8607, "step": 9196 }, { "epoch": 0.82, "grad_norm": 7.195669710214925, "learning_rate": 6.66412497015799e-06, "loss": 0.7524, "step": 9197 }, { "epoch": 0.82, "grad_norm": 5.822795667118691, "learning_rate": 6.663443764911175e-06, "loss": 0.7351, "step": 9198 }, { "epoch": 0.82, "grad_norm": 10.278779803199567, "learning_rate": 6.662762524943616e-06, "loss": 0.8917, "step": 9199 }, { "epoch": 0.82, "grad_norm": 4.6289287513526025, "learning_rate": 6.66208125026953e-06, "loss": 0.7691, "step": 9200 }, { "epoch": 0.82, "grad_norm": 7.219945086872416, "learning_rate": 6.6613999409031375e-06, "loss": 0.7636, "step": 9201 }, { "epoch": 0.82, "grad_norm": 6.667169234508395, "learning_rate": 6.660718596858658e-06, "loss": 0.6813, "step": 9202 }, { "epoch": 0.82, "grad_norm": 6.43393113116461, "learning_rate": 6.660037218150318e-06, "loss": 0.7676, "step": 9203 }, { "epoch": 0.82, "grad_norm": 5.3479330224371875, "learning_rate": 6.6593558047923344e-06, "loss": 0.7496, "step": 9204 }, { "epoch": 0.82, "grad_norm": 6.19315678162661, "learning_rate": 6.658674356798933e-06, "loss": 0.7622, "step": 9205 }, { "epoch": 0.82, "grad_norm": 5.424329275290144, "learning_rate": 6.657992874184338e-06, "loss": 0.7872, "step": 9206 }, { "epoch": 0.82, "grad_norm": 6.228312366368319, "learning_rate": 6.65731135696277e-06, "loss": 0.6999, "step": 9207 }, { "epoch": 0.82, "grad_norm": 5.191926610008593, "learning_rate": 6.656629805148458e-06, "loss": 0.8397, "step": 9208 }, { "epoch": 0.82, "grad_norm": 5.986232531620126, "learning_rate": 6.655948218755628e-06, "loss": 0.7973, "step": 9209 }, { "epoch": 0.82, "grad_norm": 6.253408289901351, "learning_rate": 6.655266597798503e-06, "loss": 0.7334, "step": 9210 }, { "epoch": 0.82, "grad_norm": 4.754746528021622, "learning_rate": 6.6545849422913145e-06, "loss": 0.7558, "step": 9211 }, { "epoch": 0.82, "grad_norm": 5.1281475446936, "learning_rate": 6.6539032522482885e-06, "loss": 0.7732, "step": 9212 }, { "epoch": 0.82, "grad_norm": 5.872874654863938, "learning_rate": 6.653221527683653e-06, "loss": 0.8353, "step": 9213 }, { "epoch": 0.82, "grad_norm": 5.28655531581945, "learning_rate": 6.652539768611641e-06, "loss": 0.7297, "step": 9214 }, { "epoch": 0.82, "grad_norm": 5.487427450203813, "learning_rate": 6.65185797504648e-06, "loss": 0.8404, "step": 9215 }, { "epoch": 0.82, "grad_norm": 6.795706657007864, "learning_rate": 6.6511761470023996e-06, "loss": 0.769, "step": 9216 }, { "epoch": 0.82, "grad_norm": 5.013014968625061, "learning_rate": 6.650494284493632e-06, "loss": 0.8346, "step": 9217 }, { "epoch": 0.82, "grad_norm": 5.83971565902096, "learning_rate": 6.649812387534413e-06, "loss": 0.8, "step": 9218 }, { "epoch": 0.82, "grad_norm": 6.04021493146678, "learning_rate": 6.6491304561389735e-06, "loss": 0.7875, "step": 9219 }, { "epoch": 0.82, "grad_norm": 7.689449883124337, "learning_rate": 6.6484484903215465e-06, "loss": 0.8489, "step": 9220 }, { "epoch": 0.82, "grad_norm": 5.797897028246807, "learning_rate": 6.647766490096368e-06, "loss": 0.7434, "step": 9221 }, { "epoch": 0.82, "grad_norm": 5.023938330465407, "learning_rate": 6.647084455477673e-06, "loss": 0.7498, "step": 9222 }, { "epoch": 0.82, "grad_norm": 5.166201376431791, "learning_rate": 6.6464023864796955e-06, "loss": 0.8184, "step": 9223 }, { "epoch": 0.82, "grad_norm": 4.460568075953246, "learning_rate": 6.6457202831166734e-06, "loss": 0.7509, "step": 9224 }, { "epoch": 0.82, "grad_norm": 5.247281875559364, "learning_rate": 6.645038145402847e-06, "loss": 0.7195, "step": 9225 }, { "epoch": 0.82, "grad_norm": 5.6946803479857655, "learning_rate": 6.6443559733524484e-06, "loss": 0.7703, "step": 9226 }, { "epoch": 0.82, "grad_norm": 6.893133654477158, "learning_rate": 6.643673766979721e-06, "loss": 0.7261, "step": 9227 }, { "epoch": 0.82, "grad_norm": 5.819059854209314, "learning_rate": 6.642991526298907e-06, "loss": 0.7477, "step": 9228 }, { "epoch": 0.82, "grad_norm": 4.314341487705419, "learning_rate": 6.64230925132424e-06, "loss": 0.783, "step": 9229 }, { "epoch": 0.82, "grad_norm": 7.251618588622973, "learning_rate": 6.641626942069963e-06, "loss": 0.8815, "step": 9230 }, { "epoch": 0.82, "grad_norm": 8.019923728707433, "learning_rate": 6.64094459855032e-06, "loss": 0.8572, "step": 9231 }, { "epoch": 0.82, "grad_norm": 7.675704132000863, "learning_rate": 6.640262220779552e-06, "loss": 0.8783, "step": 9232 }, { "epoch": 0.82, "grad_norm": 6.627118667226743, "learning_rate": 6.639579808771901e-06, "loss": 0.8131, "step": 9233 }, { "epoch": 0.82, "grad_norm": 4.781423011233196, "learning_rate": 6.6388973625416145e-06, "loss": 0.7774, "step": 9234 }, { "epoch": 0.82, "grad_norm": 5.507598205656157, "learning_rate": 6.638214882102934e-06, "loss": 0.795, "step": 9235 }, { "epoch": 0.82, "grad_norm": 6.549660281876527, "learning_rate": 6.637532367470104e-06, "loss": 0.8327, "step": 9236 }, { "epoch": 0.82, "grad_norm": 4.181392326393004, "learning_rate": 6.636849818657373e-06, "loss": 0.8219, "step": 9237 }, { "epoch": 0.82, "grad_norm": 5.053139779380874, "learning_rate": 6.6361672356789875e-06, "loss": 0.8178, "step": 9238 }, { "epoch": 0.82, "grad_norm": 6.135555366803391, "learning_rate": 6.635484618549192e-06, "loss": 0.7056, "step": 9239 }, { "epoch": 0.82, "grad_norm": 6.221347302960785, "learning_rate": 6.634801967282237e-06, "loss": 0.7841, "step": 9240 }, { "epoch": 0.82, "grad_norm": 5.992837058641838, "learning_rate": 6.634119281892373e-06, "loss": 0.7767, "step": 9241 }, { "epoch": 0.82, "grad_norm": 6.107546644312385, "learning_rate": 6.633436562393847e-06, "loss": 0.6797, "step": 9242 }, { "epoch": 0.82, "grad_norm": 4.129832495415342, "learning_rate": 6.632753808800909e-06, "loss": 0.7883, "step": 9243 }, { "epoch": 0.82, "grad_norm": 5.753666243037954, "learning_rate": 6.632071021127812e-06, "loss": 0.8234, "step": 9244 }, { "epoch": 0.82, "grad_norm": 4.184328860541277, "learning_rate": 6.6313881993888066e-06, "loss": 0.7687, "step": 9245 }, { "epoch": 0.82, "grad_norm": 5.837297877645717, "learning_rate": 6.630705343598144e-06, "loss": 0.7344, "step": 9246 }, { "epoch": 0.82, "grad_norm": 4.528570368649965, "learning_rate": 6.63002245377008e-06, "loss": 0.7605, "step": 9247 }, { "epoch": 0.83, "grad_norm": 4.373297466537811, "learning_rate": 6.6293395299188655e-06, "loss": 0.8562, "step": 9248 }, { "epoch": 0.83, "grad_norm": 5.996260641964184, "learning_rate": 6.628656572058756e-06, "loss": 0.7499, "step": 9249 }, { "epoch": 0.83, "grad_norm": 6.915657819380187, "learning_rate": 6.627973580204009e-06, "loss": 0.785, "step": 9250 }, { "epoch": 0.83, "grad_norm": 6.968558225714612, "learning_rate": 6.627290554368879e-06, "loss": 0.7726, "step": 9251 }, { "epoch": 0.83, "grad_norm": 5.029586113267461, "learning_rate": 6.626607494567621e-06, "loss": 0.7751, "step": 9252 }, { "epoch": 0.83, "grad_norm": 6.470269862485086, "learning_rate": 6.625924400814495e-06, "loss": 0.7333, "step": 9253 }, { "epoch": 0.83, "grad_norm": 5.079174350154124, "learning_rate": 6.625241273123757e-06, "loss": 0.7962, "step": 9254 }, { "epoch": 0.83, "grad_norm": 5.2146186660828455, "learning_rate": 6.624558111509666e-06, "loss": 0.7464, "step": 9255 }, { "epoch": 0.83, "grad_norm": 5.452822448698582, "learning_rate": 6.623874915986483e-06, "loss": 0.6866, "step": 9256 }, { "epoch": 0.83, "grad_norm": 4.2479928081596, "learning_rate": 6.6231916865684685e-06, "loss": 0.7357, "step": 9257 }, { "epoch": 0.83, "grad_norm": 5.211056826898012, "learning_rate": 6.62250842326988e-06, "loss": 0.8206, "step": 9258 }, { "epoch": 0.83, "grad_norm": 5.457961012287179, "learning_rate": 6.621825126104983e-06, "loss": 0.8005, "step": 9259 }, { "epoch": 0.83, "grad_norm": 7.912316596448745, "learning_rate": 6.621141795088037e-06, "loss": 0.7906, "step": 9260 }, { "epoch": 0.83, "grad_norm": 4.342943523756849, "learning_rate": 6.620458430233307e-06, "loss": 0.8242, "step": 9261 }, { "epoch": 0.83, "grad_norm": 4.370707624856441, "learning_rate": 6.619775031555056e-06, "loss": 0.7302, "step": 9262 }, { "epoch": 0.83, "grad_norm": 5.144529284392769, "learning_rate": 6.619091599067549e-06, "loss": 0.8391, "step": 9263 }, { "epoch": 0.83, "grad_norm": 6.7878541840736375, "learning_rate": 6.61840813278505e-06, "loss": 0.8514, "step": 9264 }, { "epoch": 0.83, "grad_norm": 4.32004645889888, "learning_rate": 6.617724632721826e-06, "loss": 0.7752, "step": 9265 }, { "epoch": 0.83, "grad_norm": 5.793122439130676, "learning_rate": 6.617041098892143e-06, "loss": 0.809, "step": 9266 }, { "epoch": 0.83, "grad_norm": 6.5453441624994415, "learning_rate": 6.6163575313102666e-06, "loss": 0.7685, "step": 9267 }, { "epoch": 0.83, "grad_norm": 4.69452457319781, "learning_rate": 6.6156739299904685e-06, "loss": 0.7395, "step": 9268 }, { "epoch": 0.83, "grad_norm": 4.775445369297455, "learning_rate": 6.614990294947015e-06, "loss": 0.8155, "step": 9269 }, { "epoch": 0.83, "grad_norm": 6.150895192936199, "learning_rate": 6.614306626194174e-06, "loss": 0.7413, "step": 9270 }, { "epoch": 0.83, "grad_norm": 7.4181600956107605, "learning_rate": 6.61362292374622e-06, "loss": 0.8399, "step": 9271 }, { "epoch": 0.83, "grad_norm": 6.0559154556494725, "learning_rate": 6.61293918761742e-06, "loss": 0.7604, "step": 9272 }, { "epoch": 0.83, "grad_norm": 7.202471875614134, "learning_rate": 6.612255417822048e-06, "loss": 0.6871, "step": 9273 }, { "epoch": 0.83, "grad_norm": 4.81408871772347, "learning_rate": 6.6115716143743726e-06, "loss": 0.7425, "step": 9274 }, { "epoch": 0.83, "grad_norm": 4.789683027741783, "learning_rate": 6.61088777728867e-06, "loss": 0.7296, "step": 9275 }, { "epoch": 0.83, "grad_norm": 6.364767618110003, "learning_rate": 6.610203906579213e-06, "loss": 0.8089, "step": 9276 }, { "epoch": 0.83, "grad_norm": 4.880332672665388, "learning_rate": 6.609520002260277e-06, "loss": 0.8343, "step": 9277 }, { "epoch": 0.83, "grad_norm": 6.906409923630362, "learning_rate": 6.608836064346135e-06, "loss": 0.7148, "step": 9278 }, { "epoch": 0.83, "grad_norm": 5.812606070995352, "learning_rate": 6.608152092851063e-06, "loss": 0.8045, "step": 9279 }, { "epoch": 0.83, "grad_norm": 6.290504129671277, "learning_rate": 6.607468087789338e-06, "loss": 0.8716, "step": 9280 }, { "epoch": 0.83, "grad_norm": 4.211277178257234, "learning_rate": 6.606784049175237e-06, "loss": 0.6418, "step": 9281 }, { "epoch": 0.83, "grad_norm": 5.716675392183513, "learning_rate": 6.606099977023037e-06, "loss": 0.8144, "step": 9282 }, { "epoch": 0.83, "grad_norm": 5.714086465459584, "learning_rate": 6.605415871347018e-06, "loss": 0.7539, "step": 9283 }, { "epoch": 0.83, "grad_norm": 7.833905534933337, "learning_rate": 6.604731732161458e-06, "loss": 0.8442, "step": 9284 }, { "epoch": 0.83, "grad_norm": 5.376479273078114, "learning_rate": 6.604047559480639e-06, "loss": 0.8273, "step": 9285 }, { "epoch": 0.83, "grad_norm": 6.220177948884824, "learning_rate": 6.603363353318839e-06, "loss": 0.7677, "step": 9286 }, { "epoch": 0.83, "grad_norm": 4.528253187878936, "learning_rate": 6.60267911369034e-06, "loss": 0.7917, "step": 9287 }, { "epoch": 0.83, "grad_norm": 3.7657088491680293, "learning_rate": 6.601994840609424e-06, "loss": 0.7602, "step": 9288 }, { "epoch": 0.83, "grad_norm": 5.025348767678168, "learning_rate": 6.601310534090375e-06, "loss": 0.8643, "step": 9289 }, { "epoch": 0.83, "grad_norm": 5.46474454884221, "learning_rate": 6.600626194147473e-06, "loss": 0.8021, "step": 9290 }, { "epoch": 0.83, "grad_norm": 4.3419994414191585, "learning_rate": 6.599941820795008e-06, "loss": 0.7729, "step": 9291 }, { "epoch": 0.83, "grad_norm": 5.923456966932649, "learning_rate": 6.59925741404726e-06, "loss": 0.7715, "step": 9292 }, { "epoch": 0.83, "grad_norm": 6.734376007055663, "learning_rate": 6.598572973918516e-06, "loss": 0.7669, "step": 9293 }, { "epoch": 0.83, "grad_norm": 5.187776980489579, "learning_rate": 6.59788850042306e-06, "loss": 0.8017, "step": 9294 }, { "epoch": 0.83, "grad_norm": 5.314042967289795, "learning_rate": 6.597203993575183e-06, "loss": 0.7914, "step": 9295 }, { "epoch": 0.83, "grad_norm": 7.144752312899453, "learning_rate": 6.59651945338917e-06, "loss": 0.8347, "step": 9296 }, { "epoch": 0.83, "grad_norm": 4.7487264447185655, "learning_rate": 6.595834879879308e-06, "loss": 0.7473, "step": 9297 }, { "epoch": 0.83, "grad_norm": 6.120026428640556, "learning_rate": 6.595150273059891e-06, "loss": 0.7516, "step": 9298 }, { "epoch": 0.83, "grad_norm": 4.877638539082457, "learning_rate": 6.594465632945203e-06, "loss": 0.846, "step": 9299 }, { "epoch": 0.83, "grad_norm": 4.5149391174032125, "learning_rate": 6.5937809595495376e-06, "loss": 0.8248, "step": 9300 }, { "epoch": 0.83, "grad_norm": 6.254889248262842, "learning_rate": 6.593096252887186e-06, "loss": 0.6985, "step": 9301 }, { "epoch": 0.83, "grad_norm": 5.946199252511024, "learning_rate": 6.592411512972436e-06, "loss": 0.7505, "step": 9302 }, { "epoch": 0.83, "grad_norm": 9.412465498012743, "learning_rate": 6.591726739819584e-06, "loss": 0.7772, "step": 9303 }, { "epoch": 0.83, "grad_norm": 6.796554786525381, "learning_rate": 6.591041933442923e-06, "loss": 0.7768, "step": 9304 }, { "epoch": 0.83, "grad_norm": 4.602723256971588, "learning_rate": 6.5903570938567464e-06, "loss": 0.7416, "step": 9305 }, { "epoch": 0.83, "grad_norm": 5.098240338197, "learning_rate": 6.5896722210753475e-06, "loss": 0.8607, "step": 9306 }, { "epoch": 0.83, "grad_norm": 5.740825197467731, "learning_rate": 6.588987315113023e-06, "loss": 0.8308, "step": 9307 }, { "epoch": 0.83, "grad_norm": 4.66663838349238, "learning_rate": 6.588302375984067e-06, "loss": 0.7655, "step": 9308 }, { "epoch": 0.83, "grad_norm": 4.60389603558631, "learning_rate": 6.587617403702777e-06, "loss": 0.8069, "step": 9309 }, { "epoch": 0.83, "grad_norm": 5.08797233208417, "learning_rate": 6.586932398283452e-06, "loss": 0.7804, "step": 9310 }, { "epoch": 0.83, "grad_norm": 5.099687459340384, "learning_rate": 6.586247359740387e-06, "loss": 0.7242, "step": 9311 }, { "epoch": 0.83, "grad_norm": 6.479021003152057, "learning_rate": 6.585562288087884e-06, "loss": 0.7919, "step": 9312 }, { "epoch": 0.83, "grad_norm": 5.859486004557756, "learning_rate": 6.584877183340238e-06, "loss": 0.8154, "step": 9313 }, { "epoch": 0.83, "grad_norm": 6.476655648834488, "learning_rate": 6.584192045511755e-06, "loss": 0.7812, "step": 9314 }, { "epoch": 0.83, "grad_norm": 5.325719671832365, "learning_rate": 6.58350687461673e-06, "loss": 0.7672, "step": 9315 }, { "epoch": 0.83, "grad_norm": 5.499036442972537, "learning_rate": 6.582821670669467e-06, "loss": 0.8119, "step": 9316 }, { "epoch": 0.83, "grad_norm": 5.046120769808518, "learning_rate": 6.58213643368427e-06, "loss": 0.8324, "step": 9317 }, { "epoch": 0.83, "grad_norm": 5.4561092148943935, "learning_rate": 6.581451163675436e-06, "loss": 0.7093, "step": 9318 }, { "epoch": 0.83, "grad_norm": 7.240683508485538, "learning_rate": 6.580765860657275e-06, "loss": 0.7479, "step": 9319 }, { "epoch": 0.83, "grad_norm": 5.699579831547706, "learning_rate": 6.580080524644088e-06, "loss": 0.8072, "step": 9320 }, { "epoch": 0.83, "grad_norm": 4.254794584097842, "learning_rate": 6.579395155650179e-06, "loss": 0.699, "step": 9321 }, { "epoch": 0.83, "grad_norm": 5.20031573859095, "learning_rate": 6.578709753689857e-06, "loss": 0.7101, "step": 9322 }, { "epoch": 0.83, "grad_norm": 6.5996288862491035, "learning_rate": 6.578024318777425e-06, "loss": 0.8205, "step": 9323 }, { "epoch": 0.83, "grad_norm": 5.826077833052689, "learning_rate": 6.5773388509271926e-06, "loss": 0.7139, "step": 9324 }, { "epoch": 0.83, "grad_norm": 4.535618711399195, "learning_rate": 6.576653350153463e-06, "loss": 0.7817, "step": 9325 }, { "epoch": 0.83, "grad_norm": 5.347141038170473, "learning_rate": 6.57596781647055e-06, "loss": 0.7377, "step": 9326 }, { "epoch": 0.83, "grad_norm": 6.5458820337352215, "learning_rate": 6.575282249892761e-06, "loss": 0.8087, "step": 9327 }, { "epoch": 0.83, "grad_norm": 6.0411609367458174, "learning_rate": 6.574596650434402e-06, "loss": 0.8362, "step": 9328 }, { "epoch": 0.83, "grad_norm": 6.787303473566768, "learning_rate": 6.573911018109789e-06, "loss": 0.7725, "step": 9329 }, { "epoch": 0.83, "grad_norm": 5.5674753752555475, "learning_rate": 6.573225352933229e-06, "loss": 0.8364, "step": 9330 }, { "epoch": 0.83, "grad_norm": 5.302168181009234, "learning_rate": 6.572539654919034e-06, "loss": 0.7477, "step": 9331 }, { "epoch": 0.83, "grad_norm": 6.307333187435429, "learning_rate": 6.571853924081518e-06, "loss": 0.7542, "step": 9332 }, { "epoch": 0.83, "grad_norm": 4.2774890246825645, "learning_rate": 6.571168160434995e-06, "loss": 0.8702, "step": 9333 }, { "epoch": 0.83, "grad_norm": 5.102749680764938, "learning_rate": 6.570482363993777e-06, "loss": 0.7629, "step": 9334 }, { "epoch": 0.83, "grad_norm": 4.393026536173051, "learning_rate": 6.569796534772177e-06, "loss": 0.722, "step": 9335 }, { "epoch": 0.83, "grad_norm": 4.723600092369484, "learning_rate": 6.569110672784515e-06, "loss": 0.7441, "step": 9336 }, { "epoch": 0.83, "grad_norm": 4.192978020884205, "learning_rate": 6.568424778045102e-06, "loss": 0.7619, "step": 9337 }, { "epoch": 0.83, "grad_norm": 5.210185148054354, "learning_rate": 6.567738850568257e-06, "loss": 0.7036, "step": 9338 }, { "epoch": 0.83, "grad_norm": 4.796429547702919, "learning_rate": 6.567052890368299e-06, "loss": 0.817, "step": 9339 }, { "epoch": 0.83, "grad_norm": 4.398107835952964, "learning_rate": 6.566366897459541e-06, "loss": 0.7668, "step": 9340 }, { "epoch": 0.83, "grad_norm": 4.012061984595038, "learning_rate": 6.565680871856304e-06, "loss": 0.8193, "step": 9341 }, { "epoch": 0.83, "grad_norm": 5.143235548265437, "learning_rate": 6.564994813572909e-06, "loss": 0.7825, "step": 9342 }, { "epoch": 0.83, "grad_norm": 6.620892845628608, "learning_rate": 6.564308722623675e-06, "loss": 0.8344, "step": 9343 }, { "epoch": 0.83, "grad_norm": 5.241028274455972, "learning_rate": 6.56362259902292e-06, "loss": 0.7142, "step": 9344 }, { "epoch": 0.83, "grad_norm": 6.625125090218979, "learning_rate": 6.5629364427849684e-06, "loss": 0.7598, "step": 9345 }, { "epoch": 0.83, "grad_norm": 5.8539986274109035, "learning_rate": 6.562250253924143e-06, "loss": 0.7442, "step": 9346 }, { "epoch": 0.83, "grad_norm": 5.074766112144884, "learning_rate": 6.5615640324547635e-06, "loss": 0.7802, "step": 9347 }, { "epoch": 0.83, "grad_norm": 5.464471279938751, "learning_rate": 6.560877778391154e-06, "loss": 0.71, "step": 9348 }, { "epoch": 0.83, "grad_norm": 7.1676974237056665, "learning_rate": 6.5601914917476405e-06, "loss": 0.72, "step": 9349 }, { "epoch": 0.83, "grad_norm": 5.338109431074726, "learning_rate": 6.559505172538545e-06, "loss": 0.7845, "step": 9350 }, { "epoch": 0.83, "grad_norm": 6.69799995446669, "learning_rate": 6.558818820778195e-06, "loss": 0.7059, "step": 9351 }, { "epoch": 0.83, "grad_norm": 5.823735261747067, "learning_rate": 6.558132436480918e-06, "loss": 0.7604, "step": 9352 }, { "epoch": 0.83, "grad_norm": 6.165204624555681, "learning_rate": 6.557446019661036e-06, "loss": 0.8237, "step": 9353 }, { "epoch": 0.83, "grad_norm": 5.668712238592543, "learning_rate": 6.556759570332878e-06, "loss": 0.8277, "step": 9354 }, { "epoch": 0.83, "grad_norm": 4.5360945942898905, "learning_rate": 6.556073088510776e-06, "loss": 0.8131, "step": 9355 }, { "epoch": 0.83, "grad_norm": 5.235499108088945, "learning_rate": 6.555386574209054e-06, "loss": 0.8363, "step": 9356 }, { "epoch": 0.83, "grad_norm": 5.1336195701134075, "learning_rate": 6.554700027442044e-06, "loss": 0.764, "step": 9357 }, { "epoch": 0.83, "grad_norm": 9.387252824520331, "learning_rate": 6.5540134482240766e-06, "loss": 0.7796, "step": 9358 }, { "epoch": 0.83, "grad_norm": 6.202915048337988, "learning_rate": 6.553326836569481e-06, "loss": 0.7527, "step": 9359 }, { "epoch": 0.84, "grad_norm": 4.918427239961853, "learning_rate": 6.55264019249259e-06, "loss": 0.7519, "step": 9360 }, { "epoch": 0.84, "grad_norm": 4.9670865152616, "learning_rate": 6.551953516007734e-06, "loss": 0.7769, "step": 9361 }, { "epoch": 0.84, "grad_norm": 5.3582982647282495, "learning_rate": 6.551266807129249e-06, "loss": 0.7535, "step": 9362 }, { "epoch": 0.84, "grad_norm": 4.784733914049712, "learning_rate": 6.5505800658714635e-06, "loss": 0.785, "step": 9363 }, { "epoch": 0.84, "grad_norm": 4.866126456100069, "learning_rate": 6.549893292248717e-06, "loss": 0.7668, "step": 9364 }, { "epoch": 0.84, "grad_norm": 4.7093507920902455, "learning_rate": 6.549206486275343e-06, "loss": 0.8078, "step": 9365 }, { "epoch": 0.84, "grad_norm": 6.392679800846632, "learning_rate": 6.548519647965675e-06, "loss": 0.9365, "step": 9366 }, { "epoch": 0.84, "grad_norm": 5.424821099396832, "learning_rate": 6.547832777334051e-06, "loss": 0.7181, "step": 9367 }, { "epoch": 0.84, "grad_norm": 5.103780868493945, "learning_rate": 6.547145874394807e-06, "loss": 0.8768, "step": 9368 }, { "epoch": 0.84, "grad_norm": 5.860755170616341, "learning_rate": 6.5464589391622815e-06, "loss": 0.7193, "step": 9369 }, { "epoch": 0.84, "grad_norm": 5.099389586548273, "learning_rate": 6.545771971650812e-06, "loss": 0.7725, "step": 9370 }, { "epoch": 0.84, "grad_norm": 3.698505409116462, "learning_rate": 6.545084971874738e-06, "loss": 0.8299, "step": 9371 }, { "epoch": 0.84, "grad_norm": 6.149108424180016, "learning_rate": 6.544397939848399e-06, "loss": 0.7978, "step": 9372 }, { "epoch": 0.84, "grad_norm": 5.382091344588842, "learning_rate": 6.543710875586134e-06, "loss": 0.8239, "step": 9373 }, { "epoch": 0.84, "grad_norm": 6.057556909934731, "learning_rate": 6.5430237791022875e-06, "loss": 0.7429, "step": 9374 }, { "epoch": 0.84, "grad_norm": 4.506799028951691, "learning_rate": 6.542336650411197e-06, "loss": 0.7948, "step": 9375 }, { "epoch": 0.84, "grad_norm": 5.625574976807894, "learning_rate": 6.5416494895272065e-06, "loss": 0.8271, "step": 9376 }, { "epoch": 0.84, "grad_norm": 4.448150484599392, "learning_rate": 6.54096229646466e-06, "loss": 0.7868, "step": 9377 }, { "epoch": 0.84, "grad_norm": 5.3058796499741945, "learning_rate": 6.540275071237899e-06, "loss": 0.696, "step": 9378 }, { "epoch": 0.84, "grad_norm": 4.604166138188836, "learning_rate": 6.539587813861271e-06, "loss": 0.7196, "step": 9379 }, { "epoch": 0.84, "grad_norm": 6.768993685714996, "learning_rate": 6.538900524349117e-06, "loss": 0.8068, "step": 9380 }, { "epoch": 0.84, "grad_norm": 5.30224679324688, "learning_rate": 6.538213202715787e-06, "loss": 0.6955, "step": 9381 }, { "epoch": 0.84, "grad_norm": 6.562395721617138, "learning_rate": 6.537525848975623e-06, "loss": 0.767, "step": 9382 }, { "epoch": 0.84, "grad_norm": 5.980475884469279, "learning_rate": 6.536838463142973e-06, "loss": 0.6983, "step": 9383 }, { "epoch": 0.84, "grad_norm": 7.1649780130294145, "learning_rate": 6.536151045232188e-06, "loss": 0.6985, "step": 9384 }, { "epoch": 0.84, "grad_norm": 4.765909171535991, "learning_rate": 6.535463595257614e-06, "loss": 0.7363, "step": 9385 }, { "epoch": 0.84, "grad_norm": 6.726795864361377, "learning_rate": 6.534776113233599e-06, "loss": 0.7618, "step": 9386 }, { "epoch": 0.84, "grad_norm": 5.988626692830045, "learning_rate": 6.534088599174495e-06, "loss": 0.7534, "step": 9387 }, { "epoch": 0.84, "grad_norm": 5.183295830765261, "learning_rate": 6.53340105309465e-06, "loss": 0.8292, "step": 9388 }, { "epoch": 0.84, "grad_norm": 5.32899631133422, "learning_rate": 6.532713475008416e-06, "loss": 0.7988, "step": 9389 }, { "epoch": 0.84, "grad_norm": 6.62868168421306, "learning_rate": 6.532025864930145e-06, "loss": 0.8586, "step": 9390 }, { "epoch": 0.84, "grad_norm": 4.282997199850087, "learning_rate": 6.531338222874189e-06, "loss": 0.7982, "step": 9391 }, { "epoch": 0.84, "grad_norm": 6.061076536348874, "learning_rate": 6.530650548854901e-06, "loss": 0.8003, "step": 9392 }, { "epoch": 0.84, "grad_norm": 6.237239668460433, "learning_rate": 6.529962842886637e-06, "loss": 0.7582, "step": 9393 }, { "epoch": 0.84, "grad_norm": 5.741380066204617, "learning_rate": 6.529275104983748e-06, "loss": 0.7669, "step": 9394 }, { "epoch": 0.84, "grad_norm": 4.938825262478454, "learning_rate": 6.528587335160589e-06, "loss": 0.7561, "step": 9395 }, { "epoch": 0.84, "grad_norm": 6.751444779612163, "learning_rate": 6.527899533431516e-06, "loss": 0.7518, "step": 9396 }, { "epoch": 0.84, "grad_norm": 5.736820732310771, "learning_rate": 6.527211699810889e-06, "loss": 0.7923, "step": 9397 }, { "epoch": 0.84, "grad_norm": 7.474309280000098, "learning_rate": 6.526523834313061e-06, "loss": 0.7761, "step": 9398 }, { "epoch": 0.84, "grad_norm": 5.220839179871017, "learning_rate": 6.525835936952391e-06, "loss": 0.7618, "step": 9399 }, { "epoch": 0.84, "grad_norm": 6.931245666645234, "learning_rate": 6.525148007743238e-06, "loss": 0.825, "step": 9400 }, { "epoch": 0.84, "grad_norm": 5.897219521044759, "learning_rate": 6.524460046699961e-06, "loss": 0.7739, "step": 9401 }, { "epoch": 0.84, "grad_norm": 6.373075572151494, "learning_rate": 6.523772053836918e-06, "loss": 0.7382, "step": 9402 }, { "epoch": 0.84, "grad_norm": 7.041556308792199, "learning_rate": 6.5230840291684705e-06, "loss": 0.7266, "step": 9403 }, { "epoch": 0.84, "grad_norm": 5.7045779837449375, "learning_rate": 6.522395972708978e-06, "loss": 0.7802, "step": 9404 }, { "epoch": 0.84, "grad_norm": 6.6407704344506895, "learning_rate": 6.521707884472805e-06, "loss": 0.8273, "step": 9405 }, { "epoch": 0.84, "grad_norm": 5.889276153940064, "learning_rate": 6.521019764474312e-06, "loss": 0.8027, "step": 9406 }, { "epoch": 0.84, "grad_norm": 6.678790910575011, "learning_rate": 6.520331612727863e-06, "loss": 0.8067, "step": 9407 }, { "epoch": 0.84, "grad_norm": 5.273262913229003, "learning_rate": 6.519643429247821e-06, "loss": 0.7727, "step": 9408 }, { "epoch": 0.84, "grad_norm": 4.979193303493956, "learning_rate": 6.51895521404855e-06, "loss": 0.8126, "step": 9409 }, { "epoch": 0.84, "grad_norm": 6.044569055114587, "learning_rate": 6.518266967144416e-06, "loss": 0.7986, "step": 9410 }, { "epoch": 0.84, "grad_norm": 5.774187163894605, "learning_rate": 6.517578688549783e-06, "loss": 0.7885, "step": 9411 }, { "epoch": 0.84, "grad_norm": 4.9336235379802025, "learning_rate": 6.516890378279019e-06, "loss": 0.7662, "step": 9412 }, { "epoch": 0.84, "grad_norm": 5.303508484621717, "learning_rate": 6.5162020363464896e-06, "loss": 0.7068, "step": 9413 }, { "epoch": 0.84, "grad_norm": 5.603597362731738, "learning_rate": 6.515513662766563e-06, "loss": 0.8061, "step": 9414 }, { "epoch": 0.84, "grad_norm": 5.9762408523613075, "learning_rate": 6.514825257553608e-06, "loss": 0.7211, "step": 9415 }, { "epoch": 0.84, "grad_norm": 6.5628098732941025, "learning_rate": 6.514136820721995e-06, "loss": 0.7499, "step": 9416 }, { "epoch": 0.84, "grad_norm": 5.189217828087101, "learning_rate": 6.513448352286089e-06, "loss": 0.7373, "step": 9417 }, { "epoch": 0.84, "grad_norm": 4.627368002657835, "learning_rate": 6.512759852260264e-06, "loss": 0.725, "step": 9418 }, { "epoch": 0.84, "grad_norm": 7.012489715560954, "learning_rate": 6.51207132065889e-06, "loss": 0.7112, "step": 9419 }, { "epoch": 0.84, "grad_norm": 6.454838362730212, "learning_rate": 6.5113827574963385e-06, "loss": 0.8101, "step": 9420 }, { "epoch": 0.84, "grad_norm": 5.2168377924351335, "learning_rate": 6.510694162786982e-06, "loss": 0.7265, "step": 9421 }, { "epoch": 0.84, "grad_norm": 4.693202391128046, "learning_rate": 6.510005536545193e-06, "loss": 0.7997, "step": 9422 }, { "epoch": 0.84, "grad_norm": 5.595378103239235, "learning_rate": 6.509316878785345e-06, "loss": 0.8162, "step": 9423 }, { "epoch": 0.84, "grad_norm": 5.239281820173799, "learning_rate": 6.508628189521814e-06, "loss": 0.7297, "step": 9424 }, { "epoch": 0.84, "grad_norm": 4.452615824145354, "learning_rate": 6.5079394687689725e-06, "loss": 0.8155, "step": 9425 }, { "epoch": 0.84, "grad_norm": 5.897663840133408, "learning_rate": 6.5072507165411955e-06, "loss": 0.8329, "step": 9426 }, { "epoch": 0.84, "grad_norm": 4.910108177528046, "learning_rate": 6.5065619328528615e-06, "loss": 0.7672, "step": 9427 }, { "epoch": 0.84, "grad_norm": 6.203014868969037, "learning_rate": 6.505873117718349e-06, "loss": 0.7812, "step": 9428 }, { "epoch": 0.84, "grad_norm": 5.558775238792903, "learning_rate": 6.50518427115203e-06, "loss": 0.811, "step": 9429 }, { "epoch": 0.84, "grad_norm": 6.478090989184489, "learning_rate": 6.504495393168287e-06, "loss": 0.7497, "step": 9430 }, { "epoch": 0.84, "grad_norm": 6.296602875538748, "learning_rate": 6.5038064837814965e-06, "loss": 0.7583, "step": 9431 }, { "epoch": 0.84, "grad_norm": 7.309645838031168, "learning_rate": 6.503117543006039e-06, "loss": 0.7294, "step": 9432 }, { "epoch": 0.84, "grad_norm": 5.659402894943269, "learning_rate": 6.502428570856295e-06, "loss": 0.7706, "step": 9433 }, { "epoch": 0.84, "grad_norm": 4.936603196584094, "learning_rate": 6.501739567346644e-06, "loss": 0.9055, "step": 9434 }, { "epoch": 0.84, "grad_norm": 6.284664429628865, "learning_rate": 6.50105053249147e-06, "loss": 0.8351, "step": 9435 }, { "epoch": 0.84, "grad_norm": 4.260013905503574, "learning_rate": 6.5003614663051515e-06, "loss": 0.6766, "step": 9436 }, { "epoch": 0.84, "grad_norm": 4.0709590724946665, "learning_rate": 6.4996723688020745e-06, "loss": 0.7964, "step": 9437 }, { "epoch": 0.84, "grad_norm": 6.2697306878779475, "learning_rate": 6.498983239996621e-06, "loss": 0.7703, "step": 9438 }, { "epoch": 0.84, "grad_norm": 6.645803437456238, "learning_rate": 6.498294079903175e-06, "loss": 0.7786, "step": 9439 }, { "epoch": 0.84, "grad_norm": 5.842480487030007, "learning_rate": 6.4976048885361195e-06, "loss": 0.6554, "step": 9440 }, { "epoch": 0.84, "grad_norm": 6.959448800868754, "learning_rate": 6.496915665909845e-06, "loss": 0.7559, "step": 9441 }, { "epoch": 0.84, "grad_norm": 4.619694559534239, "learning_rate": 6.496226412038731e-06, "loss": 0.7706, "step": 9442 }, { "epoch": 0.84, "grad_norm": 5.499554689360852, "learning_rate": 6.495537126937168e-06, "loss": 0.839, "step": 9443 }, { "epoch": 0.84, "grad_norm": 4.780274543954469, "learning_rate": 6.494847810619544e-06, "loss": 0.794, "step": 9444 }, { "epoch": 0.84, "grad_norm": 5.242326466764863, "learning_rate": 6.494158463100245e-06, "loss": 0.7019, "step": 9445 }, { "epoch": 0.84, "grad_norm": 4.862876868982146, "learning_rate": 6.49346908439366e-06, "loss": 0.7714, "step": 9446 }, { "epoch": 0.84, "grad_norm": 5.494067671312142, "learning_rate": 6.492779674514178e-06, "loss": 0.7424, "step": 9447 }, { "epoch": 0.84, "grad_norm": 4.082272362418472, "learning_rate": 6.492090233476191e-06, "loss": 0.8138, "step": 9448 }, { "epoch": 0.84, "grad_norm": 5.98451963811429, "learning_rate": 6.491400761294086e-06, "loss": 0.7302, "step": 9449 }, { "epoch": 0.84, "grad_norm": 5.9084431710916085, "learning_rate": 6.490711257982258e-06, "loss": 0.7329, "step": 9450 }, { "epoch": 0.84, "grad_norm": 8.232235224728774, "learning_rate": 6.490021723555095e-06, "loss": 0.7817, "step": 9451 }, { "epoch": 0.84, "grad_norm": 5.102128028204994, "learning_rate": 6.489332158026994e-06, "loss": 0.8103, "step": 9452 }, { "epoch": 0.84, "grad_norm": 7.54179087787043, "learning_rate": 6.488642561412344e-06, "loss": 0.8335, "step": 9453 }, { "epoch": 0.84, "grad_norm": 4.889359196317132, "learning_rate": 6.487952933725542e-06, "loss": 0.8135, "step": 9454 }, { "epoch": 0.84, "grad_norm": 5.821643885785941, "learning_rate": 6.48726327498098e-06, "loss": 0.8007, "step": 9455 }, { "epoch": 0.84, "grad_norm": 5.114789199067065, "learning_rate": 6.486573585193054e-06, "loss": 0.7188, "step": 9456 }, { "epoch": 0.84, "grad_norm": 6.071268844526726, "learning_rate": 6.485883864376161e-06, "loss": 0.8337, "step": 9457 }, { "epoch": 0.84, "grad_norm": 5.684197524411109, "learning_rate": 6.485194112544696e-06, "loss": 0.778, "step": 9458 }, { "epoch": 0.84, "grad_norm": 4.43065998690409, "learning_rate": 6.484504329713057e-06, "loss": 0.7983, "step": 9459 }, { "epoch": 0.84, "grad_norm": 4.199418890114732, "learning_rate": 6.4838145158956415e-06, "loss": 0.7717, "step": 9460 }, { "epoch": 0.84, "grad_norm": 7.274225217919038, "learning_rate": 6.483124671106845e-06, "loss": 0.8048, "step": 9461 }, { "epoch": 0.84, "grad_norm": 5.754643860973987, "learning_rate": 6.48243479536107e-06, "loss": 0.7138, "step": 9462 }, { "epoch": 0.84, "grad_norm": 5.698038873216083, "learning_rate": 6.4817448886727164e-06, "loss": 0.8015, "step": 9463 }, { "epoch": 0.84, "grad_norm": 5.77493837204741, "learning_rate": 6.481054951056181e-06, "loss": 0.7363, "step": 9464 }, { "epoch": 0.84, "grad_norm": 5.789577075158253, "learning_rate": 6.480364982525869e-06, "loss": 0.7795, "step": 9465 }, { "epoch": 0.84, "grad_norm": 4.930475254000997, "learning_rate": 6.479674983096179e-06, "loss": 0.7337, "step": 9466 }, { "epoch": 0.84, "grad_norm": 6.510164872189907, "learning_rate": 6.4789849527815156e-06, "loss": 0.7667, "step": 9467 }, { "epoch": 0.84, "grad_norm": 7.084035067866466, "learning_rate": 6.478294891596279e-06, "loss": 0.8784, "step": 9468 }, { "epoch": 0.84, "grad_norm": 6.401443727030793, "learning_rate": 6.477604799554873e-06, "loss": 0.7713, "step": 9469 }, { "epoch": 0.84, "grad_norm": 5.429276089639123, "learning_rate": 6.476914676671704e-06, "loss": 0.8047, "step": 9470 }, { "epoch": 0.84, "grad_norm": 6.0953449004326865, "learning_rate": 6.476224522961175e-06, "loss": 0.7899, "step": 9471 }, { "epoch": 0.84, "grad_norm": 6.1193668121232045, "learning_rate": 6.4755343384376915e-06, "loss": 0.8291, "step": 9472 }, { "epoch": 0.85, "grad_norm": 7.8491094297630895, "learning_rate": 6.474844123115662e-06, "loss": 0.7993, "step": 9473 }, { "epoch": 0.85, "grad_norm": 6.382945887061991, "learning_rate": 6.474153877009489e-06, "loss": 0.8617, "step": 9474 }, { "epoch": 0.85, "grad_norm": 6.3604486240660005, "learning_rate": 6.473463600133583e-06, "loss": 0.7681, "step": 9475 }, { "epoch": 0.85, "grad_norm": 7.706659629893536, "learning_rate": 6.4727732925023526e-06, "loss": 0.768, "step": 9476 }, { "epoch": 0.85, "grad_norm": 4.4809384329189985, "learning_rate": 6.472082954130203e-06, "loss": 0.8078, "step": 9477 }, { "epoch": 0.85, "grad_norm": 6.725671252496022, "learning_rate": 6.471392585031545e-06, "loss": 0.8189, "step": 9478 }, { "epoch": 0.85, "grad_norm": 6.044534122313022, "learning_rate": 6.470702185220792e-06, "loss": 0.7752, "step": 9479 }, { "epoch": 0.85, "grad_norm": 6.062055885616479, "learning_rate": 6.47001175471235e-06, "loss": 0.786, "step": 9480 }, { "epoch": 0.85, "grad_norm": 6.0646247179523245, "learning_rate": 6.4693212935206314e-06, "loss": 0.8016, "step": 9481 }, { "epoch": 0.85, "grad_norm": 6.7348589853236955, "learning_rate": 6.468630801660048e-06, "loss": 0.7555, "step": 9482 }, { "epoch": 0.85, "grad_norm": 6.861196209037025, "learning_rate": 6.467940279145015e-06, "loss": 0.7878, "step": 9483 }, { "epoch": 0.85, "grad_norm": 6.817401010710393, "learning_rate": 6.467249725989941e-06, "loss": 0.76, "step": 9484 }, { "epoch": 0.85, "grad_norm": 6.608166257131713, "learning_rate": 6.4665591422092445e-06, "loss": 0.8293, "step": 9485 }, { "epoch": 0.85, "grad_norm": 5.096770724347656, "learning_rate": 6.465868527817337e-06, "loss": 0.7122, "step": 9486 }, { "epoch": 0.85, "grad_norm": 4.639885021912108, "learning_rate": 6.465177882828632e-06, "loss": 0.7508, "step": 9487 }, { "epoch": 0.85, "grad_norm": 7.628082305577703, "learning_rate": 6.46448720725755e-06, "loss": 0.7956, "step": 9488 }, { "epoch": 0.85, "grad_norm": 5.579388018772583, "learning_rate": 6.463796501118506e-06, "loss": 0.7705, "step": 9489 }, { "epoch": 0.85, "grad_norm": 6.020931787607997, "learning_rate": 6.463105764425912e-06, "loss": 0.7713, "step": 9490 }, { "epoch": 0.85, "grad_norm": 4.038920757437064, "learning_rate": 6.462414997194191e-06, "loss": 0.7071, "step": 9491 }, { "epoch": 0.85, "grad_norm": 7.744117498739114, "learning_rate": 6.46172419943776e-06, "loss": 0.803, "step": 9492 }, { "epoch": 0.85, "grad_norm": 6.2790810665014405, "learning_rate": 6.461033371171037e-06, "loss": 0.8514, "step": 9493 }, { "epoch": 0.85, "grad_norm": 6.529085062590118, "learning_rate": 6.460342512408442e-06, "loss": 0.8022, "step": 9494 }, { "epoch": 0.85, "grad_norm": 4.537193277723638, "learning_rate": 6.459651623164394e-06, "loss": 0.7808, "step": 9495 }, { "epoch": 0.85, "grad_norm": 6.1996721020317205, "learning_rate": 6.458960703453317e-06, "loss": 0.7426, "step": 9496 }, { "epoch": 0.85, "grad_norm": 5.751659537916116, "learning_rate": 6.45826975328963e-06, "loss": 0.7583, "step": 9497 }, { "epoch": 0.85, "grad_norm": 6.826849393663395, "learning_rate": 6.457578772687755e-06, "loss": 0.7606, "step": 9498 }, { "epoch": 0.85, "grad_norm": 6.067813193084385, "learning_rate": 6.456887761662114e-06, "loss": 0.7334, "step": 9499 }, { "epoch": 0.85, "grad_norm": 4.654639858166285, "learning_rate": 6.456196720227133e-06, "loss": 0.7529, "step": 9500 }, { "epoch": 0.85, "grad_norm": 4.466501323227361, "learning_rate": 6.455505648397234e-06, "loss": 0.7689, "step": 9501 }, { "epoch": 0.85, "grad_norm": 4.837091725243561, "learning_rate": 6.454814546186842e-06, "loss": 0.7422, "step": 9502 }, { "epoch": 0.85, "grad_norm": 7.2314110244705025, "learning_rate": 6.454123413610383e-06, "loss": 0.774, "step": 9503 }, { "epoch": 0.85, "grad_norm": 5.759604749124786, "learning_rate": 6.4534322506822814e-06, "loss": 0.7369, "step": 9504 }, { "epoch": 0.85, "grad_norm": 5.405381900443038, "learning_rate": 6.452741057416965e-06, "loss": 0.7284, "step": 9505 }, { "epoch": 0.85, "grad_norm": 5.4367036372457225, "learning_rate": 6.45204983382886e-06, "loss": 0.7382, "step": 9506 }, { "epoch": 0.85, "grad_norm": 6.542157677493962, "learning_rate": 6.451358579932394e-06, "loss": 0.8405, "step": 9507 }, { "epoch": 0.85, "grad_norm": 4.973240321821131, "learning_rate": 6.450667295741998e-06, "loss": 0.7874, "step": 9508 }, { "epoch": 0.85, "grad_norm": 5.016150237838678, "learning_rate": 6.449975981272098e-06, "loss": 0.7691, "step": 9509 }, { "epoch": 0.85, "grad_norm": 6.331024528250204, "learning_rate": 6.449284636537126e-06, "loss": 0.8072, "step": 9510 }, { "epoch": 0.85, "grad_norm": 4.240646167379919, "learning_rate": 6.44859326155151e-06, "loss": 0.7033, "step": 9511 }, { "epoch": 0.85, "grad_norm": 5.637427449026751, "learning_rate": 6.447901856329681e-06, "loss": 0.7685, "step": 9512 }, { "epoch": 0.85, "grad_norm": 5.721048347316337, "learning_rate": 6.447210420886072e-06, "loss": 0.7902, "step": 9513 }, { "epoch": 0.85, "grad_norm": 4.969690827665251, "learning_rate": 6.446518955235117e-06, "loss": 0.7581, "step": 9514 }, { "epoch": 0.85, "grad_norm": 6.256913585650462, "learning_rate": 6.445827459391244e-06, "loss": 0.7854, "step": 9515 }, { "epoch": 0.85, "grad_norm": 5.4876556749167715, "learning_rate": 6.4451359333688915e-06, "loss": 0.772, "step": 9516 }, { "epoch": 0.85, "grad_norm": 5.33707408536969, "learning_rate": 6.444444377182488e-06, "loss": 0.7986, "step": 9517 }, { "epoch": 0.85, "grad_norm": 5.731884410582803, "learning_rate": 6.443752790846475e-06, "loss": 0.7464, "step": 9518 }, { "epoch": 0.85, "grad_norm": 5.912347865598169, "learning_rate": 6.4430611743752815e-06, "loss": 0.7782, "step": 9519 }, { "epoch": 0.85, "grad_norm": 5.784455606447906, "learning_rate": 6.442369527783347e-06, "loss": 0.7287, "step": 9520 }, { "epoch": 0.85, "grad_norm": 5.721981437503124, "learning_rate": 6.441677851085109e-06, "loss": 0.7469, "step": 9521 }, { "epoch": 0.85, "grad_norm": 5.252140621174557, "learning_rate": 6.440986144295001e-06, "loss": 0.7972, "step": 9522 }, { "epoch": 0.85, "grad_norm": 7.306176448046879, "learning_rate": 6.440294407427462e-06, "loss": 0.7982, "step": 9523 }, { "epoch": 0.85, "grad_norm": 7.425681974727794, "learning_rate": 6.439602640496935e-06, "loss": 0.7702, "step": 9524 }, { "epoch": 0.85, "grad_norm": 5.110392129159989, "learning_rate": 6.438910843517854e-06, "loss": 0.7755, "step": 9525 }, { "epoch": 0.85, "grad_norm": 4.941812063145456, "learning_rate": 6.438219016504659e-06, "loss": 0.7008, "step": 9526 }, { "epoch": 0.85, "grad_norm": 5.498904023881269, "learning_rate": 6.437527159471793e-06, "loss": 0.7006, "step": 9527 }, { "epoch": 0.85, "grad_norm": 4.778417742680642, "learning_rate": 6.436835272433694e-06, "loss": 0.7732, "step": 9528 }, { "epoch": 0.85, "grad_norm": 5.995832543564532, "learning_rate": 6.436143355404807e-06, "loss": 0.6732, "step": 9529 }, { "epoch": 0.85, "grad_norm": 5.91037264830442, "learning_rate": 6.435451408399572e-06, "loss": 0.7684, "step": 9530 }, { "epoch": 0.85, "grad_norm": 6.1141333054641205, "learning_rate": 6.434759431432434e-06, "loss": 0.755, "step": 9531 }, { "epoch": 0.85, "grad_norm": 5.132325941619388, "learning_rate": 6.434067424517832e-06, "loss": 0.7182, "step": 9532 }, { "epoch": 0.85, "grad_norm": 4.903674209103317, "learning_rate": 6.433375387670215e-06, "loss": 0.7784, "step": 9533 }, { "epoch": 0.85, "grad_norm": 5.406649848598607, "learning_rate": 6.4326833209040255e-06, "loss": 0.6866, "step": 9534 }, { "epoch": 0.85, "grad_norm": 6.742680650769454, "learning_rate": 6.431991224233709e-06, "loss": 0.7314, "step": 9535 }, { "epoch": 0.85, "grad_norm": 6.499342225988305, "learning_rate": 6.431299097673712e-06, "loss": 0.815, "step": 9536 }, { "epoch": 0.85, "grad_norm": 4.996583204365729, "learning_rate": 6.43060694123848e-06, "loss": 0.7459, "step": 9537 }, { "epoch": 0.85, "grad_norm": 6.542806627885849, "learning_rate": 6.429914754942462e-06, "loss": 0.735, "step": 9538 }, { "epoch": 0.85, "grad_norm": 6.106008251487081, "learning_rate": 6.4292225388001035e-06, "loss": 0.656, "step": 9539 }, { "epoch": 0.85, "grad_norm": 5.698595446220986, "learning_rate": 6.428530292825856e-06, "loss": 0.7344, "step": 9540 }, { "epoch": 0.85, "grad_norm": 5.458587189233385, "learning_rate": 6.427838017034167e-06, "loss": 0.6875, "step": 9541 }, { "epoch": 0.85, "grad_norm": 5.374955447314323, "learning_rate": 6.427145711439485e-06, "loss": 0.8394, "step": 9542 }, { "epoch": 0.85, "grad_norm": 5.038252284986005, "learning_rate": 6.426453376056264e-06, "loss": 0.7893, "step": 9543 }, { "epoch": 0.85, "grad_norm": 7.525123537903482, "learning_rate": 6.425761010898951e-06, "loss": 0.7484, "step": 9544 }, { "epoch": 0.85, "grad_norm": 4.377710573467817, "learning_rate": 6.425068615982e-06, "loss": 0.8033, "step": 9545 }, { "epoch": 0.85, "grad_norm": 5.501009216616729, "learning_rate": 6.424376191319862e-06, "loss": 0.7719, "step": 9546 }, { "epoch": 0.85, "grad_norm": 6.52391824500763, "learning_rate": 6.42368373692699e-06, "loss": 0.8396, "step": 9547 }, { "epoch": 0.85, "grad_norm": 7.002609928414761, "learning_rate": 6.4229912528178385e-06, "loss": 0.7063, "step": 9548 }, { "epoch": 0.85, "grad_norm": 5.617952095215382, "learning_rate": 6.42229873900686e-06, "loss": 0.8053, "step": 9549 }, { "epoch": 0.85, "grad_norm": 4.236710405032114, "learning_rate": 6.4216061955085105e-06, "loss": 0.7575, "step": 9550 }, { "epoch": 0.85, "grad_norm": 5.429146937323263, "learning_rate": 6.420913622337245e-06, "loss": 0.7842, "step": 9551 }, { "epoch": 0.85, "grad_norm": 6.205821386155081, "learning_rate": 6.42022101950752e-06, "loss": 0.6841, "step": 9552 }, { "epoch": 0.85, "grad_norm": 5.180702946289573, "learning_rate": 6.419528387033791e-06, "loss": 0.7964, "step": 9553 }, { "epoch": 0.85, "grad_norm": 5.730448785339682, "learning_rate": 6.4188357249305155e-06, "loss": 0.8772, "step": 9554 }, { "epoch": 0.85, "grad_norm": 5.56921917396458, "learning_rate": 6.4181430332121506e-06, "loss": 0.7916, "step": 9555 }, { "epoch": 0.85, "grad_norm": 6.098898727370793, "learning_rate": 6.417450311893157e-06, "loss": 0.7349, "step": 9556 }, { "epoch": 0.85, "grad_norm": 6.033889672998069, "learning_rate": 6.416757560987991e-06, "loss": 0.7626, "step": 9557 }, { "epoch": 0.85, "grad_norm": 5.605419462432562, "learning_rate": 6.416064780511114e-06, "loss": 0.7203, "step": 9558 }, { "epoch": 0.85, "grad_norm": 4.895344987713936, "learning_rate": 6.4153719704769865e-06, "loss": 0.8078, "step": 9559 }, { "epoch": 0.85, "grad_norm": 9.735344369692337, "learning_rate": 6.41467913090007e-06, "loss": 0.7712, "step": 9560 }, { "epoch": 0.85, "grad_norm": 4.132816373187437, "learning_rate": 6.413986261794823e-06, "loss": 0.7255, "step": 9561 }, { "epoch": 0.85, "grad_norm": 8.176762313962836, "learning_rate": 6.41329336317571e-06, "loss": 0.7797, "step": 9562 }, { "epoch": 0.85, "grad_norm": 4.553582987551326, "learning_rate": 6.412600435057191e-06, "loss": 0.7595, "step": 9563 }, { "epoch": 0.85, "grad_norm": 5.157066524220565, "learning_rate": 6.411907477453734e-06, "loss": 0.7705, "step": 9564 }, { "epoch": 0.85, "grad_norm": 4.701113316142415, "learning_rate": 6.411214490379799e-06, "loss": 0.7673, "step": 9565 }, { "epoch": 0.85, "grad_norm": 5.599217784664563, "learning_rate": 6.410521473849852e-06, "loss": 0.7874, "step": 9566 }, { "epoch": 0.85, "grad_norm": 6.570601781727519, "learning_rate": 6.4098284278783595e-06, "loss": 0.8087, "step": 9567 }, { "epoch": 0.85, "grad_norm": 5.01071703376564, "learning_rate": 6.409135352479785e-06, "loss": 0.7366, "step": 9568 }, { "epoch": 0.85, "grad_norm": 5.5568491721604465, "learning_rate": 6.408442247668597e-06, "loss": 0.7421, "step": 9569 }, { "epoch": 0.85, "grad_norm": 3.987956524730392, "learning_rate": 6.407749113459261e-06, "loss": 0.7401, "step": 9570 }, { "epoch": 0.85, "grad_norm": 7.423050384354918, "learning_rate": 6.407055949866243e-06, "loss": 0.6945, "step": 9571 }, { "epoch": 0.85, "grad_norm": 4.070290825002088, "learning_rate": 6.4063627569040164e-06, "loss": 0.7409, "step": 9572 }, { "epoch": 0.85, "grad_norm": 7.519554953437143, "learning_rate": 6.405669534587046e-06, "loss": 0.7601, "step": 9573 }, { "epoch": 0.85, "grad_norm": 6.73981409898587, "learning_rate": 6.404976282929802e-06, "loss": 0.7829, "step": 9574 }, { "epoch": 0.85, "grad_norm": 6.348567953293338, "learning_rate": 6.404283001946757e-06, "loss": 0.736, "step": 9575 }, { "epoch": 0.85, "grad_norm": 4.794586639815102, "learning_rate": 6.403589691652378e-06, "loss": 0.7798, "step": 9576 }, { "epoch": 0.85, "grad_norm": 6.1302917612063075, "learning_rate": 6.402896352061138e-06, "loss": 0.7947, "step": 9577 }, { "epoch": 0.85, "grad_norm": 4.870441049642524, "learning_rate": 6.402202983187509e-06, "loss": 0.7753, "step": 9578 }, { "epoch": 0.85, "grad_norm": 5.994016662555639, "learning_rate": 6.401509585045963e-06, "loss": 0.8068, "step": 9579 }, { "epoch": 0.85, "grad_norm": 5.124560939881607, "learning_rate": 6.4008161576509734e-06, "loss": 0.809, "step": 9580 }, { "epoch": 0.85, "grad_norm": 6.021929822062798, "learning_rate": 6.400122701017016e-06, "loss": 0.7422, "step": 9581 }, { "epoch": 0.85, "grad_norm": 8.439546053771394, "learning_rate": 6.399429215158564e-06, "loss": 0.8027, "step": 9582 }, { "epoch": 0.85, "grad_norm": 6.34780093249162, "learning_rate": 6.3987357000900885e-06, "loss": 0.7673, "step": 9583 }, { "epoch": 0.85, "grad_norm": 4.472288639123464, "learning_rate": 6.3980421558260715e-06, "loss": 0.7112, "step": 9584 }, { "epoch": 0.86, "grad_norm": 6.859714145040179, "learning_rate": 6.397348582380984e-06, "loss": 0.8546, "step": 9585 }, { "epoch": 0.86, "grad_norm": 4.940841819397808, "learning_rate": 6.396654979769305e-06, "loss": 0.7729, "step": 9586 }, { "epoch": 0.86, "grad_norm": 5.63557414543912, "learning_rate": 6.3959613480055136e-06, "loss": 0.7755, "step": 9587 }, { "epoch": 0.86, "grad_norm": 6.083855344299968, "learning_rate": 6.395267687104085e-06, "loss": 0.7654, "step": 9588 }, { "epoch": 0.86, "grad_norm": 5.392333284566177, "learning_rate": 6.394573997079501e-06, "loss": 0.7206, "step": 9589 }, { "epoch": 0.86, "grad_norm": 5.370705587135131, "learning_rate": 6.393880277946236e-06, "loss": 0.7868, "step": 9590 }, { "epoch": 0.86, "grad_norm": 5.622597587951707, "learning_rate": 6.393186529718776e-06, "loss": 0.8311, "step": 9591 }, { "epoch": 0.86, "grad_norm": 7.614489443883015, "learning_rate": 6.392492752411595e-06, "loss": 0.6962, "step": 9592 }, { "epoch": 0.86, "grad_norm": 5.986926147842197, "learning_rate": 6.391798946039178e-06, "loss": 0.7714, "step": 9593 }, { "epoch": 0.86, "grad_norm": 7.429776178278837, "learning_rate": 6.391105110616007e-06, "loss": 0.8937, "step": 9594 }, { "epoch": 0.86, "grad_norm": 6.8234114069123235, "learning_rate": 6.390411246156562e-06, "loss": 0.7297, "step": 9595 }, { "epoch": 0.86, "grad_norm": 5.658063226930865, "learning_rate": 6.389717352675329e-06, "loss": 0.7181, "step": 9596 }, { "epoch": 0.86, "grad_norm": 5.601391365592395, "learning_rate": 6.389023430186789e-06, "loss": 0.7584, "step": 9597 }, { "epoch": 0.86, "grad_norm": 6.7202670661460004, "learning_rate": 6.388329478705426e-06, "loss": 0.8319, "step": 9598 }, { "epoch": 0.86, "grad_norm": 4.765891946618638, "learning_rate": 6.387635498245725e-06, "loss": 0.7802, "step": 9599 }, { "epoch": 0.86, "grad_norm": 5.295515569683688, "learning_rate": 6.386941488822174e-06, "loss": 0.7482, "step": 9600 }, { "epoch": 0.86, "grad_norm": 5.559529460626731, "learning_rate": 6.386247450449255e-06, "loss": 0.7164, "step": 9601 }, { "epoch": 0.86, "grad_norm": 4.306185664172008, "learning_rate": 6.385553383141457e-06, "loss": 0.7249, "step": 9602 }, { "epoch": 0.86, "grad_norm": 6.201778368943527, "learning_rate": 6.384859286913268e-06, "loss": 0.8223, "step": 9603 }, { "epoch": 0.86, "grad_norm": 5.750108044956159, "learning_rate": 6.384165161779172e-06, "loss": 0.7877, "step": 9604 }, { "epoch": 0.86, "grad_norm": 4.674629139721951, "learning_rate": 6.38347100775366e-06, "loss": 0.8479, "step": 9605 }, { "epoch": 0.86, "grad_norm": 5.783015232008509, "learning_rate": 6.38277682485122e-06, "loss": 0.7722, "step": 9606 }, { "epoch": 0.86, "grad_norm": 6.5484561865786235, "learning_rate": 6.382082613086344e-06, "loss": 0.7745, "step": 9607 }, { "epoch": 0.86, "grad_norm": 5.3825820354104605, "learning_rate": 6.381388372473517e-06, "loss": 0.802, "step": 9608 }, { "epoch": 0.86, "grad_norm": 5.81688022314568, "learning_rate": 6.380694103027234e-06, "loss": 0.7119, "step": 9609 }, { "epoch": 0.86, "grad_norm": 7.2847306040091, "learning_rate": 6.379999804761988e-06, "loss": 0.7912, "step": 9610 }, { "epoch": 0.86, "grad_norm": 5.384531631051542, "learning_rate": 6.379305477692266e-06, "loss": 0.8215, "step": 9611 }, { "epoch": 0.86, "grad_norm": 5.3725644864050714, "learning_rate": 6.378611121832562e-06, "loss": 0.7914, "step": 9612 }, { "epoch": 0.86, "grad_norm": 6.643052970131539, "learning_rate": 6.377916737197372e-06, "loss": 0.727, "step": 9613 }, { "epoch": 0.86, "grad_norm": 5.510025912356734, "learning_rate": 6.377222323801185e-06, "loss": 0.7564, "step": 9614 }, { "epoch": 0.86, "grad_norm": 6.827755218490466, "learning_rate": 6.3765278816585e-06, "loss": 0.7714, "step": 9615 }, { "epoch": 0.86, "grad_norm": 7.308076665659627, "learning_rate": 6.3758334107838094e-06, "loss": 0.7974, "step": 9616 }, { "epoch": 0.86, "grad_norm": 5.51238909891741, "learning_rate": 6.37513891119161e-06, "loss": 0.7911, "step": 9617 }, { "epoch": 0.86, "grad_norm": 6.738011050335456, "learning_rate": 6.374444382896396e-06, "loss": 0.7646, "step": 9618 }, { "epoch": 0.86, "grad_norm": 5.978078403967973, "learning_rate": 6.373749825912667e-06, "loss": 0.7717, "step": 9619 }, { "epoch": 0.86, "grad_norm": 6.387442652963802, "learning_rate": 6.373055240254919e-06, "loss": 0.8127, "step": 9620 }, { "epoch": 0.86, "grad_norm": 4.909185629345546, "learning_rate": 6.372360625937648e-06, "loss": 0.8193, "step": 9621 }, { "epoch": 0.86, "grad_norm": 5.63492516804353, "learning_rate": 6.371665982975357e-06, "loss": 0.7915, "step": 9622 }, { "epoch": 0.86, "grad_norm": 4.341202800122641, "learning_rate": 6.37097131138254e-06, "loss": 0.843, "step": 9623 }, { "epoch": 0.86, "grad_norm": 5.255990001585032, "learning_rate": 6.3702766111737e-06, "loss": 0.663, "step": 9624 }, { "epoch": 0.86, "grad_norm": 5.35900443954684, "learning_rate": 6.3695818823633374e-06, "loss": 0.7118, "step": 9625 }, { "epoch": 0.86, "grad_norm": 4.740648511949091, "learning_rate": 6.3688871249659526e-06, "loss": 0.7908, "step": 9626 }, { "epoch": 0.86, "grad_norm": 4.86790605496722, "learning_rate": 6.368192338996044e-06, "loss": 0.7617, "step": 9627 }, { "epoch": 0.86, "grad_norm": 7.245811868476802, "learning_rate": 6.367497524468118e-06, "loss": 0.7641, "step": 9628 }, { "epoch": 0.86, "grad_norm": 6.597582641964364, "learning_rate": 6.366802681396678e-06, "loss": 0.7041, "step": 9629 }, { "epoch": 0.86, "grad_norm": 6.056824027272064, "learning_rate": 6.366107809796223e-06, "loss": 0.7632, "step": 9630 }, { "epoch": 0.86, "grad_norm": 8.653906417471454, "learning_rate": 6.36541290968126e-06, "loss": 0.7919, "step": 9631 }, { "epoch": 0.86, "grad_norm": 5.0359267310783355, "learning_rate": 6.364717981066294e-06, "loss": 0.7773, "step": 9632 }, { "epoch": 0.86, "grad_norm": 5.272816019358891, "learning_rate": 6.364023023965827e-06, "loss": 0.7004, "step": 9633 }, { "epoch": 0.86, "grad_norm": 6.38899992508028, "learning_rate": 6.3633280383943675e-06, "loss": 0.7548, "step": 9634 }, { "epoch": 0.86, "grad_norm": 5.951842709900067, "learning_rate": 6.36263302436642e-06, "loss": 0.7783, "step": 9635 }, { "epoch": 0.86, "grad_norm": 5.309120425953189, "learning_rate": 6.3619379818964925e-06, "loss": 0.7596, "step": 9636 }, { "epoch": 0.86, "grad_norm": 3.9253448801597606, "learning_rate": 6.361242910999092e-06, "loss": 0.7738, "step": 9637 }, { "epoch": 0.86, "grad_norm": 5.149601982252259, "learning_rate": 6.360547811688728e-06, "loss": 0.764, "step": 9638 }, { "epoch": 0.86, "grad_norm": 5.5309234127529034, "learning_rate": 6.359852683979908e-06, "loss": 0.7703, "step": 9639 }, { "epoch": 0.86, "grad_norm": 4.981599484626527, "learning_rate": 6.359157527887139e-06, "loss": 0.7317, "step": 9640 }, { "epoch": 0.86, "grad_norm": 5.809079650149916, "learning_rate": 6.358462343424934e-06, "loss": 0.771, "step": 9641 }, { "epoch": 0.86, "grad_norm": 8.102591800247954, "learning_rate": 6.357767130607804e-06, "loss": 0.81, "step": 9642 }, { "epoch": 0.86, "grad_norm": 3.965538116913747, "learning_rate": 6.357071889450257e-06, "loss": 0.759, "step": 9643 }, { "epoch": 0.86, "grad_norm": 4.910111440759103, "learning_rate": 6.356376619966807e-06, "loss": 0.7212, "step": 9644 }, { "epoch": 0.86, "grad_norm": 5.319572592559808, "learning_rate": 6.355681322171965e-06, "loss": 0.8215, "step": 9645 }, { "epoch": 0.86, "grad_norm": 4.968792559045306, "learning_rate": 6.354985996080245e-06, "loss": 0.7166, "step": 9646 }, { "epoch": 0.86, "grad_norm": 5.421344843831731, "learning_rate": 6.35429064170616e-06, "loss": 0.727, "step": 9647 }, { "epoch": 0.86, "grad_norm": 9.497870017712973, "learning_rate": 6.353595259064223e-06, "loss": 0.7889, "step": 9648 }, { "epoch": 0.86, "grad_norm": 6.480426679771631, "learning_rate": 6.352899848168949e-06, "loss": 0.786, "step": 9649 }, { "epoch": 0.86, "grad_norm": 6.468689564823583, "learning_rate": 6.352204409034853e-06, "loss": 0.7909, "step": 9650 }, { "epoch": 0.86, "grad_norm": 4.9841523550496385, "learning_rate": 6.3515089416764534e-06, "loss": 0.7635, "step": 9651 }, { "epoch": 0.86, "grad_norm": 4.649429577759701, "learning_rate": 6.350813446108263e-06, "loss": 0.7562, "step": 9652 }, { "epoch": 0.86, "grad_norm": 5.314009911187804, "learning_rate": 6.350117922344801e-06, "loss": 0.7712, "step": 9653 }, { "epoch": 0.86, "grad_norm": 6.54972436487417, "learning_rate": 6.3494223704005816e-06, "loss": 0.7116, "step": 9654 }, { "epoch": 0.86, "grad_norm": 5.930818601158032, "learning_rate": 6.348726790290128e-06, "loss": 0.7679, "step": 9655 }, { "epoch": 0.86, "grad_norm": 5.648315657821372, "learning_rate": 6.348031182027955e-06, "loss": 0.8574, "step": 9656 }, { "epoch": 0.86, "grad_norm": 4.826822324480468, "learning_rate": 6.347335545628584e-06, "loss": 0.7228, "step": 9657 }, { "epoch": 0.86, "grad_norm": 5.45357739437551, "learning_rate": 6.3466398811065335e-06, "loss": 0.8511, "step": 9658 }, { "epoch": 0.86, "grad_norm": 4.908836830808258, "learning_rate": 6.345944188476325e-06, "loss": 0.7354, "step": 9659 }, { "epoch": 0.86, "grad_norm": 4.883851183934523, "learning_rate": 6.345248467752478e-06, "loss": 0.7541, "step": 9660 }, { "epoch": 0.86, "grad_norm": 4.153830388505684, "learning_rate": 6.344552718949518e-06, "loss": 0.8227, "step": 9661 }, { "epoch": 0.86, "grad_norm": 5.133076835914646, "learning_rate": 6.343856942081962e-06, "loss": 0.7598, "step": 9662 }, { "epoch": 0.86, "grad_norm": 4.864469238982529, "learning_rate": 6.343161137164335e-06, "loss": 0.7438, "step": 9663 }, { "epoch": 0.86, "grad_norm": 5.258163220243833, "learning_rate": 6.342465304211163e-06, "loss": 0.754, "step": 9664 }, { "epoch": 0.86, "grad_norm": 6.463545279868044, "learning_rate": 6.341769443236966e-06, "loss": 0.7471, "step": 9665 }, { "epoch": 0.86, "grad_norm": 5.292944923143582, "learning_rate": 6.34107355425627e-06, "loss": 0.7734, "step": 9666 }, { "epoch": 0.86, "grad_norm": 4.072850028149302, "learning_rate": 6.340377637283602e-06, "loss": 0.7243, "step": 9667 }, { "epoch": 0.86, "grad_norm": 4.750335166684634, "learning_rate": 6.339681692333486e-06, "loss": 0.7213, "step": 9668 }, { "epoch": 0.86, "grad_norm": 6.929208381344161, "learning_rate": 6.338985719420447e-06, "loss": 0.6626, "step": 9669 }, { "epoch": 0.86, "grad_norm": 5.614168437915558, "learning_rate": 6.3382897185590165e-06, "loss": 0.767, "step": 9670 }, { "epoch": 0.86, "grad_norm": 5.6852280605651835, "learning_rate": 6.337593689763716e-06, "loss": 0.7302, "step": 9671 }, { "epoch": 0.86, "grad_norm": 5.8516600725868475, "learning_rate": 6.3368976330490776e-06, "loss": 0.7153, "step": 9672 }, { "epoch": 0.86, "grad_norm": 6.69805493874649, "learning_rate": 6.336201548429628e-06, "loss": 0.7213, "step": 9673 }, { "epoch": 0.86, "grad_norm": 7.190546510302859, "learning_rate": 6.335505435919897e-06, "loss": 0.7779, "step": 9674 }, { "epoch": 0.86, "grad_norm": 7.043502593244326, "learning_rate": 6.334809295534416e-06, "loss": 0.8064, "step": 9675 }, { "epoch": 0.86, "grad_norm": 6.678446128257518, "learning_rate": 6.334113127287714e-06, "loss": 0.7962, "step": 9676 }, { "epoch": 0.86, "grad_norm": 5.389223083147459, "learning_rate": 6.333416931194323e-06, "loss": 0.7437, "step": 9677 }, { "epoch": 0.86, "grad_norm": 4.95557824679926, "learning_rate": 6.332720707268772e-06, "loss": 0.7406, "step": 9678 }, { "epoch": 0.86, "grad_norm": 5.267830063979681, "learning_rate": 6.332024455525595e-06, "loss": 0.7905, "step": 9679 }, { "epoch": 0.86, "grad_norm": 6.142082586469153, "learning_rate": 6.331328175979325e-06, "loss": 0.8081, "step": 9680 }, { "epoch": 0.86, "grad_norm": 4.187690882985472, "learning_rate": 6.330631868644495e-06, "loss": 0.7281, "step": 9681 }, { "epoch": 0.86, "grad_norm": 6.2007701721987765, "learning_rate": 6.329935533535639e-06, "loss": 0.8127, "step": 9682 }, { "epoch": 0.86, "grad_norm": 4.970951646290749, "learning_rate": 6.329239170667292e-06, "loss": 0.7451, "step": 9683 }, { "epoch": 0.86, "grad_norm": 5.789900846701699, "learning_rate": 6.328542780053987e-06, "loss": 0.7847, "step": 9684 }, { "epoch": 0.86, "grad_norm": 4.882511808840508, "learning_rate": 6.3278463617102606e-06, "loss": 0.8155, "step": 9685 }, { "epoch": 0.86, "grad_norm": 4.800216619352267, "learning_rate": 6.32714991565065e-06, "loss": 0.732, "step": 9686 }, { "epoch": 0.86, "grad_norm": 5.60481732182165, "learning_rate": 6.326453441889691e-06, "loss": 0.7414, "step": 9687 }, { "epoch": 0.86, "grad_norm": 5.777320389395315, "learning_rate": 6.3257569404419205e-06, "loss": 0.9373, "step": 9688 }, { "epoch": 0.86, "grad_norm": 5.600170908611265, "learning_rate": 6.32506041132188e-06, "loss": 0.7491, "step": 9689 }, { "epoch": 0.86, "grad_norm": 4.860002759638754, "learning_rate": 6.324363854544103e-06, "loss": 0.7906, "step": 9690 }, { "epoch": 0.86, "grad_norm": 4.732816759013145, "learning_rate": 6.323667270123132e-06, "loss": 0.7343, "step": 9691 }, { "epoch": 0.86, "grad_norm": 5.138212671728253, "learning_rate": 6.322970658073504e-06, "loss": 0.7533, "step": 9692 }, { "epoch": 0.86, "grad_norm": 5.9304384114322435, "learning_rate": 6.322274018409762e-06, "loss": 0.7259, "step": 9693 }, { "epoch": 0.86, "grad_norm": 4.538010567294612, "learning_rate": 6.3215773511464455e-06, "loss": 0.6934, "step": 9694 }, { "epoch": 0.86, "grad_norm": 4.905715190860067, "learning_rate": 6.320880656298094e-06, "loss": 0.7655, "step": 9695 }, { "epoch": 0.86, "grad_norm": 5.84397310816804, "learning_rate": 6.320183933879253e-06, "loss": 0.7907, "step": 9696 }, { "epoch": 0.87, "grad_norm": 4.201392640522567, "learning_rate": 6.319487183904466e-06, "loss": 0.7421, "step": 9697 }, { "epoch": 0.87, "grad_norm": 5.006391928661296, "learning_rate": 6.318790406388271e-06, "loss": 0.7895, "step": 9698 }, { "epoch": 0.87, "grad_norm": 4.778412208700614, "learning_rate": 6.318093601345215e-06, "loss": 0.727, "step": 9699 }, { "epoch": 0.87, "grad_norm": 6.19698117656942, "learning_rate": 6.3173967687898404e-06, "loss": 0.8328, "step": 9700 }, { "epoch": 0.87, "grad_norm": 6.354407010929948, "learning_rate": 6.316699908736694e-06, "loss": 0.6964, "step": 9701 }, { "epoch": 0.87, "grad_norm": 5.308359953055685, "learning_rate": 6.3160030212003225e-06, "loss": 0.6949, "step": 9702 }, { "epoch": 0.87, "grad_norm": 5.732590648022395, "learning_rate": 6.315306106195267e-06, "loss": 0.751, "step": 9703 }, { "epoch": 0.87, "grad_norm": 6.04920604548987, "learning_rate": 6.314609163736078e-06, "loss": 0.7146, "step": 9704 }, { "epoch": 0.87, "grad_norm": 5.111519793738839, "learning_rate": 6.313912193837303e-06, "loss": 0.823, "step": 9705 }, { "epoch": 0.87, "grad_norm": 5.340380772890688, "learning_rate": 6.313215196513487e-06, "loss": 0.7715, "step": 9706 }, { "epoch": 0.87, "grad_norm": 5.17946212875513, "learning_rate": 6.31251817177918e-06, "loss": 0.7901, "step": 9707 }, { "epoch": 0.87, "grad_norm": 6.8837403279075176, "learning_rate": 6.311821119648931e-06, "loss": 0.8199, "step": 9708 }, { "epoch": 0.87, "grad_norm": 6.5144519839903685, "learning_rate": 6.3111240401372875e-06, "loss": 0.7939, "step": 9709 }, { "epoch": 0.87, "grad_norm": 4.331943428639324, "learning_rate": 6.3104269332588015e-06, "loss": 0.7678, "step": 9710 }, { "epoch": 0.87, "grad_norm": 5.645339658817871, "learning_rate": 6.309729799028023e-06, "loss": 0.7583, "step": 9711 }, { "epoch": 0.87, "grad_norm": 7.897468541297062, "learning_rate": 6.309032637459506e-06, "loss": 0.8177, "step": 9712 }, { "epoch": 0.87, "grad_norm": 5.746494859902457, "learning_rate": 6.308335448567796e-06, "loss": 0.7781, "step": 9713 }, { "epoch": 0.87, "grad_norm": 6.401305494776823, "learning_rate": 6.30763823236745e-06, "loss": 0.7762, "step": 9714 }, { "epoch": 0.87, "grad_norm": 6.109057651879473, "learning_rate": 6.30694098887302e-06, "loss": 0.767, "step": 9715 }, { "epoch": 0.87, "grad_norm": 6.8431798601119915, "learning_rate": 6.306243718099057e-06, "loss": 0.7617, "step": 9716 }, { "epoch": 0.87, "grad_norm": 5.412527459772984, "learning_rate": 6.305546420060118e-06, "loss": 0.7494, "step": 9717 }, { "epoch": 0.87, "grad_norm": 6.180288025341259, "learning_rate": 6.30484909477076e-06, "loss": 0.811, "step": 9718 }, { "epoch": 0.87, "grad_norm": 4.656689151863945, "learning_rate": 6.30415174224553e-06, "loss": 0.7106, "step": 9719 }, { "epoch": 0.87, "grad_norm": 5.85171134944672, "learning_rate": 6.303454362498991e-06, "loss": 0.7983, "step": 9720 }, { "epoch": 0.87, "grad_norm": 5.548652587058567, "learning_rate": 6.302756955545696e-06, "loss": 0.7616, "step": 9721 }, { "epoch": 0.87, "grad_norm": 5.727495958499595, "learning_rate": 6.302059521400201e-06, "loss": 0.7887, "step": 9722 }, { "epoch": 0.87, "grad_norm": 5.196384398140186, "learning_rate": 6.301362060077067e-06, "loss": 0.7641, "step": 9723 }, { "epoch": 0.87, "grad_norm": 3.854533115196373, "learning_rate": 6.300664571590849e-06, "loss": 0.8086, "step": 9724 }, { "epoch": 0.87, "grad_norm": 5.077895696447907, "learning_rate": 6.2999670559561065e-06, "loss": 0.7314, "step": 9725 }, { "epoch": 0.87, "grad_norm": 5.2199288099545536, "learning_rate": 6.299269513187399e-06, "loss": 0.7802, "step": 9726 }, { "epoch": 0.87, "grad_norm": 6.206560837203606, "learning_rate": 6.298571943299285e-06, "loss": 0.7969, "step": 9727 }, { "epoch": 0.87, "grad_norm": 5.382693834873623, "learning_rate": 6.297874346306327e-06, "loss": 0.7595, "step": 9728 }, { "epoch": 0.87, "grad_norm": 5.224437957624343, "learning_rate": 6.297176722223083e-06, "loss": 0.7414, "step": 9729 }, { "epoch": 0.87, "grad_norm": 5.499876545896473, "learning_rate": 6.296479071064115e-06, "loss": 0.7434, "step": 9730 }, { "epoch": 0.87, "grad_norm": 6.847516885582258, "learning_rate": 6.295781392843988e-06, "loss": 0.7547, "step": 9731 }, { "epoch": 0.87, "grad_norm": 5.558753388591179, "learning_rate": 6.2950836875772605e-06, "loss": 0.7334, "step": 9732 }, { "epoch": 0.87, "grad_norm": 5.736459166516716, "learning_rate": 6.294385955278498e-06, "loss": 0.8063, "step": 9733 }, { "epoch": 0.87, "grad_norm": 10.109051645774318, "learning_rate": 6.293688195962264e-06, "loss": 0.7178, "step": 9734 }, { "epoch": 0.87, "grad_norm": 5.9361968608457065, "learning_rate": 6.292990409643121e-06, "loss": 0.7742, "step": 9735 }, { "epoch": 0.87, "grad_norm": 4.460872153741046, "learning_rate": 6.292292596335634e-06, "loss": 0.8311, "step": 9736 }, { "epoch": 0.87, "grad_norm": 5.433031431305875, "learning_rate": 6.29159475605437e-06, "loss": 0.7846, "step": 9737 }, { "epoch": 0.87, "grad_norm": 5.857989579236105, "learning_rate": 6.2908968888138935e-06, "loss": 0.8038, "step": 9738 }, { "epoch": 0.87, "grad_norm": 4.569643503778049, "learning_rate": 6.290198994628771e-06, "loss": 0.7248, "step": 9739 }, { "epoch": 0.87, "grad_norm": 9.571273350934446, "learning_rate": 6.289501073513572e-06, "loss": 0.7424, "step": 9740 }, { "epoch": 0.87, "grad_norm": 7.425659957641195, "learning_rate": 6.2888031254828605e-06, "loss": 0.7492, "step": 9741 }, { "epoch": 0.87, "grad_norm": 6.407756744256636, "learning_rate": 6.288105150551207e-06, "loss": 0.7759, "step": 9742 }, { "epoch": 0.87, "grad_norm": 5.495054598506165, "learning_rate": 6.28740714873318e-06, "loss": 0.8099, "step": 9743 }, { "epoch": 0.87, "grad_norm": 4.311643341693338, "learning_rate": 6.286709120043346e-06, "loss": 0.7989, "step": 9744 }, { "epoch": 0.87, "grad_norm": 4.872150947401154, "learning_rate": 6.286011064496278e-06, "loss": 0.7214, "step": 9745 }, { "epoch": 0.87, "grad_norm": 5.351364656475472, "learning_rate": 6.285312982106546e-06, "loss": 0.7623, "step": 9746 }, { "epoch": 0.87, "grad_norm": 3.93795385029351, "learning_rate": 6.28461487288872e-06, "loss": 0.782, "step": 9747 }, { "epoch": 0.87, "grad_norm": 3.7431909970469657, "learning_rate": 6.283916736857372e-06, "loss": 0.7588, "step": 9748 }, { "epoch": 0.87, "grad_norm": 4.336567771686006, "learning_rate": 6.283218574027072e-06, "loss": 0.7275, "step": 9749 }, { "epoch": 0.87, "grad_norm": 5.511240842298394, "learning_rate": 6.2825203844123964e-06, "loss": 0.8472, "step": 9750 }, { "epoch": 0.87, "grad_norm": 6.463778359552672, "learning_rate": 6.281822168027915e-06, "loss": 0.7117, "step": 9751 }, { "epoch": 0.87, "grad_norm": 6.789918912099097, "learning_rate": 6.281123924888204e-06, "loss": 0.8157, "step": 9752 }, { "epoch": 0.87, "grad_norm": 6.321640373913825, "learning_rate": 6.280425655007837e-06, "loss": 0.7299, "step": 9753 }, { "epoch": 0.87, "grad_norm": 5.877590118784282, "learning_rate": 6.279727358401388e-06, "loss": 0.7225, "step": 9754 }, { "epoch": 0.87, "grad_norm": 5.8442221470586295, "learning_rate": 6.279029035083433e-06, "loss": 0.7851, "step": 9755 }, { "epoch": 0.87, "grad_norm": 5.34055109166387, "learning_rate": 6.278330685068549e-06, "loss": 0.8127, "step": 9756 }, { "epoch": 0.87, "grad_norm": 4.592131280234987, "learning_rate": 6.27763230837131e-06, "loss": 0.8634, "step": 9757 }, { "epoch": 0.87, "grad_norm": 4.134920724723284, "learning_rate": 6.276933905006294e-06, "loss": 0.8235, "step": 9758 }, { "epoch": 0.87, "grad_norm": 7.190500089275022, "learning_rate": 6.276235474988081e-06, "loss": 0.7727, "step": 9759 }, { "epoch": 0.87, "grad_norm": 5.209558641718262, "learning_rate": 6.2755370183312455e-06, "loss": 0.7507, "step": 9760 }, { "epoch": 0.87, "grad_norm": 4.230863139527202, "learning_rate": 6.274838535050368e-06, "loss": 0.7228, "step": 9761 }, { "epoch": 0.87, "grad_norm": 5.101414379246327, "learning_rate": 6.27414002516003e-06, "loss": 0.782, "step": 9762 }, { "epoch": 0.87, "grad_norm": 4.7469131883652285, "learning_rate": 6.27344148867481e-06, "loss": 0.8655, "step": 9763 }, { "epoch": 0.87, "grad_norm": 6.995456967869784, "learning_rate": 6.272742925609284e-06, "loss": 0.7096, "step": 9764 }, { "epoch": 0.87, "grad_norm": 6.0891935586739665, "learning_rate": 6.272044335978039e-06, "loss": 0.7578, "step": 9765 }, { "epoch": 0.87, "grad_norm": 4.793230684641359, "learning_rate": 6.271345719795654e-06, "loss": 0.803, "step": 9766 }, { "epoch": 0.87, "grad_norm": 5.641366637544686, "learning_rate": 6.270647077076712e-06, "loss": 0.7562, "step": 9767 }, { "epoch": 0.87, "grad_norm": 5.480318732455397, "learning_rate": 6.269948407835793e-06, "loss": 0.7606, "step": 9768 }, { "epoch": 0.87, "grad_norm": 4.943779578022765, "learning_rate": 6.269249712087484e-06, "loss": 0.8211, "step": 9769 }, { "epoch": 0.87, "grad_norm": 5.659187168561962, "learning_rate": 6.268550989846366e-06, "loss": 0.7322, "step": 9770 }, { "epoch": 0.87, "grad_norm": 5.138023512846651, "learning_rate": 6.267852241127024e-06, "loss": 0.8062, "step": 9771 }, { "epoch": 0.87, "grad_norm": 4.26433237441788, "learning_rate": 6.267153465944044e-06, "loss": 0.7732, "step": 9772 }, { "epoch": 0.87, "grad_norm": 6.130434754997048, "learning_rate": 6.26645466431201e-06, "loss": 0.8221, "step": 9773 }, { "epoch": 0.87, "grad_norm": 5.464948469415333, "learning_rate": 6.265755836245506e-06, "loss": 0.7215, "step": 9774 }, { "epoch": 0.87, "grad_norm": 4.390970233903345, "learning_rate": 6.2650569817591246e-06, "loss": 0.8311, "step": 9775 }, { "epoch": 0.87, "grad_norm": 5.9748642101933696, "learning_rate": 6.264358100867448e-06, "loss": 0.8102, "step": 9776 }, { "epoch": 0.87, "grad_norm": 6.396353472123679, "learning_rate": 6.263659193585063e-06, "loss": 0.7037, "step": 9777 }, { "epoch": 0.87, "grad_norm": 6.641845806086245, "learning_rate": 6.262960259926562e-06, "loss": 0.7212, "step": 9778 }, { "epoch": 0.87, "grad_norm": 5.073172200723437, "learning_rate": 6.2622612999065316e-06, "loss": 0.7877, "step": 9779 }, { "epoch": 0.87, "grad_norm": 7.6781154246567365, "learning_rate": 6.261562313539559e-06, "loss": 0.796, "step": 9780 }, { "epoch": 0.87, "grad_norm": 4.889775074326401, "learning_rate": 6.260863300840236e-06, "loss": 0.7303, "step": 9781 }, { "epoch": 0.87, "grad_norm": 6.248690123335354, "learning_rate": 6.260164261823154e-06, "loss": 0.8359, "step": 9782 }, { "epoch": 0.87, "grad_norm": 6.628509215390753, "learning_rate": 6.259465196502903e-06, "loss": 0.7448, "step": 9783 }, { "epoch": 0.87, "grad_norm": 4.397228932471153, "learning_rate": 6.258766104894073e-06, "loss": 0.7903, "step": 9784 }, { "epoch": 0.87, "grad_norm": 4.750413185837244, "learning_rate": 6.258066987011258e-06, "loss": 0.8418, "step": 9785 }, { "epoch": 0.87, "grad_norm": 4.765393854953187, "learning_rate": 6.257367842869049e-06, "loss": 0.6927, "step": 9786 }, { "epoch": 0.87, "grad_norm": 5.664058517144299, "learning_rate": 6.25666867248204e-06, "loss": 0.7535, "step": 9787 }, { "epoch": 0.87, "grad_norm": 5.8413468096439685, "learning_rate": 6.255969475864825e-06, "loss": 0.7816, "step": 9788 }, { "epoch": 0.87, "grad_norm": 4.939419908556113, "learning_rate": 6.255270253031997e-06, "loss": 0.7555, "step": 9789 }, { "epoch": 0.87, "grad_norm": 4.675445841706478, "learning_rate": 6.254571003998153e-06, "loss": 0.7374, "step": 9790 }, { "epoch": 0.87, "grad_norm": 4.258271396872061, "learning_rate": 6.253871728777887e-06, "loss": 0.8041, "step": 9791 }, { "epoch": 0.87, "grad_norm": 5.237165624338756, "learning_rate": 6.253172427385792e-06, "loss": 0.7318, "step": 9792 }, { "epoch": 0.87, "grad_norm": 5.97391673899181, "learning_rate": 6.252473099836468e-06, "loss": 0.7823, "step": 9793 }, { "epoch": 0.87, "grad_norm": 4.62725143017116, "learning_rate": 6.251773746144512e-06, "loss": 0.7209, "step": 9794 }, { "epoch": 0.87, "grad_norm": 5.227032551796015, "learning_rate": 6.251074366324519e-06, "loss": 0.7249, "step": 9795 }, { "epoch": 0.87, "grad_norm": 4.47700944849814, "learning_rate": 6.250374960391089e-06, "loss": 0.775, "step": 9796 }, { "epoch": 0.87, "grad_norm": 5.745307835948027, "learning_rate": 6.249675528358821e-06, "loss": 0.7855, "step": 9797 }, { "epoch": 0.87, "grad_norm": 4.735720615309904, "learning_rate": 6.248976070242312e-06, "loss": 0.8509, "step": 9798 }, { "epoch": 0.87, "grad_norm": 4.016392953332443, "learning_rate": 6.248276586056164e-06, "loss": 0.77, "step": 9799 }, { "epoch": 0.87, "grad_norm": 4.910986800627424, "learning_rate": 6.247577075814974e-06, "loss": 0.7444, "step": 9800 }, { "epoch": 0.87, "grad_norm": 5.530083286415009, "learning_rate": 6.246877539533345e-06, "loss": 0.8654, "step": 9801 }, { "epoch": 0.87, "grad_norm": 8.218722774095697, "learning_rate": 6.246177977225878e-06, "loss": 0.7235, "step": 9802 }, { "epoch": 0.87, "grad_norm": 4.9857800587362044, "learning_rate": 6.245478388907175e-06, "loss": 0.7817, "step": 9803 }, { "epoch": 0.87, "grad_norm": 5.413422600785966, "learning_rate": 6.24477877459184e-06, "loss": 0.829, "step": 9804 }, { "epoch": 0.87, "grad_norm": 6.052256947450342, "learning_rate": 6.244079134294473e-06, "loss": 0.7134, "step": 9805 }, { "epoch": 0.87, "grad_norm": 6.274180735199831, "learning_rate": 6.243379468029677e-06, "loss": 0.8488, "step": 9806 }, { "epoch": 0.87, "grad_norm": 6.254375315197664, "learning_rate": 6.242679775812059e-06, "loss": 0.8056, "step": 9807 }, { "epoch": 0.87, "grad_norm": 5.382070136401744, "learning_rate": 6.2419800576562215e-06, "loss": 0.7802, "step": 9808 }, { "epoch": 0.88, "grad_norm": 6.0295743014485765, "learning_rate": 6.241280313576771e-06, "loss": 0.7505, "step": 9809 }, { "epoch": 0.88, "grad_norm": 6.407889549345209, "learning_rate": 6.240580543588311e-06, "loss": 0.7725, "step": 9810 }, { "epoch": 0.88, "grad_norm": 4.218968171172818, "learning_rate": 6.2398807477054504e-06, "loss": 0.7413, "step": 9811 }, { "epoch": 0.88, "grad_norm": 6.060846227752618, "learning_rate": 6.239180925942794e-06, "loss": 0.7572, "step": 9812 }, { "epoch": 0.88, "grad_norm": 4.8622348164976295, "learning_rate": 6.238481078314949e-06, "loss": 0.813, "step": 9813 }, { "epoch": 0.88, "grad_norm": 5.24001652070445, "learning_rate": 6.237781204836525e-06, "loss": 0.7636, "step": 9814 }, { "epoch": 0.88, "grad_norm": 5.55707352381635, "learning_rate": 6.237081305522128e-06, "loss": 0.837, "step": 9815 }, { "epoch": 0.88, "grad_norm": 7.625670235453415, "learning_rate": 6.236381380386367e-06, "loss": 0.7711, "step": 9816 }, { "epoch": 0.88, "grad_norm": 6.379156261099252, "learning_rate": 6.235681429443854e-06, "loss": 0.7697, "step": 9817 }, { "epoch": 0.88, "grad_norm": 4.451108602408712, "learning_rate": 6.234981452709197e-06, "loss": 0.7571, "step": 9818 }, { "epoch": 0.88, "grad_norm": 4.899880208049548, "learning_rate": 6.234281450197006e-06, "loss": 0.7011, "step": 9819 }, { "epoch": 0.88, "grad_norm": 5.443604525105231, "learning_rate": 6.233581421921894e-06, "loss": 0.7502, "step": 9820 }, { "epoch": 0.88, "grad_norm": 5.638481191283446, "learning_rate": 6.23288136789847e-06, "loss": 0.7472, "step": 9821 }, { "epoch": 0.88, "grad_norm": 4.384864944442155, "learning_rate": 6.232181288141347e-06, "loss": 0.7378, "step": 9822 }, { "epoch": 0.88, "grad_norm": 6.607874529528891, "learning_rate": 6.23148118266514e-06, "loss": 0.8002, "step": 9823 }, { "epoch": 0.88, "grad_norm": 5.95233334445, "learning_rate": 6.230781051484458e-06, "loss": 0.8404, "step": 9824 }, { "epoch": 0.88, "grad_norm": 5.5368550010793305, "learning_rate": 6.230080894613918e-06, "loss": 0.7554, "step": 9825 }, { "epoch": 0.88, "grad_norm": 7.124440877781158, "learning_rate": 6.229380712068134e-06, "loss": 0.7439, "step": 9826 }, { "epoch": 0.88, "grad_norm": 6.604613160183594, "learning_rate": 6.228680503861719e-06, "loss": 0.6907, "step": 9827 }, { "epoch": 0.88, "grad_norm": 6.54661559806082, "learning_rate": 6.2279802700092895e-06, "loss": 0.7889, "step": 9828 }, { "epoch": 0.88, "grad_norm": 5.5884258738460995, "learning_rate": 6.227280010525462e-06, "loss": 0.7331, "step": 9829 }, { "epoch": 0.88, "grad_norm": 6.92609050916543, "learning_rate": 6.2265797254248496e-06, "loss": 0.8959, "step": 9830 }, { "epoch": 0.88, "grad_norm": 5.721371076632458, "learning_rate": 6.225879414722073e-06, "loss": 0.7396, "step": 9831 }, { "epoch": 0.88, "grad_norm": 4.675046520298822, "learning_rate": 6.225179078431749e-06, "loss": 0.7759, "step": 9832 }, { "epoch": 0.88, "grad_norm": 5.734424718990778, "learning_rate": 6.224478716568493e-06, "loss": 0.8332, "step": 9833 }, { "epoch": 0.88, "grad_norm": 5.813233278884142, "learning_rate": 6.223778329146926e-06, "loss": 0.7346, "step": 9834 }, { "epoch": 0.88, "grad_norm": 5.533396986931334, "learning_rate": 6.223077916181668e-06, "loss": 0.7139, "step": 9835 }, { "epoch": 0.88, "grad_norm": 5.824997391290138, "learning_rate": 6.222377477687336e-06, "loss": 0.7967, "step": 9836 }, { "epoch": 0.88, "grad_norm": 5.435201711908418, "learning_rate": 6.22167701367855e-06, "loss": 0.8049, "step": 9837 }, { "epoch": 0.88, "grad_norm": 5.84024165376351, "learning_rate": 6.220976524169933e-06, "loss": 0.7473, "step": 9838 }, { "epoch": 0.88, "grad_norm": 6.292495970896735, "learning_rate": 6.220276009176105e-06, "loss": 0.7425, "step": 9839 }, { "epoch": 0.88, "grad_norm": 8.540935655678451, "learning_rate": 6.219575468711686e-06, "loss": 0.7082, "step": 9840 }, { "epoch": 0.88, "grad_norm": 5.3673207731489425, "learning_rate": 6.218874902791302e-06, "loss": 0.7466, "step": 9841 }, { "epoch": 0.88, "grad_norm": 4.471366730282035, "learning_rate": 6.218174311429573e-06, "loss": 0.709, "step": 9842 }, { "epoch": 0.88, "grad_norm": 7.073493636428725, "learning_rate": 6.217473694641123e-06, "loss": 0.7796, "step": 9843 }, { "epoch": 0.88, "grad_norm": 5.409516413250098, "learning_rate": 6.216773052440575e-06, "loss": 0.7717, "step": 9844 }, { "epoch": 0.88, "grad_norm": 6.034277959291131, "learning_rate": 6.216072384842555e-06, "loss": 0.8032, "step": 9845 }, { "epoch": 0.88, "grad_norm": 7.199711364132453, "learning_rate": 6.215371691861687e-06, "loss": 0.7656, "step": 9846 }, { "epoch": 0.88, "grad_norm": 5.068191820197493, "learning_rate": 6.214670973512597e-06, "loss": 0.7784, "step": 9847 }, { "epoch": 0.88, "grad_norm": 5.593190241652087, "learning_rate": 6.2139702298099105e-06, "loss": 0.7403, "step": 9848 }, { "epoch": 0.88, "grad_norm": 5.273192634488201, "learning_rate": 6.213269460768254e-06, "loss": 0.7523, "step": 9849 }, { "epoch": 0.88, "grad_norm": 5.3117759622183405, "learning_rate": 6.2125686664022535e-06, "loss": 0.8214, "step": 9850 }, { "epoch": 0.88, "grad_norm": 5.4138003334489335, "learning_rate": 6.2118678467265385e-06, "loss": 0.777, "step": 9851 }, { "epoch": 0.88, "grad_norm": 7.160305888734228, "learning_rate": 6.2111670017557355e-06, "loss": 0.7833, "step": 9852 }, { "epoch": 0.88, "grad_norm": 6.665382394899319, "learning_rate": 6.2104661315044735e-06, "loss": 0.6944, "step": 9853 }, { "epoch": 0.88, "grad_norm": 5.713495891370325, "learning_rate": 6.2097652359873825e-06, "loss": 0.7949, "step": 9854 }, { "epoch": 0.88, "grad_norm": 6.265703804137476, "learning_rate": 6.209064315219093e-06, "loss": 0.8059, "step": 9855 }, { "epoch": 0.88, "grad_norm": 5.858570053957441, "learning_rate": 6.208363369214232e-06, "loss": 0.7778, "step": 9856 }, { "epoch": 0.88, "grad_norm": 5.781248456350245, "learning_rate": 6.207662397987432e-06, "loss": 0.7537, "step": 9857 }, { "epoch": 0.88, "grad_norm": 5.601402282377116, "learning_rate": 6.206961401553324e-06, "loss": 0.7967, "step": 9858 }, { "epoch": 0.88, "grad_norm": 4.957806345611381, "learning_rate": 6.206260379926541e-06, "loss": 0.729, "step": 9859 }, { "epoch": 0.88, "grad_norm": 5.349063241000264, "learning_rate": 6.205559333121712e-06, "loss": 0.8146, "step": 9860 }, { "epoch": 0.88, "grad_norm": 4.831368950822578, "learning_rate": 6.2048582611534745e-06, "loss": 0.7914, "step": 9861 }, { "epoch": 0.88, "grad_norm": 5.857696199709002, "learning_rate": 6.2041571640364596e-06, "loss": 0.7346, "step": 9862 }, { "epoch": 0.88, "grad_norm": 5.839866061630495, "learning_rate": 6.2034560417852996e-06, "loss": 0.7562, "step": 9863 }, { "epoch": 0.88, "grad_norm": 6.072856223420023, "learning_rate": 6.20275489441463e-06, "loss": 0.8224, "step": 9864 }, { "epoch": 0.88, "grad_norm": 5.085497580555607, "learning_rate": 6.202053721939086e-06, "loss": 0.6989, "step": 9865 }, { "epoch": 0.88, "grad_norm": 7.724531931464815, "learning_rate": 6.201352524373304e-06, "loss": 0.7377, "step": 9866 }, { "epoch": 0.88, "grad_norm": 5.536589617298664, "learning_rate": 6.2006513017319195e-06, "loss": 0.7293, "step": 9867 }, { "epoch": 0.88, "grad_norm": 5.134751604167117, "learning_rate": 6.199950054029567e-06, "loss": 0.8487, "step": 9868 }, { "epoch": 0.88, "grad_norm": 5.742377482116037, "learning_rate": 6.199248781280884e-06, "loss": 0.694, "step": 9869 }, { "epoch": 0.88, "grad_norm": 5.797167232460141, "learning_rate": 6.198547483500511e-06, "loss": 0.762, "step": 9870 }, { "epoch": 0.88, "grad_norm": 5.635590357252279, "learning_rate": 6.197846160703084e-06, "loss": 0.8541, "step": 9871 }, { "epoch": 0.88, "grad_norm": 6.44350449516486, "learning_rate": 6.197144812903241e-06, "loss": 0.7159, "step": 9872 }, { "epoch": 0.88, "grad_norm": 6.774322462155159, "learning_rate": 6.196443440115621e-06, "loss": 0.7571, "step": 9873 }, { "epoch": 0.88, "grad_norm": 5.832464951253434, "learning_rate": 6.195742042354867e-06, "loss": 0.7492, "step": 9874 }, { "epoch": 0.88, "grad_norm": 7.1981025830356975, "learning_rate": 6.195040619635613e-06, "loss": 0.6939, "step": 9875 }, { "epoch": 0.88, "grad_norm": 7.216948315005162, "learning_rate": 6.194339171972504e-06, "loss": 0.7563, "step": 9876 }, { "epoch": 0.88, "grad_norm": 5.494237381620149, "learning_rate": 6.193637699380183e-06, "loss": 0.8334, "step": 9877 }, { "epoch": 0.88, "grad_norm": 4.748936491391593, "learning_rate": 6.1929362018732875e-06, "loss": 0.8176, "step": 9878 }, { "epoch": 0.88, "grad_norm": 5.815403970786437, "learning_rate": 6.19223467946646e-06, "loss": 0.7331, "step": 9879 }, { "epoch": 0.88, "grad_norm": 4.988813265145291, "learning_rate": 6.191533132174346e-06, "loss": 0.7949, "step": 9880 }, { "epoch": 0.88, "grad_norm": 6.119290827075412, "learning_rate": 6.190831560011587e-06, "loss": 0.7462, "step": 9881 }, { "epoch": 0.88, "grad_norm": 5.354332812928787, "learning_rate": 6.190129962992825e-06, "loss": 0.7294, "step": 9882 }, { "epoch": 0.88, "grad_norm": 5.739268766731975, "learning_rate": 6.1894283411327095e-06, "loss": 0.7549, "step": 9883 }, { "epoch": 0.88, "grad_norm": 8.269171136319507, "learning_rate": 6.188726694445881e-06, "loss": 0.7387, "step": 9884 }, { "epoch": 0.88, "grad_norm": 5.592298546498344, "learning_rate": 6.1880250229469864e-06, "loss": 0.7891, "step": 9885 }, { "epoch": 0.88, "grad_norm": 5.649529788930542, "learning_rate": 6.18732332665067e-06, "loss": 0.7135, "step": 9886 }, { "epoch": 0.88, "grad_norm": 5.487248039224758, "learning_rate": 6.18662160557158e-06, "loss": 0.8339, "step": 9887 }, { "epoch": 0.88, "grad_norm": 5.191795688632441, "learning_rate": 6.185919859724363e-06, "loss": 0.7617, "step": 9888 }, { "epoch": 0.88, "grad_norm": 6.536568846719598, "learning_rate": 6.185218089123665e-06, "loss": 0.7355, "step": 9889 }, { "epoch": 0.88, "grad_norm": 6.457957308384774, "learning_rate": 6.184516293784136e-06, "loss": 0.7719, "step": 9890 }, { "epoch": 0.88, "grad_norm": 5.1524146471923675, "learning_rate": 6.183814473720424e-06, "loss": 0.7661, "step": 9891 }, { "epoch": 0.88, "grad_norm": 5.647911330361317, "learning_rate": 6.183112628947176e-06, "loss": 0.7436, "step": 9892 }, { "epoch": 0.88, "grad_norm": 6.197298556638414, "learning_rate": 6.182410759479044e-06, "loss": 0.8343, "step": 9893 }, { "epoch": 0.88, "grad_norm": 7.014020875297456, "learning_rate": 6.181708865330676e-06, "loss": 0.8063, "step": 9894 }, { "epoch": 0.88, "grad_norm": 5.254773602607386, "learning_rate": 6.1810069465167225e-06, "loss": 0.7956, "step": 9895 }, { "epoch": 0.88, "grad_norm": 6.570829429039235, "learning_rate": 6.180305003051836e-06, "loss": 0.857, "step": 9896 }, { "epoch": 0.88, "grad_norm": 4.695056987900873, "learning_rate": 6.179603034950667e-06, "loss": 0.7609, "step": 9897 }, { "epoch": 0.88, "grad_norm": 4.281681373151997, "learning_rate": 6.178901042227869e-06, "loss": 0.781, "step": 9898 }, { "epoch": 0.88, "grad_norm": 5.913419575781941, "learning_rate": 6.178199024898094e-06, "loss": 0.7454, "step": 9899 }, { "epoch": 0.88, "grad_norm": 4.811890768573447, "learning_rate": 6.1774969829759946e-06, "loss": 0.7161, "step": 9900 }, { "epoch": 0.88, "grad_norm": 5.360663192042909, "learning_rate": 6.176794916476223e-06, "loss": 0.7234, "step": 9901 }, { "epoch": 0.88, "grad_norm": 6.060096520468763, "learning_rate": 6.176092825413435e-06, "loss": 0.7674, "step": 9902 }, { "epoch": 0.88, "grad_norm": 5.308445189994336, "learning_rate": 6.1753907098022845e-06, "loss": 0.7939, "step": 9903 }, { "epoch": 0.88, "grad_norm": 5.366564507937351, "learning_rate": 6.174688569657428e-06, "loss": 0.8274, "step": 9904 }, { "epoch": 0.88, "grad_norm": 5.318543323550969, "learning_rate": 6.1739864049935196e-06, "loss": 0.7273, "step": 9905 }, { "epoch": 0.88, "grad_norm": 7.1867724327927975, "learning_rate": 6.1732842158252175e-06, "loss": 0.8589, "step": 9906 }, { "epoch": 0.88, "grad_norm": 5.206113139862672, "learning_rate": 6.1725820021671735e-06, "loss": 0.7706, "step": 9907 }, { "epoch": 0.88, "grad_norm": 5.1861446086850815, "learning_rate": 6.17187976403405e-06, "loss": 0.7603, "step": 9908 }, { "epoch": 0.88, "grad_norm": 5.888317944665131, "learning_rate": 6.171177501440503e-06, "loss": 0.7572, "step": 9909 }, { "epoch": 0.88, "grad_norm": 5.853713656033985, "learning_rate": 6.17047521440119e-06, "loss": 0.8133, "step": 9910 }, { "epoch": 0.88, "grad_norm": 5.7852457081629325, "learning_rate": 6.16977290293077e-06, "loss": 0.7985, "step": 9911 }, { "epoch": 0.88, "grad_norm": 6.973910445947179, "learning_rate": 6.169070567043903e-06, "loss": 0.7511, "step": 9912 }, { "epoch": 0.88, "grad_norm": 8.686211967465981, "learning_rate": 6.168368206755249e-06, "loss": 0.7867, "step": 9913 }, { "epoch": 0.88, "grad_norm": 5.3604987410375875, "learning_rate": 6.167665822079466e-06, "loss": 0.7089, "step": 9914 }, { "epoch": 0.88, "grad_norm": 5.704179874781647, "learning_rate": 6.166963413031217e-06, "loss": 0.6883, "step": 9915 }, { "epoch": 0.88, "grad_norm": 5.199409241040635, "learning_rate": 6.166260979625162e-06, "loss": 0.7706, "step": 9916 }, { "epoch": 0.88, "grad_norm": 6.293311353888366, "learning_rate": 6.165558521875961e-06, "loss": 0.7513, "step": 9917 }, { "epoch": 0.88, "grad_norm": 5.615946942219975, "learning_rate": 6.1648560397982815e-06, "loss": 0.8617, "step": 9918 }, { "epoch": 0.88, "grad_norm": 5.299422588502069, "learning_rate": 6.164153533406782e-06, "loss": 0.7774, "step": 9919 }, { "epoch": 0.88, "grad_norm": 6.6395639156996635, "learning_rate": 6.163451002716128e-06, "loss": 0.7657, "step": 9920 }, { "epoch": 0.89, "grad_norm": 5.558679436021888, "learning_rate": 6.162748447740981e-06, "loss": 0.8147, "step": 9921 }, { "epoch": 0.89, "grad_norm": 4.70867369170834, "learning_rate": 6.162045868496006e-06, "loss": 0.7988, "step": 9922 }, { "epoch": 0.89, "grad_norm": 5.652359245621111, "learning_rate": 6.161343264995869e-06, "loss": 0.7659, "step": 9923 }, { "epoch": 0.89, "grad_norm": 6.432275932699342, "learning_rate": 6.160640637255234e-06, "loss": 0.7363, "step": 9924 }, { "epoch": 0.89, "grad_norm": 4.8900369421039995, "learning_rate": 6.159937985288769e-06, "loss": 0.7123, "step": 9925 }, { "epoch": 0.89, "grad_norm": 5.783861648226687, "learning_rate": 6.159235309111137e-06, "loss": 0.7926, "step": 9926 }, { "epoch": 0.89, "grad_norm": 4.088594430153176, "learning_rate": 6.158532608737008e-06, "loss": 0.7676, "step": 9927 }, { "epoch": 0.89, "grad_norm": 5.337841527623541, "learning_rate": 6.157829884181048e-06, "loss": 0.7408, "step": 9928 }, { "epoch": 0.89, "grad_norm": 6.410127942879893, "learning_rate": 6.157127135457924e-06, "loss": 0.7401, "step": 9929 }, { "epoch": 0.89, "grad_norm": 4.4489336181597094, "learning_rate": 6.156424362582305e-06, "loss": 0.7588, "step": 9930 }, { "epoch": 0.89, "grad_norm": 5.097794303015817, "learning_rate": 6.155721565568861e-06, "loss": 0.8204, "step": 9931 }, { "epoch": 0.89, "grad_norm": 5.800590315559956, "learning_rate": 6.155018744432259e-06, "loss": 0.7604, "step": 9932 }, { "epoch": 0.89, "grad_norm": 6.871408568930423, "learning_rate": 6.1543158991871696e-06, "loss": 0.7552, "step": 9933 }, { "epoch": 0.89, "grad_norm": 5.293399265363556, "learning_rate": 6.153613029848267e-06, "loss": 0.6806, "step": 9934 }, { "epoch": 0.89, "grad_norm": 4.355082011566736, "learning_rate": 6.152910136430217e-06, "loss": 0.7717, "step": 9935 }, { "epoch": 0.89, "grad_norm": 4.209321721744387, "learning_rate": 6.152207218947691e-06, "loss": 0.7195, "step": 9936 }, { "epoch": 0.89, "grad_norm": 5.658416390741104, "learning_rate": 6.151504277415363e-06, "loss": 0.7812, "step": 9937 }, { "epoch": 0.89, "grad_norm": 5.162827111585358, "learning_rate": 6.150801311847907e-06, "loss": 0.8035, "step": 9938 }, { "epoch": 0.89, "grad_norm": 7.517315034367639, "learning_rate": 6.150098322259992e-06, "loss": 0.7499, "step": 9939 }, { "epoch": 0.89, "grad_norm": 12.376673801749577, "learning_rate": 6.149395308666294e-06, "loss": 0.722, "step": 9940 }, { "epoch": 0.89, "grad_norm": 5.812406824099804, "learning_rate": 6.148692271081487e-06, "loss": 0.7427, "step": 9941 }, { "epoch": 0.89, "grad_norm": 4.789268857645545, "learning_rate": 6.147989209520243e-06, "loss": 0.8063, "step": 9942 }, { "epoch": 0.89, "grad_norm": 5.081661830991714, "learning_rate": 6.147286123997239e-06, "loss": 0.8247, "step": 9943 }, { "epoch": 0.89, "grad_norm": 6.177937220663609, "learning_rate": 6.14658301452715e-06, "loss": 0.7775, "step": 9944 }, { "epoch": 0.89, "grad_norm": 5.296086440226046, "learning_rate": 6.14587988112465e-06, "loss": 0.8018, "step": 9945 }, { "epoch": 0.89, "grad_norm": 5.31034367047131, "learning_rate": 6.145176723804418e-06, "loss": 0.7613, "step": 9946 }, { "epoch": 0.89, "grad_norm": 6.367915722935144, "learning_rate": 6.14447354258113e-06, "loss": 0.7323, "step": 9947 }, { "epoch": 0.89, "grad_norm": 5.715031615547743, "learning_rate": 6.143770337469463e-06, "loss": 0.7277, "step": 9948 }, { "epoch": 0.89, "grad_norm": 5.440645120633575, "learning_rate": 6.143067108484096e-06, "loss": 0.6938, "step": 9949 }, { "epoch": 0.89, "grad_norm": 5.652528111811027, "learning_rate": 6.142363855639704e-06, "loss": 0.7634, "step": 9950 }, { "epoch": 0.89, "grad_norm": 5.7452619743242535, "learning_rate": 6.14166057895097e-06, "loss": 0.7273, "step": 9951 }, { "epoch": 0.89, "grad_norm": 5.348928122225856, "learning_rate": 6.14095727843257e-06, "loss": 0.7472, "step": 9952 }, { "epoch": 0.89, "grad_norm": 5.003946677838617, "learning_rate": 6.1402539540991875e-06, "loss": 0.7568, "step": 9953 }, { "epoch": 0.89, "grad_norm": 4.933015932333282, "learning_rate": 6.1395506059655e-06, "loss": 0.7719, "step": 9954 }, { "epoch": 0.89, "grad_norm": 5.461634939290451, "learning_rate": 6.138847234046188e-06, "loss": 0.7956, "step": 9955 }, { "epoch": 0.89, "grad_norm": 5.272669479192141, "learning_rate": 6.138143838355936e-06, "loss": 0.8185, "step": 9956 }, { "epoch": 0.89, "grad_norm": 8.217522583808606, "learning_rate": 6.137440418909423e-06, "loss": 0.766, "step": 9957 }, { "epoch": 0.89, "grad_norm": 4.827210194161117, "learning_rate": 6.136736975721332e-06, "loss": 0.8066, "step": 9958 }, { "epoch": 0.89, "grad_norm": 7.226471277970875, "learning_rate": 6.136033508806345e-06, "loss": 0.859, "step": 9959 }, { "epoch": 0.89, "grad_norm": 6.188991729044971, "learning_rate": 6.135330018179149e-06, "loss": 0.7618, "step": 9960 }, { "epoch": 0.89, "grad_norm": 5.83083007985564, "learning_rate": 6.134626503854423e-06, "loss": 0.8219, "step": 9961 }, { "epoch": 0.89, "grad_norm": 5.960559076085251, "learning_rate": 6.133922965846853e-06, "loss": 0.734, "step": 9962 }, { "epoch": 0.89, "grad_norm": 4.9525788599371285, "learning_rate": 6.133219404171128e-06, "loss": 0.7148, "step": 9963 }, { "epoch": 0.89, "grad_norm": 5.87106501882532, "learning_rate": 6.132515818841927e-06, "loss": 0.795, "step": 9964 }, { "epoch": 0.89, "grad_norm": 6.9746742327432125, "learning_rate": 6.131812209873937e-06, "loss": 0.8194, "step": 9965 }, { "epoch": 0.89, "grad_norm": 4.467382899722966, "learning_rate": 6.131108577281849e-06, "loss": 0.7362, "step": 9966 }, { "epoch": 0.89, "grad_norm": 4.278482148525703, "learning_rate": 6.130404921080344e-06, "loss": 0.7624, "step": 9967 }, { "epoch": 0.89, "grad_norm": 6.151906703395872, "learning_rate": 6.129701241284113e-06, "loss": 0.7435, "step": 9968 }, { "epoch": 0.89, "grad_norm": 6.611433006178244, "learning_rate": 6.128997537907842e-06, "loss": 0.6936, "step": 9969 }, { "epoch": 0.89, "grad_norm": 6.569739234088302, "learning_rate": 6.12829381096622e-06, "loss": 0.8724, "step": 9970 }, { "epoch": 0.89, "grad_norm": 7.279732669095729, "learning_rate": 6.127590060473937e-06, "loss": 0.7944, "step": 9971 }, { "epoch": 0.89, "grad_norm": 7.060564500467176, "learning_rate": 6.126886286445679e-06, "loss": 0.7236, "step": 9972 }, { "epoch": 0.89, "grad_norm": 5.9973168054322175, "learning_rate": 6.12618248889614e-06, "loss": 0.7693, "step": 9973 }, { "epoch": 0.89, "grad_norm": 5.518326534640806, "learning_rate": 6.125478667840006e-06, "loss": 0.7054, "step": 9974 }, { "epoch": 0.89, "grad_norm": 5.269288927047092, "learning_rate": 6.124774823291968e-06, "loss": 0.7579, "step": 9975 }, { "epoch": 0.89, "grad_norm": 5.002691076097937, "learning_rate": 6.124070955266721e-06, "loss": 0.7676, "step": 9976 }, { "epoch": 0.89, "grad_norm": 6.512147588469111, "learning_rate": 6.1233670637789545e-06, "loss": 0.6848, "step": 9977 }, { "epoch": 0.89, "grad_norm": 4.519773017488289, "learning_rate": 6.122663148843361e-06, "loss": 0.7581, "step": 9978 }, { "epoch": 0.89, "grad_norm": 4.308623433024361, "learning_rate": 6.121959210474632e-06, "loss": 0.73, "step": 9979 }, { "epoch": 0.89, "grad_norm": 5.027926797192309, "learning_rate": 6.121255248687461e-06, "loss": 0.794, "step": 9980 }, { "epoch": 0.89, "grad_norm": 6.089798457938509, "learning_rate": 6.120551263496543e-06, "loss": 0.7278, "step": 9981 }, { "epoch": 0.89, "grad_norm": 4.242809207026988, "learning_rate": 6.119847254916571e-06, "loss": 0.7407, "step": 9982 }, { "epoch": 0.89, "grad_norm": 5.073283783776094, "learning_rate": 6.1191432229622395e-06, "loss": 0.7847, "step": 9983 }, { "epoch": 0.89, "grad_norm": 5.638679284923997, "learning_rate": 6.1184391676482445e-06, "loss": 0.7783, "step": 9984 }, { "epoch": 0.89, "grad_norm": 6.922893123326059, "learning_rate": 6.117735088989282e-06, "loss": 0.7346, "step": 9985 }, { "epoch": 0.89, "grad_norm": 8.15366719924164, "learning_rate": 6.117030987000047e-06, "loss": 0.8147, "step": 9986 }, { "epoch": 0.89, "grad_norm": 5.394937677931595, "learning_rate": 6.116326861695236e-06, "loss": 0.7214, "step": 9987 }, { "epoch": 0.89, "grad_norm": 6.539290256828647, "learning_rate": 6.1156227130895475e-06, "loss": 0.7775, "step": 9988 }, { "epoch": 0.89, "grad_norm": 5.891447296131274, "learning_rate": 6.114918541197676e-06, "loss": 0.794, "step": 9989 }, { "epoch": 0.89, "grad_norm": 4.860324355783745, "learning_rate": 6.114214346034323e-06, "loss": 0.7441, "step": 9990 }, { "epoch": 0.89, "grad_norm": 5.2798531898199474, "learning_rate": 6.113510127614186e-06, "loss": 0.7232, "step": 9991 }, { "epoch": 0.89, "grad_norm": 6.6169567353272765, "learning_rate": 6.112805885951964e-06, "loss": 0.8362, "step": 9992 }, { "epoch": 0.89, "grad_norm": 5.00586446045036, "learning_rate": 6.112101621062354e-06, "loss": 0.804, "step": 9993 }, { "epoch": 0.89, "grad_norm": 5.276672972729107, "learning_rate": 6.111397332960058e-06, "loss": 0.7192, "step": 9994 }, { "epoch": 0.89, "grad_norm": 5.8162262709805175, "learning_rate": 6.110693021659779e-06, "loss": 0.8544, "step": 9995 }, { "epoch": 0.89, "grad_norm": 5.5965964650582185, "learning_rate": 6.1099886871762134e-06, "loss": 0.7483, "step": 9996 }, { "epoch": 0.89, "grad_norm": 6.822203589771857, "learning_rate": 6.1092843295240655e-06, "loss": 0.7419, "step": 9997 }, { "epoch": 0.89, "grad_norm": 6.042248790297229, "learning_rate": 6.108579948718037e-06, "loss": 0.7648, "step": 9998 }, { "epoch": 0.89, "grad_norm": 4.830030508812699, "learning_rate": 6.107875544772831e-06, "loss": 0.838, "step": 9999 }, { "epoch": 0.89, "grad_norm": 7.727371859945585, "learning_rate": 6.1071711177031465e-06, "loss": 0.7615, "step": 10000 }, { "epoch": 0.89, "grad_norm": 4.735783227055713, "learning_rate": 6.106466667523692e-06, "loss": 0.7854, "step": 10001 }, { "epoch": 0.89, "grad_norm": 6.304545210862161, "learning_rate": 6.105762194249167e-06, "loss": 0.7767, "step": 10002 }, { "epoch": 0.89, "grad_norm": 5.99249786634484, "learning_rate": 6.105057697894279e-06, "loss": 0.8448, "step": 10003 }, { "epoch": 0.89, "grad_norm": 4.387622246294665, "learning_rate": 6.104353178473732e-06, "loss": 0.7574, "step": 10004 }, { "epoch": 0.89, "grad_norm": 5.083313528126039, "learning_rate": 6.103648636002229e-06, "loss": 0.7714, "step": 10005 }, { "epoch": 0.89, "grad_norm": 4.983867207279958, "learning_rate": 6.102944070494479e-06, "loss": 0.7167, "step": 10006 }, { "epoch": 0.89, "grad_norm": 5.394388653670665, "learning_rate": 6.102239481965187e-06, "loss": 0.7278, "step": 10007 }, { "epoch": 0.89, "grad_norm": 5.108046015205648, "learning_rate": 6.10153487042906e-06, "loss": 0.733, "step": 10008 }, { "epoch": 0.89, "grad_norm": 4.854866686076289, "learning_rate": 6.100830235900803e-06, "loss": 0.7207, "step": 10009 }, { "epoch": 0.89, "grad_norm": 7.674054705932066, "learning_rate": 6.100125578395126e-06, "loss": 0.7674, "step": 10010 }, { "epoch": 0.89, "grad_norm": 5.257686403511286, "learning_rate": 6.099420897926739e-06, "loss": 0.6923, "step": 10011 }, { "epoch": 0.89, "grad_norm": 5.066290451115364, "learning_rate": 6.0987161945103465e-06, "loss": 0.7688, "step": 10012 }, { "epoch": 0.89, "grad_norm": 6.187575334233862, "learning_rate": 6.09801146816066e-06, "loss": 0.7538, "step": 10013 }, { "epoch": 0.89, "grad_norm": 8.71687445044388, "learning_rate": 6.097306718892391e-06, "loss": 0.8285, "step": 10014 }, { "epoch": 0.89, "grad_norm": 4.871311096069346, "learning_rate": 6.096601946720244e-06, "loss": 0.7834, "step": 10015 }, { "epoch": 0.89, "grad_norm": 5.773263185873478, "learning_rate": 6.095897151658934e-06, "loss": 0.7738, "step": 10016 }, { "epoch": 0.89, "grad_norm": 6.610930909008848, "learning_rate": 6.095192333723171e-06, "loss": 0.7324, "step": 10017 }, { "epoch": 0.89, "grad_norm": 6.776279945821417, "learning_rate": 6.094487492927667e-06, "loss": 0.7449, "step": 10018 }, { "epoch": 0.89, "grad_norm": 6.0715849201214445, "learning_rate": 6.093782629287132e-06, "loss": 0.7183, "step": 10019 }, { "epoch": 0.89, "grad_norm": 5.482334863950214, "learning_rate": 6.0930777428162815e-06, "loss": 0.8058, "step": 10020 }, { "epoch": 0.89, "grad_norm": 5.710100899183156, "learning_rate": 6.092372833529827e-06, "loss": 0.7524, "step": 10021 }, { "epoch": 0.89, "grad_norm": 5.477060983194241, "learning_rate": 6.091667901442481e-06, "loss": 0.842, "step": 10022 }, { "epoch": 0.89, "grad_norm": 6.368188635859025, "learning_rate": 6.090962946568957e-06, "loss": 0.7527, "step": 10023 }, { "epoch": 0.89, "grad_norm": 5.009059587859002, "learning_rate": 6.090257968923972e-06, "loss": 0.795, "step": 10024 }, { "epoch": 0.89, "grad_norm": 5.990158182959132, "learning_rate": 6.08955296852224e-06, "loss": 0.7738, "step": 10025 }, { "epoch": 0.89, "grad_norm": 4.486058449075646, "learning_rate": 6.088847945378477e-06, "loss": 0.7258, "step": 10026 }, { "epoch": 0.89, "grad_norm": 5.174720632378773, "learning_rate": 6.088142899507394e-06, "loss": 0.7741, "step": 10027 }, { "epoch": 0.89, "grad_norm": 7.686638366020004, "learning_rate": 6.087437830923715e-06, "loss": 0.7409, "step": 10028 }, { "epoch": 0.89, "grad_norm": 4.332738098931653, "learning_rate": 6.086732739642149e-06, "loss": 0.7563, "step": 10029 }, { "epoch": 0.89, "grad_norm": 5.225133777397662, "learning_rate": 6.08602762567742e-06, "loss": 0.7404, "step": 10030 }, { "epoch": 0.89, "grad_norm": 5.419400766468994, "learning_rate": 6.085322489044241e-06, "loss": 0.8044, "step": 10031 }, { "epoch": 0.89, "grad_norm": 5.626995438997508, "learning_rate": 6.084617329757332e-06, "loss": 0.7529, "step": 10032 }, { "epoch": 0.9, "grad_norm": 4.203065256208566, "learning_rate": 6.083912147831412e-06, "loss": 0.7891, "step": 10033 }, { "epoch": 0.9, "grad_norm": 5.782370649653533, "learning_rate": 6.083206943281199e-06, "loss": 0.6985, "step": 10034 }, { "epoch": 0.9, "grad_norm": 6.362973724616038, "learning_rate": 6.082501716121414e-06, "loss": 0.7801, "step": 10035 }, { "epoch": 0.9, "grad_norm": 6.263373466010143, "learning_rate": 6.081796466366778e-06, "loss": 0.7308, "step": 10036 }, { "epoch": 0.9, "grad_norm": 6.4120379684649045, "learning_rate": 6.081091194032007e-06, "loss": 0.7899, "step": 10037 }, { "epoch": 0.9, "grad_norm": 4.829747760899516, "learning_rate": 6.0803858991318255e-06, "loss": 0.8223, "step": 10038 }, { "epoch": 0.9, "grad_norm": 5.73773182302649, "learning_rate": 6.079680581680955e-06, "loss": 0.8008, "step": 10039 }, { "epoch": 0.9, "grad_norm": 5.30934508061525, "learning_rate": 6.078975241694116e-06, "loss": 0.7545, "step": 10040 }, { "epoch": 0.9, "grad_norm": 7.198314782251604, "learning_rate": 6.078269879186032e-06, "loss": 0.7324, "step": 10041 }, { "epoch": 0.9, "grad_norm": 5.862874533805563, "learning_rate": 6.077564494171426e-06, "loss": 0.7197, "step": 10042 }, { "epoch": 0.9, "grad_norm": 5.860026868749205, "learning_rate": 6.076859086665023e-06, "loss": 0.8082, "step": 10043 }, { "epoch": 0.9, "grad_norm": 6.0187565234896345, "learning_rate": 6.076153656681541e-06, "loss": 0.7073, "step": 10044 }, { "epoch": 0.9, "grad_norm": 6.301271178481069, "learning_rate": 6.07544820423571e-06, "loss": 0.8341, "step": 10045 }, { "epoch": 0.9, "grad_norm": 6.354473595692216, "learning_rate": 6.074742729342254e-06, "loss": 0.7598, "step": 10046 }, { "epoch": 0.9, "grad_norm": 4.178272236488297, "learning_rate": 6.074037232015895e-06, "loss": 0.7395, "step": 10047 }, { "epoch": 0.9, "grad_norm": 5.905838041484551, "learning_rate": 6.073331712271361e-06, "loss": 0.7398, "step": 10048 }, { "epoch": 0.9, "grad_norm": 5.502493954700063, "learning_rate": 6.072626170123379e-06, "loss": 0.7049, "step": 10049 }, { "epoch": 0.9, "grad_norm": 4.917720054291424, "learning_rate": 6.071920605586676e-06, "loss": 0.7386, "step": 10050 }, { "epoch": 0.9, "grad_norm": 5.264001219494541, "learning_rate": 6.071215018675975e-06, "loss": 0.7051, "step": 10051 }, { "epoch": 0.9, "grad_norm": 5.441411026066102, "learning_rate": 6.0705094094060094e-06, "loss": 0.7068, "step": 10052 }, { "epoch": 0.9, "grad_norm": 4.803366175328362, "learning_rate": 6.069803777791502e-06, "loss": 0.7277, "step": 10053 }, { "epoch": 0.9, "grad_norm": 6.582451690344822, "learning_rate": 6.069098123847184e-06, "loss": 0.804, "step": 10054 }, { "epoch": 0.9, "grad_norm": 6.067186353647767, "learning_rate": 6.0683924475877846e-06, "loss": 0.7401, "step": 10055 }, { "epoch": 0.9, "grad_norm": 7.209597911305351, "learning_rate": 6.067686749028031e-06, "loss": 0.7778, "step": 10056 }, { "epoch": 0.9, "grad_norm": 5.320557693865471, "learning_rate": 6.066981028182657e-06, "loss": 0.6987, "step": 10057 }, { "epoch": 0.9, "grad_norm": 5.308111495323908, "learning_rate": 6.0662752850663895e-06, "loss": 0.721, "step": 10058 }, { "epoch": 0.9, "grad_norm": 4.8280425356427985, "learning_rate": 6.0655695196939615e-06, "loss": 0.7324, "step": 10059 }, { "epoch": 0.9, "grad_norm": 7.587302455941787, "learning_rate": 6.064863732080102e-06, "loss": 0.8344, "step": 10060 }, { "epoch": 0.9, "grad_norm": 5.505566552592791, "learning_rate": 6.064157922239543e-06, "loss": 0.7773, "step": 10061 }, { "epoch": 0.9, "grad_norm": 6.22513331125461, "learning_rate": 6.063452090187021e-06, "loss": 0.7809, "step": 10062 }, { "epoch": 0.9, "grad_norm": 5.113180543984132, "learning_rate": 6.062746235937263e-06, "loss": 0.7741, "step": 10063 }, { "epoch": 0.9, "grad_norm": 4.069250187426377, "learning_rate": 6.062040359505006e-06, "loss": 0.7135, "step": 10064 }, { "epoch": 0.9, "grad_norm": 5.433675683390304, "learning_rate": 6.061334460904983e-06, "loss": 0.7618, "step": 10065 }, { "epoch": 0.9, "grad_norm": 4.768941339917, "learning_rate": 6.060628540151926e-06, "loss": 0.7509, "step": 10066 }, { "epoch": 0.9, "grad_norm": 5.150916963886978, "learning_rate": 6.05992259726057e-06, "loss": 0.755, "step": 10067 }, { "epoch": 0.9, "grad_norm": 8.215783877601135, "learning_rate": 6.059216632245654e-06, "loss": 0.8039, "step": 10068 }, { "epoch": 0.9, "grad_norm": 5.499978799247384, "learning_rate": 6.058510645121908e-06, "loss": 0.8913, "step": 10069 }, { "epoch": 0.9, "grad_norm": 4.9848393176263475, "learning_rate": 6.057804635904071e-06, "loss": 0.8056, "step": 10070 }, { "epoch": 0.9, "grad_norm": 6.534488885163003, "learning_rate": 6.057098604606879e-06, "loss": 0.7829, "step": 10071 }, { "epoch": 0.9, "grad_norm": 5.148841790775409, "learning_rate": 6.056392551245069e-06, "loss": 0.8182, "step": 10072 }, { "epoch": 0.9, "grad_norm": 5.2844800462880475, "learning_rate": 6.0556864758333776e-06, "loss": 0.7231, "step": 10073 }, { "epoch": 0.9, "grad_norm": 5.5931348799494085, "learning_rate": 6.054980378386544e-06, "loss": 0.7877, "step": 10074 }, { "epoch": 0.9, "grad_norm": 5.627605700556318, "learning_rate": 6.054274258919303e-06, "loss": 0.7675, "step": 10075 }, { "epoch": 0.9, "grad_norm": 6.930126989311527, "learning_rate": 6.053568117446397e-06, "loss": 0.7409, "step": 10076 }, { "epoch": 0.9, "grad_norm": 4.781661996215399, "learning_rate": 6.052861953982564e-06, "loss": 0.7348, "step": 10077 }, { "epoch": 0.9, "grad_norm": 7.482067947570703, "learning_rate": 6.052155768542542e-06, "loss": 0.7827, "step": 10078 }, { "epoch": 0.9, "grad_norm": 5.747027824749389, "learning_rate": 6.051449561141074e-06, "loss": 0.7234, "step": 10079 }, { "epoch": 0.9, "grad_norm": 4.948424052656772, "learning_rate": 6.050743331792898e-06, "loss": 0.7187, "step": 10080 }, { "epoch": 0.9, "grad_norm": 4.755905530630202, "learning_rate": 6.0500370805127585e-06, "loss": 0.7673, "step": 10081 }, { "epoch": 0.9, "grad_norm": 5.08734165175219, "learning_rate": 6.049330807315392e-06, "loss": 0.7175, "step": 10082 }, { "epoch": 0.9, "grad_norm": 5.105004777273502, "learning_rate": 6.048624512215543e-06, "loss": 0.7786, "step": 10083 }, { "epoch": 0.9, "grad_norm": 4.953459918845217, "learning_rate": 6.047918195227955e-06, "loss": 0.7721, "step": 10084 }, { "epoch": 0.9, "grad_norm": 5.814288047102402, "learning_rate": 6.0472118563673685e-06, "loss": 0.7514, "step": 10085 }, { "epoch": 0.9, "grad_norm": 6.560200332037439, "learning_rate": 6.046505495648529e-06, "loss": 0.8118, "step": 10086 }, { "epoch": 0.9, "grad_norm": 4.69089871195553, "learning_rate": 6.045799113086179e-06, "loss": 0.8098, "step": 10087 }, { "epoch": 0.9, "grad_norm": 5.671740650771017, "learning_rate": 6.045092708695062e-06, "loss": 0.7746, "step": 10088 }, { "epoch": 0.9, "grad_norm": 6.356871446898439, "learning_rate": 6.044386282489922e-06, "loss": 0.7609, "step": 10089 }, { "epoch": 0.9, "grad_norm": 5.351122634515333, "learning_rate": 6.043679834485509e-06, "loss": 0.8129, "step": 10090 }, { "epoch": 0.9, "grad_norm": 5.199569568170179, "learning_rate": 6.0429733646965626e-06, "loss": 0.7708, "step": 10091 }, { "epoch": 0.9, "grad_norm": 5.1130480560467335, "learning_rate": 6.042266873137831e-06, "loss": 0.7062, "step": 10092 }, { "epoch": 0.9, "grad_norm": 5.068170327086019, "learning_rate": 6.041560359824062e-06, "loss": 0.8145, "step": 10093 }, { "epoch": 0.9, "grad_norm": 4.146578788201856, "learning_rate": 6.040853824770002e-06, "loss": 0.6769, "step": 10094 }, { "epoch": 0.9, "grad_norm": 4.769983583827237, "learning_rate": 6.040147267990397e-06, "loss": 0.7163, "step": 10095 }, { "epoch": 0.9, "grad_norm": 7.923191879051833, "learning_rate": 6.039440689499996e-06, "loss": 0.761, "step": 10096 }, { "epoch": 0.9, "grad_norm": 5.747811911644929, "learning_rate": 6.038734089313547e-06, "loss": 0.8004, "step": 10097 }, { "epoch": 0.9, "grad_norm": 5.952685061465306, "learning_rate": 6.038027467445797e-06, "loss": 0.7147, "step": 10098 }, { "epoch": 0.9, "grad_norm": 4.3895768598002585, "learning_rate": 6.037320823911498e-06, "loss": 0.7682, "step": 10099 }, { "epoch": 0.9, "grad_norm": 5.789455252931676, "learning_rate": 6.0366141587254005e-06, "loss": 0.7475, "step": 10100 }, { "epoch": 0.9, "grad_norm": 7.172091641545261, "learning_rate": 6.03590747190225e-06, "loss": 0.7418, "step": 10101 }, { "epoch": 0.9, "grad_norm": 7.726220343871182, "learning_rate": 6.035200763456801e-06, "loss": 0.8353, "step": 10102 }, { "epoch": 0.9, "grad_norm": 5.134183284480216, "learning_rate": 6.034494033403803e-06, "loss": 0.7338, "step": 10103 }, { "epoch": 0.9, "grad_norm": 5.123594598953369, "learning_rate": 6.033787281758007e-06, "loss": 0.7079, "step": 10104 }, { "epoch": 0.9, "grad_norm": 5.209517733680748, "learning_rate": 6.033080508534165e-06, "loss": 0.6924, "step": 10105 }, { "epoch": 0.9, "grad_norm": 5.332955005841327, "learning_rate": 6.03237371374703e-06, "loss": 0.7834, "step": 10106 }, { "epoch": 0.9, "grad_norm": 4.775242489923759, "learning_rate": 6.031666897411355e-06, "loss": 0.7341, "step": 10107 }, { "epoch": 0.9, "grad_norm": 5.4851396099760334, "learning_rate": 6.030960059541894e-06, "loss": 0.7067, "step": 10108 }, { "epoch": 0.9, "grad_norm": 5.097324463878127, "learning_rate": 6.030253200153398e-06, "loss": 0.7683, "step": 10109 }, { "epoch": 0.9, "grad_norm": 6.273093521053382, "learning_rate": 6.029546319260623e-06, "loss": 0.7913, "step": 10110 }, { "epoch": 0.9, "grad_norm": 6.821081694592558, "learning_rate": 6.028839416878323e-06, "loss": 0.769, "step": 10111 }, { "epoch": 0.9, "grad_norm": 4.641786314944227, "learning_rate": 6.028132493021254e-06, "loss": 0.7347, "step": 10112 }, { "epoch": 0.9, "grad_norm": 6.617135824502022, "learning_rate": 6.0274255477041695e-06, "loss": 0.687, "step": 10113 }, { "epoch": 0.9, "grad_norm": 6.324514969329878, "learning_rate": 6.026718580941827e-06, "loss": 0.7408, "step": 10114 }, { "epoch": 0.9, "grad_norm": 5.048135206116677, "learning_rate": 6.026011592748984e-06, "loss": 0.8175, "step": 10115 }, { "epoch": 0.9, "grad_norm": 5.120532959706313, "learning_rate": 6.025304583140396e-06, "loss": 0.7004, "step": 10116 }, { "epoch": 0.9, "grad_norm": 5.281929852680507, "learning_rate": 6.024597552130818e-06, "loss": 0.8099, "step": 10117 }, { "epoch": 0.9, "grad_norm": 5.3943211602964, "learning_rate": 6.0238904997350116e-06, "loss": 0.7829, "step": 10118 }, { "epoch": 0.9, "grad_norm": 6.920056221303454, "learning_rate": 6.023183425967732e-06, "loss": 0.8205, "step": 10119 }, { "epoch": 0.9, "grad_norm": 3.54667145987195, "learning_rate": 6.02247633084374e-06, "loss": 0.7261, "step": 10120 }, { "epoch": 0.9, "grad_norm": 4.845720934551642, "learning_rate": 6.021769214377794e-06, "loss": 0.771, "step": 10121 }, { "epoch": 0.9, "grad_norm": 4.4836455224539264, "learning_rate": 6.021062076584654e-06, "loss": 0.7356, "step": 10122 }, { "epoch": 0.9, "grad_norm": 9.641724845102303, "learning_rate": 6.0203549174790774e-06, "loss": 0.79, "step": 10123 }, { "epoch": 0.9, "grad_norm": 4.749782728503014, "learning_rate": 6.019647737075828e-06, "loss": 0.7644, "step": 10124 }, { "epoch": 0.9, "grad_norm": 8.008392028997855, "learning_rate": 6.018940535389664e-06, "loss": 0.8416, "step": 10125 }, { "epoch": 0.9, "grad_norm": 5.698032796611498, "learning_rate": 6.018233312435349e-06, "loss": 0.7558, "step": 10126 }, { "epoch": 0.9, "grad_norm": 5.312920975506399, "learning_rate": 6.0175260682276415e-06, "loss": 0.8431, "step": 10127 }, { "epoch": 0.9, "grad_norm": 5.772987394100591, "learning_rate": 6.016818802781308e-06, "loss": 0.737, "step": 10128 }, { "epoch": 0.9, "grad_norm": 4.973141706310906, "learning_rate": 6.016111516111108e-06, "loss": 0.71, "step": 10129 }, { "epoch": 0.9, "grad_norm": 5.420661799350445, "learning_rate": 6.015404208231804e-06, "loss": 0.7971, "step": 10130 }, { "epoch": 0.9, "grad_norm": 5.2295498432948975, "learning_rate": 6.014696879158162e-06, "loss": 0.7579, "step": 10131 }, { "epoch": 0.9, "grad_norm": 6.018855134838867, "learning_rate": 6.013989528904944e-06, "loss": 0.7167, "step": 10132 }, { "epoch": 0.9, "grad_norm": 4.110255160365863, "learning_rate": 6.013282157486916e-06, "loss": 0.7215, "step": 10133 }, { "epoch": 0.9, "grad_norm": 5.166774050905314, "learning_rate": 6.012574764918841e-06, "loss": 0.7224, "step": 10134 }, { "epoch": 0.9, "grad_norm": 4.281741455106094, "learning_rate": 6.011867351215487e-06, "loss": 0.7208, "step": 10135 }, { "epoch": 0.9, "grad_norm": 5.903080868371061, "learning_rate": 6.011159916391618e-06, "loss": 0.7718, "step": 10136 }, { "epoch": 0.9, "grad_norm": 6.43860259129649, "learning_rate": 6.010452460461998e-06, "loss": 0.8073, "step": 10137 }, { "epoch": 0.9, "grad_norm": 8.54332453093333, "learning_rate": 6.009744983441398e-06, "loss": 0.7278, "step": 10138 }, { "epoch": 0.9, "grad_norm": 5.9317512724742105, "learning_rate": 6.009037485344582e-06, "loss": 0.7973, "step": 10139 }, { "epoch": 0.9, "grad_norm": 6.898519067128321, "learning_rate": 6.008329966186317e-06, "loss": 0.7935, "step": 10140 }, { "epoch": 0.9, "grad_norm": 6.841711738682992, "learning_rate": 6.007622425981374e-06, "loss": 0.785, "step": 10141 }, { "epoch": 0.9, "grad_norm": 3.9822828056981296, "learning_rate": 6.006914864744517e-06, "loss": 0.7687, "step": 10142 }, { "epoch": 0.9, "grad_norm": 5.019336005640037, "learning_rate": 6.0062072824905195e-06, "loss": 0.8059, "step": 10143 }, { "epoch": 0.9, "grad_norm": 6.082063576322833, "learning_rate": 6.005499679234149e-06, "loss": 0.7934, "step": 10144 }, { "epoch": 0.91, "grad_norm": 5.408641323200552, "learning_rate": 6.004792054990174e-06, "loss": 0.7944, "step": 10145 }, { "epoch": 0.91, "grad_norm": 5.222675246254403, "learning_rate": 6.004084409773364e-06, "loss": 0.7249, "step": 10146 }, { "epoch": 0.91, "grad_norm": 8.601163465490684, "learning_rate": 6.003376743598492e-06, "loss": 0.7604, "step": 10147 }, { "epoch": 0.91, "grad_norm": 5.6605740215032645, "learning_rate": 6.002669056480328e-06, "loss": 0.7687, "step": 10148 }, { "epoch": 0.91, "grad_norm": 5.126747031031846, "learning_rate": 6.001961348433642e-06, "loss": 0.8262, "step": 10149 }, { "epoch": 0.91, "grad_norm": 4.048416241417372, "learning_rate": 6.001253619473209e-06, "loss": 0.7466, "step": 10150 }, { "epoch": 0.91, "grad_norm": 5.8441006645314575, "learning_rate": 6.0005458696138e-06, "loss": 0.7948, "step": 10151 }, { "epoch": 0.91, "grad_norm": 6.184457871113128, "learning_rate": 5.9998380988701845e-06, "loss": 0.8017, "step": 10152 }, { "epoch": 0.91, "grad_norm": 6.16397216425896, "learning_rate": 5.9991303072571395e-06, "loss": 0.7721, "step": 10153 }, { "epoch": 0.91, "grad_norm": 8.341066575556022, "learning_rate": 5.998422494789438e-06, "loss": 0.8287, "step": 10154 }, { "epoch": 0.91, "grad_norm": 5.482470107070238, "learning_rate": 5.9977146614818515e-06, "loss": 0.7623, "step": 10155 }, { "epoch": 0.91, "grad_norm": 6.344598088749769, "learning_rate": 5.997006807349157e-06, "loss": 0.7679, "step": 10156 }, { "epoch": 0.91, "grad_norm": 4.635837145415517, "learning_rate": 5.996298932406131e-06, "loss": 0.7421, "step": 10157 }, { "epoch": 0.91, "grad_norm": 6.3405981021877285, "learning_rate": 5.995591036667545e-06, "loss": 0.8404, "step": 10158 }, { "epoch": 0.91, "grad_norm": 5.93853278804704, "learning_rate": 5.994883120148176e-06, "loss": 0.7526, "step": 10159 }, { "epoch": 0.91, "grad_norm": 6.0336639916880825, "learning_rate": 5.994175182862802e-06, "loss": 0.7239, "step": 10160 }, { "epoch": 0.91, "grad_norm": 4.984680383713284, "learning_rate": 5.993467224826197e-06, "loss": 0.8013, "step": 10161 }, { "epoch": 0.91, "grad_norm": 5.8556186652738695, "learning_rate": 5.992759246053139e-06, "loss": 0.7894, "step": 10162 }, { "epoch": 0.91, "grad_norm": 4.534400100311656, "learning_rate": 5.992051246558407e-06, "loss": 0.7504, "step": 10163 }, { "epoch": 0.91, "grad_norm": 7.124377546536591, "learning_rate": 5.9913432263567775e-06, "loss": 0.7599, "step": 10164 }, { "epoch": 0.91, "grad_norm": 6.065690904053198, "learning_rate": 5.99063518546303e-06, "loss": 0.7196, "step": 10165 }, { "epoch": 0.91, "grad_norm": 6.67918373889295, "learning_rate": 5.9899271238919395e-06, "loss": 0.7879, "step": 10166 }, { "epoch": 0.91, "grad_norm": 4.3733512130602765, "learning_rate": 5.989219041658292e-06, "loss": 0.7752, "step": 10167 }, { "epoch": 0.91, "grad_norm": 4.793435821293688, "learning_rate": 5.9885109387768605e-06, "loss": 0.7392, "step": 10168 }, { "epoch": 0.91, "grad_norm": 3.4255563103600104, "learning_rate": 5.987802815262429e-06, "loss": 0.7809, "step": 10169 }, { "epoch": 0.91, "grad_norm": 5.544626023788492, "learning_rate": 5.987094671129777e-06, "loss": 0.7796, "step": 10170 }, { "epoch": 0.91, "grad_norm": 6.250783963214054, "learning_rate": 5.986386506393686e-06, "loss": 0.7924, "step": 10171 }, { "epoch": 0.91, "grad_norm": 5.401299321511758, "learning_rate": 5.9856783210689355e-06, "loss": 0.7409, "step": 10172 }, { "epoch": 0.91, "grad_norm": 4.62978406185081, "learning_rate": 5.98497011517031e-06, "loss": 0.7235, "step": 10173 }, { "epoch": 0.91, "grad_norm": 4.938909699381534, "learning_rate": 5.984261888712589e-06, "loss": 0.7152, "step": 10174 }, { "epoch": 0.91, "grad_norm": 4.9423853534991755, "learning_rate": 5.983553641710558e-06, "loss": 0.8283, "step": 10175 }, { "epoch": 0.91, "grad_norm": 4.625666816171898, "learning_rate": 5.982845374178999e-06, "loss": 0.7528, "step": 10176 }, { "epoch": 0.91, "grad_norm": 6.849159932641711, "learning_rate": 5.9821370861326936e-06, "loss": 0.7741, "step": 10177 }, { "epoch": 0.91, "grad_norm": 5.811759627358219, "learning_rate": 5.981428777586427e-06, "loss": 0.7736, "step": 10178 }, { "epoch": 0.91, "grad_norm": 5.0867673195608605, "learning_rate": 5.980720448554985e-06, "loss": 0.8026, "step": 10179 }, { "epoch": 0.91, "grad_norm": 5.871666937271671, "learning_rate": 5.980012099053152e-06, "loss": 0.7687, "step": 10180 }, { "epoch": 0.91, "grad_norm": 6.550254771156421, "learning_rate": 5.979303729095711e-06, "loss": 0.67, "step": 10181 }, { "epoch": 0.91, "grad_norm": 5.403168501484566, "learning_rate": 5.978595338697451e-06, "loss": 0.7745, "step": 10182 }, { "epoch": 0.91, "grad_norm": 4.833699124739106, "learning_rate": 5.977886927873155e-06, "loss": 0.7829, "step": 10183 }, { "epoch": 0.91, "grad_norm": 6.878447063134297, "learning_rate": 5.9771784966376126e-06, "loss": 0.8032, "step": 10184 }, { "epoch": 0.91, "grad_norm": 5.964439574424301, "learning_rate": 5.9764700450056065e-06, "loss": 0.7753, "step": 10185 }, { "epoch": 0.91, "grad_norm": 4.747248068486609, "learning_rate": 5.975761572991929e-06, "loss": 0.8838, "step": 10186 }, { "epoch": 0.91, "grad_norm": 4.494419879989593, "learning_rate": 5.975053080611365e-06, "loss": 0.7595, "step": 10187 }, { "epoch": 0.91, "grad_norm": 4.037945476084634, "learning_rate": 5.974344567878703e-06, "loss": 0.7639, "step": 10188 }, { "epoch": 0.91, "grad_norm": 6.4617793844962454, "learning_rate": 5.973636034808732e-06, "loss": 0.7098, "step": 10189 }, { "epoch": 0.91, "grad_norm": 5.9561752575729425, "learning_rate": 5.9729274814162405e-06, "loss": 0.7834, "step": 10190 }, { "epoch": 0.91, "grad_norm": 5.5104831278262125, "learning_rate": 5.972218907716018e-06, "loss": 0.7463, "step": 10191 }, { "epoch": 0.91, "grad_norm": 5.244261278841452, "learning_rate": 5.971510313722857e-06, "loss": 0.7724, "step": 10192 }, { "epoch": 0.91, "grad_norm": 6.7579344364370275, "learning_rate": 5.970801699451544e-06, "loss": 0.7346, "step": 10193 }, { "epoch": 0.91, "grad_norm": 5.147625337172605, "learning_rate": 5.970093064916873e-06, "loss": 0.7623, "step": 10194 }, { "epoch": 0.91, "grad_norm": 4.815283855281739, "learning_rate": 5.969384410133632e-06, "loss": 0.7623, "step": 10195 }, { "epoch": 0.91, "grad_norm": 5.979515260478697, "learning_rate": 5.968675735116616e-06, "loss": 0.734, "step": 10196 }, { "epoch": 0.91, "grad_norm": 5.054534094317187, "learning_rate": 5.967967039880614e-06, "loss": 0.6948, "step": 10197 }, { "epoch": 0.91, "grad_norm": 5.92188626744924, "learning_rate": 5.967258324440421e-06, "loss": 0.8388, "step": 10198 }, { "epoch": 0.91, "grad_norm": 6.349287666226595, "learning_rate": 5.9665495888108285e-06, "loss": 0.7093, "step": 10199 }, { "epoch": 0.91, "grad_norm": 4.999478698249883, "learning_rate": 5.965840833006629e-06, "loss": 0.72, "step": 10200 }, { "epoch": 0.91, "grad_norm": 7.0080130748785034, "learning_rate": 5.965132057042619e-06, "loss": 0.7168, "step": 10201 }, { "epoch": 0.91, "grad_norm": 5.213317350673265, "learning_rate": 5.9644232609335905e-06, "loss": 0.7541, "step": 10202 }, { "epoch": 0.91, "grad_norm": 6.3889212633825885, "learning_rate": 5.963714444694337e-06, "loss": 0.7895, "step": 10203 }, { "epoch": 0.91, "grad_norm": 5.769421033905793, "learning_rate": 5.963005608339656e-06, "loss": 0.831, "step": 10204 }, { "epoch": 0.91, "grad_norm": 4.9197544387462315, "learning_rate": 5.9622967518843425e-06, "loss": 0.6931, "step": 10205 }, { "epoch": 0.91, "grad_norm": 6.594457269245087, "learning_rate": 5.96158787534319e-06, "loss": 0.864, "step": 10206 }, { "epoch": 0.91, "grad_norm": 5.875402121695587, "learning_rate": 5.960878978730998e-06, "loss": 0.8205, "step": 10207 }, { "epoch": 0.91, "grad_norm": 6.233978370962491, "learning_rate": 5.96017006206256e-06, "loss": 0.7241, "step": 10208 }, { "epoch": 0.91, "grad_norm": 7.839445752282132, "learning_rate": 5.959461125352677e-06, "loss": 0.7592, "step": 10209 }, { "epoch": 0.91, "grad_norm": 4.600687005484644, "learning_rate": 5.958752168616142e-06, "loss": 0.7423, "step": 10210 }, { "epoch": 0.91, "grad_norm": 5.248519781800196, "learning_rate": 5.958043191867757e-06, "loss": 0.8532, "step": 10211 }, { "epoch": 0.91, "grad_norm": 5.440763487065326, "learning_rate": 5.957334195122316e-06, "loss": 0.8065, "step": 10212 }, { "epoch": 0.91, "grad_norm": 5.273071068090524, "learning_rate": 5.956625178394622e-06, "loss": 0.8017, "step": 10213 }, { "epoch": 0.91, "grad_norm": 5.3393760530749175, "learning_rate": 5.955916141699473e-06, "loss": 0.7366, "step": 10214 }, { "epoch": 0.91, "grad_norm": 6.009460421139024, "learning_rate": 5.955207085051666e-06, "loss": 0.7754, "step": 10215 }, { "epoch": 0.91, "grad_norm": 4.24644008568989, "learning_rate": 5.954498008466004e-06, "loss": 0.7384, "step": 10216 }, { "epoch": 0.91, "grad_norm": 5.5411862043126785, "learning_rate": 5.953788911957287e-06, "loss": 0.7487, "step": 10217 }, { "epoch": 0.91, "grad_norm": 6.044253759104281, "learning_rate": 5.953079795540314e-06, "loss": 0.8138, "step": 10218 }, { "epoch": 0.91, "grad_norm": 4.822614510412595, "learning_rate": 5.952370659229888e-06, "loss": 0.7711, "step": 10219 }, { "epoch": 0.91, "grad_norm": 6.139392977134978, "learning_rate": 5.95166150304081e-06, "loss": 0.7674, "step": 10220 }, { "epoch": 0.91, "grad_norm": 5.716702171710714, "learning_rate": 5.950952326987883e-06, "loss": 0.7662, "step": 10221 }, { "epoch": 0.91, "grad_norm": 6.607393314711142, "learning_rate": 5.950243131085908e-06, "loss": 0.713, "step": 10222 }, { "epoch": 0.91, "grad_norm": 7.698212825370309, "learning_rate": 5.949533915349689e-06, "loss": 0.7416, "step": 10223 }, { "epoch": 0.91, "grad_norm": 4.901374933639409, "learning_rate": 5.94882467979403e-06, "loss": 0.7657, "step": 10224 }, { "epoch": 0.91, "grad_norm": 5.277682104149696, "learning_rate": 5.948115424433732e-06, "loss": 0.7382, "step": 10225 }, { "epoch": 0.91, "grad_norm": 5.199364599579645, "learning_rate": 5.9474061492835995e-06, "loss": 0.7585, "step": 10226 }, { "epoch": 0.91, "grad_norm": 4.743683224819009, "learning_rate": 5.9466968543584415e-06, "loss": 0.7377, "step": 10227 }, { "epoch": 0.91, "grad_norm": 5.706162499848515, "learning_rate": 5.9459875396730584e-06, "loss": 0.8489, "step": 10228 }, { "epoch": 0.91, "grad_norm": 4.93606856886517, "learning_rate": 5.945278205242257e-06, "loss": 0.7588, "step": 10229 }, { "epoch": 0.91, "grad_norm": 5.667029038509037, "learning_rate": 5.944568851080843e-06, "loss": 0.7113, "step": 10230 }, { "epoch": 0.91, "grad_norm": 6.681734489503555, "learning_rate": 5.9438594772036236e-06, "loss": 0.7126, "step": 10231 }, { "epoch": 0.91, "grad_norm": 6.1548601584572475, "learning_rate": 5.943150083625404e-06, "loss": 0.733, "step": 10232 }, { "epoch": 0.91, "grad_norm": 5.060351325497627, "learning_rate": 5.942440670360992e-06, "loss": 0.7322, "step": 10233 }, { "epoch": 0.91, "grad_norm": 5.244535210468549, "learning_rate": 5.941731237425196e-06, "loss": 0.7439, "step": 10234 }, { "epoch": 0.91, "grad_norm": 4.474595827267809, "learning_rate": 5.941021784832821e-06, "loss": 0.7821, "step": 10235 }, { "epoch": 0.91, "grad_norm": 7.149365825171848, "learning_rate": 5.94031231259868e-06, "loss": 0.8077, "step": 10236 }, { "epoch": 0.91, "grad_norm": 5.604554043797558, "learning_rate": 5.939602820737576e-06, "loss": 0.7793, "step": 10237 }, { "epoch": 0.91, "grad_norm": 5.3463142247907935, "learning_rate": 5.938893309264321e-06, "loss": 0.7112, "step": 10238 }, { "epoch": 0.91, "grad_norm": 5.279540793060562, "learning_rate": 5.9381837781937245e-06, "loss": 0.8195, "step": 10239 }, { "epoch": 0.91, "grad_norm": 6.050913011049116, "learning_rate": 5.937474227540597e-06, "loss": 0.8252, "step": 10240 }, { "epoch": 0.91, "grad_norm": 7.165954218964528, "learning_rate": 5.936764657319747e-06, "loss": 0.8221, "step": 10241 }, { "epoch": 0.91, "grad_norm": 4.841602496829226, "learning_rate": 5.936055067545986e-06, "loss": 0.7006, "step": 10242 }, { "epoch": 0.91, "grad_norm": 5.2423112063594655, "learning_rate": 5.935345458234128e-06, "loss": 0.7373, "step": 10243 }, { "epoch": 0.91, "grad_norm": 5.393968280154461, "learning_rate": 5.934635829398979e-06, "loss": 0.7647, "step": 10244 }, { "epoch": 0.91, "grad_norm": 5.733558035126318, "learning_rate": 5.933926181055355e-06, "loss": 0.7825, "step": 10245 }, { "epoch": 0.91, "grad_norm": 5.698497496963701, "learning_rate": 5.933216513218069e-06, "loss": 0.7136, "step": 10246 }, { "epoch": 0.91, "grad_norm": 5.165242828459354, "learning_rate": 5.932506825901929e-06, "loss": 0.7985, "step": 10247 }, { "epoch": 0.91, "grad_norm": 7.5555363539972, "learning_rate": 5.931797119121752e-06, "loss": 0.8016, "step": 10248 }, { "epoch": 0.91, "grad_norm": 6.121251574037313, "learning_rate": 5.931087392892352e-06, "loss": 0.7135, "step": 10249 }, { "epoch": 0.91, "grad_norm": 4.5199037689258, "learning_rate": 5.93037764722854e-06, "loss": 0.7838, "step": 10250 }, { "epoch": 0.91, "grad_norm": 8.084462957977724, "learning_rate": 5.929667882145133e-06, "loss": 0.7718, "step": 10251 }, { "epoch": 0.91, "grad_norm": 6.605646446882679, "learning_rate": 5.928958097656945e-06, "loss": 0.7214, "step": 10252 }, { "epoch": 0.91, "grad_norm": 5.103123179590603, "learning_rate": 5.928248293778791e-06, "loss": 0.6932, "step": 10253 }, { "epoch": 0.91, "grad_norm": 5.420309280541527, "learning_rate": 5.927538470525487e-06, "loss": 0.791, "step": 10254 }, { "epoch": 0.91, "grad_norm": 5.433796437079402, "learning_rate": 5.926828627911847e-06, "loss": 0.7878, "step": 10255 }, { "epoch": 0.91, "grad_norm": 4.614681843317519, "learning_rate": 5.926118765952691e-06, "loss": 0.7868, "step": 10256 }, { "epoch": 0.92, "grad_norm": 4.721420884966442, "learning_rate": 5.9254088846628334e-06, "loss": 0.7851, "step": 10257 }, { "epoch": 0.92, "grad_norm": 5.395996853961319, "learning_rate": 5.924698984057092e-06, "loss": 0.7292, "step": 10258 }, { "epoch": 0.92, "grad_norm": 6.048717911283639, "learning_rate": 5.923989064150285e-06, "loss": 0.7391, "step": 10259 }, { "epoch": 0.92, "grad_norm": 4.419366523033852, "learning_rate": 5.923279124957228e-06, "loss": 0.7213, "step": 10260 }, { "epoch": 0.92, "grad_norm": 6.6388047724926, "learning_rate": 5.922569166492742e-06, "loss": 0.7582, "step": 10261 }, { "epoch": 0.92, "grad_norm": 11.252333126976932, "learning_rate": 5.921859188771647e-06, "loss": 0.8129, "step": 10262 }, { "epoch": 0.92, "grad_norm": 6.1425472048752985, "learning_rate": 5.921149191808757e-06, "loss": 0.7949, "step": 10263 }, { "epoch": 0.92, "grad_norm": 4.68021286840027, "learning_rate": 5.920439175618897e-06, "loss": 0.6935, "step": 10264 }, { "epoch": 0.92, "grad_norm": 6.120487722410045, "learning_rate": 5.919729140216884e-06, "loss": 0.7978, "step": 10265 }, { "epoch": 0.92, "grad_norm": 5.817397009794762, "learning_rate": 5.919019085617541e-06, "loss": 0.7118, "step": 10266 }, { "epoch": 0.92, "grad_norm": 5.598504002989604, "learning_rate": 5.918309011835686e-06, "loss": 0.8079, "step": 10267 }, { "epoch": 0.92, "grad_norm": 6.086787909908084, "learning_rate": 5.917598918886141e-06, "loss": 0.7574, "step": 10268 }, { "epoch": 0.92, "grad_norm": 4.705525652651661, "learning_rate": 5.9168888067837286e-06, "loss": 0.6873, "step": 10269 }, { "epoch": 0.92, "grad_norm": 5.1889257299656455, "learning_rate": 5.916178675543269e-06, "loss": 0.7859, "step": 10270 }, { "epoch": 0.92, "grad_norm": 5.3052941778272125, "learning_rate": 5.915468525179588e-06, "loss": 0.733, "step": 10271 }, { "epoch": 0.92, "grad_norm": 4.915900079386993, "learning_rate": 5.914758355707506e-06, "loss": 0.7145, "step": 10272 }, { "epoch": 0.92, "grad_norm": 7.680818432999958, "learning_rate": 5.914048167141846e-06, "loss": 0.7894, "step": 10273 }, { "epoch": 0.92, "grad_norm": 5.0757167228610225, "learning_rate": 5.913337959497433e-06, "loss": 0.7202, "step": 10274 }, { "epoch": 0.92, "grad_norm": 5.849375295542815, "learning_rate": 5.912627732789089e-06, "loss": 0.7753, "step": 10275 }, { "epoch": 0.92, "grad_norm": 5.070136730501652, "learning_rate": 5.911917487031641e-06, "loss": 0.8111, "step": 10276 }, { "epoch": 0.92, "grad_norm": 6.944433457796826, "learning_rate": 5.911207222239911e-06, "loss": 0.7748, "step": 10277 }, { "epoch": 0.92, "grad_norm": 4.67943034409326, "learning_rate": 5.910496938428727e-06, "loss": 0.7819, "step": 10278 }, { "epoch": 0.92, "grad_norm": 6.439583326947833, "learning_rate": 5.909786635612913e-06, "loss": 0.7274, "step": 10279 }, { "epoch": 0.92, "grad_norm": 4.655363060879445, "learning_rate": 5.909076313807295e-06, "loss": 0.8355, "step": 10280 }, { "epoch": 0.92, "grad_norm": 5.749378105623691, "learning_rate": 5.908365973026702e-06, "loss": 0.7852, "step": 10281 }, { "epoch": 0.92, "grad_norm": 4.578349842518201, "learning_rate": 5.907655613285957e-06, "loss": 0.735, "step": 10282 }, { "epoch": 0.92, "grad_norm": 6.94192012514465, "learning_rate": 5.906945234599887e-06, "loss": 0.7844, "step": 10283 }, { "epoch": 0.92, "grad_norm": 6.744945539299703, "learning_rate": 5.906234836983323e-06, "loss": 0.8119, "step": 10284 }, { "epoch": 0.92, "grad_norm": 6.687302574560532, "learning_rate": 5.905524420451092e-06, "loss": 0.7687, "step": 10285 }, { "epoch": 0.92, "grad_norm": 5.429418575884567, "learning_rate": 5.904813985018021e-06, "loss": 0.7618, "step": 10286 }, { "epoch": 0.92, "grad_norm": 5.366395482336362, "learning_rate": 5.904103530698939e-06, "loss": 0.786, "step": 10287 }, { "epoch": 0.92, "grad_norm": 4.925739570850399, "learning_rate": 5.903393057508677e-06, "loss": 0.7678, "step": 10288 }, { "epoch": 0.92, "grad_norm": 5.459284382438698, "learning_rate": 5.902682565462064e-06, "loss": 0.7215, "step": 10289 }, { "epoch": 0.92, "grad_norm": 5.198719574093342, "learning_rate": 5.901972054573927e-06, "loss": 0.768, "step": 10290 }, { "epoch": 0.92, "grad_norm": 5.199791313280977, "learning_rate": 5.9012615248591e-06, "loss": 0.8258, "step": 10291 }, { "epoch": 0.92, "grad_norm": 6.7869140878290155, "learning_rate": 5.900550976332412e-06, "loss": 0.7307, "step": 10292 }, { "epoch": 0.92, "grad_norm": 5.640913265627771, "learning_rate": 5.899840409008693e-06, "loss": 0.813, "step": 10293 }, { "epoch": 0.92, "grad_norm": 5.434595488179786, "learning_rate": 5.8991298229027795e-06, "loss": 0.7795, "step": 10294 }, { "epoch": 0.92, "grad_norm": 6.877006451601237, "learning_rate": 5.898419218029499e-06, "loss": 0.8245, "step": 10295 }, { "epoch": 0.92, "grad_norm": 6.106576136554656, "learning_rate": 5.897708594403684e-06, "loss": 0.7578, "step": 10296 }, { "epoch": 0.92, "grad_norm": 5.773713429721693, "learning_rate": 5.896997952040168e-06, "loss": 0.7496, "step": 10297 }, { "epoch": 0.92, "grad_norm": 6.167837520183137, "learning_rate": 5.896287290953784e-06, "loss": 0.724, "step": 10298 }, { "epoch": 0.92, "grad_norm": 6.750423640308995, "learning_rate": 5.895576611159365e-06, "loss": 0.7548, "step": 10299 }, { "epoch": 0.92, "grad_norm": 5.6414621803823275, "learning_rate": 5.894865912671748e-06, "loss": 0.7867, "step": 10300 }, { "epoch": 0.92, "grad_norm": 4.934873624403338, "learning_rate": 5.894155195505762e-06, "loss": 0.7436, "step": 10301 }, { "epoch": 0.92, "grad_norm": 4.322468548155096, "learning_rate": 5.893444459676246e-06, "loss": 0.7069, "step": 10302 }, { "epoch": 0.92, "grad_norm": 6.153724410587487, "learning_rate": 5.892733705198033e-06, "loss": 0.7747, "step": 10303 }, { "epoch": 0.92, "grad_norm": 6.075851175729716, "learning_rate": 5.89202293208596e-06, "loss": 0.7383, "step": 10304 }, { "epoch": 0.92, "grad_norm": 5.7816544760087005, "learning_rate": 5.8913121403548605e-06, "loss": 0.7307, "step": 10305 }, { "epoch": 0.92, "grad_norm": 6.007082653587733, "learning_rate": 5.890601330019572e-06, "loss": 0.7555, "step": 10306 }, { "epoch": 0.92, "grad_norm": 5.666168129069355, "learning_rate": 5.889890501094932e-06, "loss": 0.7232, "step": 10307 }, { "epoch": 0.92, "grad_norm": 6.453098121153691, "learning_rate": 5.889179653595776e-06, "loss": 0.6662, "step": 10308 }, { "epoch": 0.92, "grad_norm": 8.686504383003271, "learning_rate": 5.888468787536943e-06, "loss": 0.7967, "step": 10309 }, { "epoch": 0.92, "grad_norm": 5.367405327499712, "learning_rate": 5.887757902933269e-06, "loss": 0.7079, "step": 10310 }, { "epoch": 0.92, "grad_norm": 5.755159719408923, "learning_rate": 5.887046999799593e-06, "loss": 0.8253, "step": 10311 }, { "epoch": 0.92, "grad_norm": 4.897067130097319, "learning_rate": 5.886336078150753e-06, "loss": 0.7955, "step": 10312 }, { "epoch": 0.92, "grad_norm": 4.654706902678017, "learning_rate": 5.885625138001589e-06, "loss": 0.7877, "step": 10313 }, { "epoch": 0.92, "grad_norm": 4.383833474739498, "learning_rate": 5.88491417936694e-06, "loss": 0.7654, "step": 10314 }, { "epoch": 0.92, "grad_norm": 5.075102691656701, "learning_rate": 5.884203202261643e-06, "loss": 0.797, "step": 10315 }, { "epoch": 0.92, "grad_norm": 5.090295343243966, "learning_rate": 5.883492206700544e-06, "loss": 0.7241, "step": 10316 }, { "epoch": 0.92, "grad_norm": 4.346853261343785, "learning_rate": 5.882781192698479e-06, "loss": 0.8664, "step": 10317 }, { "epoch": 0.92, "grad_norm": 7.910441799543443, "learning_rate": 5.882070160270289e-06, "loss": 0.8855, "step": 10318 }, { "epoch": 0.92, "grad_norm": 8.458027462217437, "learning_rate": 5.881359109430816e-06, "loss": 0.7845, "step": 10319 }, { "epoch": 0.92, "grad_norm": 5.941103665928735, "learning_rate": 5.880648040194903e-06, "loss": 0.7134, "step": 10320 }, { "epoch": 0.92, "grad_norm": 4.277356727217884, "learning_rate": 5.879936952577389e-06, "loss": 0.8185, "step": 10321 }, { "epoch": 0.92, "grad_norm": 4.88626592785158, "learning_rate": 5.87922584659312e-06, "loss": 0.7747, "step": 10322 }, { "epoch": 0.92, "grad_norm": 6.606761594443453, "learning_rate": 5.878514722256935e-06, "loss": 0.7821, "step": 10323 }, { "epoch": 0.92, "grad_norm": 5.577610080341137, "learning_rate": 5.8778035795836805e-06, "loss": 0.8278, "step": 10324 }, { "epoch": 0.92, "grad_norm": 6.787826382015443, "learning_rate": 5.8770924185881986e-06, "loss": 0.7681, "step": 10325 }, { "epoch": 0.92, "grad_norm": 5.824005570876082, "learning_rate": 5.876381239285334e-06, "loss": 0.7989, "step": 10326 }, { "epoch": 0.92, "grad_norm": 4.878096438500129, "learning_rate": 5.875670041689928e-06, "loss": 0.763, "step": 10327 }, { "epoch": 0.92, "grad_norm": 5.568807387049608, "learning_rate": 5.8749588258168274e-06, "loss": 0.7465, "step": 10328 }, { "epoch": 0.92, "grad_norm": 4.532008265704843, "learning_rate": 5.874247591680881e-06, "loss": 0.7503, "step": 10329 }, { "epoch": 0.92, "grad_norm": 6.193955682249286, "learning_rate": 5.873536339296927e-06, "loss": 0.7827, "step": 10330 }, { "epoch": 0.92, "grad_norm": 5.1295558430872426, "learning_rate": 5.872825068679817e-06, "loss": 0.7895, "step": 10331 }, { "epoch": 0.92, "grad_norm": 3.9245282441763143, "learning_rate": 5.872113779844395e-06, "loss": 0.787, "step": 10332 }, { "epoch": 0.92, "grad_norm": 6.129126951365239, "learning_rate": 5.871402472805507e-06, "loss": 0.7499, "step": 10333 }, { "epoch": 0.92, "grad_norm": 5.728993177312453, "learning_rate": 5.870691147578001e-06, "loss": 0.7819, "step": 10334 }, { "epoch": 0.92, "grad_norm": 5.197886358451952, "learning_rate": 5.869979804176724e-06, "loss": 0.7739, "step": 10335 }, { "epoch": 0.92, "grad_norm": 5.3982869379694165, "learning_rate": 5.8692684426165235e-06, "loss": 0.8111, "step": 10336 }, { "epoch": 0.92, "grad_norm": 5.148365410356932, "learning_rate": 5.868557062912248e-06, "loss": 0.7162, "step": 10337 }, { "epoch": 0.92, "grad_norm": 6.114282040573901, "learning_rate": 5.867845665078746e-06, "loss": 0.7651, "step": 10338 }, { "epoch": 0.92, "grad_norm": 5.402617446184281, "learning_rate": 5.8671342491308684e-06, "loss": 0.8049, "step": 10339 }, { "epoch": 0.92, "grad_norm": 4.607720925592822, "learning_rate": 5.866422815083459e-06, "loss": 0.763, "step": 10340 }, { "epoch": 0.92, "grad_norm": 5.5224531370962895, "learning_rate": 5.865711362951372e-06, "loss": 0.7817, "step": 10341 }, { "epoch": 0.92, "grad_norm": 5.595438115496167, "learning_rate": 5.864999892749458e-06, "loss": 0.7785, "step": 10342 }, { "epoch": 0.92, "grad_norm": 4.882187300794333, "learning_rate": 5.864288404492563e-06, "loss": 0.8399, "step": 10343 }, { "epoch": 0.92, "grad_norm": 5.521319483182579, "learning_rate": 5.863576898195541e-06, "loss": 0.7402, "step": 10344 }, { "epoch": 0.92, "grad_norm": 5.365883489702586, "learning_rate": 5.862865373873243e-06, "loss": 0.7596, "step": 10345 }, { "epoch": 0.92, "grad_norm": 9.4401196917089, "learning_rate": 5.86215383154052e-06, "loss": 0.7991, "step": 10346 }, { "epoch": 0.92, "grad_norm": 5.932178646703814, "learning_rate": 5.861442271212223e-06, "loss": 0.8174, "step": 10347 }, { "epoch": 0.92, "grad_norm": 6.907031448300558, "learning_rate": 5.860730692903207e-06, "loss": 0.8017, "step": 10348 }, { "epoch": 0.92, "grad_norm": 10.675850629778722, "learning_rate": 5.860019096628321e-06, "loss": 0.8396, "step": 10349 }, { "epoch": 0.92, "grad_norm": 6.311565954651198, "learning_rate": 5.859307482402418e-06, "loss": 0.7717, "step": 10350 }, { "epoch": 0.92, "grad_norm": 4.96821983252965, "learning_rate": 5.8585958502403576e-06, "loss": 0.7639, "step": 10351 }, { "epoch": 0.92, "grad_norm": 5.241393107918069, "learning_rate": 5.857884200156985e-06, "loss": 0.8147, "step": 10352 }, { "epoch": 0.92, "grad_norm": 5.586874388800151, "learning_rate": 5.857172532167162e-06, "loss": 0.7754, "step": 10353 }, { "epoch": 0.92, "grad_norm": 4.719524147420923, "learning_rate": 5.8564608462857375e-06, "loss": 0.7503, "step": 10354 }, { "epoch": 0.92, "grad_norm": 6.484857305083872, "learning_rate": 5.855749142527569e-06, "loss": 0.783, "step": 10355 }, { "epoch": 0.92, "grad_norm": 6.2154446857985, "learning_rate": 5.855037420907511e-06, "loss": 0.7986, "step": 10356 }, { "epoch": 0.92, "grad_norm": 6.8154011031568205, "learning_rate": 5.85432568144042e-06, "loss": 0.7204, "step": 10357 }, { "epoch": 0.92, "grad_norm": 4.87853289994059, "learning_rate": 5.853613924141151e-06, "loss": 0.8086, "step": 10358 }, { "epoch": 0.92, "grad_norm": 5.992968615896541, "learning_rate": 5.852902149024559e-06, "loss": 0.7373, "step": 10359 }, { "epoch": 0.92, "grad_norm": 7.200317375051821, "learning_rate": 5.8521903561055045e-06, "loss": 0.7661, "step": 10360 }, { "epoch": 0.92, "grad_norm": 4.679027155574282, "learning_rate": 5.8514785453988435e-06, "loss": 0.7649, "step": 10361 }, { "epoch": 0.92, "grad_norm": 7.500005858373713, "learning_rate": 5.85076671691943e-06, "loss": 0.6932, "step": 10362 }, { "epoch": 0.92, "grad_norm": 5.360151891061298, "learning_rate": 5.850054870682125e-06, "loss": 0.7837, "step": 10363 }, { "epoch": 0.92, "grad_norm": 8.225898483090898, "learning_rate": 5.849343006701787e-06, "loss": 0.8123, "step": 10364 }, { "epoch": 0.92, "grad_norm": 7.160769501992757, "learning_rate": 5.848631124993272e-06, "loss": 0.7747, "step": 10365 }, { "epoch": 0.92, "grad_norm": 6.144609586022871, "learning_rate": 5.8479192255714424e-06, "loss": 0.7709, "step": 10366 }, { "epoch": 0.92, "grad_norm": 5.605235779283128, "learning_rate": 5.847207308451157e-06, "loss": 0.7071, "step": 10367 }, { "epoch": 0.92, "grad_norm": 5.887815086444653, "learning_rate": 5.846495373647273e-06, "loss": 0.7386, "step": 10368 }, { "epoch": 0.93, "grad_norm": 6.785840600454714, "learning_rate": 5.845783421174651e-06, "loss": 0.785, "step": 10369 }, { "epoch": 0.93, "grad_norm": 5.043759062248905, "learning_rate": 5.845071451048155e-06, "loss": 0.7723, "step": 10370 }, { "epoch": 0.93, "grad_norm": 5.967429647991739, "learning_rate": 5.844359463282641e-06, "loss": 0.7819, "step": 10371 }, { "epoch": 0.93, "grad_norm": 4.583230789410842, "learning_rate": 5.843647457892972e-06, "loss": 0.7868, "step": 10372 }, { "epoch": 0.93, "grad_norm": 5.511121774507842, "learning_rate": 5.84293543489401e-06, "loss": 0.7167, "step": 10373 }, { "epoch": 0.93, "grad_norm": 3.700681910852306, "learning_rate": 5.842223394300619e-06, "loss": 0.7354, "step": 10374 }, { "epoch": 0.93, "grad_norm": 6.0420660209514425, "learning_rate": 5.841511336127657e-06, "loss": 0.7945, "step": 10375 }, { "epoch": 0.93, "grad_norm": 5.746516795867465, "learning_rate": 5.840799260389988e-06, "loss": 0.758, "step": 10376 }, { "epoch": 0.93, "grad_norm": 5.337950254921207, "learning_rate": 5.840087167102477e-06, "loss": 0.7016, "step": 10377 }, { "epoch": 0.93, "grad_norm": 5.546522385332707, "learning_rate": 5.8393750562799856e-06, "loss": 0.7711, "step": 10378 }, { "epoch": 0.93, "grad_norm": 4.988052959663414, "learning_rate": 5.838662927937379e-06, "loss": 0.7234, "step": 10379 }, { "epoch": 0.93, "grad_norm": 5.630674204213422, "learning_rate": 5.83795078208952e-06, "loss": 0.7757, "step": 10380 }, { "epoch": 0.93, "grad_norm": 6.362696298640356, "learning_rate": 5.837238618751273e-06, "loss": 0.7335, "step": 10381 }, { "epoch": 0.93, "grad_norm": 4.003077476298915, "learning_rate": 5.8365264379375046e-06, "loss": 0.7131, "step": 10382 }, { "epoch": 0.93, "grad_norm": 7.004186379932984, "learning_rate": 5.835814239663078e-06, "loss": 0.778, "step": 10383 }, { "epoch": 0.93, "grad_norm": 6.532055525441049, "learning_rate": 5.83510202394286e-06, "loss": 0.7782, "step": 10384 }, { "epoch": 0.93, "grad_norm": 5.905027598599512, "learning_rate": 5.834389790791715e-06, "loss": 0.7878, "step": 10385 }, { "epoch": 0.93, "grad_norm": 5.072600511236851, "learning_rate": 5.833677540224512e-06, "loss": 0.6893, "step": 10386 }, { "epoch": 0.93, "grad_norm": 6.679437753037188, "learning_rate": 5.832965272256113e-06, "loss": 0.7008, "step": 10387 }, { "epoch": 0.93, "grad_norm": 5.198797708859641, "learning_rate": 5.832252986901391e-06, "loss": 0.7977, "step": 10388 }, { "epoch": 0.93, "grad_norm": 5.296903660646669, "learning_rate": 5.83154068417521e-06, "loss": 0.7808, "step": 10389 }, { "epoch": 0.93, "grad_norm": 6.249615796148007, "learning_rate": 5.830828364092439e-06, "loss": 0.7826, "step": 10390 }, { "epoch": 0.93, "grad_norm": 5.491532383954514, "learning_rate": 5.830116026667944e-06, "loss": 0.7339, "step": 10391 }, { "epoch": 0.93, "grad_norm": 6.614076751677338, "learning_rate": 5.829403671916595e-06, "loss": 0.8052, "step": 10392 }, { "epoch": 0.93, "grad_norm": 5.352334955242177, "learning_rate": 5.828691299853261e-06, "loss": 0.7729, "step": 10393 }, { "epoch": 0.93, "grad_norm": 6.400938430184248, "learning_rate": 5.8279789104928096e-06, "loss": 0.7433, "step": 10394 }, { "epoch": 0.93, "grad_norm": 5.469780981972323, "learning_rate": 5.827266503850114e-06, "loss": 0.8114, "step": 10395 }, { "epoch": 0.93, "grad_norm": 4.865486527662251, "learning_rate": 5.826554079940041e-06, "loss": 0.7663, "step": 10396 }, { "epoch": 0.93, "grad_norm": 5.341372956952108, "learning_rate": 5.825841638777462e-06, "loss": 0.7748, "step": 10397 }, { "epoch": 0.93, "grad_norm": 4.902952488388827, "learning_rate": 5.8251291803772455e-06, "loss": 0.7105, "step": 10398 }, { "epoch": 0.93, "grad_norm": 6.9490741282528745, "learning_rate": 5.824416704754267e-06, "loss": 0.7878, "step": 10399 }, { "epoch": 0.93, "grad_norm": 8.250681407778234, "learning_rate": 5.823704211923393e-06, "loss": 0.7874, "step": 10400 }, { "epoch": 0.93, "grad_norm": 6.356418186587142, "learning_rate": 5.822991701899498e-06, "loss": 0.732, "step": 10401 }, { "epoch": 0.93, "grad_norm": 4.3271152827567745, "learning_rate": 5.822279174697454e-06, "loss": 0.7315, "step": 10402 }, { "epoch": 0.93, "grad_norm": 5.646328934502949, "learning_rate": 5.821566630332134e-06, "loss": 0.7895, "step": 10403 }, { "epoch": 0.93, "grad_norm": 5.210015052851324, "learning_rate": 5.820854068818408e-06, "loss": 0.7801, "step": 10404 }, { "epoch": 0.93, "grad_norm": 5.840249579728816, "learning_rate": 5.820141490171153e-06, "loss": 0.7388, "step": 10405 }, { "epoch": 0.93, "grad_norm": 5.815994395193933, "learning_rate": 5.819428894405238e-06, "loss": 0.7987, "step": 10406 }, { "epoch": 0.93, "grad_norm": 5.696700030652165, "learning_rate": 5.81871628153554e-06, "loss": 0.7622, "step": 10407 }, { "epoch": 0.93, "grad_norm": 6.078937007130367, "learning_rate": 5.8180036515769335e-06, "loss": 0.7205, "step": 10408 }, { "epoch": 0.93, "grad_norm": 7.6266404057736725, "learning_rate": 5.817291004544291e-06, "loss": 0.7178, "step": 10409 }, { "epoch": 0.93, "grad_norm": 4.675522912573433, "learning_rate": 5.81657834045249e-06, "loss": 0.6995, "step": 10410 }, { "epoch": 0.93, "grad_norm": 4.737443268600538, "learning_rate": 5.815865659316403e-06, "loss": 0.7794, "step": 10411 }, { "epoch": 0.93, "grad_norm": 4.963491265462475, "learning_rate": 5.81515296115091e-06, "loss": 0.712, "step": 10412 }, { "epoch": 0.93, "grad_norm": 5.424731870726073, "learning_rate": 5.81444024597088e-06, "loss": 0.7485, "step": 10413 }, { "epoch": 0.93, "grad_norm": 4.284566255419097, "learning_rate": 5.813727513791195e-06, "loss": 0.7887, "step": 10414 }, { "epoch": 0.93, "grad_norm": 4.813756629636914, "learning_rate": 5.813014764626731e-06, "loss": 0.7415, "step": 10415 }, { "epoch": 0.93, "grad_norm": 5.169518779441186, "learning_rate": 5.812301998492363e-06, "loss": 0.7024, "step": 10416 }, { "epoch": 0.93, "grad_norm": 6.878204096309575, "learning_rate": 5.811589215402971e-06, "loss": 0.6919, "step": 10417 }, { "epoch": 0.93, "grad_norm": 5.3674481267699745, "learning_rate": 5.810876415373432e-06, "loss": 0.7647, "step": 10418 }, { "epoch": 0.93, "grad_norm": 6.26157827807808, "learning_rate": 5.8101635984186225e-06, "loss": 0.7848, "step": 10419 }, { "epoch": 0.93, "grad_norm": 4.096138818245598, "learning_rate": 5.809450764553421e-06, "loss": 0.7042, "step": 10420 }, { "epoch": 0.93, "grad_norm": 6.963258268220989, "learning_rate": 5.80873791379271e-06, "loss": 0.7555, "step": 10421 }, { "epoch": 0.93, "grad_norm": 4.7714489449697535, "learning_rate": 5.8080250461513645e-06, "loss": 0.7995, "step": 10422 }, { "epoch": 0.93, "grad_norm": 5.2466404249603755, "learning_rate": 5.807312161644265e-06, "loss": 0.7461, "step": 10423 }, { "epoch": 0.93, "grad_norm": 4.824790290782588, "learning_rate": 5.8065992602862945e-06, "loss": 0.7765, "step": 10424 }, { "epoch": 0.93, "grad_norm": 6.654907867118062, "learning_rate": 5.8058863420923295e-06, "loss": 0.7387, "step": 10425 }, { "epoch": 0.93, "grad_norm": 7.185685958378814, "learning_rate": 5.805173407077252e-06, "loss": 0.7211, "step": 10426 }, { "epoch": 0.93, "grad_norm": 5.466302318333001, "learning_rate": 5.804460455255942e-06, "loss": 0.7616, "step": 10427 }, { "epoch": 0.93, "grad_norm": 4.432810267952034, "learning_rate": 5.803747486643283e-06, "loss": 0.758, "step": 10428 }, { "epoch": 0.93, "grad_norm": 6.389295915621514, "learning_rate": 5.8030345012541555e-06, "loss": 0.7091, "step": 10429 }, { "epoch": 0.93, "grad_norm": 5.205600665551974, "learning_rate": 5.80232149910344e-06, "loss": 0.8211, "step": 10430 }, { "epoch": 0.93, "grad_norm": 4.737980131623514, "learning_rate": 5.801608480206022e-06, "loss": 0.7954, "step": 10431 }, { "epoch": 0.93, "grad_norm": 5.6566184818689935, "learning_rate": 5.800895444576782e-06, "loss": 0.7735, "step": 10432 }, { "epoch": 0.93, "grad_norm": 5.667167109611212, "learning_rate": 5.8001823922306035e-06, "loss": 0.7099, "step": 10433 }, { "epoch": 0.93, "grad_norm": 6.342543516234341, "learning_rate": 5.7994693231823695e-06, "loss": 0.7219, "step": 10434 }, { "epoch": 0.93, "grad_norm": 5.531444399605784, "learning_rate": 5.798756237446964e-06, "loss": 0.7519, "step": 10435 }, { "epoch": 0.93, "grad_norm": 5.915885758139498, "learning_rate": 5.798043135039271e-06, "loss": 0.7483, "step": 10436 }, { "epoch": 0.93, "grad_norm": 6.712418906822784, "learning_rate": 5.7973300159741765e-06, "loss": 0.7784, "step": 10437 }, { "epoch": 0.93, "grad_norm": 5.614322535472201, "learning_rate": 5.796616880266564e-06, "loss": 0.7117, "step": 10438 }, { "epoch": 0.93, "grad_norm": 5.469834109473515, "learning_rate": 5.795903727931318e-06, "loss": 0.6904, "step": 10439 }, { "epoch": 0.93, "grad_norm": 4.729979880727099, "learning_rate": 5.795190558983324e-06, "loss": 0.7873, "step": 10440 }, { "epoch": 0.93, "grad_norm": 6.331494419043117, "learning_rate": 5.794477373437469e-06, "loss": 0.7651, "step": 10441 }, { "epoch": 0.93, "grad_norm": 4.22137797540707, "learning_rate": 5.793764171308639e-06, "loss": 0.8413, "step": 10442 }, { "epoch": 0.93, "grad_norm": 5.120418604977993, "learning_rate": 5.793050952611721e-06, "loss": 0.7196, "step": 10443 }, { "epoch": 0.93, "grad_norm": 5.475075280260225, "learning_rate": 5.7923377173616e-06, "loss": 0.7385, "step": 10444 }, { "epoch": 0.93, "grad_norm": 5.426603852709179, "learning_rate": 5.791624465573164e-06, "loss": 0.7692, "step": 10445 }, { "epoch": 0.93, "grad_norm": 6.417080677731547, "learning_rate": 5.790911197261302e-06, "loss": 0.7284, "step": 10446 }, { "epoch": 0.93, "grad_norm": 5.698665207725203, "learning_rate": 5.790197912440901e-06, "loss": 0.7626, "step": 10447 }, { "epoch": 0.93, "grad_norm": 4.391641658389334, "learning_rate": 5.789484611126848e-06, "loss": 0.7358, "step": 10448 }, { "epoch": 0.93, "grad_norm": 7.269168455114803, "learning_rate": 5.788771293334032e-06, "loss": 0.7683, "step": 10449 }, { "epoch": 0.93, "grad_norm": 5.558423382879241, "learning_rate": 5.788057959077345e-06, "loss": 0.7692, "step": 10450 }, { "epoch": 0.93, "grad_norm": 6.051031672193766, "learning_rate": 5.787344608371673e-06, "loss": 0.7321, "step": 10451 }, { "epoch": 0.93, "grad_norm": 5.6524552123877765, "learning_rate": 5.786631241231905e-06, "loss": 0.7861, "step": 10452 }, { "epoch": 0.93, "grad_norm": 6.698684852641717, "learning_rate": 5.7859178576729345e-06, "loss": 0.8164, "step": 10453 }, { "epoch": 0.93, "grad_norm": 7.115658972944113, "learning_rate": 5.78520445770965e-06, "loss": 0.7988, "step": 10454 }, { "epoch": 0.93, "grad_norm": 5.386004473914074, "learning_rate": 5.784491041356941e-06, "loss": 0.7718, "step": 10455 }, { "epoch": 0.93, "grad_norm": 5.764386847165355, "learning_rate": 5.7837776086297e-06, "loss": 0.7386, "step": 10456 }, { "epoch": 0.93, "grad_norm": 7.432645313548101, "learning_rate": 5.7830641595428175e-06, "loss": 0.8342, "step": 10457 }, { "epoch": 0.93, "grad_norm": 5.3431230473735996, "learning_rate": 5.782350694111185e-06, "loss": 0.7182, "step": 10458 }, { "epoch": 0.93, "grad_norm": 5.1839031329482665, "learning_rate": 5.781637212349696e-06, "loss": 0.8192, "step": 10459 }, { "epoch": 0.93, "grad_norm": 6.108840070426629, "learning_rate": 5.780923714273241e-06, "loss": 0.7384, "step": 10460 }, { "epoch": 0.93, "grad_norm": 5.567668280231709, "learning_rate": 5.780210199896715e-06, "loss": 0.7837, "step": 10461 }, { "epoch": 0.93, "grad_norm": 6.721910508866932, "learning_rate": 5.7794966692350084e-06, "loss": 0.767, "step": 10462 }, { "epoch": 0.93, "grad_norm": 5.507984346704366, "learning_rate": 5.778783122303018e-06, "loss": 0.7035, "step": 10463 }, { "epoch": 0.93, "grad_norm": 5.205967295892064, "learning_rate": 5.778069559115632e-06, "loss": 0.7672, "step": 10464 }, { "epoch": 0.93, "grad_norm": 6.82910086983143, "learning_rate": 5.77735597968775e-06, "loss": 0.7335, "step": 10465 }, { "epoch": 0.93, "grad_norm": 5.309133983890882, "learning_rate": 5.7766423840342645e-06, "loss": 0.6998, "step": 10466 }, { "epoch": 0.93, "grad_norm": 6.192800778588366, "learning_rate": 5.77592877217007e-06, "loss": 0.8045, "step": 10467 }, { "epoch": 0.93, "grad_norm": 4.317274183117621, "learning_rate": 5.775215144110061e-06, "loss": 0.7715, "step": 10468 }, { "epoch": 0.93, "grad_norm": 5.045096418471563, "learning_rate": 5.774501499869135e-06, "loss": 0.7734, "step": 10469 }, { "epoch": 0.93, "grad_norm": 5.388706476378973, "learning_rate": 5.773787839462184e-06, "loss": 0.7175, "step": 10470 }, { "epoch": 0.93, "grad_norm": 5.228926406268309, "learning_rate": 5.773074162904108e-06, "loss": 0.68, "step": 10471 }, { "epoch": 0.93, "grad_norm": 5.050877007613123, "learning_rate": 5.7723604702098015e-06, "loss": 0.737, "step": 10472 }, { "epoch": 0.93, "grad_norm": 5.228962791106061, "learning_rate": 5.7716467613941605e-06, "loss": 0.7774, "step": 10473 }, { "epoch": 0.93, "grad_norm": 5.279199897197876, "learning_rate": 5.770933036472084e-06, "loss": 0.7974, "step": 10474 }, { "epoch": 0.93, "grad_norm": 6.022123019130507, "learning_rate": 5.77021929545847e-06, "loss": 0.775, "step": 10475 }, { "epoch": 0.93, "grad_norm": 6.630805213328504, "learning_rate": 5.7695055383682145e-06, "loss": 0.7517, "step": 10476 }, { "epoch": 0.93, "grad_norm": 4.371124191348364, "learning_rate": 5.768791765216215e-06, "loss": 0.7599, "step": 10477 }, { "epoch": 0.93, "grad_norm": 8.15312427864812, "learning_rate": 5.768077976017371e-06, "loss": 0.8376, "step": 10478 }, { "epoch": 0.93, "grad_norm": 7.484360755592375, "learning_rate": 5.767364170786582e-06, "loss": 0.7217, "step": 10479 }, { "epoch": 0.93, "grad_norm": 5.199025843030411, "learning_rate": 5.766650349538747e-06, "loss": 0.7595, "step": 10480 }, { "epoch": 0.94, "grad_norm": 5.629998480747124, "learning_rate": 5.765936512288764e-06, "loss": 0.7992, "step": 10481 }, { "epoch": 0.94, "grad_norm": 5.2042431878974815, "learning_rate": 5.765222659051534e-06, "loss": 0.7108, "step": 10482 }, { "epoch": 0.94, "grad_norm": 4.784961416878586, "learning_rate": 5.7645087898419576e-06, "loss": 0.7429, "step": 10483 }, { "epoch": 0.94, "grad_norm": 6.266198666586149, "learning_rate": 5.763794904674933e-06, "loss": 0.7676, "step": 10484 }, { "epoch": 0.94, "grad_norm": 5.9603124583250775, "learning_rate": 5.763081003565363e-06, "loss": 0.793, "step": 10485 }, { "epoch": 0.94, "grad_norm": 5.11653436756417, "learning_rate": 5.762367086528147e-06, "loss": 0.7242, "step": 10486 }, { "epoch": 0.94, "grad_norm": 22.369367807618673, "learning_rate": 5.76165315357819e-06, "loss": 0.7952, "step": 10487 }, { "epoch": 0.94, "grad_norm": 5.818259833216247, "learning_rate": 5.76093920473039e-06, "loss": 0.8467, "step": 10488 }, { "epoch": 0.94, "grad_norm": 7.391197941907862, "learning_rate": 5.760225239999651e-06, "loss": 0.7586, "step": 10489 }, { "epoch": 0.94, "grad_norm": 5.580002687958734, "learning_rate": 5.759511259400874e-06, "loss": 0.7375, "step": 10490 }, { "epoch": 0.94, "grad_norm": 6.239952941872122, "learning_rate": 5.758797262948964e-06, "loss": 0.7908, "step": 10491 }, { "epoch": 0.94, "grad_norm": 4.525852003762492, "learning_rate": 5.758083250658822e-06, "loss": 0.6977, "step": 10492 }, { "epoch": 0.94, "grad_norm": 8.217196421494338, "learning_rate": 5.7573692225453525e-06, "loss": 0.7887, "step": 10493 }, { "epoch": 0.94, "grad_norm": 5.319922048149324, "learning_rate": 5.75665517862346e-06, "loss": 0.8497, "step": 10494 }, { "epoch": 0.94, "grad_norm": 3.742060979365445, "learning_rate": 5.755941118908046e-06, "loss": 0.6583, "step": 10495 }, { "epoch": 0.94, "grad_norm": 5.92379202557174, "learning_rate": 5.755227043414018e-06, "loss": 0.7534, "step": 10496 }, { "epoch": 0.94, "grad_norm": 6.772748880033975, "learning_rate": 5.754512952156279e-06, "loss": 0.7185, "step": 10497 }, { "epoch": 0.94, "grad_norm": 4.381573660971025, "learning_rate": 5.753798845149735e-06, "loss": 0.6913, "step": 10498 }, { "epoch": 0.94, "grad_norm": 7.2102226712117465, "learning_rate": 5.75308472240929e-06, "loss": 0.8069, "step": 10499 }, { "epoch": 0.94, "grad_norm": 6.206287080927273, "learning_rate": 5.75237058394985e-06, "loss": 0.7217, "step": 10500 }, { "epoch": 0.94, "grad_norm": 6.080906558801783, "learning_rate": 5.751656429786323e-06, "loss": 0.7787, "step": 10501 }, { "epoch": 0.94, "grad_norm": 4.3863757203362495, "learning_rate": 5.750942259933614e-06, "loss": 0.7339, "step": 10502 }, { "epoch": 0.94, "grad_norm": 5.872315370986401, "learning_rate": 5.750228074406629e-06, "loss": 0.7529, "step": 10503 }, { "epoch": 0.94, "grad_norm": 4.709167223722633, "learning_rate": 5.749513873220278e-06, "loss": 0.8074, "step": 10504 }, { "epoch": 0.94, "grad_norm": 5.800341003602659, "learning_rate": 5.748799656389464e-06, "loss": 0.7343, "step": 10505 }, { "epoch": 0.94, "grad_norm": 5.3444239839321614, "learning_rate": 5.748085423929099e-06, "loss": 0.7344, "step": 10506 }, { "epoch": 0.94, "grad_norm": 7.000115820171544, "learning_rate": 5.747371175854087e-06, "loss": 0.7568, "step": 10507 }, { "epoch": 0.94, "grad_norm": 4.226268722913593, "learning_rate": 5.746656912179339e-06, "loss": 0.7195, "step": 10508 }, { "epoch": 0.94, "grad_norm": 4.932709708163412, "learning_rate": 5.745942632919763e-06, "loss": 0.7649, "step": 10509 }, { "epoch": 0.94, "grad_norm": 7.178137859378232, "learning_rate": 5.74522833809027e-06, "loss": 0.7484, "step": 10510 }, { "epoch": 0.94, "grad_norm": 3.8050701394726456, "learning_rate": 5.7445140277057665e-06, "loss": 0.7192, "step": 10511 }, { "epoch": 0.94, "grad_norm": 6.136706071830449, "learning_rate": 5.743799701781162e-06, "loss": 0.7847, "step": 10512 }, { "epoch": 0.94, "grad_norm": 5.160541403908818, "learning_rate": 5.743085360331368e-06, "loss": 0.6795, "step": 10513 }, { "epoch": 0.94, "grad_norm": 6.563082121575759, "learning_rate": 5.7423710033712954e-06, "loss": 0.7529, "step": 10514 }, { "epoch": 0.94, "grad_norm": 7.5844129041905495, "learning_rate": 5.741656630915853e-06, "loss": 0.7439, "step": 10515 }, { "epoch": 0.94, "grad_norm": 7.102688210838052, "learning_rate": 5.740942242979952e-06, "loss": 0.8093, "step": 10516 }, { "epoch": 0.94, "grad_norm": 5.477879444858365, "learning_rate": 5.740227839578506e-06, "loss": 0.7103, "step": 10517 }, { "epoch": 0.94, "grad_norm": 5.363358285552373, "learning_rate": 5.739513420726424e-06, "loss": 0.7279, "step": 10518 }, { "epoch": 0.94, "grad_norm": 4.665614128777816, "learning_rate": 5.738798986438619e-06, "loss": 0.7784, "step": 10519 }, { "epoch": 0.94, "grad_norm": 5.0594426197810725, "learning_rate": 5.738084536730004e-06, "loss": 0.6948, "step": 10520 }, { "epoch": 0.94, "grad_norm": 5.280316813783569, "learning_rate": 5.737370071615489e-06, "loss": 0.7661, "step": 10521 }, { "epoch": 0.94, "grad_norm": 5.523339458968434, "learning_rate": 5.736655591109989e-06, "loss": 0.6888, "step": 10522 }, { "epoch": 0.94, "grad_norm": 5.3015696602877025, "learning_rate": 5.735941095228418e-06, "loss": 0.7647, "step": 10523 }, { "epoch": 0.94, "grad_norm": 10.444392859702623, "learning_rate": 5.735226583985687e-06, "loss": 0.776, "step": 10524 }, { "epoch": 0.94, "grad_norm": 7.751741781566641, "learning_rate": 5.73451205739671e-06, "loss": 0.729, "step": 10525 }, { "epoch": 0.94, "grad_norm": 5.234003258918477, "learning_rate": 5.733797515476406e-06, "loss": 0.8208, "step": 10526 }, { "epoch": 0.94, "grad_norm": 4.335600175065065, "learning_rate": 5.733082958239683e-06, "loss": 0.6749, "step": 10527 }, { "epoch": 0.94, "grad_norm": 4.3797299214855965, "learning_rate": 5.732368385701459e-06, "loss": 0.7032, "step": 10528 }, { "epoch": 0.94, "grad_norm": 5.048224864775324, "learning_rate": 5.731653797876651e-06, "loss": 0.7911, "step": 10529 }, { "epoch": 0.94, "grad_norm": 7.062843288885905, "learning_rate": 5.73093919478017e-06, "loss": 0.746, "step": 10530 }, { "epoch": 0.94, "grad_norm": 4.857003108063851, "learning_rate": 5.730224576426933e-06, "loss": 0.784, "step": 10531 }, { "epoch": 0.94, "grad_norm": 8.482367323577703, "learning_rate": 5.729509942831859e-06, "loss": 0.8381, "step": 10532 }, { "epoch": 0.94, "grad_norm": 9.487671896020563, "learning_rate": 5.728795294009863e-06, "loss": 0.7908, "step": 10533 }, { "epoch": 0.94, "grad_norm": 6.513544083711457, "learning_rate": 5.728080629975861e-06, "loss": 0.7387, "step": 10534 }, { "epoch": 0.94, "grad_norm": 8.663195932943987, "learning_rate": 5.727365950744769e-06, "loss": 0.7865, "step": 10535 }, { "epoch": 0.94, "grad_norm": 6.039117849878281, "learning_rate": 5.726651256331507e-06, "loss": 0.7133, "step": 10536 }, { "epoch": 0.94, "grad_norm": 7.17767092428578, "learning_rate": 5.7259365467509895e-06, "loss": 0.6903, "step": 10537 }, { "epoch": 0.94, "grad_norm": 5.808951258299627, "learning_rate": 5.725221822018138e-06, "loss": 0.8201, "step": 10538 }, { "epoch": 0.94, "grad_norm": 4.210869802194983, "learning_rate": 5.72450708214787e-06, "loss": 0.7746, "step": 10539 }, { "epoch": 0.94, "grad_norm": 5.317890573755662, "learning_rate": 5.723792327155102e-06, "loss": 0.8016, "step": 10540 }, { "epoch": 0.94, "grad_norm": 5.67476286701007, "learning_rate": 5.723077557054755e-06, "loss": 0.8007, "step": 10541 }, { "epoch": 0.94, "grad_norm": 4.792551488976828, "learning_rate": 5.722362771861747e-06, "loss": 0.7748, "step": 10542 }, { "epoch": 0.94, "grad_norm": 5.314475116960023, "learning_rate": 5.721647971590998e-06, "loss": 0.6972, "step": 10543 }, { "epoch": 0.94, "grad_norm": 3.70319239117721, "learning_rate": 5.720933156257427e-06, "loss": 0.7381, "step": 10544 }, { "epoch": 0.94, "grad_norm": 5.408937876887455, "learning_rate": 5.720218325875957e-06, "loss": 0.709, "step": 10545 }, { "epoch": 0.94, "grad_norm": 5.293910358916889, "learning_rate": 5.719503480461506e-06, "loss": 0.8367, "step": 10546 }, { "epoch": 0.94, "grad_norm": 6.853258674229003, "learning_rate": 5.718788620028996e-06, "loss": 0.725, "step": 10547 }, { "epoch": 0.94, "grad_norm": 5.847736954241616, "learning_rate": 5.7180737445933466e-06, "loss": 0.8204, "step": 10548 }, { "epoch": 0.94, "grad_norm": 7.394800935294825, "learning_rate": 5.717358854169482e-06, "loss": 0.7974, "step": 10549 }, { "epoch": 0.94, "grad_norm": 6.696083837242662, "learning_rate": 5.716643948772319e-06, "loss": 0.6854, "step": 10550 }, { "epoch": 0.94, "grad_norm": 4.157388859027308, "learning_rate": 5.715929028416785e-06, "loss": 0.7778, "step": 10551 }, { "epoch": 0.94, "grad_norm": 4.772259387156887, "learning_rate": 5.715214093117801e-06, "loss": 0.7737, "step": 10552 }, { "epoch": 0.94, "grad_norm": 5.806732296966844, "learning_rate": 5.714499142890288e-06, "loss": 0.7107, "step": 10553 }, { "epoch": 0.94, "grad_norm": 4.674795966904009, "learning_rate": 5.713784177749171e-06, "loss": 0.7881, "step": 10554 }, { "epoch": 0.94, "grad_norm": 4.957508401894398, "learning_rate": 5.7130691977093735e-06, "loss": 0.7355, "step": 10555 }, { "epoch": 0.94, "grad_norm": 5.514264089060806, "learning_rate": 5.712354202785817e-06, "loss": 0.834, "step": 10556 }, { "epoch": 0.94, "grad_norm": 4.9821461537906115, "learning_rate": 5.711639192993426e-06, "loss": 0.6489, "step": 10557 }, { "epoch": 0.94, "grad_norm": 6.761887423669251, "learning_rate": 5.710924168347125e-06, "loss": 0.7079, "step": 10558 }, { "epoch": 0.94, "grad_norm": 6.4834057370897265, "learning_rate": 5.71020912886184e-06, "loss": 0.744, "step": 10559 }, { "epoch": 0.94, "grad_norm": 7.150207667374416, "learning_rate": 5.709494074552493e-06, "loss": 0.6911, "step": 10560 }, { "epoch": 0.94, "grad_norm": 4.637005324682461, "learning_rate": 5.708779005434013e-06, "loss": 0.7538, "step": 10561 }, { "epoch": 0.94, "grad_norm": 5.537409106199605, "learning_rate": 5.708063921521324e-06, "loss": 0.7976, "step": 10562 }, { "epoch": 0.94, "grad_norm": 6.551660941669284, "learning_rate": 5.707348822829349e-06, "loss": 0.8054, "step": 10563 }, { "epoch": 0.94, "grad_norm": 5.293912815037526, "learning_rate": 5.706633709373019e-06, "loss": 0.7274, "step": 10564 }, { "epoch": 0.94, "grad_norm": 7.825203256092446, "learning_rate": 5.7059185811672545e-06, "loss": 0.7935, "step": 10565 }, { "epoch": 0.94, "grad_norm": 5.266956992963532, "learning_rate": 5.705203438226987e-06, "loss": 0.7379, "step": 10566 }, { "epoch": 0.94, "grad_norm": 6.319227347685753, "learning_rate": 5.704488280567143e-06, "loss": 0.7465, "step": 10567 }, { "epoch": 0.94, "grad_norm": 5.6613945429889485, "learning_rate": 5.703773108202648e-06, "loss": 0.778, "step": 10568 }, { "epoch": 0.94, "grad_norm": 6.601466357272401, "learning_rate": 5.703057921148431e-06, "loss": 0.8086, "step": 10569 }, { "epoch": 0.94, "grad_norm": 5.922661586074919, "learning_rate": 5.7023427194194195e-06, "loss": 0.7085, "step": 10570 }, { "epoch": 0.94, "grad_norm": 3.7602596428269757, "learning_rate": 5.701627503030542e-06, "loss": 0.7649, "step": 10571 }, { "epoch": 0.94, "grad_norm": 6.153425383996962, "learning_rate": 5.700912271996727e-06, "loss": 0.7709, "step": 10572 }, { "epoch": 0.94, "grad_norm": 6.24851704273224, "learning_rate": 5.700197026332902e-06, "loss": 0.761, "step": 10573 }, { "epoch": 0.94, "grad_norm": 9.183066525520982, "learning_rate": 5.6994817660539996e-06, "loss": 0.6776, "step": 10574 }, { "epoch": 0.94, "grad_norm": 6.0555538943175256, "learning_rate": 5.698766491174945e-06, "loss": 0.7505, "step": 10575 }, { "epoch": 0.94, "grad_norm": 5.5741315011178925, "learning_rate": 5.6980512017106725e-06, "loss": 0.7749, "step": 10576 }, { "epoch": 0.94, "grad_norm": 5.285887509299367, "learning_rate": 5.697335897676108e-06, "loss": 0.7664, "step": 10577 }, { "epoch": 0.94, "grad_norm": 5.67133417229361, "learning_rate": 5.696620579086185e-06, "loss": 0.7809, "step": 10578 }, { "epoch": 0.94, "grad_norm": 6.606709836009858, "learning_rate": 5.695905245955832e-06, "loss": 0.7171, "step": 10579 }, { "epoch": 0.94, "grad_norm": 20.44308600593045, "learning_rate": 5.695189898299982e-06, "loss": 0.7642, "step": 10580 }, { "epoch": 0.94, "grad_norm": 8.125027478798033, "learning_rate": 5.694474536133564e-06, "loss": 0.7711, "step": 10581 }, { "epoch": 0.94, "grad_norm": 5.928184912893852, "learning_rate": 5.69375915947151e-06, "loss": 0.7122, "step": 10582 }, { "epoch": 0.94, "grad_norm": 4.79868389918157, "learning_rate": 5.693043768328756e-06, "loss": 0.6725, "step": 10583 }, { "epoch": 0.94, "grad_norm": 5.056637229007533, "learning_rate": 5.6923283627202294e-06, "loss": 0.7245, "step": 10584 }, { "epoch": 0.94, "grad_norm": 5.1340321551556825, "learning_rate": 5.691612942660865e-06, "loss": 0.8587, "step": 10585 }, { "epoch": 0.94, "grad_norm": 4.6433615407927045, "learning_rate": 5.690897508165594e-06, "loss": 0.8216, "step": 10586 }, { "epoch": 0.94, "grad_norm": 5.649372504963632, "learning_rate": 5.690182059249352e-06, "loss": 0.6997, "step": 10587 }, { "epoch": 0.94, "grad_norm": 5.459925966193677, "learning_rate": 5.689466595927069e-06, "loss": 0.7302, "step": 10588 }, { "epoch": 0.94, "grad_norm": 4.938044694976502, "learning_rate": 5.688751118213681e-06, "loss": 0.7801, "step": 10589 }, { "epoch": 0.94, "grad_norm": 7.460728672945716, "learning_rate": 5.688035626124124e-06, "loss": 0.7501, "step": 10590 }, { "epoch": 0.94, "grad_norm": 5.049578428352045, "learning_rate": 5.687320119673329e-06, "loss": 0.7499, "step": 10591 }, { "epoch": 0.94, "grad_norm": 5.591645689084926, "learning_rate": 5.686604598876231e-06, "loss": 0.7803, "step": 10592 }, { "epoch": 0.95, "grad_norm": 8.138861938854694, "learning_rate": 5.685889063747767e-06, "loss": 0.7889, "step": 10593 }, { "epoch": 0.95, "grad_norm": 4.8647485309529825, "learning_rate": 5.685173514302869e-06, "loss": 0.8415, "step": 10594 }, { "epoch": 0.95, "grad_norm": 5.001790642676916, "learning_rate": 5.684457950556475e-06, "loss": 0.8442, "step": 10595 }, { "epoch": 0.95, "grad_norm": 5.864996972665924, "learning_rate": 5.683742372523521e-06, "loss": 0.7822, "step": 10596 }, { "epoch": 0.95, "grad_norm": 5.001483549320294, "learning_rate": 5.683026780218941e-06, "loss": 0.8154, "step": 10597 }, { "epoch": 0.95, "grad_norm": 4.543924225639487, "learning_rate": 5.682311173657674e-06, "loss": 0.7392, "step": 10598 }, { "epoch": 0.95, "grad_norm": 5.473060235350142, "learning_rate": 5.681595552854655e-06, "loss": 0.8487, "step": 10599 }, { "epoch": 0.95, "grad_norm": 6.227635742922879, "learning_rate": 5.680879917824822e-06, "loss": 0.7601, "step": 10600 }, { "epoch": 0.95, "grad_norm": 5.981401883064955, "learning_rate": 5.68016426858311e-06, "loss": 0.7585, "step": 10601 }, { "epoch": 0.95, "grad_norm": 5.250713039130418, "learning_rate": 5.679448605144461e-06, "loss": 0.7582, "step": 10602 }, { "epoch": 0.95, "grad_norm": 6.120912657845478, "learning_rate": 5.678732927523809e-06, "loss": 0.7773, "step": 10603 }, { "epoch": 0.95, "grad_norm": 4.922008188992745, "learning_rate": 5.678017235736093e-06, "loss": 0.8072, "step": 10604 }, { "epoch": 0.95, "grad_norm": 5.384303245746343, "learning_rate": 5.677301529796253e-06, "loss": 0.7226, "step": 10605 }, { "epoch": 0.95, "grad_norm": 7.16553505462511, "learning_rate": 5.676585809719227e-06, "loss": 0.781, "step": 10606 }, { "epoch": 0.95, "grad_norm": 6.257252476048734, "learning_rate": 5.675870075519954e-06, "loss": 0.7069, "step": 10607 }, { "epoch": 0.95, "grad_norm": 5.061989437671772, "learning_rate": 5.675154327213372e-06, "loss": 0.7892, "step": 10608 }, { "epoch": 0.95, "grad_norm": 4.941533213954334, "learning_rate": 5.674438564814423e-06, "loss": 0.7934, "step": 10609 }, { "epoch": 0.95, "grad_norm": 5.422928954500159, "learning_rate": 5.673722788338045e-06, "loss": 0.7303, "step": 10610 }, { "epoch": 0.95, "grad_norm": 5.561990431902749, "learning_rate": 5.673006997799179e-06, "loss": 0.7378, "step": 10611 }, { "epoch": 0.95, "grad_norm": 5.713988289723681, "learning_rate": 5.672291193212769e-06, "loss": 0.7103, "step": 10612 }, { "epoch": 0.95, "grad_norm": 5.426396705556261, "learning_rate": 5.6715753745937505e-06, "loss": 0.7244, "step": 10613 }, { "epoch": 0.95, "grad_norm": 5.48007063886861, "learning_rate": 5.670859541957066e-06, "loss": 0.6828, "step": 10614 }, { "epoch": 0.95, "grad_norm": 4.560177100987123, "learning_rate": 5.670143695317659e-06, "loss": 0.7498, "step": 10615 }, { "epoch": 0.95, "grad_norm": 5.535038728425869, "learning_rate": 5.6694278346904685e-06, "loss": 0.8167, "step": 10616 }, { "epoch": 0.95, "grad_norm": 5.811904873715546, "learning_rate": 5.668711960090438e-06, "loss": 0.7891, "step": 10617 }, { "epoch": 0.95, "grad_norm": 7.0308424635672955, "learning_rate": 5.6679960715325114e-06, "loss": 0.7814, "step": 10618 }, { "epoch": 0.95, "grad_norm": 4.432602147406498, "learning_rate": 5.6672801690316285e-06, "loss": 0.7286, "step": 10619 }, { "epoch": 0.95, "grad_norm": 5.004723888851333, "learning_rate": 5.666564252602734e-06, "loss": 0.807, "step": 10620 }, { "epoch": 0.95, "grad_norm": 5.414302087072265, "learning_rate": 5.66584832226077e-06, "loss": 0.7395, "step": 10621 }, { "epoch": 0.95, "grad_norm": 5.590218691418691, "learning_rate": 5.665132378020682e-06, "loss": 0.7446, "step": 10622 }, { "epoch": 0.95, "grad_norm": 5.4007665810745955, "learning_rate": 5.664416419897409e-06, "loss": 0.8377, "step": 10623 }, { "epoch": 0.95, "grad_norm": 4.647582444977295, "learning_rate": 5.663700447905901e-06, "loss": 0.7309, "step": 10624 }, { "epoch": 0.95, "grad_norm": 6.406149784073318, "learning_rate": 5.662984462061099e-06, "loss": 0.7058, "step": 10625 }, { "epoch": 0.95, "grad_norm": 6.982429903941107, "learning_rate": 5.662268462377948e-06, "loss": 0.7667, "step": 10626 }, { "epoch": 0.95, "grad_norm": 4.794985432104046, "learning_rate": 5.661552448871393e-06, "loss": 0.7255, "step": 10627 }, { "epoch": 0.95, "grad_norm": 5.755769379565185, "learning_rate": 5.66083642155638e-06, "loss": 0.7405, "step": 10628 }, { "epoch": 0.95, "grad_norm": 4.554138044239582, "learning_rate": 5.660120380447854e-06, "loss": 0.7398, "step": 10629 }, { "epoch": 0.95, "grad_norm": 5.370821533988524, "learning_rate": 5.65940432556076e-06, "loss": 0.7442, "step": 10630 }, { "epoch": 0.95, "grad_norm": 6.613554427284437, "learning_rate": 5.658688256910046e-06, "loss": 0.7922, "step": 10631 }, { "epoch": 0.95, "grad_norm": 6.139966233718298, "learning_rate": 5.657972174510655e-06, "loss": 0.7637, "step": 10632 }, { "epoch": 0.95, "grad_norm": 6.067191973318115, "learning_rate": 5.657256078377536e-06, "loss": 0.7067, "step": 10633 }, { "epoch": 0.95, "grad_norm": 5.36515011619705, "learning_rate": 5.656539968525638e-06, "loss": 0.7377, "step": 10634 }, { "epoch": 0.95, "grad_norm": 6.03777964560069, "learning_rate": 5.655823844969905e-06, "loss": 0.7087, "step": 10635 }, { "epoch": 0.95, "grad_norm": 5.241257607384797, "learning_rate": 5.655107707725285e-06, "loss": 0.7427, "step": 10636 }, { "epoch": 0.95, "grad_norm": 4.668711749566408, "learning_rate": 5.654391556806725e-06, "loss": 0.7771, "step": 10637 }, { "epoch": 0.95, "grad_norm": 5.267537926873053, "learning_rate": 5.653675392229178e-06, "loss": 0.7858, "step": 10638 }, { "epoch": 0.95, "grad_norm": 5.7236647959620015, "learning_rate": 5.652959214007585e-06, "loss": 0.7596, "step": 10639 }, { "epoch": 0.95, "grad_norm": 5.8694732343863345, "learning_rate": 5.6522430221569004e-06, "loss": 0.7508, "step": 10640 }, { "epoch": 0.95, "grad_norm": 9.88193012467719, "learning_rate": 5.6515268166920715e-06, "loss": 0.754, "step": 10641 }, { "epoch": 0.95, "grad_norm": 6.489735502573147, "learning_rate": 5.650810597628046e-06, "loss": 0.765, "step": 10642 }, { "epoch": 0.95, "grad_norm": 6.0482321491459805, "learning_rate": 5.650094364979774e-06, "loss": 0.7655, "step": 10643 }, { "epoch": 0.95, "grad_norm": 5.490434511148844, "learning_rate": 5.649378118762207e-06, "loss": 0.7927, "step": 10644 }, { "epoch": 0.95, "grad_norm": 5.9672910682123605, "learning_rate": 5.6486618589902935e-06, "loss": 0.7778, "step": 10645 }, { "epoch": 0.95, "grad_norm": 6.030370270274002, "learning_rate": 5.647945585678982e-06, "loss": 0.693, "step": 10646 }, { "epoch": 0.95, "grad_norm": 6.150857129540686, "learning_rate": 5.647229298843229e-06, "loss": 0.7697, "step": 10647 }, { "epoch": 0.95, "grad_norm": 5.84791791725582, "learning_rate": 5.646512998497981e-06, "loss": 0.7648, "step": 10648 }, { "epoch": 0.95, "grad_norm": 7.520211133255416, "learning_rate": 5.645796684658189e-06, "loss": 0.7883, "step": 10649 }, { "epoch": 0.95, "grad_norm": 4.391184800779915, "learning_rate": 5.645080357338806e-06, "loss": 0.788, "step": 10650 }, { "epoch": 0.95, "grad_norm": 7.7424224870392075, "learning_rate": 5.644364016554782e-06, "loss": 0.7973, "step": 10651 }, { "epoch": 0.95, "grad_norm": 7.046044369398744, "learning_rate": 5.643647662321071e-06, "loss": 0.7175, "step": 10652 }, { "epoch": 0.95, "grad_norm": 4.882666461048117, "learning_rate": 5.642931294652626e-06, "loss": 0.8116, "step": 10653 }, { "epoch": 0.95, "grad_norm": 4.457199183562852, "learning_rate": 5.642214913564397e-06, "loss": 0.7455, "step": 10654 }, { "epoch": 0.95, "grad_norm": 6.097195040072289, "learning_rate": 5.641498519071337e-06, "loss": 0.7887, "step": 10655 }, { "epoch": 0.95, "grad_norm": 6.863772194466594, "learning_rate": 5.6407821111884024e-06, "loss": 0.7393, "step": 10656 }, { "epoch": 0.95, "grad_norm": 6.580266362356395, "learning_rate": 5.6400656899305436e-06, "loss": 0.7109, "step": 10657 }, { "epoch": 0.95, "grad_norm": 6.110254962935882, "learning_rate": 5.639349255312715e-06, "loss": 0.9318, "step": 10658 }, { "epoch": 0.95, "grad_norm": 6.3561319169399315, "learning_rate": 5.638632807349871e-06, "loss": 0.6812, "step": 10659 }, { "epoch": 0.95, "grad_norm": 6.307685948451638, "learning_rate": 5.637916346056966e-06, "loss": 0.7, "step": 10660 }, { "epoch": 0.95, "grad_norm": 5.913633347905796, "learning_rate": 5.637199871448954e-06, "loss": 0.8009, "step": 10661 }, { "epoch": 0.95, "grad_norm": 5.704537892364227, "learning_rate": 5.63648338354079e-06, "loss": 0.8207, "step": 10662 }, { "epoch": 0.95, "grad_norm": 4.17523944710243, "learning_rate": 5.635766882347431e-06, "loss": 0.7511, "step": 10663 }, { "epoch": 0.95, "grad_norm": 4.932949810619823, "learning_rate": 5.635050367883829e-06, "loss": 0.7519, "step": 10664 }, { "epoch": 0.95, "grad_norm": 9.517703119593028, "learning_rate": 5.6343338401649405e-06, "loss": 0.7538, "step": 10665 }, { "epoch": 0.95, "grad_norm": 4.86925452250156, "learning_rate": 5.633617299205724e-06, "loss": 0.7592, "step": 10666 }, { "epoch": 0.95, "grad_norm": 5.5895185413363375, "learning_rate": 5.632900745021133e-06, "loss": 0.7818, "step": 10667 }, { "epoch": 0.95, "grad_norm": 5.060077849033257, "learning_rate": 5.632184177626124e-06, "loss": 0.8032, "step": 10668 }, { "epoch": 0.95, "grad_norm": 6.113490545332831, "learning_rate": 5.631467597035656e-06, "loss": 0.8013, "step": 10669 }, { "epoch": 0.95, "grad_norm": 6.3128102952792755, "learning_rate": 5.6307510032646866e-06, "loss": 0.761, "step": 10670 }, { "epoch": 0.95, "grad_norm": 6.4453703933024915, "learning_rate": 5.630034396328168e-06, "loss": 0.7921, "step": 10671 }, { "epoch": 0.95, "grad_norm": 6.727918782878589, "learning_rate": 5.629317776241062e-06, "loss": 0.7443, "step": 10672 }, { "epoch": 0.95, "grad_norm": 5.345823940572856, "learning_rate": 5.628601143018327e-06, "loss": 0.7632, "step": 10673 }, { "epoch": 0.95, "grad_norm": 5.030813211507775, "learning_rate": 5.627884496674918e-06, "loss": 0.7669, "step": 10674 }, { "epoch": 0.95, "grad_norm": 4.893063987113317, "learning_rate": 5.6271678372257955e-06, "loss": 0.7662, "step": 10675 }, { "epoch": 0.95, "grad_norm": 6.340974433669948, "learning_rate": 5.62645116468592e-06, "loss": 0.7817, "step": 10676 }, { "epoch": 0.95, "grad_norm": 5.784494013256472, "learning_rate": 5.625734479070247e-06, "loss": 0.7037, "step": 10677 }, { "epoch": 0.95, "grad_norm": 4.9243659451852935, "learning_rate": 5.6250177803937365e-06, "loss": 0.8111, "step": 10678 }, { "epoch": 0.95, "grad_norm": 6.089870308805469, "learning_rate": 5.6243010686713496e-06, "loss": 0.8342, "step": 10679 }, { "epoch": 0.95, "grad_norm": 6.285006547919683, "learning_rate": 5.623584343918043e-06, "loss": 0.7553, "step": 10680 }, { "epoch": 0.95, "grad_norm": 4.754903450597129, "learning_rate": 5.622867606148781e-06, "loss": 0.7926, "step": 10681 }, { "epoch": 0.95, "grad_norm": 5.585319833935165, "learning_rate": 5.622150855378521e-06, "loss": 0.725, "step": 10682 }, { "epoch": 0.95, "grad_norm": 5.514578778583079, "learning_rate": 5.621434091622225e-06, "loss": 0.759, "step": 10683 }, { "epoch": 0.95, "grad_norm": 5.131466121453619, "learning_rate": 5.620717314894852e-06, "loss": 0.8013, "step": 10684 }, { "epoch": 0.95, "grad_norm": 6.731256976412902, "learning_rate": 5.620000525211366e-06, "loss": 0.7002, "step": 10685 }, { "epoch": 0.95, "grad_norm": 5.4858986793358575, "learning_rate": 5.619283722586726e-06, "loss": 0.7515, "step": 10686 }, { "epoch": 0.95, "grad_norm": 6.125580683336622, "learning_rate": 5.618566907035894e-06, "loss": 0.7035, "step": 10687 }, { "epoch": 0.95, "grad_norm": 4.976497982957543, "learning_rate": 5.617850078573833e-06, "loss": 0.7486, "step": 10688 }, { "epoch": 0.95, "grad_norm": 6.521918463697802, "learning_rate": 5.617133237215504e-06, "loss": 0.8199, "step": 10689 }, { "epoch": 0.95, "grad_norm": 8.92038726397508, "learning_rate": 5.616416382975871e-06, "loss": 0.7749, "step": 10690 }, { "epoch": 0.95, "grad_norm": 7.334149706984305, "learning_rate": 5.6156995158698955e-06, "loss": 0.7683, "step": 10691 }, { "epoch": 0.95, "grad_norm": 5.108888792430661, "learning_rate": 5.614982635912543e-06, "loss": 0.7878, "step": 10692 }, { "epoch": 0.95, "grad_norm": 5.440014651546038, "learning_rate": 5.614265743118772e-06, "loss": 0.78, "step": 10693 }, { "epoch": 0.95, "grad_norm": 6.055910991030995, "learning_rate": 5.613548837503548e-06, "loss": 0.8293, "step": 10694 }, { "epoch": 0.95, "grad_norm": 5.867393594276292, "learning_rate": 5.61283191908184e-06, "loss": 0.7484, "step": 10695 }, { "epoch": 0.95, "grad_norm": 5.358813530741499, "learning_rate": 5.612114987868603e-06, "loss": 0.804, "step": 10696 }, { "epoch": 0.95, "grad_norm": 5.382498873444532, "learning_rate": 5.6113980438788085e-06, "loss": 0.7935, "step": 10697 }, { "epoch": 0.95, "grad_norm": 6.084619184208191, "learning_rate": 5.610681087127418e-06, "loss": 0.8601, "step": 10698 }, { "epoch": 0.95, "grad_norm": 4.793705620537771, "learning_rate": 5.6099641176293995e-06, "loss": 0.7808, "step": 10699 }, { "epoch": 0.95, "grad_norm": 7.091789453290367, "learning_rate": 5.609247135399713e-06, "loss": 0.7931, "step": 10700 }, { "epoch": 0.95, "grad_norm": 5.595553673038061, "learning_rate": 5.608530140453329e-06, "loss": 0.7687, "step": 10701 }, { "epoch": 0.95, "grad_norm": 7.571177159852979, "learning_rate": 5.6078131328052095e-06, "loss": 0.7446, "step": 10702 }, { "epoch": 0.95, "grad_norm": 4.739434960997699, "learning_rate": 5.607096112470321e-06, "loss": 0.7637, "step": 10703 }, { "epoch": 0.95, "grad_norm": 5.807746198722069, "learning_rate": 5.606379079463633e-06, "loss": 0.7835, "step": 10704 }, { "epoch": 0.95, "grad_norm": 5.877265563074078, "learning_rate": 5.605662033800109e-06, "loss": 0.8166, "step": 10705 }, { "epoch": 0.96, "grad_norm": 10.936338041091139, "learning_rate": 5.604944975494716e-06, "loss": 0.7646, "step": 10706 }, { "epoch": 0.96, "grad_norm": 5.506535647237497, "learning_rate": 5.6042279045624215e-06, "loss": 0.8112, "step": 10707 }, { "epoch": 0.96, "grad_norm": 5.9536408837729065, "learning_rate": 5.603510821018193e-06, "loss": 0.7206, "step": 10708 }, { "epoch": 0.96, "grad_norm": 4.368995281812021, "learning_rate": 5.602793724876997e-06, "loss": 0.7744, "step": 10709 }, { "epoch": 0.96, "grad_norm": 4.775677622857346, "learning_rate": 5.602076616153802e-06, "loss": 0.6652, "step": 10710 }, { "epoch": 0.96, "grad_norm": 7.368150406012541, "learning_rate": 5.601359494863577e-06, "loss": 0.7261, "step": 10711 }, { "epoch": 0.96, "grad_norm": 5.083465653920019, "learning_rate": 5.600642361021289e-06, "loss": 0.7295, "step": 10712 }, { "epoch": 0.96, "grad_norm": 6.5629695235647665, "learning_rate": 5.599925214641908e-06, "loss": 0.8112, "step": 10713 }, { "epoch": 0.96, "grad_norm": 7.84280749341674, "learning_rate": 5.5992080557404015e-06, "loss": 0.777, "step": 10714 }, { "epoch": 0.96, "grad_norm": 6.842479768087824, "learning_rate": 5.598490884331739e-06, "loss": 0.752, "step": 10715 }, { "epoch": 0.96, "grad_norm": 6.998495916147981, "learning_rate": 5.59777370043089e-06, "loss": 0.7384, "step": 10716 }, { "epoch": 0.96, "grad_norm": 4.616983184864074, "learning_rate": 5.5970565040528234e-06, "loss": 0.7267, "step": 10717 }, { "epoch": 0.96, "grad_norm": 4.293705506394972, "learning_rate": 5.5963392952125104e-06, "loss": 0.7871, "step": 10718 }, { "epoch": 0.96, "grad_norm": 6.839736128201519, "learning_rate": 5.59562207392492e-06, "loss": 0.7461, "step": 10719 }, { "epoch": 0.96, "grad_norm": 6.640076475261465, "learning_rate": 5.5949048402050245e-06, "loss": 0.7795, "step": 10720 }, { "epoch": 0.96, "grad_norm": 8.20933092338473, "learning_rate": 5.5941875940677925e-06, "loss": 0.8238, "step": 10721 }, { "epoch": 0.96, "grad_norm": 5.0923548930411355, "learning_rate": 5.593470335528196e-06, "loss": 0.7445, "step": 10722 }, { "epoch": 0.96, "grad_norm": 5.307340813811372, "learning_rate": 5.592753064601205e-06, "loss": 0.7974, "step": 10723 }, { "epoch": 0.96, "grad_norm": 6.555544714894877, "learning_rate": 5.592035781301794e-06, "loss": 0.6808, "step": 10724 }, { "epoch": 0.96, "grad_norm": 5.8248089448274, "learning_rate": 5.59131848564493e-06, "loss": 0.8255, "step": 10725 }, { "epoch": 0.96, "grad_norm": 4.0295610595861575, "learning_rate": 5.59060117764559e-06, "loss": 0.7614, "step": 10726 }, { "epoch": 0.96, "grad_norm": 5.034031392543963, "learning_rate": 5.589883857318741e-06, "loss": 0.7721, "step": 10727 }, { "epoch": 0.96, "grad_norm": 9.122500617665851, "learning_rate": 5.58916652467936e-06, "loss": 0.7802, "step": 10728 }, { "epoch": 0.96, "grad_norm": 5.324792360586684, "learning_rate": 5.588449179742418e-06, "loss": 0.7405, "step": 10729 }, { "epoch": 0.96, "grad_norm": 6.339675133654454, "learning_rate": 5.587731822522888e-06, "loss": 0.7666, "step": 10730 }, { "epoch": 0.96, "grad_norm": 5.757497106349291, "learning_rate": 5.587014453035743e-06, "loss": 0.7605, "step": 10731 }, { "epoch": 0.96, "grad_norm": 6.289306820598123, "learning_rate": 5.586297071295956e-06, "loss": 0.7713, "step": 10732 }, { "epoch": 0.96, "grad_norm": 4.755484171933607, "learning_rate": 5.585579677318504e-06, "loss": 0.7652, "step": 10733 }, { "epoch": 0.96, "grad_norm": 6.42013905598207, "learning_rate": 5.584862271118357e-06, "loss": 0.8028, "step": 10734 }, { "epoch": 0.96, "grad_norm": 4.435379640819886, "learning_rate": 5.584144852710491e-06, "loss": 0.8026, "step": 10735 }, { "epoch": 0.96, "grad_norm": 5.9592135828320965, "learning_rate": 5.5834274221098825e-06, "loss": 0.6856, "step": 10736 }, { "epoch": 0.96, "grad_norm": 6.350021084577158, "learning_rate": 5.582709979331502e-06, "loss": 0.7402, "step": 10737 }, { "epoch": 0.96, "grad_norm": 5.9190001422942355, "learning_rate": 5.581992524390326e-06, "loss": 0.7675, "step": 10738 }, { "epoch": 0.96, "grad_norm": 5.967910954540599, "learning_rate": 5.581275057301333e-06, "loss": 0.735, "step": 10739 }, { "epoch": 0.96, "grad_norm": 4.8934848885417574, "learning_rate": 5.580557578079494e-06, "loss": 0.8004, "step": 10740 }, { "epoch": 0.96, "grad_norm": 4.628239390229365, "learning_rate": 5.579840086739789e-06, "loss": 0.674, "step": 10741 }, { "epoch": 0.96, "grad_norm": 5.448732014005137, "learning_rate": 5.579122583297191e-06, "loss": 0.7722, "step": 10742 }, { "epoch": 0.96, "grad_norm": 4.824158500368036, "learning_rate": 5.578405067766677e-06, "loss": 0.7057, "step": 10743 }, { "epoch": 0.96, "grad_norm": 6.924629681966388, "learning_rate": 5.577687540163223e-06, "loss": 0.7225, "step": 10744 }, { "epoch": 0.96, "grad_norm": 4.763506936378303, "learning_rate": 5.576970000501809e-06, "loss": 0.6747, "step": 10745 }, { "epoch": 0.96, "grad_norm": 6.104354558695034, "learning_rate": 5.576252448797409e-06, "loss": 0.7987, "step": 10746 }, { "epoch": 0.96, "grad_norm": 6.808018785697197, "learning_rate": 5.575534885065e-06, "loss": 0.7984, "step": 10747 }, { "epoch": 0.96, "grad_norm": 4.908433311103883, "learning_rate": 5.574817309319561e-06, "loss": 0.7605, "step": 10748 }, { "epoch": 0.96, "grad_norm": 6.4284983130881255, "learning_rate": 5.574099721576072e-06, "loss": 0.7097, "step": 10749 }, { "epoch": 0.96, "grad_norm": 4.807095208190377, "learning_rate": 5.573382121849505e-06, "loss": 0.764, "step": 10750 }, { "epoch": 0.96, "grad_norm": 4.441683340938526, "learning_rate": 5.572664510154844e-06, "loss": 0.7918, "step": 10751 }, { "epoch": 0.96, "grad_norm": 7.109456285242718, "learning_rate": 5.571946886507064e-06, "loss": 0.8281, "step": 10752 }, { "epoch": 0.96, "grad_norm": 6.193676926402861, "learning_rate": 5.571229250921146e-06, "loss": 0.772, "step": 10753 }, { "epoch": 0.96, "grad_norm": 6.982173230664179, "learning_rate": 5.570511603412069e-06, "loss": 0.6982, "step": 10754 }, { "epoch": 0.96, "grad_norm": 5.357854473239579, "learning_rate": 5.569793943994812e-06, "loss": 0.6939, "step": 10755 }, { "epoch": 0.96, "grad_norm": 4.713659414415944, "learning_rate": 5.569076272684353e-06, "loss": 0.7669, "step": 10756 }, { "epoch": 0.96, "grad_norm": 5.317259298856979, "learning_rate": 5.568358589495674e-06, "loss": 0.7546, "step": 10757 }, { "epoch": 0.96, "grad_norm": 8.892567803185804, "learning_rate": 5.567640894443753e-06, "loss": 0.7802, "step": 10758 }, { "epoch": 0.96, "grad_norm": 4.846412932848237, "learning_rate": 5.566923187543573e-06, "loss": 0.8154, "step": 10759 }, { "epoch": 0.96, "grad_norm": 5.068471552640093, "learning_rate": 5.566205468810111e-06, "loss": 0.7065, "step": 10760 }, { "epoch": 0.96, "grad_norm": 4.6999439367677835, "learning_rate": 5.565487738258351e-06, "loss": 0.7289, "step": 10761 }, { "epoch": 0.96, "grad_norm": 5.474354562243727, "learning_rate": 5.564769995903273e-06, "loss": 0.7344, "step": 10762 }, { "epoch": 0.96, "grad_norm": 9.910927881187835, "learning_rate": 5.564052241759858e-06, "loss": 0.7058, "step": 10763 }, { "epoch": 0.96, "grad_norm": 7.282517001681445, "learning_rate": 5.563334475843087e-06, "loss": 0.8072, "step": 10764 }, { "epoch": 0.96, "grad_norm": 5.3570970787606695, "learning_rate": 5.562616698167944e-06, "loss": 0.7352, "step": 10765 }, { "epoch": 0.96, "grad_norm": 4.863273078112554, "learning_rate": 5.561898908749409e-06, "loss": 0.7442, "step": 10766 }, { "epoch": 0.96, "grad_norm": 6.098503267501091, "learning_rate": 5.561181107602464e-06, "loss": 0.7942, "step": 10767 }, { "epoch": 0.96, "grad_norm": 6.0722410880532784, "learning_rate": 5.560463294742094e-06, "loss": 0.8654, "step": 10768 }, { "epoch": 0.96, "grad_norm": 5.604544241358215, "learning_rate": 5.559745470183278e-06, "loss": 0.8158, "step": 10769 }, { "epoch": 0.96, "grad_norm": 5.542121453878259, "learning_rate": 5.559027633941001e-06, "loss": 0.7228, "step": 10770 }, { "epoch": 0.96, "grad_norm": 5.974083517509441, "learning_rate": 5.55830978603025e-06, "loss": 0.8172, "step": 10771 }, { "epoch": 0.96, "grad_norm": 4.232486534983515, "learning_rate": 5.557591926466002e-06, "loss": 0.7789, "step": 10772 }, { "epoch": 0.96, "grad_norm": 5.828476193029375, "learning_rate": 5.556874055263243e-06, "loss": 0.7887, "step": 10773 }, { "epoch": 0.96, "grad_norm": 6.310737379762359, "learning_rate": 5.556156172436958e-06, "loss": 0.7281, "step": 10774 }, { "epoch": 0.96, "grad_norm": 4.573933299145076, "learning_rate": 5.555438278002132e-06, "loss": 0.7326, "step": 10775 }, { "epoch": 0.96, "grad_norm": 5.138146596991702, "learning_rate": 5.554720371973747e-06, "loss": 0.7809, "step": 10776 }, { "epoch": 0.96, "grad_norm": 5.801364955093416, "learning_rate": 5.55400245436679e-06, "loss": 0.752, "step": 10777 }, { "epoch": 0.96, "grad_norm": 5.6574481048814995, "learning_rate": 5.553284525196246e-06, "loss": 0.8301, "step": 10778 }, { "epoch": 0.96, "grad_norm": 6.929194771244951, "learning_rate": 5.552566584477098e-06, "loss": 0.7973, "step": 10779 }, { "epoch": 0.96, "grad_norm": 6.836063940641461, "learning_rate": 5.551848632224333e-06, "loss": 0.815, "step": 10780 }, { "epoch": 0.96, "grad_norm": 5.435930726399479, "learning_rate": 5.551130668452936e-06, "loss": 0.7325, "step": 10781 }, { "epoch": 0.96, "grad_norm": 4.523674390823477, "learning_rate": 5.550412693177893e-06, "loss": 0.7691, "step": 10782 }, { "epoch": 0.96, "grad_norm": 6.6180469253151895, "learning_rate": 5.5496947064141894e-06, "loss": 0.697, "step": 10783 }, { "epoch": 0.96, "grad_norm": 6.135105493775467, "learning_rate": 5.548976708176815e-06, "loss": 0.7325, "step": 10784 }, { "epoch": 0.96, "grad_norm": 4.7600323438977155, "learning_rate": 5.548258698480754e-06, "loss": 0.7838, "step": 10785 }, { "epoch": 0.96, "grad_norm": 4.343729945456398, "learning_rate": 5.547540677340992e-06, "loss": 0.6605, "step": 10786 }, { "epoch": 0.96, "grad_norm": 5.92056030427151, "learning_rate": 5.546822644772518e-06, "loss": 0.8129, "step": 10787 }, { "epoch": 0.96, "grad_norm": 5.31900559432893, "learning_rate": 5.546104600790317e-06, "loss": 0.6927, "step": 10788 }, { "epoch": 0.96, "grad_norm": 5.093399631653884, "learning_rate": 5.5453865454093804e-06, "loss": 0.7638, "step": 10789 }, { "epoch": 0.96, "grad_norm": 6.5354269520252855, "learning_rate": 5.544668478644694e-06, "loss": 0.7214, "step": 10790 }, { "epoch": 0.96, "grad_norm": 4.602634929394719, "learning_rate": 5.543950400511246e-06, "loss": 0.7239, "step": 10791 }, { "epoch": 0.96, "grad_norm": 6.14763620197193, "learning_rate": 5.543232311024022e-06, "loss": 0.7094, "step": 10792 }, { "epoch": 0.96, "grad_norm": 7.186740873558471, "learning_rate": 5.542514210198016e-06, "loss": 0.7498, "step": 10793 }, { "epoch": 0.96, "grad_norm": 7.625108897839705, "learning_rate": 5.541796098048216e-06, "loss": 0.7043, "step": 10794 }, { "epoch": 0.96, "grad_norm": 5.040329259090135, "learning_rate": 5.5410779745896045e-06, "loss": 0.7452, "step": 10795 }, { "epoch": 0.96, "grad_norm": 4.953571668079244, "learning_rate": 5.540359839837177e-06, "loss": 0.7202, "step": 10796 }, { "epoch": 0.96, "grad_norm": 6.257510274340713, "learning_rate": 5.539641693805922e-06, "loss": 0.734, "step": 10797 }, { "epoch": 0.96, "grad_norm": 7.807550674479319, "learning_rate": 5.538923536510826e-06, "loss": 0.7629, "step": 10798 }, { "epoch": 0.96, "grad_norm": 6.393003910189959, "learning_rate": 5.538205367966883e-06, "loss": 0.7306, "step": 10799 }, { "epoch": 0.96, "grad_norm": 4.467274352010674, "learning_rate": 5.537487188189084e-06, "loss": 0.7198, "step": 10800 }, { "epoch": 0.96, "grad_norm": 6.908547468165139, "learning_rate": 5.536768997192413e-06, "loss": 0.7529, "step": 10801 }, { "epoch": 0.96, "grad_norm": 5.0980365721146, "learning_rate": 5.5360507949918665e-06, "loss": 0.7915, "step": 10802 }, { "epoch": 0.96, "grad_norm": 7.6406987634162675, "learning_rate": 5.535332581602434e-06, "loss": 0.7467, "step": 10803 }, { "epoch": 0.96, "grad_norm": 4.520230773249258, "learning_rate": 5.534614357039106e-06, "loss": 0.7633, "step": 10804 }, { "epoch": 0.96, "grad_norm": 6.149237622316492, "learning_rate": 5.533896121316873e-06, "loss": 0.7358, "step": 10805 }, { "epoch": 0.96, "grad_norm": 6.56208937303041, "learning_rate": 5.533177874450728e-06, "loss": 0.7972, "step": 10806 }, { "epoch": 0.96, "grad_norm": 5.723721530361703, "learning_rate": 5.532459616455664e-06, "loss": 0.7009, "step": 10807 }, { "epoch": 0.96, "grad_norm": 4.907545575773026, "learning_rate": 5.5317413473466695e-06, "loss": 0.7363, "step": 10808 }, { "epoch": 0.96, "grad_norm": 5.491298275741591, "learning_rate": 5.5310230671387414e-06, "loss": 0.8063, "step": 10809 }, { "epoch": 0.96, "grad_norm": 9.92361604961312, "learning_rate": 5.530304775846867e-06, "loss": 0.7895, "step": 10810 }, { "epoch": 0.96, "grad_norm": 4.862532215228409, "learning_rate": 5.529586473486043e-06, "loss": 0.7405, "step": 10811 }, { "epoch": 0.96, "grad_norm": 6.185312047580206, "learning_rate": 5.528868160071261e-06, "loss": 0.7803, "step": 10812 }, { "epoch": 0.96, "grad_norm": 5.961223488597242, "learning_rate": 5.528149835617514e-06, "loss": 0.7018, "step": 10813 }, { "epoch": 0.96, "grad_norm": 7.115070386481663, "learning_rate": 5.527431500139797e-06, "loss": 0.6888, "step": 10814 }, { "epoch": 0.96, "grad_norm": 5.240889203508808, "learning_rate": 5.526713153653102e-06, "loss": 0.7588, "step": 10815 }, { "epoch": 0.96, "grad_norm": 8.409514123785138, "learning_rate": 5.525994796172422e-06, "loss": 0.7608, "step": 10816 }, { "epoch": 0.96, "grad_norm": 4.847199709647511, "learning_rate": 5.525276427712755e-06, "loss": 0.8109, "step": 10817 }, { "epoch": 0.97, "grad_norm": 6.0468838379030485, "learning_rate": 5.5245580482890905e-06, "loss": 0.7985, "step": 10818 }, { "epoch": 0.97, "grad_norm": 7.057783443753944, "learning_rate": 5.523839657916427e-06, "loss": 0.8106, "step": 10819 }, { "epoch": 0.97, "grad_norm": 5.395911694185834, "learning_rate": 5.523121256609757e-06, "loss": 0.8074, "step": 10820 }, { "epoch": 0.97, "grad_norm": 5.6349244125114355, "learning_rate": 5.522402844384077e-06, "loss": 0.7294, "step": 10821 }, { "epoch": 0.97, "grad_norm": 5.382522609416082, "learning_rate": 5.521684421254382e-06, "loss": 0.8, "step": 10822 }, { "epoch": 0.97, "grad_norm": 5.836391953693463, "learning_rate": 5.5209659872356654e-06, "loss": 0.7634, "step": 10823 }, { "epoch": 0.97, "grad_norm": 5.868128572568108, "learning_rate": 5.5202475423429255e-06, "loss": 0.7749, "step": 10824 }, { "epoch": 0.97, "grad_norm": 5.076462503549313, "learning_rate": 5.519529086591159e-06, "loss": 0.754, "step": 10825 }, { "epoch": 0.97, "grad_norm": 6.832570910090234, "learning_rate": 5.5188106199953595e-06, "loss": 0.7709, "step": 10826 }, { "epoch": 0.97, "grad_norm": 6.41740428907209, "learning_rate": 5.5180921425705235e-06, "loss": 0.7645, "step": 10827 }, { "epoch": 0.97, "grad_norm": 7.860705760960257, "learning_rate": 5.51737365433165e-06, "loss": 0.7704, "step": 10828 }, { "epoch": 0.97, "grad_norm": 4.479478864224294, "learning_rate": 5.516655155293735e-06, "loss": 0.7229, "step": 10829 }, { "epoch": 0.97, "grad_norm": 6.2566102146947555, "learning_rate": 5.5159366454717735e-06, "loss": 0.7339, "step": 10830 }, { "epoch": 0.97, "grad_norm": 6.557257423336992, "learning_rate": 5.515218124880765e-06, "loss": 0.7171, "step": 10831 }, { "epoch": 0.97, "grad_norm": 4.852578937963471, "learning_rate": 5.514499593535707e-06, "loss": 0.783, "step": 10832 }, { "epoch": 0.97, "grad_norm": 5.506309609137088, "learning_rate": 5.513781051451595e-06, "loss": 0.6963, "step": 10833 }, { "epoch": 0.97, "grad_norm": 5.770704000006851, "learning_rate": 5.51306249864343e-06, "loss": 0.7856, "step": 10834 }, { "epoch": 0.97, "grad_norm": 4.687536992812467, "learning_rate": 5.51234393512621e-06, "loss": 0.7903, "step": 10835 }, { "epoch": 0.97, "grad_norm": 4.14151277853263, "learning_rate": 5.5116253609149315e-06, "loss": 0.693, "step": 10836 }, { "epoch": 0.97, "grad_norm": 6.397605921093894, "learning_rate": 5.510906776024594e-06, "loss": 0.696, "step": 10837 }, { "epoch": 0.97, "grad_norm": 6.153173229645279, "learning_rate": 5.5101881804701966e-06, "loss": 0.7653, "step": 10838 }, { "epoch": 0.97, "grad_norm": 6.09032519402522, "learning_rate": 5.509469574266737e-06, "loss": 0.7554, "step": 10839 }, { "epoch": 0.97, "grad_norm": 5.200858368297088, "learning_rate": 5.5087509574292165e-06, "loss": 0.7658, "step": 10840 }, { "epoch": 0.97, "grad_norm": 5.323588625124677, "learning_rate": 5.508032329972635e-06, "loss": 0.7119, "step": 10841 }, { "epoch": 0.97, "grad_norm": 5.897500151511188, "learning_rate": 5.507313691911988e-06, "loss": 0.8124, "step": 10842 }, { "epoch": 0.97, "grad_norm": 4.843540263562123, "learning_rate": 5.506595043262282e-06, "loss": 0.7367, "step": 10843 }, { "epoch": 0.97, "grad_norm": 4.940924451862673, "learning_rate": 5.505876384038512e-06, "loss": 0.8115, "step": 10844 }, { "epoch": 0.97, "grad_norm": 6.142674815756071, "learning_rate": 5.505157714255682e-06, "loss": 0.7766, "step": 10845 }, { "epoch": 0.97, "grad_norm": 5.827504295541371, "learning_rate": 5.50443903392879e-06, "loss": 0.7613, "step": 10846 }, { "epoch": 0.97, "grad_norm": 5.877686315815717, "learning_rate": 5.503720343072838e-06, "loss": 0.8235, "step": 10847 }, { "epoch": 0.97, "grad_norm": 4.506388751840898, "learning_rate": 5.5030016417028264e-06, "loss": 0.6973, "step": 10848 }, { "epoch": 0.97, "grad_norm": 6.080662334653133, "learning_rate": 5.5022829298337574e-06, "loss": 0.7377, "step": 10849 }, { "epoch": 0.97, "grad_norm": 5.368887439964926, "learning_rate": 5.501564207480633e-06, "loss": 0.7257, "step": 10850 }, { "epoch": 0.97, "grad_norm": 5.437736372977898, "learning_rate": 5.500845474658454e-06, "loss": 0.7668, "step": 10851 }, { "epoch": 0.97, "grad_norm": 4.595274744317537, "learning_rate": 5.500126731382222e-06, "loss": 0.8354, "step": 10852 }, { "epoch": 0.97, "grad_norm": 7.495852183228257, "learning_rate": 5.499407977666939e-06, "loss": 0.793, "step": 10853 }, { "epoch": 0.97, "grad_norm": 7.398505559717923, "learning_rate": 5.49868921352761e-06, "loss": 0.832, "step": 10854 }, { "epoch": 0.97, "grad_norm": 5.837122273337541, "learning_rate": 5.497970438979235e-06, "loss": 0.7368, "step": 10855 }, { "epoch": 0.97, "grad_norm": 5.633314221489813, "learning_rate": 5.497251654036816e-06, "loss": 0.7704, "step": 10856 }, { "epoch": 0.97, "grad_norm": 4.899177043702602, "learning_rate": 5.496532858715361e-06, "loss": 0.7967, "step": 10857 }, { "epoch": 0.97, "grad_norm": 5.390812539567147, "learning_rate": 5.495814053029867e-06, "loss": 0.669, "step": 10858 }, { "epoch": 0.97, "grad_norm": 5.152273159417039, "learning_rate": 5.495095236995341e-06, "loss": 0.7353, "step": 10859 }, { "epoch": 0.97, "grad_norm": 6.228486809690229, "learning_rate": 5.494376410626788e-06, "loss": 0.7295, "step": 10860 }, { "epoch": 0.97, "grad_norm": 4.6030421015126635, "learning_rate": 5.4936575739392085e-06, "loss": 0.7071, "step": 10861 }, { "epoch": 0.97, "grad_norm": 5.282241887527273, "learning_rate": 5.4929387269476076e-06, "loss": 0.7878, "step": 10862 }, { "epoch": 0.97, "grad_norm": 6.664744075912512, "learning_rate": 5.4922198696669915e-06, "loss": 0.7436, "step": 10863 }, { "epoch": 0.97, "grad_norm": 4.379604978079328, "learning_rate": 5.491501002112363e-06, "loss": 0.7499, "step": 10864 }, { "epoch": 0.97, "grad_norm": 4.711792151969145, "learning_rate": 5.490782124298729e-06, "loss": 0.7216, "step": 10865 }, { "epoch": 0.97, "grad_norm": 6.257201442553998, "learning_rate": 5.490063236241091e-06, "loss": 0.733, "step": 10866 }, { "epoch": 0.97, "grad_norm": 4.698936147947885, "learning_rate": 5.489344337954457e-06, "loss": 0.7341, "step": 10867 }, { "epoch": 0.97, "grad_norm": 6.262660272297028, "learning_rate": 5.488625429453831e-06, "loss": 0.709, "step": 10868 }, { "epoch": 0.97, "grad_norm": 6.091025138917357, "learning_rate": 5.48790651075422e-06, "loss": 0.7364, "step": 10869 }, { "epoch": 0.97, "grad_norm": 6.030706704439861, "learning_rate": 5.487187581870629e-06, "loss": 0.7644, "step": 10870 }, { "epoch": 0.97, "grad_norm": 6.396493711182646, "learning_rate": 5.486468642818064e-06, "loss": 0.7689, "step": 10871 }, { "epoch": 0.97, "grad_norm": 4.521708301865422, "learning_rate": 5.48574969361153e-06, "loss": 0.8386, "step": 10872 }, { "epoch": 0.97, "grad_norm": 6.531950468405093, "learning_rate": 5.485030734266038e-06, "loss": 0.7249, "step": 10873 }, { "epoch": 0.97, "grad_norm": 5.055894533866756, "learning_rate": 5.48431176479659e-06, "loss": 0.7865, "step": 10874 }, { "epoch": 0.97, "grad_norm": 6.994955839876602, "learning_rate": 5.483592785218194e-06, "loss": 0.7545, "step": 10875 }, { "epoch": 0.97, "grad_norm": 4.8720281031726715, "learning_rate": 5.482873795545858e-06, "loss": 0.8024, "step": 10876 }, { "epoch": 0.97, "grad_norm": 5.09959164851131, "learning_rate": 5.482154795794588e-06, "loss": 0.7807, "step": 10877 }, { "epoch": 0.97, "grad_norm": 5.498183640465294, "learning_rate": 5.481435785979394e-06, "loss": 0.7144, "step": 10878 }, { "epoch": 0.97, "grad_norm": 6.570621921208779, "learning_rate": 5.480716766115283e-06, "loss": 0.6933, "step": 10879 }, { "epoch": 0.97, "grad_norm": 4.809443695996478, "learning_rate": 5.479997736217261e-06, "loss": 0.82, "step": 10880 }, { "epoch": 0.97, "grad_norm": 5.91908343473005, "learning_rate": 5.4792786963003376e-06, "loss": 0.7902, "step": 10881 }, { "epoch": 0.97, "grad_norm": 4.911956032275399, "learning_rate": 5.47855964637952e-06, "loss": 0.7451, "step": 10882 }, { "epoch": 0.97, "grad_norm": 5.292592110278131, "learning_rate": 5.4778405864698195e-06, "loss": 0.7956, "step": 10883 }, { "epoch": 0.97, "grad_norm": 6.040662578521395, "learning_rate": 5.477121516586243e-06, "loss": 0.8381, "step": 10884 }, { "epoch": 0.97, "grad_norm": 5.991990085727443, "learning_rate": 5.476402436743799e-06, "loss": 0.7801, "step": 10885 }, { "epoch": 0.97, "grad_norm": 6.050521211636354, "learning_rate": 5.475683346957497e-06, "loss": 0.7659, "step": 10886 }, { "epoch": 0.97, "grad_norm": 5.286247708002262, "learning_rate": 5.4749642472423485e-06, "loss": 0.6867, "step": 10887 }, { "epoch": 0.97, "grad_norm": 6.781821682144777, "learning_rate": 5.47424513761336e-06, "loss": 0.829, "step": 10888 }, { "epoch": 0.97, "grad_norm": 5.25061283162398, "learning_rate": 5.473526018085544e-06, "loss": 0.6939, "step": 10889 }, { "epoch": 0.97, "grad_norm": 5.566651308412545, "learning_rate": 5.472806888673909e-06, "loss": 0.828, "step": 10890 }, { "epoch": 0.97, "grad_norm": 6.085724297098351, "learning_rate": 5.472087749393465e-06, "loss": 0.7881, "step": 10891 }, { "epoch": 0.97, "grad_norm": 5.166223927755239, "learning_rate": 5.471368600259223e-06, "loss": 0.8002, "step": 10892 }, { "epoch": 0.97, "grad_norm": 8.17966785273844, "learning_rate": 5.470649441286194e-06, "loss": 0.799, "step": 10893 }, { "epoch": 0.97, "grad_norm": 6.181368284013485, "learning_rate": 5.469930272489389e-06, "loss": 0.7201, "step": 10894 }, { "epoch": 0.97, "grad_norm": 5.750473902174519, "learning_rate": 5.469211093883819e-06, "loss": 0.7895, "step": 10895 }, { "epoch": 0.97, "grad_norm": 5.409850444267522, "learning_rate": 5.468491905484494e-06, "loss": 0.68, "step": 10896 }, { "epoch": 0.97, "grad_norm": 6.412174205988963, "learning_rate": 5.467772707306427e-06, "loss": 0.7322, "step": 10897 }, { "epoch": 0.97, "grad_norm": 6.133378927836763, "learning_rate": 5.46705349936463e-06, "loss": 0.7358, "step": 10898 }, { "epoch": 0.97, "grad_norm": 5.91671555772656, "learning_rate": 5.466334281674112e-06, "loss": 0.7659, "step": 10899 }, { "epoch": 0.97, "grad_norm": 4.9215159926489624, "learning_rate": 5.465615054249888e-06, "loss": 0.6961, "step": 10900 }, { "epoch": 0.97, "grad_norm": 6.2816955387597595, "learning_rate": 5.46489581710697e-06, "loss": 0.7533, "step": 10901 }, { "epoch": 0.97, "grad_norm": 9.882621229467905, "learning_rate": 5.46417657026037e-06, "loss": 0.8756, "step": 10902 }, { "epoch": 0.97, "grad_norm": 5.708762956651207, "learning_rate": 5.4634573137251e-06, "loss": 0.8209, "step": 10903 }, { "epoch": 0.97, "grad_norm": 5.345043098191782, "learning_rate": 5.462738047516173e-06, "loss": 0.8502, "step": 10904 }, { "epoch": 0.97, "grad_norm": 7.252415881748944, "learning_rate": 5.462018771648604e-06, "loss": 0.7915, "step": 10905 }, { "epoch": 0.97, "grad_norm": 5.984023331478982, "learning_rate": 5.461299486137404e-06, "loss": 0.7453, "step": 10906 }, { "epoch": 0.97, "grad_norm": 4.321797581294392, "learning_rate": 5.460580190997587e-06, "loss": 0.7968, "step": 10907 }, { "epoch": 0.97, "grad_norm": 6.670609197430789, "learning_rate": 5.459860886244171e-06, "loss": 0.743, "step": 10908 }, { "epoch": 0.97, "grad_norm": 5.284889902389466, "learning_rate": 5.459141571892161e-06, "loss": 0.7583, "step": 10909 }, { "epoch": 0.97, "grad_norm": 6.635824894749308, "learning_rate": 5.458422247956579e-06, "loss": 0.7977, "step": 10910 }, { "epoch": 0.97, "grad_norm": 6.600541405788429, "learning_rate": 5.4577029144524355e-06, "loss": 0.7082, "step": 10911 }, { "epoch": 0.97, "grad_norm": 9.067959538016334, "learning_rate": 5.456983571394746e-06, "loss": 0.7831, "step": 10912 }, { "epoch": 0.97, "grad_norm": 5.3605245078371695, "learning_rate": 5.456264218798525e-06, "loss": 0.788, "step": 10913 }, { "epoch": 0.97, "grad_norm": 4.59972656581848, "learning_rate": 5.455544856678788e-06, "loss": 0.7643, "step": 10914 }, { "epoch": 0.97, "grad_norm": 5.276688621234027, "learning_rate": 5.454825485050551e-06, "loss": 0.7258, "step": 10915 }, { "epoch": 0.97, "grad_norm": 5.902128502988064, "learning_rate": 5.4541061039288265e-06, "loss": 0.7898, "step": 10916 }, { "epoch": 0.97, "grad_norm": 5.074511796033352, "learning_rate": 5.453386713328631e-06, "loss": 0.7483, "step": 10917 }, { "epoch": 0.97, "grad_norm": 4.2963896584454675, "learning_rate": 5.452667313264982e-06, "loss": 0.8307, "step": 10918 }, { "epoch": 0.97, "grad_norm": 5.788046973127715, "learning_rate": 5.451947903752895e-06, "loss": 0.82, "step": 10919 }, { "epoch": 0.97, "grad_norm": 4.991260409354889, "learning_rate": 5.4512284848073825e-06, "loss": 0.7517, "step": 10920 }, { "epoch": 0.97, "grad_norm": 4.896170925086359, "learning_rate": 5.450509056443466e-06, "loss": 0.7919, "step": 10921 }, { "epoch": 0.97, "grad_norm": 5.298583060987662, "learning_rate": 5.449789618676159e-06, "loss": 0.8471, "step": 10922 }, { "epoch": 0.97, "grad_norm": 6.153903798141602, "learning_rate": 5.449070171520478e-06, "loss": 0.6738, "step": 10923 }, { "epoch": 0.97, "grad_norm": 5.611644670033036, "learning_rate": 5.448350714991442e-06, "loss": 0.8042, "step": 10924 }, { "epoch": 0.97, "grad_norm": 5.423400673310906, "learning_rate": 5.447631249104065e-06, "loss": 0.7167, "step": 10925 }, { "epoch": 0.97, "grad_norm": 6.964824962476968, "learning_rate": 5.446911773873367e-06, "loss": 0.8151, "step": 10926 }, { "epoch": 0.97, "grad_norm": 5.533647884779483, "learning_rate": 5.4461922893143635e-06, "loss": 0.7879, "step": 10927 }, { "epoch": 0.97, "grad_norm": 5.5947120585872, "learning_rate": 5.445472795442074e-06, "loss": 0.7243, "step": 10928 }, { "epoch": 0.97, "grad_norm": 3.4628424952932098, "learning_rate": 5.444753292271514e-06, "loss": 0.8739, "step": 10929 }, { "epoch": 0.98, "grad_norm": 4.821373319907367, "learning_rate": 5.444033779817706e-06, "loss": 0.7448, "step": 10930 }, { "epoch": 0.98, "grad_norm": 8.54558232942016, "learning_rate": 5.443314258095663e-06, "loss": 0.7809, "step": 10931 }, { "epoch": 0.98, "grad_norm": 7.183940544667082, "learning_rate": 5.442594727120406e-06, "loss": 0.7706, "step": 10932 }, { "epoch": 0.98, "grad_norm": 6.160339184049071, "learning_rate": 5.4418751869069545e-06, "loss": 0.7798, "step": 10933 }, { "epoch": 0.98, "grad_norm": 5.790313198179425, "learning_rate": 5.441155637470325e-06, "loss": 0.7999, "step": 10934 }, { "epoch": 0.98, "grad_norm": 4.22865400726616, "learning_rate": 5.440436078825537e-06, "loss": 0.7152, "step": 10935 }, { "epoch": 0.98, "grad_norm": 6.520589550067894, "learning_rate": 5.439716510987611e-06, "loss": 0.7224, "step": 10936 }, { "epoch": 0.98, "grad_norm": 5.253168197368117, "learning_rate": 5.438996933971568e-06, "loss": 0.8284, "step": 10937 }, { "epoch": 0.98, "grad_norm": 6.00109018867336, "learning_rate": 5.438277347792422e-06, "loss": 0.7435, "step": 10938 }, { "epoch": 0.98, "grad_norm": 3.984674075021841, "learning_rate": 5.437557752465198e-06, "loss": 0.6891, "step": 10939 }, { "epoch": 0.98, "grad_norm": 5.626382171257207, "learning_rate": 5.4368381480049146e-06, "loss": 0.7453, "step": 10940 }, { "epoch": 0.98, "grad_norm": 6.188686310681479, "learning_rate": 5.4361185344265895e-06, "loss": 0.7425, "step": 10941 }, { "epoch": 0.98, "grad_norm": 5.71696672107443, "learning_rate": 5.435398911745246e-06, "loss": 0.7494, "step": 10942 }, { "epoch": 0.98, "grad_norm": 4.761570090407094, "learning_rate": 5.434679279975905e-06, "loss": 0.7178, "step": 10943 }, { "epoch": 0.98, "grad_norm": 5.542822081852594, "learning_rate": 5.433959639133586e-06, "loss": 0.8043, "step": 10944 }, { "epoch": 0.98, "grad_norm": 4.309655126576206, "learning_rate": 5.433239989233308e-06, "loss": 0.7624, "step": 10945 }, { "epoch": 0.98, "grad_norm": 4.3565105270089814, "learning_rate": 5.432520330290095e-06, "loss": 0.7251, "step": 10946 }, { "epoch": 0.98, "grad_norm": 4.064157990392767, "learning_rate": 5.431800662318967e-06, "loss": 0.7245, "step": 10947 }, { "epoch": 0.98, "grad_norm": 6.107981509897668, "learning_rate": 5.431080985334945e-06, "loss": 0.7374, "step": 10948 }, { "epoch": 0.98, "grad_norm": 6.839918550509735, "learning_rate": 5.430361299353054e-06, "loss": 0.7491, "step": 10949 }, { "epoch": 0.98, "grad_norm": 6.249140372363708, "learning_rate": 5.4296416043883115e-06, "loss": 0.7865, "step": 10950 }, { "epoch": 0.98, "grad_norm": 6.5981414290553335, "learning_rate": 5.428921900455741e-06, "loss": 0.8204, "step": 10951 }, { "epoch": 0.98, "grad_norm": 5.868684612888946, "learning_rate": 5.428202187570366e-06, "loss": 0.719, "step": 10952 }, { "epoch": 0.98, "grad_norm": 5.66179288022737, "learning_rate": 5.427482465747209e-06, "loss": 0.6998, "step": 10953 }, { "epoch": 0.98, "grad_norm": 6.203700427451355, "learning_rate": 5.4267627350012895e-06, "loss": 0.7647, "step": 10954 }, { "epoch": 0.98, "grad_norm": 6.703821703943381, "learning_rate": 5.426042995347634e-06, "loss": 0.7659, "step": 10955 }, { "epoch": 0.98, "grad_norm": 5.810313708812205, "learning_rate": 5.425323246801264e-06, "loss": 0.6959, "step": 10956 }, { "epoch": 0.98, "grad_norm": 5.124878584922569, "learning_rate": 5.4246034893772015e-06, "loss": 0.7705, "step": 10957 }, { "epoch": 0.98, "grad_norm": 4.782854270363214, "learning_rate": 5.423883723090473e-06, "loss": 0.6905, "step": 10958 }, { "epoch": 0.98, "grad_norm": 7.106513041089919, "learning_rate": 5.423163947956099e-06, "loss": 0.7214, "step": 10959 }, { "epoch": 0.98, "grad_norm": 6.036914215044615, "learning_rate": 5.422444163989104e-06, "loss": 0.6942, "step": 10960 }, { "epoch": 0.98, "grad_norm": 9.218680754965723, "learning_rate": 5.421724371204512e-06, "loss": 0.8339, "step": 10961 }, { "epoch": 0.98, "grad_norm": 4.829682076883969, "learning_rate": 5.421004569617348e-06, "loss": 0.7902, "step": 10962 }, { "epoch": 0.98, "grad_norm": 4.996729331965973, "learning_rate": 5.420284759242636e-06, "loss": 0.7246, "step": 10963 }, { "epoch": 0.98, "grad_norm": 4.784089209312177, "learning_rate": 5.419564940095398e-06, "loss": 0.75, "step": 10964 }, { "epoch": 0.98, "grad_norm": 8.472496721757127, "learning_rate": 5.418845112190663e-06, "loss": 0.7639, "step": 10965 }, { "epoch": 0.98, "grad_norm": 5.622684587540335, "learning_rate": 5.418125275543453e-06, "loss": 0.7343, "step": 10966 }, { "epoch": 0.98, "grad_norm": 5.792942923617605, "learning_rate": 5.417405430168793e-06, "loss": 0.6959, "step": 10967 }, { "epoch": 0.98, "grad_norm": 7.012266940813307, "learning_rate": 5.4166855760817085e-06, "loss": 0.7104, "step": 10968 }, { "epoch": 0.98, "grad_norm": 6.123479517454359, "learning_rate": 5.4159657132972255e-06, "loss": 0.7414, "step": 10969 }, { "epoch": 0.98, "grad_norm": 6.059458844612427, "learning_rate": 5.415245841830368e-06, "loss": 0.7404, "step": 10970 }, { "epoch": 0.98, "grad_norm": 5.884663455045666, "learning_rate": 5.414525961696165e-06, "loss": 0.7327, "step": 10971 }, { "epoch": 0.98, "grad_norm": 5.4885457002636375, "learning_rate": 5.4138060729096394e-06, "loss": 0.7794, "step": 10972 }, { "epoch": 0.98, "grad_norm": 4.132055494004316, "learning_rate": 5.41308617548582e-06, "loss": 0.7308, "step": 10973 }, { "epoch": 0.98, "grad_norm": 4.731627618879089, "learning_rate": 5.412366269439728e-06, "loss": 0.7224, "step": 10974 }, { "epoch": 0.98, "grad_norm": 5.246486169524868, "learning_rate": 5.411646354786397e-06, "loss": 0.782, "step": 10975 }, { "epoch": 0.98, "grad_norm": 5.924513941975421, "learning_rate": 5.410926431540846e-06, "loss": 0.7091, "step": 10976 }, { "epoch": 0.98, "grad_norm": 4.758059422257872, "learning_rate": 5.410206499718107e-06, "loss": 0.8375, "step": 10977 }, { "epoch": 0.98, "grad_norm": 5.767472676778761, "learning_rate": 5.409486559333207e-06, "loss": 0.7906, "step": 10978 }, { "epoch": 0.98, "grad_norm": 4.811519428828896, "learning_rate": 5.4087666104011705e-06, "loss": 0.7512, "step": 10979 }, { "epoch": 0.98, "grad_norm": 5.769157249553341, "learning_rate": 5.408046652937027e-06, "loss": 0.7469, "step": 10980 }, { "epoch": 0.98, "grad_norm": 6.234970326389391, "learning_rate": 5.407326686955804e-06, "loss": 0.8275, "step": 10981 }, { "epoch": 0.98, "grad_norm": 4.517935192664958, "learning_rate": 5.4066067124725265e-06, "loss": 0.7943, "step": 10982 }, { "epoch": 0.98, "grad_norm": 5.4386855719959115, "learning_rate": 5.405886729502224e-06, "loss": 0.815, "step": 10983 }, { "epoch": 0.98, "grad_norm": 5.429937211251179, "learning_rate": 5.405166738059927e-06, "loss": 0.7624, "step": 10984 }, { "epoch": 0.98, "grad_norm": 6.800785655966383, "learning_rate": 5.40444673816066e-06, "loss": 0.8036, "step": 10985 }, { "epoch": 0.98, "grad_norm": 5.839310660289208, "learning_rate": 5.403726729819453e-06, "loss": 0.7926, "step": 10986 }, { "epoch": 0.98, "grad_norm": 5.9388657699629075, "learning_rate": 5.403006713051336e-06, "loss": 0.7846, "step": 10987 }, { "epoch": 0.98, "grad_norm": 5.101990328000364, "learning_rate": 5.4022866878713365e-06, "loss": 0.7198, "step": 10988 }, { "epoch": 0.98, "grad_norm": 5.1357160780207725, "learning_rate": 5.4015666542944825e-06, "loss": 0.6939, "step": 10989 }, { "epoch": 0.98, "grad_norm": 4.3646086531637724, "learning_rate": 5.400846612335803e-06, "loss": 0.6795, "step": 10990 }, { "epoch": 0.98, "grad_norm": 5.693596334146372, "learning_rate": 5.40012656201033e-06, "loss": 0.6909, "step": 10991 }, { "epoch": 0.98, "grad_norm": 4.769017161535011, "learning_rate": 5.39940650333309e-06, "loss": 0.8169, "step": 10992 }, { "epoch": 0.98, "grad_norm": 4.399699925285489, "learning_rate": 5.398686436319115e-06, "loss": 0.7661, "step": 10993 }, { "epoch": 0.98, "grad_norm": 5.90561808801369, "learning_rate": 5.3979663609834355e-06, "loss": 0.7491, "step": 10994 }, { "epoch": 0.98, "grad_norm": 6.211008260075684, "learning_rate": 5.397246277341076e-06, "loss": 0.7199, "step": 10995 }, { "epoch": 0.98, "grad_norm": 5.215269306267232, "learning_rate": 5.396526185407072e-06, "loss": 0.7719, "step": 10996 }, { "epoch": 0.98, "grad_norm": 4.8419117740802555, "learning_rate": 5.395806085196453e-06, "loss": 0.8293, "step": 10997 }, { "epoch": 0.98, "grad_norm": 5.922609747711222, "learning_rate": 5.395085976724248e-06, "loss": 0.7584, "step": 10998 }, { "epoch": 0.98, "grad_norm": 6.770890993048914, "learning_rate": 5.3943658600054886e-06, "loss": 0.812, "step": 10999 }, { "epoch": 0.98, "grad_norm": 5.252765837460645, "learning_rate": 5.393645735055205e-06, "loss": 0.7558, "step": 11000 }, { "epoch": 0.98, "grad_norm": 7.1003718615310785, "learning_rate": 5.392925601888429e-06, "loss": 0.8175, "step": 11001 }, { "epoch": 0.98, "grad_norm": 6.127926939834495, "learning_rate": 5.392205460520192e-06, "loss": 0.7528, "step": 11002 }, { "epoch": 0.98, "grad_norm": 6.837037990237241, "learning_rate": 5.391485310965524e-06, "loss": 0.7439, "step": 11003 }, { "epoch": 0.98, "grad_norm": 5.350434987831685, "learning_rate": 5.390765153239459e-06, "loss": 0.7745, "step": 11004 }, { "epoch": 0.98, "grad_norm": 3.898594735039204, "learning_rate": 5.390044987357026e-06, "loss": 0.756, "step": 11005 }, { "epoch": 0.98, "grad_norm": 5.2749602056664235, "learning_rate": 5.389324813333259e-06, "loss": 0.7679, "step": 11006 }, { "epoch": 0.98, "grad_norm": 6.065526230824823, "learning_rate": 5.388604631183189e-06, "loss": 0.733, "step": 11007 }, { "epoch": 0.98, "grad_norm": 5.971647564906095, "learning_rate": 5.387884440921846e-06, "loss": 0.6954, "step": 11008 }, { "epoch": 0.98, "grad_norm": 5.654992520017964, "learning_rate": 5.3871642425642675e-06, "loss": 0.7647, "step": 11009 }, { "epoch": 0.98, "grad_norm": 4.299184502539172, "learning_rate": 5.3864440361254836e-06, "loss": 0.7783, "step": 11010 }, { "epoch": 0.98, "grad_norm": 4.626667894031781, "learning_rate": 5.385723821620525e-06, "loss": 0.7197, "step": 11011 }, { "epoch": 0.98, "grad_norm": 5.02334049614243, "learning_rate": 5.385003599064426e-06, "loss": 0.7564, "step": 11012 }, { "epoch": 0.98, "grad_norm": 6.862395593727241, "learning_rate": 5.384283368472223e-06, "loss": 0.8131, "step": 11013 }, { "epoch": 0.98, "grad_norm": 6.618701518775597, "learning_rate": 5.383563129858943e-06, "loss": 0.7718, "step": 11014 }, { "epoch": 0.98, "grad_norm": 4.356015050610118, "learning_rate": 5.382842883239624e-06, "loss": 0.7434, "step": 11015 }, { "epoch": 0.98, "grad_norm": 6.342844156947658, "learning_rate": 5.382122628629298e-06, "loss": 0.7659, "step": 11016 }, { "epoch": 0.98, "grad_norm": 5.35263850274779, "learning_rate": 5.381402366043001e-06, "loss": 0.8834, "step": 11017 }, { "epoch": 0.98, "grad_norm": 6.087155007188754, "learning_rate": 5.3806820954957616e-06, "loss": 0.8347, "step": 11018 }, { "epoch": 0.98, "grad_norm": 7.62816028355693, "learning_rate": 5.379961817002619e-06, "loss": 0.7922, "step": 11019 }, { "epoch": 0.98, "grad_norm": 5.327301937430217, "learning_rate": 5.379241530578605e-06, "loss": 0.7781, "step": 11020 }, { "epoch": 0.98, "grad_norm": 6.5433780651070474, "learning_rate": 5.378521236238754e-06, "loss": 0.7786, "step": 11021 }, { "epoch": 0.98, "grad_norm": 6.531973255540814, "learning_rate": 5.377800933998102e-06, "loss": 0.6978, "step": 11022 }, { "epoch": 0.98, "grad_norm": 6.530768261184267, "learning_rate": 5.3770806238716836e-06, "loss": 0.8534, "step": 11023 }, { "epoch": 0.98, "grad_norm": 6.558703111807603, "learning_rate": 5.37636030587453e-06, "loss": 0.7905, "step": 11024 }, { "epoch": 0.98, "grad_norm": 5.964483616505667, "learning_rate": 5.375639980021682e-06, "loss": 0.7456, "step": 11025 }, { "epoch": 0.98, "grad_norm": 5.574431588536491, "learning_rate": 5.374919646328171e-06, "loss": 0.8059, "step": 11026 }, { "epoch": 0.98, "grad_norm": 5.476183949429598, "learning_rate": 5.3741993048090335e-06, "loss": 0.7621, "step": 11027 }, { "epoch": 0.98, "grad_norm": 5.570655043610124, "learning_rate": 5.373478955479306e-06, "loss": 0.6816, "step": 11028 }, { "epoch": 0.98, "grad_norm": 4.537724318700964, "learning_rate": 5.372758598354022e-06, "loss": 0.79, "step": 11029 }, { "epoch": 0.98, "grad_norm": 5.989313968311101, "learning_rate": 5.37203823344822e-06, "loss": 0.6836, "step": 11030 }, { "epoch": 0.98, "grad_norm": 5.7452197611324065, "learning_rate": 5.371317860776933e-06, "loss": 0.7953, "step": 11031 }, { "epoch": 0.98, "grad_norm": 5.589394212759536, "learning_rate": 5.3705974803552e-06, "loss": 0.7946, "step": 11032 }, { "epoch": 0.98, "grad_norm": 3.8469425344130372, "learning_rate": 5.369877092198056e-06, "loss": 0.7421, "step": 11033 }, { "epoch": 0.98, "grad_norm": 7.100493372293296, "learning_rate": 5.3691566963205366e-06, "loss": 0.7678, "step": 11034 }, { "epoch": 0.98, "grad_norm": 4.967245258979909, "learning_rate": 5.368436292737681e-06, "loss": 0.7573, "step": 11035 }, { "epoch": 0.98, "grad_norm": 5.5789971328599846, "learning_rate": 5.367715881464524e-06, "loss": 0.712, "step": 11036 }, { "epoch": 0.98, "grad_norm": 6.136789739081765, "learning_rate": 5.3669954625161026e-06, "loss": 0.7196, "step": 11037 }, { "epoch": 0.98, "grad_norm": 6.376699761360246, "learning_rate": 5.366275035907456e-06, "loss": 0.7486, "step": 11038 }, { "epoch": 0.98, "grad_norm": 5.053732917571989, "learning_rate": 5.36555460165362e-06, "loss": 0.7453, "step": 11039 }, { "epoch": 0.98, "grad_norm": 5.585110943815493, "learning_rate": 5.364834159769631e-06, "loss": 0.7496, "step": 11040 }, { "epoch": 0.98, "grad_norm": 5.734102219818554, "learning_rate": 5.364113710270528e-06, "loss": 0.718, "step": 11041 }, { "epoch": 0.99, "grad_norm": 4.565223050700412, "learning_rate": 5.36339325317135e-06, "loss": 0.6957, "step": 11042 }, { "epoch": 0.99, "grad_norm": 7.010018216746038, "learning_rate": 5.362672788487132e-06, "loss": 0.738, "step": 11043 }, { "epoch": 0.99, "grad_norm": 6.180877472304474, "learning_rate": 5.3619523162329145e-06, "loss": 0.7165, "step": 11044 }, { "epoch": 0.99, "grad_norm": 6.934431818293741, "learning_rate": 5.361231836423737e-06, "loss": 0.8024, "step": 11045 }, { "epoch": 0.99, "grad_norm": 7.79695194664858, "learning_rate": 5.360511349074634e-06, "loss": 0.7465, "step": 11046 }, { "epoch": 0.99, "grad_norm": 3.871524056988522, "learning_rate": 5.359790854200646e-06, "loss": 0.7095, "step": 11047 }, { "epoch": 0.99, "grad_norm": 4.517145947357526, "learning_rate": 5.359070351816813e-06, "loss": 0.7326, "step": 11048 }, { "epoch": 0.99, "grad_norm": 5.573777801895005, "learning_rate": 5.358349841938171e-06, "loss": 0.7818, "step": 11049 }, { "epoch": 0.99, "grad_norm": 5.494414394083082, "learning_rate": 5.357629324579762e-06, "loss": 0.709, "step": 11050 }, { "epoch": 0.99, "grad_norm": 5.574138597080131, "learning_rate": 5.356908799756624e-06, "loss": 0.7052, "step": 11051 }, { "epoch": 0.99, "grad_norm": 6.746537583242266, "learning_rate": 5.356188267483798e-06, "loss": 0.7231, "step": 11052 }, { "epoch": 0.99, "grad_norm": 4.91583823624544, "learning_rate": 5.3554677277763186e-06, "loss": 0.7404, "step": 11053 }, { "epoch": 0.99, "grad_norm": 4.367306997187707, "learning_rate": 5.35474718064923e-06, "loss": 0.8321, "step": 11054 }, { "epoch": 0.99, "grad_norm": 5.7238502956997275, "learning_rate": 5.354026626117572e-06, "loss": 0.6996, "step": 11055 }, { "epoch": 0.99, "grad_norm": 5.5815414442975175, "learning_rate": 5.353306064196382e-06, "loss": 0.7724, "step": 11056 }, { "epoch": 0.99, "grad_norm": 6.6305107650338515, "learning_rate": 5.352585494900704e-06, "loss": 0.7703, "step": 11057 }, { "epoch": 0.99, "grad_norm": 5.232995229234033, "learning_rate": 5.351864918245574e-06, "loss": 0.711, "step": 11058 }, { "epoch": 0.99, "grad_norm": 6.217542122287174, "learning_rate": 5.351144334246035e-06, "loss": 0.7428, "step": 11059 }, { "epoch": 0.99, "grad_norm": 4.188560929620792, "learning_rate": 5.350423742917125e-06, "loss": 0.7879, "step": 11060 }, { "epoch": 0.99, "grad_norm": 7.214590432888082, "learning_rate": 5.34970314427389e-06, "loss": 0.7533, "step": 11061 }, { "epoch": 0.99, "grad_norm": 5.707725072022295, "learning_rate": 5.348982538331367e-06, "loss": 0.7073, "step": 11062 }, { "epoch": 0.99, "grad_norm": 5.603418241949082, "learning_rate": 5.348261925104597e-06, "loss": 0.7645, "step": 11063 }, { "epoch": 0.99, "grad_norm": 5.536911551471322, "learning_rate": 5.347541304608622e-06, "loss": 0.7006, "step": 11064 }, { "epoch": 0.99, "grad_norm": 6.667297216952301, "learning_rate": 5.3468206768584825e-06, "loss": 0.8391, "step": 11065 }, { "epoch": 0.99, "grad_norm": 5.3362277448489985, "learning_rate": 5.346100041869222e-06, "loss": 0.7298, "step": 11066 }, { "epoch": 0.99, "grad_norm": 5.836542275288702, "learning_rate": 5.345379399655882e-06, "loss": 0.698, "step": 11067 }, { "epoch": 0.99, "grad_norm": 5.020966609390918, "learning_rate": 5.344658750233503e-06, "loss": 0.7337, "step": 11068 }, { "epoch": 0.99, "grad_norm": 5.437600069422203, "learning_rate": 5.343938093617126e-06, "loss": 0.7535, "step": 11069 }, { "epoch": 0.99, "grad_norm": 7.41711096062655, "learning_rate": 5.343217429821795e-06, "loss": 0.7028, "step": 11070 }, { "epoch": 0.99, "grad_norm": 4.875467033523789, "learning_rate": 5.342496758862552e-06, "loss": 0.7131, "step": 11071 }, { "epoch": 0.99, "grad_norm": 6.596263239440667, "learning_rate": 5.3417760807544386e-06, "loss": 0.7458, "step": 11072 }, { "epoch": 0.99, "grad_norm": 5.090267105980639, "learning_rate": 5.341055395512499e-06, "loss": 0.685, "step": 11073 }, { "epoch": 0.99, "grad_norm": 4.369899438742318, "learning_rate": 5.340334703151775e-06, "loss": 0.7364, "step": 11074 }, { "epoch": 0.99, "grad_norm": 5.908078120353521, "learning_rate": 5.339614003687309e-06, "loss": 0.741, "step": 11075 }, { "epoch": 0.99, "grad_norm": 8.698311181713988, "learning_rate": 5.338893297134143e-06, "loss": 0.693, "step": 11076 }, { "epoch": 0.99, "grad_norm": 5.333893968370529, "learning_rate": 5.338172583507324e-06, "loss": 0.744, "step": 11077 }, { "epoch": 0.99, "grad_norm": 5.076349717455652, "learning_rate": 5.3374518628218915e-06, "loss": 0.7614, "step": 11078 }, { "epoch": 0.99, "grad_norm": 5.688289627842928, "learning_rate": 5.33673113509289e-06, "loss": 0.7697, "step": 11079 }, { "epoch": 0.99, "grad_norm": 6.001034098495634, "learning_rate": 5.336010400335366e-06, "loss": 0.7479, "step": 11080 }, { "epoch": 0.99, "grad_norm": 4.293700404860387, "learning_rate": 5.3352896585643585e-06, "loss": 0.7127, "step": 11081 }, { "epoch": 0.99, "grad_norm": 5.320618590815339, "learning_rate": 5.334568909794914e-06, "loss": 0.7957, "step": 11082 }, { "epoch": 0.99, "grad_norm": 4.36013408435718, "learning_rate": 5.333848154042076e-06, "loss": 0.7685, "step": 11083 }, { "epoch": 0.99, "grad_norm": 5.425353350697938, "learning_rate": 5.333127391320889e-06, "loss": 0.7163, "step": 11084 }, { "epoch": 0.99, "grad_norm": 4.500995439363926, "learning_rate": 5.332406621646397e-06, "loss": 0.7429, "step": 11085 }, { "epoch": 0.99, "grad_norm": 3.8887881187478297, "learning_rate": 5.331685845033646e-06, "loss": 0.7087, "step": 11086 }, { "epoch": 0.99, "grad_norm": 4.995144707314016, "learning_rate": 5.330965061497678e-06, "loss": 0.7824, "step": 11087 }, { "epoch": 0.99, "grad_norm": 5.272135191288286, "learning_rate": 5.3302442710535405e-06, "loss": 0.7469, "step": 11088 }, { "epoch": 0.99, "grad_norm": 5.007328459995023, "learning_rate": 5.329523473716276e-06, "loss": 0.7622, "step": 11089 }, { "epoch": 0.99, "grad_norm": 6.44990972062913, "learning_rate": 5.328802669500931e-06, "loss": 0.7835, "step": 11090 }, { "epoch": 0.99, "grad_norm": 5.503068382090159, "learning_rate": 5.328081858422549e-06, "loss": 0.8321, "step": 11091 }, { "epoch": 0.99, "grad_norm": 6.735618697969628, "learning_rate": 5.3273610404961785e-06, "loss": 0.7674, "step": 11092 }, { "epoch": 0.99, "grad_norm": 6.625291475298846, "learning_rate": 5.326640215736863e-06, "loss": 0.7432, "step": 11093 }, { "epoch": 0.99, "grad_norm": 4.90159579411522, "learning_rate": 5.325919384159647e-06, "loss": 0.7451, "step": 11094 }, { "epoch": 0.99, "grad_norm": 5.450107463345857, "learning_rate": 5.32519854577958e-06, "loss": 0.7488, "step": 11095 }, { "epoch": 0.99, "grad_norm": 5.948842562469714, "learning_rate": 5.324477700611704e-06, "loss": 0.7821, "step": 11096 }, { "epoch": 0.99, "grad_norm": 5.441562555587381, "learning_rate": 5.323756848671067e-06, "loss": 0.6984, "step": 11097 }, { "epoch": 0.99, "grad_norm": 4.941430913409872, "learning_rate": 5.323035989972714e-06, "loss": 0.7725, "step": 11098 }, { "epoch": 0.99, "grad_norm": 7.152779379546134, "learning_rate": 5.322315124531692e-06, "loss": 0.7781, "step": 11099 }, { "epoch": 0.99, "grad_norm": 6.455084110953734, "learning_rate": 5.321594252363048e-06, "loss": 0.7319, "step": 11100 }, { "epoch": 0.99, "grad_norm": 6.58101134498056, "learning_rate": 5.320873373481827e-06, "loss": 0.7814, "step": 11101 }, { "epoch": 0.99, "grad_norm": 5.804214450777967, "learning_rate": 5.32015248790308e-06, "loss": 0.7617, "step": 11102 }, { "epoch": 0.99, "grad_norm": 5.496609120332069, "learning_rate": 5.31943159564185e-06, "loss": 0.7617, "step": 11103 }, { "epoch": 0.99, "grad_norm": 4.822165199977477, "learning_rate": 5.318710696713184e-06, "loss": 0.7368, "step": 11104 }, { "epoch": 0.99, "grad_norm": 6.3913241971728745, "learning_rate": 5.317989791132131e-06, "loss": 0.7767, "step": 11105 }, { "epoch": 0.99, "grad_norm": 3.966836248529469, "learning_rate": 5.3172688789137375e-06, "loss": 0.7985, "step": 11106 }, { "epoch": 0.99, "grad_norm": 5.569565415828155, "learning_rate": 5.31654796007305e-06, "loss": 0.7807, "step": 11107 }, { "epoch": 0.99, "grad_norm": 5.855007805784122, "learning_rate": 5.3158270346251175e-06, "loss": 0.6857, "step": 11108 }, { "epoch": 0.99, "grad_norm": 4.7023718898429605, "learning_rate": 5.315106102584987e-06, "loss": 0.7814, "step": 11109 }, { "epoch": 0.99, "grad_norm": 6.9456521524255335, "learning_rate": 5.314385163967708e-06, "loss": 0.7301, "step": 11110 }, { "epoch": 0.99, "grad_norm": 4.674611081080166, "learning_rate": 5.313664218788326e-06, "loss": 0.7883, "step": 11111 }, { "epoch": 0.99, "grad_norm": 4.467399671637663, "learning_rate": 5.312943267061891e-06, "loss": 0.7245, "step": 11112 }, { "epoch": 0.99, "grad_norm": 6.001527424091409, "learning_rate": 5.3122223088034495e-06, "loss": 0.8032, "step": 11113 }, { "epoch": 0.99, "grad_norm": 5.129455901720221, "learning_rate": 5.311501344028052e-06, "loss": 0.6819, "step": 11114 }, { "epoch": 0.99, "grad_norm": 5.97272682739805, "learning_rate": 5.310780372750746e-06, "loss": 0.7385, "step": 11115 }, { "epoch": 0.99, "grad_norm": 6.0324857106532175, "learning_rate": 5.31005939498658e-06, "loss": 0.7178, "step": 11116 }, { "epoch": 0.99, "grad_norm": 6.1917020583694145, "learning_rate": 5.309338410750604e-06, "loss": 0.7157, "step": 11117 }, { "epoch": 0.99, "grad_norm": 5.216383665502591, "learning_rate": 5.3086174200578654e-06, "loss": 0.6789, "step": 11118 }, { "epoch": 0.99, "grad_norm": 5.03592545385452, "learning_rate": 5.3078964229234145e-06, "loss": 0.6922, "step": 11119 }, { "epoch": 0.99, "grad_norm": 5.4534259871065744, "learning_rate": 5.307175419362299e-06, "loss": 0.7069, "step": 11120 }, { "epoch": 0.99, "grad_norm": 7.417561982450537, "learning_rate": 5.306454409389571e-06, "loss": 0.752, "step": 11121 }, { "epoch": 0.99, "grad_norm": 7.202252689188949, "learning_rate": 5.305733393020276e-06, "loss": 0.7489, "step": 11122 }, { "epoch": 0.99, "grad_norm": 5.229794599311198, "learning_rate": 5.305012370269466e-06, "loss": 0.8313, "step": 11123 }, { "epoch": 0.99, "grad_norm": 6.846325936212159, "learning_rate": 5.304291341152193e-06, "loss": 0.779, "step": 11124 }, { "epoch": 0.99, "grad_norm": 4.243116482925004, "learning_rate": 5.303570305683505e-06, "loss": 0.772, "step": 11125 }, { "epoch": 0.99, "grad_norm": 5.784615047089913, "learning_rate": 5.30284926387845e-06, "loss": 0.6988, "step": 11126 }, { "epoch": 0.99, "grad_norm": 9.51757655345791, "learning_rate": 5.3021282157520805e-06, "loss": 0.7724, "step": 11127 }, { "epoch": 0.99, "grad_norm": 6.675569148559299, "learning_rate": 5.301407161319446e-06, "loss": 0.754, "step": 11128 }, { "epoch": 0.99, "grad_norm": 5.335646812850775, "learning_rate": 5.300686100595597e-06, "loss": 0.7535, "step": 11129 }, { "epoch": 0.99, "grad_norm": 6.457738368359142, "learning_rate": 5.299965033595583e-06, "loss": 0.7597, "step": 11130 }, { "epoch": 0.99, "grad_norm": 4.664368820911588, "learning_rate": 5.299243960334458e-06, "loss": 0.6787, "step": 11131 }, { "epoch": 0.99, "grad_norm": 5.508968935688197, "learning_rate": 5.298522880827269e-06, "loss": 0.7746, "step": 11132 }, { "epoch": 0.99, "grad_norm": 5.6085156884599785, "learning_rate": 5.297801795089069e-06, "loss": 0.7747, "step": 11133 }, { "epoch": 0.99, "grad_norm": 5.3766850913658, "learning_rate": 5.297080703134909e-06, "loss": 0.67, "step": 11134 }, { "epoch": 0.99, "grad_norm": 6.349451374275716, "learning_rate": 5.29635960497984e-06, "loss": 0.7498, "step": 11135 }, { "epoch": 0.99, "grad_norm": 4.819946108006879, "learning_rate": 5.295638500638912e-06, "loss": 0.7019, "step": 11136 }, { "epoch": 0.99, "grad_norm": 5.988975181112236, "learning_rate": 5.29491739012718e-06, "loss": 0.8002, "step": 11137 }, { "epoch": 0.99, "grad_norm": 5.425962746352961, "learning_rate": 5.294196273459692e-06, "loss": 0.6879, "step": 11138 }, { "epoch": 0.99, "grad_norm": 6.509832263465465, "learning_rate": 5.293475150651501e-06, "loss": 0.8263, "step": 11139 }, { "epoch": 0.99, "grad_norm": 4.163186818760884, "learning_rate": 5.29275402171766e-06, "loss": 0.7276, "step": 11140 }, { "epoch": 0.99, "grad_norm": 7.2366056447604015, "learning_rate": 5.292032886673219e-06, "loss": 0.6903, "step": 11141 }, { "epoch": 0.99, "grad_norm": 5.024996295916544, "learning_rate": 5.291311745533231e-06, "loss": 0.7363, "step": 11142 }, { "epoch": 0.99, "grad_norm": 6.420552446417939, "learning_rate": 5.290590598312748e-06, "loss": 0.7357, "step": 11143 }, { "epoch": 0.99, "grad_norm": 6.416391416934465, "learning_rate": 5.289869445026823e-06, "loss": 0.8028, "step": 11144 }, { "epoch": 0.99, "grad_norm": 6.83724320236594, "learning_rate": 5.289148285690508e-06, "loss": 0.7402, "step": 11145 }, { "epoch": 0.99, "grad_norm": 3.7418543464112113, "learning_rate": 5.288427120318856e-06, "loss": 0.7111, "step": 11146 }, { "epoch": 0.99, "grad_norm": 4.967061001565642, "learning_rate": 5.28770594892692e-06, "loss": 0.7769, "step": 11147 }, { "epoch": 0.99, "grad_norm": 6.656524098120945, "learning_rate": 5.286984771529752e-06, "loss": 0.8369, "step": 11148 }, { "epoch": 0.99, "grad_norm": 5.496758741244326, "learning_rate": 5.286263588142404e-06, "loss": 0.7336, "step": 11149 }, { "epoch": 0.99, "grad_norm": 4.80131776733186, "learning_rate": 5.285542398779932e-06, "loss": 0.7299, "step": 11150 }, { "epoch": 0.99, "grad_norm": 6.255954373363686, "learning_rate": 5.284821203457387e-06, "loss": 0.7618, "step": 11151 }, { "epoch": 0.99, "grad_norm": 6.6399007238879575, "learning_rate": 5.284100002189824e-06, "loss": 0.7557, "step": 11152 }, { "epoch": 0.99, "grad_norm": 6.646126179423611, "learning_rate": 5.283378794992297e-06, "loss": 0.7998, "step": 11153 }, { "epoch": 1.0, "grad_norm": 5.551077985229171, "learning_rate": 5.282657581879855e-06, "loss": 0.7077, "step": 11154 }, { "epoch": 1.0, "grad_norm": 4.887965551964678, "learning_rate": 5.281936362867557e-06, "loss": 0.784, "step": 11155 }, { "epoch": 1.0, "grad_norm": 5.062014855435554, "learning_rate": 5.281215137970455e-06, "loss": 0.7241, "step": 11156 }, { "epoch": 1.0, "grad_norm": 5.917226639235365, "learning_rate": 5.280493907203601e-06, "loss": 0.7768, "step": 11157 }, { "epoch": 1.0, "grad_norm": 4.889973499642333, "learning_rate": 5.279772670582051e-06, "loss": 0.7557, "step": 11158 }, { "epoch": 1.0, "grad_norm": 5.6137971153273805, "learning_rate": 5.279051428120862e-06, "loss": 0.7933, "step": 11159 }, { "epoch": 1.0, "grad_norm": 6.380429543675351, "learning_rate": 5.278330179835084e-06, "loss": 0.7302, "step": 11160 }, { "epoch": 1.0, "grad_norm": 4.799749573624482, "learning_rate": 5.2776089257397725e-06, "loss": 0.7327, "step": 11161 }, { "epoch": 1.0, "grad_norm": 4.868394833331379, "learning_rate": 5.276887665849982e-06, "loss": 0.8077, "step": 11162 }, { "epoch": 1.0, "grad_norm": 6.4912062246692335, "learning_rate": 5.27616640018077e-06, "loss": 0.7417, "step": 11163 }, { "epoch": 1.0, "grad_norm": 5.834906321563252, "learning_rate": 5.275445128747188e-06, "loss": 0.7388, "step": 11164 }, { "epoch": 1.0, "grad_norm": 4.655867120785422, "learning_rate": 5.274723851564291e-06, "loss": 0.7743, "step": 11165 }, { "epoch": 1.0, "grad_norm": 4.291826937812054, "learning_rate": 5.274002568647136e-06, "loss": 0.7713, "step": 11166 }, { "epoch": 1.0, "grad_norm": 5.568515272696508, "learning_rate": 5.273281280010779e-06, "loss": 0.8011, "step": 11167 }, { "epoch": 1.0, "grad_norm": 7.121924475269006, "learning_rate": 5.2725599856702726e-06, "loss": 0.7356, "step": 11168 }, { "epoch": 1.0, "grad_norm": 4.699011438143868, "learning_rate": 5.271838685640673e-06, "loss": 0.7521, "step": 11169 }, { "epoch": 1.0, "grad_norm": 5.980007856629774, "learning_rate": 5.271117379937036e-06, "loss": 0.7627, "step": 11170 }, { "epoch": 1.0, "grad_norm": 5.66795948985015, "learning_rate": 5.270396068574418e-06, "loss": 0.823, "step": 11171 }, { "epoch": 1.0, "grad_norm": 6.194293718619441, "learning_rate": 5.269674751567875e-06, "loss": 0.718, "step": 11172 }, { "epoch": 1.0, "grad_norm": 4.425510881431933, "learning_rate": 5.26895342893246e-06, "loss": 0.7227, "step": 11173 }, { "epoch": 1.0, "grad_norm": 4.969507683872716, "learning_rate": 5.268232100683233e-06, "loss": 0.75, "step": 11174 }, { "epoch": 1.0, "grad_norm": 5.810486147772585, "learning_rate": 5.2675107668352484e-06, "loss": 0.7721, "step": 11175 }, { "epoch": 1.0, "grad_norm": 5.6796142905639115, "learning_rate": 5.266789427403562e-06, "loss": 0.6626, "step": 11176 }, { "epoch": 1.0, "grad_norm": 6.250851617819861, "learning_rate": 5.26606808240323e-06, "loss": 0.7819, "step": 11177 }, { "epoch": 1.0, "grad_norm": 6.237223959406041, "learning_rate": 5.265346731849311e-06, "loss": 0.6466, "step": 11178 }, { "epoch": 1.0, "grad_norm": 4.409894501679666, "learning_rate": 5.264625375756858e-06, "loss": 0.7991, "step": 11179 }, { "epoch": 1.0, "grad_norm": 7.5321797721443895, "learning_rate": 5.26390401414093e-06, "loss": 0.7389, "step": 11180 }, { "epoch": 1.0, "grad_norm": 4.927430400664432, "learning_rate": 5.263182647016585e-06, "loss": 0.7599, "step": 11181 }, { "epoch": 1.0, "grad_norm": 5.367990838837778, "learning_rate": 5.262461274398879e-06, "loss": 0.7427, "step": 11182 }, { "epoch": 1.0, "grad_norm": 6.673366907806207, "learning_rate": 5.261739896302867e-06, "loss": 0.7264, "step": 11183 }, { "epoch": 1.0, "grad_norm": 5.659779488131939, "learning_rate": 5.261018512743607e-06, "loss": 0.7585, "step": 11184 }, { "epoch": 1.0, "grad_norm": 6.399644321678648, "learning_rate": 5.26029712373616e-06, "loss": 0.7786, "step": 11185 }, { "epoch": 1.0, "grad_norm": 5.3341215573417236, "learning_rate": 5.259575729295579e-06, "loss": 0.6887, "step": 11186 }, { "epoch": 1.0, "grad_norm": 5.153505715991838, "learning_rate": 5.258854329436923e-06, "loss": 0.7236, "step": 11187 }, { "epoch": 1.0, "grad_norm": 7.464931435434329, "learning_rate": 5.258132924175251e-06, "loss": 0.8749, "step": 11188 }, { "epoch": 1.0, "grad_norm": 5.683147710879489, "learning_rate": 5.257411513525619e-06, "loss": 0.7581, "step": 11189 }, { "epoch": 1.0, "grad_norm": 6.181117449786323, "learning_rate": 5.256690097503084e-06, "loss": 0.7166, "step": 11190 }, { "epoch": 1.0, "grad_norm": 5.351594563678765, "learning_rate": 5.255968676122707e-06, "loss": 0.7148, "step": 11191 }, { "epoch": 1.0, "grad_norm": 5.3658055852887445, "learning_rate": 5.255247249399544e-06, "loss": 0.719, "step": 11192 }, { "epoch": 1.0, "grad_norm": 7.754852526676091, "learning_rate": 5.2545258173486525e-06, "loss": 0.7795, "step": 11193 }, { "epoch": 1.0, "grad_norm": 5.84950436200714, "learning_rate": 5.253804379985094e-06, "loss": 0.7463, "step": 11194 }, { "epoch": 1.0, "grad_norm": 7.323464547394593, "learning_rate": 5.253082937323923e-06, "loss": 0.702, "step": 11195 }, { "epoch": 1.0, "grad_norm": 6.255173612874839, "learning_rate": 5.252361489380201e-06, "loss": 0.759, "step": 11196 }, { "epoch": 1.0, "grad_norm": 6.651375860622127, "learning_rate": 5.2516400361689846e-06, "loss": 0.7888, "step": 11197 }, { "epoch": 1.0, "grad_norm": 7.691488313503528, "learning_rate": 5.250918577705335e-06, "loss": 0.7758, "step": 11198 }, { "epoch": 1.0, "grad_norm": 5.339017942335485, "learning_rate": 5.2501971140043085e-06, "loss": 0.8201, "step": 11199 }, { "epoch": 1.0, "grad_norm": 5.588977533193598, "learning_rate": 5.2494756450809644e-06, "loss": 0.7573, "step": 11200 }, { "epoch": 1.0, "grad_norm": 5.49654528048211, "learning_rate": 5.248754170950365e-06, "loss": 0.7584, "step": 11201 }, { "epoch": 1.0, "grad_norm": 6.55074267439653, "learning_rate": 5.248032691627565e-06, "loss": 0.75, "step": 11202 }, { "epoch": 1.0, "grad_norm": 6.4186145457978085, "learning_rate": 5.247311207127625e-06, "loss": 0.8155, "step": 11203 }, { "epoch": 1.0, "grad_norm": 6.817904208077326, "learning_rate": 5.246589717465609e-06, "loss": 0.7919, "step": 11204 }, { "epoch": 1.0, "grad_norm": 5.612027768834955, "learning_rate": 5.24586822265657e-06, "loss": 0.7815, "step": 11205 }, { "epoch": 1.0, "grad_norm": 5.736399329693415, "learning_rate": 5.245146722715571e-06, "loss": 0.8042, "step": 11206 }, { "epoch": 1.0, "grad_norm": 4.609914591005141, "learning_rate": 5.244425217657671e-06, "loss": 0.7256, "step": 11207 }, { "epoch": 1.0, "grad_norm": 5.833565041493093, "learning_rate": 5.243703707497928e-06, "loss": 0.7549, "step": 11208 }, { "epoch": 1.0, "grad_norm": 5.1984447723944385, "learning_rate": 5.242982192251405e-06, "loss": 0.7457, "step": 11209 }, { "epoch": 1.0, "grad_norm": 5.597889396423226, "learning_rate": 5.242260671933162e-06, "loss": 0.7081, "step": 11210 }, { "epoch": 1.0, "grad_norm": 5.748625060787291, "learning_rate": 5.2415391465582574e-06, "loss": 0.6447, "step": 11211 }, { "epoch": 1.0, "grad_norm": 5.062348679614206, "learning_rate": 5.240817616141751e-06, "loss": 0.6636, "step": 11212 }, { "epoch": 1.0, "grad_norm": 5.3993536315792925, "learning_rate": 5.240096080698704e-06, "loss": 0.5966, "step": 11213 }, { "epoch": 1.0, "grad_norm": 8.930451492117777, "learning_rate": 5.23937454024418e-06, "loss": 0.6387, "step": 11214 }, { "epoch": 1.0, "grad_norm": 6.238289855437982, "learning_rate": 5.238652994793234e-06, "loss": 0.653, "step": 11215 }, { "epoch": 1.0, "grad_norm": 6.247595818884971, "learning_rate": 5.2379314443609305e-06, "loss": 0.6438, "step": 11216 }, { "epoch": 1.0, "grad_norm": 5.314688446725929, "learning_rate": 5.237209888962329e-06, "loss": 0.6966, "step": 11217 }, { "epoch": 1.0, "grad_norm": 3.5633923668492025, "learning_rate": 5.236488328612491e-06, "loss": 0.6336, "step": 11218 }, { "epoch": 1.0, "grad_norm": 6.268957245928129, "learning_rate": 5.235766763326475e-06, "loss": 0.6396, "step": 11219 }, { "epoch": 1.0, "grad_norm": 5.611266944803511, "learning_rate": 5.235045193119347e-06, "loss": 0.5682, "step": 11220 }, { "epoch": 1.0, "grad_norm": 6.540538641915241, "learning_rate": 5.234323618006163e-06, "loss": 0.6084, "step": 11221 }, { "epoch": 1.0, "grad_norm": 5.108061926770547, "learning_rate": 5.233602038001988e-06, "loss": 0.6118, "step": 11222 }, { "epoch": 1.0, "grad_norm": 6.024405065718537, "learning_rate": 5.232880453121881e-06, "loss": 0.6483, "step": 11223 }, { "epoch": 1.0, "grad_norm": 5.859788820733571, "learning_rate": 5.232158863380905e-06, "loss": 0.5988, "step": 11224 }, { "epoch": 1.0, "grad_norm": 4.543978678386403, "learning_rate": 5.231437268794123e-06, "loss": 0.628, "step": 11225 }, { "epoch": 1.0, "grad_norm": 4.696374092180809, "learning_rate": 5.230715669376594e-06, "loss": 0.6055, "step": 11226 }, { "epoch": 1.0, "grad_norm": 5.2664878234742565, "learning_rate": 5.229994065143379e-06, "loss": 0.6701, "step": 11227 }, { "epoch": 1.0, "grad_norm": 6.030958593596821, "learning_rate": 5.229272456109541e-06, "loss": 0.6336, "step": 11228 }, { "epoch": 1.0, "grad_norm": 5.497627142481068, "learning_rate": 5.228550842290146e-06, "loss": 0.6671, "step": 11229 }, { "epoch": 1.0, "grad_norm": 5.709481196881171, "learning_rate": 5.227829223700249e-06, "loss": 0.7214, "step": 11230 }, { "epoch": 1.0, "grad_norm": 6.36234947262567, "learning_rate": 5.227107600354917e-06, "loss": 0.5956, "step": 11231 }, { "epoch": 1.0, "grad_norm": 7.723871360136007, "learning_rate": 5.2263859722692125e-06, "loss": 0.6789, "step": 11232 }, { "epoch": 1.0, "grad_norm": 6.284392976485072, "learning_rate": 5.225664339458197e-06, "loss": 0.6497, "step": 11233 }, { "epoch": 1.0, "grad_norm": 6.0378673568909695, "learning_rate": 5.224942701936931e-06, "loss": 0.5965, "step": 11234 }, { "epoch": 1.0, "grad_norm": 6.414616625115518, "learning_rate": 5.224221059720478e-06, "loss": 0.5578, "step": 11235 }, { "epoch": 1.0, "grad_norm": 5.877565732033814, "learning_rate": 5.223499412823904e-06, "loss": 0.5942, "step": 11236 }, { "epoch": 1.0, "grad_norm": 5.309384903287145, "learning_rate": 5.222777761262266e-06, "loss": 0.7337, "step": 11237 }, { "epoch": 1.0, "grad_norm": 6.343336920215003, "learning_rate": 5.222056105050632e-06, "loss": 0.6545, "step": 11238 }, { "epoch": 1.0, "grad_norm": 4.351688957636159, "learning_rate": 5.221334444204063e-06, "loss": 0.6215, "step": 11239 }, { "epoch": 1.0, "grad_norm": 7.007742946798826, "learning_rate": 5.220612778737623e-06, "loss": 0.6234, "step": 11240 }, { "epoch": 1.0, "grad_norm": 6.505872130936188, "learning_rate": 5.219891108666374e-06, "loss": 0.6134, "step": 11241 }, { "epoch": 1.0, "grad_norm": 6.262369688442417, "learning_rate": 5.219169434005379e-06, "loss": 0.6429, "step": 11242 }, { "epoch": 1.0, "grad_norm": 6.6723811399728215, "learning_rate": 5.218447754769702e-06, "loss": 0.5991, "step": 11243 }, { "epoch": 1.0, "grad_norm": 4.563464628838593, "learning_rate": 5.217726070974406e-06, "loss": 0.616, "step": 11244 }, { "epoch": 1.0, "grad_norm": 4.987681432158209, "learning_rate": 5.2170043826345575e-06, "loss": 0.6316, "step": 11245 }, { "epoch": 1.0, "grad_norm": 5.753575118632956, "learning_rate": 5.216282689765215e-06, "loss": 0.6656, "step": 11246 }, { "epoch": 1.0, "grad_norm": 5.573892396098607, "learning_rate": 5.2155609923814465e-06, "loss": 0.6179, "step": 11247 }, { "epoch": 1.0, "grad_norm": 5.381419439276616, "learning_rate": 5.214839290498313e-06, "loss": 0.6519, "step": 11248 }, { "epoch": 1.0, "grad_norm": 4.067877224474092, "learning_rate": 5.214117584130882e-06, "loss": 0.5643, "step": 11249 }, { "epoch": 1.0, "grad_norm": 5.557330105462775, "learning_rate": 5.213395873294213e-06, "loss": 0.6092, "step": 11250 }, { "epoch": 1.0, "grad_norm": 6.034332328434427, "learning_rate": 5.2126741580033735e-06, "loss": 0.6496, "step": 11251 }, { "epoch": 1.0, "grad_norm": 6.355680397065146, "learning_rate": 5.211952438273428e-06, "loss": 0.6821, "step": 11252 }, { "epoch": 1.0, "grad_norm": 6.123372540034472, "learning_rate": 5.211230714119437e-06, "loss": 0.628, "step": 11253 }, { "epoch": 1.0, "grad_norm": 7.029464815059172, "learning_rate": 5.2105089855564695e-06, "loss": 0.6615, "step": 11254 }, { "epoch": 1.0, "grad_norm": 4.764609167748253, "learning_rate": 5.209787252599589e-06, "loss": 0.5431, "step": 11255 }, { "epoch": 1.0, "grad_norm": 6.288690533502914, "learning_rate": 5.209065515263856e-06, "loss": 0.6318, "step": 11256 }, { "epoch": 1.0, "grad_norm": 6.509925678063468, "learning_rate": 5.208343773564338e-06, "loss": 0.6449, "step": 11257 }, { "epoch": 1.0, "grad_norm": 7.33513383864488, "learning_rate": 5.2076220275161026e-06, "loss": 0.5876, "step": 11258 }, { "epoch": 1.0, "grad_norm": 5.979975109683134, "learning_rate": 5.20690027713421e-06, "loss": 0.5983, "step": 11259 }, { "epoch": 1.0, "grad_norm": 6.315631701488811, "learning_rate": 5.206178522433728e-06, "loss": 0.6531, "step": 11260 }, { "epoch": 1.0, "grad_norm": 4.604982178387352, "learning_rate": 5.205456763429721e-06, "loss": 0.6059, "step": 11261 }, { "epoch": 1.0, "grad_norm": 5.986883389002421, "learning_rate": 5.204735000137255e-06, "loss": 0.59, "step": 11262 }, { "epoch": 1.0, "grad_norm": 6.736180465463961, "learning_rate": 5.204013232571393e-06, "loss": 0.7132, "step": 11263 }, { "epoch": 1.0, "grad_norm": 5.712472845251881, "learning_rate": 5.203291460747202e-06, "loss": 0.5942, "step": 11264 }, { "epoch": 1.0, "grad_norm": 7.084005484462115, "learning_rate": 5.202569684679747e-06, "loss": 0.6562, "step": 11265 }, { "epoch": 1.01, "grad_norm": 7.45185463871948, "learning_rate": 5.201847904384092e-06, "loss": 0.6085, "step": 11266 }, { "epoch": 1.01, "grad_norm": 5.676436496535014, "learning_rate": 5.201126119875306e-06, "loss": 0.5681, "step": 11267 }, { "epoch": 1.01, "grad_norm": 8.133483041093939, "learning_rate": 5.200404331168453e-06, "loss": 0.6041, "step": 11268 }, { "epoch": 1.01, "grad_norm": 6.873792717964926, "learning_rate": 5.199682538278596e-06, "loss": 0.6291, "step": 11269 }, { "epoch": 1.01, "grad_norm": 5.15650307295505, "learning_rate": 5.198960741220804e-06, "loss": 0.6082, "step": 11270 }, { "epoch": 1.01, "grad_norm": 5.694406860820382, "learning_rate": 5.198238940010144e-06, "loss": 0.5986, "step": 11271 }, { "epoch": 1.01, "grad_norm": 7.496649946112167, "learning_rate": 5.197517134661678e-06, "loss": 0.5993, "step": 11272 }, { "epoch": 1.01, "grad_norm": 5.718472022820167, "learning_rate": 5.1967953251904755e-06, "loss": 0.6241, "step": 11273 }, { "epoch": 1.01, "grad_norm": 7.20808586463834, "learning_rate": 5.196073511611602e-06, "loss": 0.6282, "step": 11274 }, { "epoch": 1.01, "grad_norm": 7.288236761807121, "learning_rate": 5.195351693940123e-06, "loss": 0.6424, "step": 11275 }, { "epoch": 1.01, "grad_norm": 5.255930015952369, "learning_rate": 5.194629872191106e-06, "loss": 0.6187, "step": 11276 }, { "epoch": 1.01, "grad_norm": 5.469966388699414, "learning_rate": 5.193908046379616e-06, "loss": 0.6474, "step": 11277 }, { "epoch": 1.01, "grad_norm": 6.076805105153063, "learning_rate": 5.193186216520719e-06, "loss": 0.6385, "step": 11278 }, { "epoch": 1.01, "grad_norm": 5.07908578794181, "learning_rate": 5.192464382629484e-06, "loss": 0.6007, "step": 11279 }, { "epoch": 1.01, "grad_norm": 4.811831026601546, "learning_rate": 5.191742544720977e-06, "loss": 0.6071, "step": 11280 }, { "epoch": 1.01, "grad_norm": 5.508781304423288, "learning_rate": 5.191020702810262e-06, "loss": 0.6485, "step": 11281 }, { "epoch": 1.01, "grad_norm": 4.899720311240473, "learning_rate": 5.1902988569124105e-06, "loss": 0.6207, "step": 11282 }, { "epoch": 1.01, "grad_norm": 5.039429607760431, "learning_rate": 5.189577007042487e-06, "loss": 0.5611, "step": 11283 }, { "epoch": 1.01, "grad_norm": 5.954435534763285, "learning_rate": 5.188855153215557e-06, "loss": 0.5884, "step": 11284 }, { "epoch": 1.01, "grad_norm": 6.033904674662508, "learning_rate": 5.18813329544669e-06, "loss": 0.5819, "step": 11285 }, { "epoch": 1.01, "grad_norm": 4.774354861066126, "learning_rate": 5.187411433750953e-06, "loss": 0.6301, "step": 11286 }, { "epoch": 1.01, "grad_norm": 7.414579316868729, "learning_rate": 5.186689568143413e-06, "loss": 0.5818, "step": 11287 }, { "epoch": 1.01, "grad_norm": 5.586596101825275, "learning_rate": 5.185967698639136e-06, "loss": 0.6479, "step": 11288 }, { "epoch": 1.01, "grad_norm": 6.54473033147053, "learning_rate": 5.1852458252531905e-06, "loss": 0.5921, "step": 11289 }, { "epoch": 1.01, "grad_norm": 5.636608948701973, "learning_rate": 5.1845239480006456e-06, "loss": 0.6375, "step": 11290 }, { "epoch": 1.01, "grad_norm": 5.738915246304771, "learning_rate": 5.183802066896567e-06, "loss": 0.6651, "step": 11291 }, { "epoch": 1.01, "grad_norm": 6.1904329214343, "learning_rate": 5.183080181956022e-06, "loss": 0.5846, "step": 11292 }, { "epoch": 1.01, "grad_norm": 5.824754184344088, "learning_rate": 5.18235829319408e-06, "loss": 0.6611, "step": 11293 }, { "epoch": 1.01, "grad_norm": 7.014528694596895, "learning_rate": 5.181636400625807e-06, "loss": 0.6666, "step": 11294 }, { "epoch": 1.01, "grad_norm": 4.571633673316373, "learning_rate": 5.1809145042662715e-06, "loss": 0.5989, "step": 11295 }, { "epoch": 1.01, "grad_norm": 7.318148196236044, "learning_rate": 5.180192604130543e-06, "loss": 0.6382, "step": 11296 }, { "epoch": 1.01, "grad_norm": 5.133725040963876, "learning_rate": 5.179470700233689e-06, "loss": 0.5716, "step": 11297 }, { "epoch": 1.01, "grad_norm": 6.0688069296680425, "learning_rate": 5.178748792590775e-06, "loss": 0.6643, "step": 11298 }, { "epoch": 1.01, "grad_norm": 6.923699030955574, "learning_rate": 5.178026881216873e-06, "loss": 0.6344, "step": 11299 }, { "epoch": 1.01, "grad_norm": 5.84547869303841, "learning_rate": 5.17730496612705e-06, "loss": 0.6338, "step": 11300 }, { "epoch": 1.01, "grad_norm": 8.434718993274839, "learning_rate": 5.1765830473363734e-06, "loss": 0.5738, "step": 11301 }, { "epoch": 1.01, "grad_norm": 6.423533308120808, "learning_rate": 5.175861124859914e-06, "loss": 0.6633, "step": 11302 }, { "epoch": 1.01, "grad_norm": 5.697385366193451, "learning_rate": 5.175139198712737e-06, "loss": 0.5872, "step": 11303 }, { "epoch": 1.01, "grad_norm": 5.891422887680415, "learning_rate": 5.174417268909913e-06, "loss": 0.5506, "step": 11304 }, { "epoch": 1.01, "grad_norm": 7.322485467255316, "learning_rate": 5.17369533546651e-06, "loss": 0.6341, "step": 11305 }, { "epoch": 1.01, "grad_norm": 6.355660539530603, "learning_rate": 5.172973398397599e-06, "loss": 0.6686, "step": 11306 }, { "epoch": 1.01, "grad_norm": 6.871315290234635, "learning_rate": 5.172251457718245e-06, "loss": 0.6038, "step": 11307 }, { "epoch": 1.01, "grad_norm": 5.184692412395197, "learning_rate": 5.171529513443519e-06, "loss": 0.5729, "step": 11308 }, { "epoch": 1.01, "grad_norm": 6.969204663915408, "learning_rate": 5.170807565588493e-06, "loss": 0.6186, "step": 11309 }, { "epoch": 1.01, "grad_norm": 5.569334678967067, "learning_rate": 5.17008561416823e-06, "loss": 0.6076, "step": 11310 }, { "epoch": 1.01, "grad_norm": 5.523142912042274, "learning_rate": 5.169363659197802e-06, "loss": 0.6096, "step": 11311 }, { "epoch": 1.01, "grad_norm": 5.73092529076704, "learning_rate": 5.168641700692283e-06, "loss": 0.5501, "step": 11312 }, { "epoch": 1.01, "grad_norm": 5.8714193386360805, "learning_rate": 5.167919738666735e-06, "loss": 0.6165, "step": 11313 }, { "epoch": 1.01, "grad_norm": 7.258713871809443, "learning_rate": 5.167197773136229e-06, "loss": 0.598, "step": 11314 }, { "epoch": 1.01, "grad_norm": 6.947388269975274, "learning_rate": 5.166475804115837e-06, "loss": 0.5948, "step": 11315 }, { "epoch": 1.01, "grad_norm": 6.736932915945489, "learning_rate": 5.165753831620626e-06, "loss": 0.6708, "step": 11316 }, { "epoch": 1.01, "grad_norm": 5.153098203638437, "learning_rate": 5.165031855665668e-06, "loss": 0.5855, "step": 11317 }, { "epoch": 1.01, "grad_norm": 4.562760381941685, "learning_rate": 5.1643098762660315e-06, "loss": 0.5721, "step": 11318 }, { "epoch": 1.01, "grad_norm": 8.506232744561556, "learning_rate": 5.163587893436785e-06, "loss": 0.6036, "step": 11319 }, { "epoch": 1.01, "grad_norm": 9.770030512475426, "learning_rate": 5.162865907193001e-06, "loss": 0.626, "step": 11320 }, { "epoch": 1.01, "grad_norm": 4.3090432061831825, "learning_rate": 5.162143917549746e-06, "loss": 0.6524, "step": 11321 }, { "epoch": 1.01, "grad_norm": 5.779302475295813, "learning_rate": 5.161421924522093e-06, "loss": 0.5916, "step": 11322 }, { "epoch": 1.01, "grad_norm": 8.833145520561269, "learning_rate": 5.16069992812511e-06, "loss": 0.5944, "step": 11323 }, { "epoch": 1.01, "grad_norm": 5.011355850799018, "learning_rate": 5.1599779283738695e-06, "loss": 0.6226, "step": 11324 }, { "epoch": 1.01, "grad_norm": 7.183863226093159, "learning_rate": 5.159255925283439e-06, "loss": 0.605, "step": 11325 }, { "epoch": 1.01, "grad_norm": 6.0740766451245385, "learning_rate": 5.158533918868891e-06, "loss": 0.5745, "step": 11326 }, { "epoch": 1.01, "grad_norm": 5.878321986070727, "learning_rate": 5.157811909145293e-06, "loss": 0.5789, "step": 11327 }, { "epoch": 1.01, "grad_norm": 5.282948427254995, "learning_rate": 5.157089896127718e-06, "loss": 0.6432, "step": 11328 }, { "epoch": 1.01, "grad_norm": 4.564376770122776, "learning_rate": 5.1563678798312344e-06, "loss": 0.5911, "step": 11329 }, { "epoch": 1.01, "grad_norm": 4.838746964893542, "learning_rate": 5.155645860270914e-06, "loss": 0.6105, "step": 11330 }, { "epoch": 1.01, "grad_norm": 5.7145180033186485, "learning_rate": 5.154923837461828e-06, "loss": 0.5988, "step": 11331 }, { "epoch": 1.01, "grad_norm": 8.442825008992337, "learning_rate": 5.154201811419046e-06, "loss": 0.6685, "step": 11332 }, { "epoch": 1.01, "grad_norm": 5.595385631103522, "learning_rate": 5.153479782157639e-06, "loss": 0.6457, "step": 11333 }, { "epoch": 1.01, "grad_norm": 5.34937284454787, "learning_rate": 5.152757749692676e-06, "loss": 0.5737, "step": 11334 }, { "epoch": 1.01, "grad_norm": 6.591145974024, "learning_rate": 5.152035714039232e-06, "loss": 0.6489, "step": 11335 }, { "epoch": 1.01, "grad_norm": 7.402016566084002, "learning_rate": 5.151313675212374e-06, "loss": 0.6102, "step": 11336 }, { "epoch": 1.01, "grad_norm": 6.1229472353528624, "learning_rate": 5.150591633227174e-06, "loss": 0.6238, "step": 11337 }, { "epoch": 1.01, "grad_norm": 5.231695503645207, "learning_rate": 5.1498695880987046e-06, "loss": 0.5882, "step": 11338 }, { "epoch": 1.01, "grad_norm": 7.151166310107379, "learning_rate": 5.149147539842034e-06, "loss": 0.591, "step": 11339 }, { "epoch": 1.01, "grad_norm": 7.861912067328402, "learning_rate": 5.148425488472238e-06, "loss": 0.6251, "step": 11340 }, { "epoch": 1.01, "grad_norm": 5.348021636205391, "learning_rate": 5.147703434004383e-06, "loss": 0.6583, "step": 11341 }, { "epoch": 1.01, "grad_norm": 5.4612281392657325, "learning_rate": 5.146981376453542e-06, "loss": 0.6122, "step": 11342 }, { "epoch": 1.01, "grad_norm": 5.41400060830754, "learning_rate": 5.1462593158347875e-06, "loss": 0.5711, "step": 11343 }, { "epoch": 1.01, "grad_norm": 7.327502499633114, "learning_rate": 5.14553725216319e-06, "loss": 0.6151, "step": 11344 }, { "epoch": 1.01, "grad_norm": 4.75494020510985, "learning_rate": 5.1448151854538205e-06, "loss": 0.642, "step": 11345 }, { "epoch": 1.01, "grad_norm": 7.484040052999811, "learning_rate": 5.144093115721751e-06, "loss": 0.634, "step": 11346 }, { "epoch": 1.01, "grad_norm": 6.086948507403673, "learning_rate": 5.143371042982055e-06, "loss": 0.634, "step": 11347 }, { "epoch": 1.01, "grad_norm": 5.0331070225793235, "learning_rate": 5.142648967249802e-06, "loss": 0.6161, "step": 11348 }, { "epoch": 1.01, "grad_norm": 7.315633092783068, "learning_rate": 5.141926888540063e-06, "loss": 0.6645, "step": 11349 }, { "epoch": 1.01, "grad_norm": 4.4106214879668295, "learning_rate": 5.141204806867912e-06, "loss": 0.5429, "step": 11350 }, { "epoch": 1.01, "grad_norm": 8.334803539631114, "learning_rate": 5.140482722248419e-06, "loss": 0.6336, "step": 11351 }, { "epoch": 1.01, "grad_norm": 5.394827787517125, "learning_rate": 5.139760634696656e-06, "loss": 0.6273, "step": 11352 }, { "epoch": 1.01, "grad_norm": 6.905956357096545, "learning_rate": 5.139038544227698e-06, "loss": 0.6243, "step": 11353 }, { "epoch": 1.01, "grad_norm": 8.014263228755812, "learning_rate": 5.138316450856613e-06, "loss": 0.6195, "step": 11354 }, { "epoch": 1.01, "grad_norm": 5.836077695173028, "learning_rate": 5.137594354598477e-06, "loss": 0.6169, "step": 11355 }, { "epoch": 1.01, "grad_norm": 5.1367932210217955, "learning_rate": 5.136872255468358e-06, "loss": 0.5456, "step": 11356 }, { "epoch": 1.01, "grad_norm": 5.5981287360020495, "learning_rate": 5.136150153481333e-06, "loss": 0.6165, "step": 11357 }, { "epoch": 1.01, "grad_norm": 7.085463309280587, "learning_rate": 5.1354280486524696e-06, "loss": 0.6242, "step": 11358 }, { "epoch": 1.01, "grad_norm": 6.110363330752762, "learning_rate": 5.134705940996843e-06, "loss": 0.6686, "step": 11359 }, { "epoch": 1.01, "grad_norm": 7.299276794193033, "learning_rate": 5.133983830529524e-06, "loss": 0.6537, "step": 11360 }, { "epoch": 1.01, "grad_norm": 5.325401268716434, "learning_rate": 5.133261717265586e-06, "loss": 0.5777, "step": 11361 }, { "epoch": 1.01, "grad_norm": 9.333297579746171, "learning_rate": 5.132539601220104e-06, "loss": 0.6107, "step": 11362 }, { "epoch": 1.01, "grad_norm": 7.3752934644884505, "learning_rate": 5.1318174824081465e-06, "loss": 0.6551, "step": 11363 }, { "epoch": 1.01, "grad_norm": 5.351167156154887, "learning_rate": 5.131095360844787e-06, "loss": 0.6338, "step": 11364 }, { "epoch": 1.01, "grad_norm": 5.636800416642211, "learning_rate": 5.130373236545099e-06, "loss": 0.6864, "step": 11365 }, { "epoch": 1.01, "grad_norm": 5.51638844958901, "learning_rate": 5.1296511095241565e-06, "loss": 0.6207, "step": 11366 }, { "epoch": 1.01, "grad_norm": 8.746745259314302, "learning_rate": 5.1289289797970295e-06, "loss": 0.6467, "step": 11367 }, { "epoch": 1.01, "grad_norm": 6.854676511438797, "learning_rate": 5.1282068473787925e-06, "loss": 0.7029, "step": 11368 }, { "epoch": 1.01, "grad_norm": 6.69968387974251, "learning_rate": 5.127484712284519e-06, "loss": 0.5428, "step": 11369 }, { "epoch": 1.01, "grad_norm": 8.134382169637025, "learning_rate": 5.126762574529283e-06, "loss": 0.6022, "step": 11370 }, { "epoch": 1.01, "grad_norm": 7.759521435004208, "learning_rate": 5.126040434128153e-06, "loss": 0.6066, "step": 11371 }, { "epoch": 1.01, "grad_norm": 5.393437943141763, "learning_rate": 5.125318291096205e-06, "loss": 0.6939, "step": 11372 }, { "epoch": 1.01, "grad_norm": 6.8047975171466835, "learning_rate": 5.124596145448514e-06, "loss": 0.6618, "step": 11373 }, { "epoch": 1.01, "grad_norm": 6.247199510972823, "learning_rate": 5.12387399720015e-06, "loss": 0.6096, "step": 11374 }, { "epoch": 1.01, "grad_norm": 6.5524838837178825, "learning_rate": 5.123151846366188e-06, "loss": 0.6278, "step": 11375 }, { "epoch": 1.01, "grad_norm": 6.665490253629418, "learning_rate": 5.122429692961701e-06, "loss": 0.6218, "step": 11376 }, { "epoch": 1.01, "grad_norm": 4.194413799344783, "learning_rate": 5.121707537001764e-06, "loss": 0.5666, "step": 11377 }, { "epoch": 1.02, "grad_norm": 7.300511763302451, "learning_rate": 5.120985378501445e-06, "loss": 0.6404, "step": 11378 }, { "epoch": 1.02, "grad_norm": 8.216023387768901, "learning_rate": 5.120263217475825e-06, "loss": 0.6223, "step": 11379 }, { "epoch": 1.02, "grad_norm": 10.961711852198805, "learning_rate": 5.119541053939971e-06, "loss": 0.6037, "step": 11380 }, { "epoch": 1.02, "grad_norm": 7.318922298290036, "learning_rate": 5.118818887908959e-06, "loss": 0.5804, "step": 11381 }, { "epoch": 1.02, "grad_norm": 7.836134337484413, "learning_rate": 5.118096719397865e-06, "loss": 0.5829, "step": 11382 }, { "epoch": 1.02, "grad_norm": 6.489477004067507, "learning_rate": 5.11737454842176e-06, "loss": 0.5761, "step": 11383 }, { "epoch": 1.02, "grad_norm": 6.021797842592924, "learning_rate": 5.116652374995718e-06, "loss": 0.6966, "step": 11384 }, { "epoch": 1.02, "grad_norm": 6.825999992030382, "learning_rate": 5.115930199134813e-06, "loss": 0.6721, "step": 11385 }, { "epoch": 1.02, "grad_norm": 6.123574970274254, "learning_rate": 5.115208020854119e-06, "loss": 0.6216, "step": 11386 }, { "epoch": 1.02, "grad_norm": 5.992222818863251, "learning_rate": 5.114485840168709e-06, "loss": 0.6024, "step": 11387 }, { "epoch": 1.02, "grad_norm": 6.084441262004154, "learning_rate": 5.11376365709366e-06, "loss": 0.5806, "step": 11388 }, { "epoch": 1.02, "grad_norm": 6.849311915104958, "learning_rate": 5.1130414716440425e-06, "loss": 0.609, "step": 11389 }, { "epoch": 1.02, "grad_norm": 5.602094818139735, "learning_rate": 5.112319283834931e-06, "loss": 0.6055, "step": 11390 }, { "epoch": 1.02, "grad_norm": 5.54100686171314, "learning_rate": 5.111597093681402e-06, "loss": 0.6078, "step": 11391 }, { "epoch": 1.02, "grad_norm": 11.611014210205434, "learning_rate": 5.110874901198528e-06, "loss": 0.6585, "step": 11392 }, { "epoch": 1.02, "grad_norm": 6.216896573717639, "learning_rate": 5.110152706401383e-06, "loss": 0.5489, "step": 11393 }, { "epoch": 1.02, "grad_norm": 10.463082972611163, "learning_rate": 5.10943050930504e-06, "loss": 0.6212, "step": 11394 }, { "epoch": 1.02, "grad_norm": 6.602228594882577, "learning_rate": 5.108708309924576e-06, "loss": 0.6237, "step": 11395 }, { "epoch": 1.02, "grad_norm": 7.5767981249845535, "learning_rate": 5.107986108275064e-06, "loss": 0.6055, "step": 11396 }, { "epoch": 1.02, "grad_norm": 8.111452852558513, "learning_rate": 5.107263904371578e-06, "loss": 0.5733, "step": 11397 }, { "epoch": 1.02, "grad_norm": 5.63531002547118, "learning_rate": 5.106541698229195e-06, "loss": 0.633, "step": 11398 }, { "epoch": 1.02, "grad_norm": 8.978541470179241, "learning_rate": 5.105819489862985e-06, "loss": 0.5728, "step": 11399 }, { "epoch": 1.02, "grad_norm": 6.029244991781461, "learning_rate": 5.105097279288025e-06, "loss": 0.6509, "step": 11400 }, { "epoch": 1.02, "grad_norm": 5.529001024073215, "learning_rate": 5.104375066519391e-06, "loss": 0.5786, "step": 11401 }, { "epoch": 1.02, "grad_norm": 5.948032124036982, "learning_rate": 5.103652851572155e-06, "loss": 0.6255, "step": 11402 }, { "epoch": 1.02, "grad_norm": 6.975780918465593, "learning_rate": 5.102930634461393e-06, "loss": 0.6729, "step": 11403 }, { "epoch": 1.02, "grad_norm": 7.316359102916173, "learning_rate": 5.102208415202179e-06, "loss": 0.6679, "step": 11404 }, { "epoch": 1.02, "grad_norm": 5.958832126274536, "learning_rate": 5.101486193809589e-06, "loss": 0.6289, "step": 11405 }, { "epoch": 1.02, "grad_norm": 5.477458264689184, "learning_rate": 5.100763970298697e-06, "loss": 0.6739, "step": 11406 }, { "epoch": 1.02, "grad_norm": 4.656585999986154, "learning_rate": 5.100041744684576e-06, "loss": 0.6204, "step": 11407 }, { "epoch": 1.02, "grad_norm": 4.544282710556619, "learning_rate": 5.099319516982304e-06, "loss": 0.6007, "step": 11408 }, { "epoch": 1.02, "grad_norm": 4.982818665043, "learning_rate": 5.0985972872069545e-06, "loss": 0.6201, "step": 11409 }, { "epoch": 1.02, "grad_norm": 6.001293771451889, "learning_rate": 5.097875055373602e-06, "loss": 0.5226, "step": 11410 }, { "epoch": 1.02, "grad_norm": 5.611818856797479, "learning_rate": 5.0971528214973235e-06, "loss": 0.6213, "step": 11411 }, { "epoch": 1.02, "grad_norm": 7.6068912902888375, "learning_rate": 5.09643058559319e-06, "loss": 0.6869, "step": 11412 }, { "epoch": 1.02, "grad_norm": 6.867969643151915, "learning_rate": 5.095708347676282e-06, "loss": 0.6663, "step": 11413 }, { "epoch": 1.02, "grad_norm": 5.728334604144465, "learning_rate": 5.0949861077616715e-06, "loss": 0.534, "step": 11414 }, { "epoch": 1.02, "grad_norm": 8.294957119915058, "learning_rate": 5.094263865864433e-06, "loss": 0.6413, "step": 11415 }, { "epoch": 1.02, "grad_norm": 7.746751795752345, "learning_rate": 5.0935416219996426e-06, "loss": 0.6369, "step": 11416 }, { "epoch": 1.02, "grad_norm": 6.030213528351936, "learning_rate": 5.092819376182376e-06, "loss": 0.5969, "step": 11417 }, { "epoch": 1.02, "grad_norm": 5.405853745610788, "learning_rate": 5.092097128427709e-06, "loss": 0.6051, "step": 11418 }, { "epoch": 1.02, "grad_norm": 6.235119267744857, "learning_rate": 5.091374878750714e-06, "loss": 0.5606, "step": 11419 }, { "epoch": 1.02, "grad_norm": 6.632487551018236, "learning_rate": 5.09065262716647e-06, "loss": 0.6539, "step": 11420 }, { "epoch": 1.02, "grad_norm": 6.995152608978691, "learning_rate": 5.089930373690052e-06, "loss": 0.6431, "step": 11421 }, { "epoch": 1.02, "grad_norm": 6.768388364564545, "learning_rate": 5.0892081183365325e-06, "loss": 0.6064, "step": 11422 }, { "epoch": 1.02, "grad_norm": 6.3847951495610245, "learning_rate": 5.088485861120991e-06, "loss": 0.6294, "step": 11423 }, { "epoch": 1.02, "grad_norm": 5.314571781682193, "learning_rate": 5.087763602058498e-06, "loss": 0.5334, "step": 11424 }, { "epoch": 1.02, "grad_norm": 8.137603820930277, "learning_rate": 5.087041341164134e-06, "loss": 0.6365, "step": 11425 }, { "epoch": 1.02, "grad_norm": 6.676398265810293, "learning_rate": 5.086319078452973e-06, "loss": 0.6493, "step": 11426 }, { "epoch": 1.02, "grad_norm": 5.531140024359236, "learning_rate": 5.08559681394009e-06, "loss": 0.6098, "step": 11427 }, { "epoch": 1.02, "grad_norm": 4.795685367607865, "learning_rate": 5.084874547640559e-06, "loss": 0.6085, "step": 11428 }, { "epoch": 1.02, "grad_norm": 4.977715780749774, "learning_rate": 5.08415227956946e-06, "loss": 0.5523, "step": 11429 }, { "epoch": 1.02, "grad_norm": 6.701954916975521, "learning_rate": 5.0834300097418656e-06, "loss": 0.6568, "step": 11430 }, { "epoch": 1.02, "grad_norm": 6.825649859262631, "learning_rate": 5.082707738172852e-06, "loss": 0.6338, "step": 11431 }, { "epoch": 1.02, "grad_norm": 7.424576363078336, "learning_rate": 5.081985464877496e-06, "loss": 0.6646, "step": 11432 }, { "epoch": 1.02, "grad_norm": 7.204824288897726, "learning_rate": 5.081263189870873e-06, "loss": 0.6183, "step": 11433 }, { "epoch": 1.02, "grad_norm": 6.462092594874722, "learning_rate": 5.080540913168059e-06, "loss": 0.5577, "step": 11434 }, { "epoch": 1.02, "grad_norm": 6.28645645414889, "learning_rate": 5.079818634784129e-06, "loss": 0.5972, "step": 11435 }, { "epoch": 1.02, "grad_norm": 5.631719639151833, "learning_rate": 5.079096354734161e-06, "loss": 0.6259, "step": 11436 }, { "epoch": 1.02, "grad_norm": 6.667478946581205, "learning_rate": 5.078374073033229e-06, "loss": 0.6103, "step": 11437 }, { "epoch": 1.02, "grad_norm": 5.7740575441181665, "learning_rate": 5.077651789696409e-06, "loss": 0.5899, "step": 11438 }, { "epoch": 1.02, "grad_norm": 7.32525287815923, "learning_rate": 5.07692950473878e-06, "loss": 0.6151, "step": 11439 }, { "epoch": 1.02, "grad_norm": 5.281783797969762, "learning_rate": 5.076207218175415e-06, "loss": 0.5403, "step": 11440 }, { "epoch": 1.02, "grad_norm": 5.774511354060808, "learning_rate": 5.075484930021392e-06, "loss": 0.6538, "step": 11441 }, { "epoch": 1.02, "grad_norm": 8.662274836108477, "learning_rate": 5.074762640291785e-06, "loss": 0.6752, "step": 11442 }, { "epoch": 1.02, "grad_norm": 5.661672031506345, "learning_rate": 5.0740403490016735e-06, "loss": 0.618, "step": 11443 }, { "epoch": 1.02, "grad_norm": 8.245042037497141, "learning_rate": 5.073318056166129e-06, "loss": 0.6574, "step": 11444 }, { "epoch": 1.02, "grad_norm": 8.052691900434517, "learning_rate": 5.0725957618002315e-06, "loss": 0.6663, "step": 11445 }, { "epoch": 1.02, "grad_norm": 6.157634901345381, "learning_rate": 5.071873465919058e-06, "loss": 0.6099, "step": 11446 }, { "epoch": 1.02, "grad_norm": 6.932033893410408, "learning_rate": 5.071151168537683e-06, "loss": 0.6296, "step": 11447 }, { "epoch": 1.02, "grad_norm": 5.973200997771974, "learning_rate": 5.070428869671182e-06, "loss": 0.5977, "step": 11448 }, { "epoch": 1.02, "grad_norm": 5.833855465900155, "learning_rate": 5.069706569334635e-06, "loss": 0.6604, "step": 11449 }, { "epoch": 1.02, "grad_norm": 5.882596259893021, "learning_rate": 5.068984267543113e-06, "loss": 0.6768, "step": 11450 }, { "epoch": 1.02, "grad_norm": 5.770479898384032, "learning_rate": 5.068261964311697e-06, "loss": 0.6783, "step": 11451 }, { "epoch": 1.02, "grad_norm": 6.907892101400199, "learning_rate": 5.067539659655463e-06, "loss": 0.6376, "step": 11452 }, { "epoch": 1.02, "grad_norm": 5.734263194858119, "learning_rate": 5.066817353589484e-06, "loss": 0.6173, "step": 11453 }, { "epoch": 1.02, "grad_norm": 6.398599587220075, "learning_rate": 5.06609504612884e-06, "loss": 0.5926, "step": 11454 }, { "epoch": 1.02, "grad_norm": 5.70067804449616, "learning_rate": 5.065372737288607e-06, "loss": 0.6311, "step": 11455 }, { "epoch": 1.02, "grad_norm": 5.4150553119325755, "learning_rate": 5.064650427083863e-06, "loss": 0.639, "step": 11456 }, { "epoch": 1.02, "grad_norm": 8.442340269120825, "learning_rate": 5.0639281155296806e-06, "loss": 0.687, "step": 11457 }, { "epoch": 1.02, "grad_norm": 8.798305589387832, "learning_rate": 5.063205802641139e-06, "loss": 0.6037, "step": 11458 }, { "epoch": 1.02, "grad_norm": 4.673649266090739, "learning_rate": 5.0624834884333154e-06, "loss": 0.6838, "step": 11459 }, { "epoch": 1.02, "grad_norm": 5.250625613610803, "learning_rate": 5.0617611729212844e-06, "loss": 0.6067, "step": 11460 }, { "epoch": 1.02, "grad_norm": 7.772826373606097, "learning_rate": 5.061038856120126e-06, "loss": 0.6212, "step": 11461 }, { "epoch": 1.02, "grad_norm": 5.594908919090863, "learning_rate": 5.060316538044915e-06, "loss": 0.6241, "step": 11462 }, { "epoch": 1.02, "grad_norm": 5.854159010753713, "learning_rate": 5.059594218710728e-06, "loss": 0.5981, "step": 11463 }, { "epoch": 1.02, "grad_norm": 5.500377314854002, "learning_rate": 5.0588718981326414e-06, "loss": 0.6425, "step": 11464 }, { "epoch": 1.02, "grad_norm": 5.80562941535456, "learning_rate": 5.0581495763257335e-06, "loss": 0.6323, "step": 11465 }, { "epoch": 1.02, "grad_norm": 7.571074823588971, "learning_rate": 5.057427253305081e-06, "loss": 0.6346, "step": 11466 }, { "epoch": 1.02, "grad_norm": 6.722749322848404, "learning_rate": 5.0567049290857585e-06, "loss": 0.5943, "step": 11467 }, { "epoch": 1.02, "grad_norm": 6.481658607449244, "learning_rate": 5.0559826036828475e-06, "loss": 0.5727, "step": 11468 }, { "epoch": 1.02, "grad_norm": 6.321420713106929, "learning_rate": 5.05526027711142e-06, "loss": 0.6046, "step": 11469 }, { "epoch": 1.02, "grad_norm": 5.69915151786212, "learning_rate": 5.054537949386556e-06, "loss": 0.5813, "step": 11470 }, { "epoch": 1.02, "grad_norm": 6.993833275767788, "learning_rate": 5.0538156205233335e-06, "loss": 0.608, "step": 11471 }, { "epoch": 1.02, "grad_norm": 8.420672562897401, "learning_rate": 5.053093290536826e-06, "loss": 0.5879, "step": 11472 }, { "epoch": 1.02, "grad_norm": 6.493765763491909, "learning_rate": 5.052370959442111e-06, "loss": 0.6876, "step": 11473 }, { "epoch": 1.02, "grad_norm": 5.507581170107481, "learning_rate": 5.051648627254269e-06, "loss": 0.63, "step": 11474 }, { "epoch": 1.02, "grad_norm": 8.403338322853598, "learning_rate": 5.050926293988374e-06, "loss": 0.6017, "step": 11475 }, { "epoch": 1.02, "grad_norm": 5.313249061872875, "learning_rate": 5.050203959659504e-06, "loss": 0.5424, "step": 11476 }, { "epoch": 1.02, "grad_norm": 6.5872753999466305, "learning_rate": 5.049481624282737e-06, "loss": 0.5898, "step": 11477 }, { "epoch": 1.02, "grad_norm": 6.183280672574792, "learning_rate": 5.0487592878731504e-06, "loss": 0.6401, "step": 11478 }, { "epoch": 1.02, "grad_norm": 8.261467317979394, "learning_rate": 5.048036950445819e-06, "loss": 0.6742, "step": 11479 }, { "epoch": 1.02, "grad_norm": 5.76108353026768, "learning_rate": 5.047314612015821e-06, "loss": 0.6188, "step": 11480 }, { "epoch": 1.02, "grad_norm": 6.913390375652617, "learning_rate": 5.046592272598236e-06, "loss": 0.5899, "step": 11481 }, { "epoch": 1.02, "grad_norm": 6.628231017864223, "learning_rate": 5.045869932208137e-06, "loss": 0.5729, "step": 11482 }, { "epoch": 1.02, "grad_norm": 5.07457548432494, "learning_rate": 5.045147590860604e-06, "loss": 0.6035, "step": 11483 }, { "epoch": 1.02, "grad_norm": 9.284740504858679, "learning_rate": 5.044425248570716e-06, "loss": 0.559, "step": 11484 }, { "epoch": 1.02, "grad_norm": 5.907460166476627, "learning_rate": 5.043702905353548e-06, "loss": 0.6275, "step": 11485 }, { "epoch": 1.02, "grad_norm": 8.848072912635923, "learning_rate": 5.042980561224176e-06, "loss": 0.6687, "step": 11486 }, { "epoch": 1.02, "grad_norm": 5.621763236752186, "learning_rate": 5.04225821619768e-06, "loss": 0.5943, "step": 11487 }, { "epoch": 1.02, "grad_norm": 7.46178128264674, "learning_rate": 5.0415358702891345e-06, "loss": 0.6173, "step": 11488 }, { "epoch": 1.02, "grad_norm": 7.005711819240018, "learning_rate": 5.0408135235136206e-06, "loss": 0.6857, "step": 11489 }, { "epoch": 1.03, "grad_norm": 7.297334044548831, "learning_rate": 5.0400911758862126e-06, "loss": 0.5889, "step": 11490 }, { "epoch": 1.03, "grad_norm": 5.591909453487586, "learning_rate": 5.0393688274219885e-06, "loss": 0.5977, "step": 11491 }, { "epoch": 1.03, "grad_norm": 6.061440818436598, "learning_rate": 5.038646478136029e-06, "loss": 0.6168, "step": 11492 }, { "epoch": 1.03, "grad_norm": 7.460224554245447, "learning_rate": 5.037924128043407e-06, "loss": 0.5664, "step": 11493 }, { "epoch": 1.03, "grad_norm": 6.60422334316145, "learning_rate": 5.037201777159202e-06, "loss": 0.7056, "step": 11494 }, { "epoch": 1.03, "grad_norm": 5.8994025750970716, "learning_rate": 5.036479425498492e-06, "loss": 0.5328, "step": 11495 }, { "epoch": 1.03, "grad_norm": 5.5350167946378095, "learning_rate": 5.035757073076353e-06, "loss": 0.6421, "step": 11496 }, { "epoch": 1.03, "grad_norm": 8.927885163593757, "learning_rate": 5.035034719907864e-06, "loss": 0.6146, "step": 11497 }, { "epoch": 1.03, "grad_norm": 6.294837036449359, "learning_rate": 5.034312366008101e-06, "loss": 0.6389, "step": 11498 }, { "epoch": 1.03, "grad_norm": 6.900942595591899, "learning_rate": 5.033590011392144e-06, "loss": 0.6887, "step": 11499 }, { "epoch": 1.03, "grad_norm": 5.690394694873084, "learning_rate": 5.032867656075069e-06, "loss": 0.6386, "step": 11500 }, { "epoch": 1.03, "grad_norm": 5.283241499234376, "learning_rate": 5.032145300071952e-06, "loss": 0.665, "step": 11501 }, { "epoch": 1.03, "grad_norm": 6.271389471213352, "learning_rate": 5.031422943397873e-06, "loss": 0.6237, "step": 11502 }, { "epoch": 1.03, "grad_norm": 8.125916459041276, "learning_rate": 5.03070058606791e-06, "loss": 0.6037, "step": 11503 }, { "epoch": 1.03, "grad_norm": 6.703090067136761, "learning_rate": 5.029978228097139e-06, "loss": 0.6328, "step": 11504 }, { "epoch": 1.03, "grad_norm": 4.842789822427345, "learning_rate": 5.0292558695006365e-06, "loss": 0.6048, "step": 11505 }, { "epoch": 1.03, "grad_norm": 9.212363566239267, "learning_rate": 5.028533510293484e-06, "loss": 0.6041, "step": 11506 }, { "epoch": 1.03, "grad_norm": 6.503166832871615, "learning_rate": 5.027811150490757e-06, "loss": 0.655, "step": 11507 }, { "epoch": 1.03, "grad_norm": 6.59167392833282, "learning_rate": 5.027088790107532e-06, "loss": 0.6066, "step": 11508 }, { "epoch": 1.03, "grad_norm": 5.474418554538446, "learning_rate": 5.026366429158887e-06, "loss": 0.5633, "step": 11509 }, { "epoch": 1.03, "grad_norm": 4.172980239802118, "learning_rate": 5.025644067659902e-06, "loss": 0.5761, "step": 11510 }, { "epoch": 1.03, "grad_norm": 7.488017010827383, "learning_rate": 5.024921705625652e-06, "loss": 0.6102, "step": 11511 }, { "epoch": 1.03, "grad_norm": 6.446714887549067, "learning_rate": 5.024199343071218e-06, "loss": 0.5879, "step": 11512 }, { "epoch": 1.03, "grad_norm": 6.789440163970139, "learning_rate": 5.023476980011673e-06, "loss": 0.5838, "step": 11513 }, { "epoch": 1.03, "grad_norm": 5.354716789168824, "learning_rate": 5.0227546164621e-06, "loss": 0.6582, "step": 11514 }, { "epoch": 1.03, "grad_norm": 7.188615956744129, "learning_rate": 5.022032252437573e-06, "loss": 0.6195, "step": 11515 }, { "epoch": 1.03, "grad_norm": 6.089118996545337, "learning_rate": 5.021309887953172e-06, "loss": 0.5577, "step": 11516 }, { "epoch": 1.03, "grad_norm": 5.545948300449825, "learning_rate": 5.020587523023972e-06, "loss": 0.6115, "step": 11517 }, { "epoch": 1.03, "grad_norm": 6.415812148689991, "learning_rate": 5.019865157665053e-06, "loss": 0.5819, "step": 11518 }, { "epoch": 1.03, "grad_norm": 7.439016580300034, "learning_rate": 5.019142791891493e-06, "loss": 0.6101, "step": 11519 }, { "epoch": 1.03, "grad_norm": 5.911579774811184, "learning_rate": 5.018420425718369e-06, "loss": 0.6327, "step": 11520 }, { "epoch": 1.03, "grad_norm": 5.230702755594394, "learning_rate": 5.017698059160759e-06, "loss": 0.648, "step": 11521 }, { "epoch": 1.03, "grad_norm": 7.964705443057674, "learning_rate": 5.016975692233741e-06, "loss": 0.6498, "step": 11522 }, { "epoch": 1.03, "grad_norm": 6.529977629322342, "learning_rate": 5.016253324952391e-06, "loss": 0.6297, "step": 11523 }, { "epoch": 1.03, "grad_norm": 5.572412333393989, "learning_rate": 5.0155309573317905e-06, "loss": 0.5947, "step": 11524 }, { "epoch": 1.03, "grad_norm": 7.187373485687559, "learning_rate": 5.014808589387015e-06, "loss": 0.6067, "step": 11525 }, { "epoch": 1.03, "grad_norm": 5.471144636984977, "learning_rate": 5.014086221133141e-06, "loss": 0.6742, "step": 11526 }, { "epoch": 1.03, "grad_norm": 8.224234039349197, "learning_rate": 5.013363852585249e-06, "loss": 0.6082, "step": 11527 }, { "epoch": 1.03, "grad_norm": 4.0713006743089135, "learning_rate": 5.012641483758416e-06, "loss": 0.552, "step": 11528 }, { "epoch": 1.03, "grad_norm": 6.570666742010075, "learning_rate": 5.01191911466772e-06, "loss": 0.6227, "step": 11529 }, { "epoch": 1.03, "grad_norm": 5.981175160822479, "learning_rate": 5.011196745328239e-06, "loss": 0.6282, "step": 11530 }, { "epoch": 1.03, "grad_norm": 6.271769193810429, "learning_rate": 5.010474375755049e-06, "loss": 0.59, "step": 11531 }, { "epoch": 1.03, "grad_norm": 6.185141740809473, "learning_rate": 5.00975200596323e-06, "loss": 0.6131, "step": 11532 }, { "epoch": 1.03, "grad_norm": 6.643681328053057, "learning_rate": 5.0090296359678595e-06, "loss": 0.6189, "step": 11533 }, { "epoch": 1.03, "grad_norm": 6.128185880585026, "learning_rate": 5.008307265784014e-06, "loss": 0.6229, "step": 11534 }, { "epoch": 1.03, "grad_norm": 6.182380969139973, "learning_rate": 5.007584895426775e-06, "loss": 0.5952, "step": 11535 }, { "epoch": 1.03, "grad_norm": 6.8331041246897035, "learning_rate": 5.0068625249112155e-06, "loss": 0.6158, "step": 11536 }, { "epoch": 1.03, "grad_norm": 6.2907842416158735, "learning_rate": 5.006140154252416e-06, "loss": 0.602, "step": 11537 }, { "epoch": 1.03, "grad_norm": 5.292279184733193, "learning_rate": 5.005417783465456e-06, "loss": 0.5754, "step": 11538 }, { "epoch": 1.03, "grad_norm": 7.59613597818785, "learning_rate": 5.004695412565409e-06, "loss": 0.6877, "step": 11539 }, { "epoch": 1.03, "grad_norm": 8.42116736347381, "learning_rate": 5.003973041567357e-06, "loss": 0.6058, "step": 11540 }, { "epoch": 1.03, "grad_norm": 4.729166899613549, "learning_rate": 5.003250670486377e-06, "loss": 0.5908, "step": 11541 }, { "epoch": 1.03, "grad_norm": 5.799978681526645, "learning_rate": 5.002528299337547e-06, "loss": 0.6326, "step": 11542 }, { "epoch": 1.03, "grad_norm": 6.807465680695557, "learning_rate": 5.0018059281359425e-06, "loss": 0.6224, "step": 11543 }, { "epoch": 1.03, "grad_norm": 5.108132583987435, "learning_rate": 5.001083556896643e-06, "loss": 0.6444, "step": 11544 }, { "epoch": 1.03, "grad_norm": 5.20729466929514, "learning_rate": 5.000361185634728e-06, "loss": 0.6548, "step": 11545 }, { "epoch": 1.03, "grad_norm": 8.10717752010198, "learning_rate": 4.999638814365273e-06, "loss": 0.6755, "step": 11546 }, { "epoch": 1.03, "grad_norm": 6.10324742911467, "learning_rate": 4.9989164431033585e-06, "loss": 0.6205, "step": 11547 }, { "epoch": 1.03, "grad_norm": 6.243421256885166, "learning_rate": 4.998194071864059e-06, "loss": 0.5662, "step": 11548 }, { "epoch": 1.03, "grad_norm": 5.34246403140865, "learning_rate": 4.9974717006624546e-06, "loss": 0.6997, "step": 11549 }, { "epoch": 1.03, "grad_norm": 8.113219008995605, "learning_rate": 4.996749329513624e-06, "loss": 0.6086, "step": 11550 }, { "epoch": 1.03, "grad_norm": 5.707493788583779, "learning_rate": 4.996026958432643e-06, "loss": 0.6095, "step": 11551 }, { "epoch": 1.03, "grad_norm": 4.93636205535711, "learning_rate": 4.995304587434591e-06, "loss": 0.6413, "step": 11552 }, { "epoch": 1.03, "grad_norm": 7.062170286042081, "learning_rate": 4.994582216534547e-06, "loss": 0.6345, "step": 11553 }, { "epoch": 1.03, "grad_norm": 7.102956607353067, "learning_rate": 4.993859845747585e-06, "loss": 0.656, "step": 11554 }, { "epoch": 1.03, "grad_norm": 6.958866486720514, "learning_rate": 4.993137475088787e-06, "loss": 0.6348, "step": 11555 }, { "epoch": 1.03, "grad_norm": 11.820802122633879, "learning_rate": 4.9924151045732274e-06, "loss": 0.572, "step": 11556 }, { "epoch": 1.03, "grad_norm": 8.392307876567111, "learning_rate": 4.991692734215988e-06, "loss": 0.643, "step": 11557 }, { "epoch": 1.03, "grad_norm": 7.145269653077524, "learning_rate": 4.990970364032143e-06, "loss": 0.5575, "step": 11558 }, { "epoch": 1.03, "grad_norm": 5.046670788164138, "learning_rate": 4.990247994036772e-06, "loss": 0.5744, "step": 11559 }, { "epoch": 1.03, "grad_norm": 5.9347660465118866, "learning_rate": 4.989525624244953e-06, "loss": 0.5537, "step": 11560 }, { "epoch": 1.03, "grad_norm": 7.769164539044649, "learning_rate": 4.988803254671763e-06, "loss": 0.6361, "step": 11561 }, { "epoch": 1.03, "grad_norm": 6.40482128982242, "learning_rate": 4.988080885332281e-06, "loss": 0.6284, "step": 11562 }, { "epoch": 1.03, "grad_norm": 5.9590693196092515, "learning_rate": 4.9873585162415846e-06, "loss": 0.6175, "step": 11563 }, { "epoch": 1.03, "grad_norm": 6.751836284169477, "learning_rate": 4.986636147414752e-06, "loss": 0.6143, "step": 11564 }, { "epoch": 1.03, "grad_norm": 4.6238152204696465, "learning_rate": 4.985913778866859e-06, "loss": 0.5998, "step": 11565 }, { "epoch": 1.03, "grad_norm": 6.4365254207761975, "learning_rate": 4.985191410612987e-06, "loss": 0.6015, "step": 11566 }, { "epoch": 1.03, "grad_norm": 5.738063856239536, "learning_rate": 4.98446904266821e-06, "loss": 0.5378, "step": 11567 }, { "epoch": 1.03, "grad_norm": 5.7074920040055535, "learning_rate": 4.98374667504761e-06, "loss": 0.6107, "step": 11568 }, { "epoch": 1.03, "grad_norm": 7.856502618767377, "learning_rate": 4.983024307766261e-06, "loss": 0.6593, "step": 11569 }, { "epoch": 1.03, "grad_norm": 8.979495406386846, "learning_rate": 4.982301940839244e-06, "loss": 0.62, "step": 11570 }, { "epoch": 1.03, "grad_norm": 6.191810845899528, "learning_rate": 4.981579574281633e-06, "loss": 0.5745, "step": 11571 }, { "epoch": 1.03, "grad_norm": 4.752206465400866, "learning_rate": 4.980857208108508e-06, "loss": 0.6443, "step": 11572 }, { "epoch": 1.03, "grad_norm": 7.156432038916066, "learning_rate": 4.980134842334949e-06, "loss": 0.5946, "step": 11573 }, { "epoch": 1.03, "grad_norm": 6.968628794496162, "learning_rate": 4.97941247697603e-06, "loss": 0.6263, "step": 11574 }, { "epoch": 1.03, "grad_norm": 4.558121292003021, "learning_rate": 4.97869011204683e-06, "loss": 0.5709, "step": 11575 }, { "epoch": 1.03, "grad_norm": 6.8931835047507635, "learning_rate": 4.977967747562429e-06, "loss": 0.6132, "step": 11576 }, { "epoch": 1.03, "grad_norm": 5.909841470891709, "learning_rate": 4.977245383537901e-06, "loss": 0.6207, "step": 11577 }, { "epoch": 1.03, "grad_norm": 8.025363744166805, "learning_rate": 4.976523019988327e-06, "loss": 0.6055, "step": 11578 }, { "epoch": 1.03, "grad_norm": 5.645062046137986, "learning_rate": 4.975800656928784e-06, "loss": 0.6273, "step": 11579 }, { "epoch": 1.03, "grad_norm": 6.044601132057996, "learning_rate": 4.975078294374348e-06, "loss": 0.5457, "step": 11580 }, { "epoch": 1.03, "grad_norm": 6.970371495622717, "learning_rate": 4.974355932340099e-06, "loss": 0.6618, "step": 11581 }, { "epoch": 1.03, "grad_norm": 5.174100868524889, "learning_rate": 4.973633570841115e-06, "loss": 0.6487, "step": 11582 }, { "epoch": 1.03, "grad_norm": 5.13765437106652, "learning_rate": 4.972911209892472e-06, "loss": 0.6248, "step": 11583 }, { "epoch": 1.03, "grad_norm": 4.980104733223577, "learning_rate": 4.9721888495092465e-06, "loss": 0.5503, "step": 11584 }, { "epoch": 1.03, "grad_norm": 5.950058306115187, "learning_rate": 4.971466489706518e-06, "loss": 0.6037, "step": 11585 }, { "epoch": 1.03, "grad_norm": 7.033951743911191, "learning_rate": 4.970744130499364e-06, "loss": 0.6054, "step": 11586 }, { "epoch": 1.03, "grad_norm": 5.175963293025318, "learning_rate": 4.970021771902864e-06, "loss": 0.5927, "step": 11587 }, { "epoch": 1.03, "grad_norm": 7.206327114341463, "learning_rate": 4.9692994139320915e-06, "loss": 0.6588, "step": 11588 }, { "epoch": 1.03, "grad_norm": 9.496243306088898, "learning_rate": 4.968577056602128e-06, "loss": 0.6352, "step": 11589 }, { "epoch": 1.03, "grad_norm": 6.609569710816613, "learning_rate": 4.9678546999280495e-06, "loss": 0.5895, "step": 11590 }, { "epoch": 1.03, "grad_norm": 6.681324355487443, "learning_rate": 4.967132343924932e-06, "loss": 0.6258, "step": 11591 }, { "epoch": 1.03, "grad_norm": 5.662207063779923, "learning_rate": 4.966409988607858e-06, "loss": 0.5889, "step": 11592 }, { "epoch": 1.03, "grad_norm": 7.192711181551438, "learning_rate": 4.965687633991899e-06, "loss": 0.6443, "step": 11593 }, { "epoch": 1.03, "grad_norm": 4.63277396377212, "learning_rate": 4.964965280092137e-06, "loss": 0.6249, "step": 11594 }, { "epoch": 1.03, "grad_norm": 5.913799268249351, "learning_rate": 4.964242926923648e-06, "loss": 0.5912, "step": 11595 }, { "epoch": 1.03, "grad_norm": 7.036281536971418, "learning_rate": 4.9635205745015116e-06, "loss": 0.6056, "step": 11596 }, { "epoch": 1.03, "grad_norm": 6.724884028849377, "learning_rate": 4.9627982228408e-06, "loss": 0.6211, "step": 11597 }, { "epoch": 1.03, "grad_norm": 8.415578932219454, "learning_rate": 4.962075871956595e-06, "loss": 0.5708, "step": 11598 }, { "epoch": 1.03, "grad_norm": 7.066689069973688, "learning_rate": 4.961353521863974e-06, "loss": 0.5954, "step": 11599 }, { "epoch": 1.03, "grad_norm": 6.0070667699931155, "learning_rate": 4.960631172578012e-06, "loss": 0.5999, "step": 11600 }, { "epoch": 1.03, "grad_norm": 6.908002225563063, "learning_rate": 4.959908824113789e-06, "loss": 0.6236, "step": 11601 }, { "epoch": 1.04, "grad_norm": 6.746544597030718, "learning_rate": 4.959186476486382e-06, "loss": 0.589, "step": 11602 }, { "epoch": 1.04, "grad_norm": 6.097525999553976, "learning_rate": 4.958464129710866e-06, "loss": 0.6051, "step": 11603 }, { "epoch": 1.04, "grad_norm": 6.713746553368952, "learning_rate": 4.957741783802322e-06, "loss": 0.6213, "step": 11604 }, { "epoch": 1.04, "grad_norm": 5.310710845586217, "learning_rate": 4.957019438775826e-06, "loss": 0.672, "step": 11605 }, { "epoch": 1.04, "grad_norm": 6.450203043718635, "learning_rate": 4.956297094646454e-06, "loss": 0.6049, "step": 11606 }, { "epoch": 1.04, "grad_norm": 7.433116611329401, "learning_rate": 4.955574751429285e-06, "loss": 0.7148, "step": 11607 }, { "epoch": 1.04, "grad_norm": 4.612580659791629, "learning_rate": 4.954852409139396e-06, "loss": 0.5536, "step": 11608 }, { "epoch": 1.04, "grad_norm": 5.545808617002117, "learning_rate": 4.954130067791863e-06, "loss": 0.6656, "step": 11609 }, { "epoch": 1.04, "grad_norm": 7.148509559005961, "learning_rate": 4.953407727401765e-06, "loss": 0.62, "step": 11610 }, { "epoch": 1.04, "grad_norm": 7.3410735902542585, "learning_rate": 4.95268538798418e-06, "loss": 0.6306, "step": 11611 }, { "epoch": 1.04, "grad_norm": 5.706582097823274, "learning_rate": 4.9519630495541835e-06, "loss": 0.545, "step": 11612 }, { "epoch": 1.04, "grad_norm": 6.173032299194765, "learning_rate": 4.951240712126852e-06, "loss": 0.5924, "step": 11613 }, { "epoch": 1.04, "grad_norm": 6.548073661849845, "learning_rate": 4.950518375717264e-06, "loss": 0.5956, "step": 11614 }, { "epoch": 1.04, "grad_norm": 7.888479027226866, "learning_rate": 4.949796040340497e-06, "loss": 0.5896, "step": 11615 }, { "epoch": 1.04, "grad_norm": 8.433440162374461, "learning_rate": 4.949073706011627e-06, "loss": 0.6063, "step": 11616 }, { "epoch": 1.04, "grad_norm": 4.4849077217338165, "learning_rate": 4.948351372745732e-06, "loss": 0.6368, "step": 11617 }, { "epoch": 1.04, "grad_norm": 4.725372815886445, "learning_rate": 4.94762904055789e-06, "loss": 0.5773, "step": 11618 }, { "epoch": 1.04, "grad_norm": 6.038803687674468, "learning_rate": 4.946906709463176e-06, "loss": 0.595, "step": 11619 }, { "epoch": 1.04, "grad_norm": 6.705144894344487, "learning_rate": 4.946184379476668e-06, "loss": 0.5703, "step": 11620 }, { "epoch": 1.04, "grad_norm": 4.878014472896639, "learning_rate": 4.945462050613445e-06, "loss": 0.5613, "step": 11621 }, { "epoch": 1.04, "grad_norm": 5.614552743859249, "learning_rate": 4.94473972288858e-06, "loss": 0.5928, "step": 11622 }, { "epoch": 1.04, "grad_norm": 6.75923764594968, "learning_rate": 4.944017396317154e-06, "loss": 0.6249, "step": 11623 }, { "epoch": 1.04, "grad_norm": 7.006578633704678, "learning_rate": 4.9432950709142415e-06, "loss": 0.6367, "step": 11624 }, { "epoch": 1.04, "grad_norm": 5.589058482318481, "learning_rate": 4.942572746694922e-06, "loss": 0.6534, "step": 11625 }, { "epoch": 1.04, "grad_norm": 6.001916045976451, "learning_rate": 4.941850423674269e-06, "loss": 0.6191, "step": 11626 }, { "epoch": 1.04, "grad_norm": 6.755334636529886, "learning_rate": 4.94112810186736e-06, "loss": 0.5983, "step": 11627 }, { "epoch": 1.04, "grad_norm": 5.081945456418661, "learning_rate": 4.940405781289274e-06, "loss": 0.6493, "step": 11628 }, { "epoch": 1.04, "grad_norm": 8.957893307739983, "learning_rate": 4.939683461955087e-06, "loss": 0.6173, "step": 11629 }, { "epoch": 1.04, "grad_norm": 5.663882749701962, "learning_rate": 4.938961143879875e-06, "loss": 0.6191, "step": 11630 }, { "epoch": 1.04, "grad_norm": 6.139162442312647, "learning_rate": 4.938238827078716e-06, "loss": 0.6602, "step": 11631 }, { "epoch": 1.04, "grad_norm": 4.584409858489053, "learning_rate": 4.937516511566686e-06, "loss": 0.6692, "step": 11632 }, { "epoch": 1.04, "grad_norm": 4.228828919104155, "learning_rate": 4.936794197358863e-06, "loss": 0.6134, "step": 11633 }, { "epoch": 1.04, "grad_norm": 6.684611024638545, "learning_rate": 4.936071884470321e-06, "loss": 0.5829, "step": 11634 }, { "epoch": 1.04, "grad_norm": 5.8859301482553565, "learning_rate": 4.935349572916139e-06, "loss": 0.6087, "step": 11635 }, { "epoch": 1.04, "grad_norm": 5.826469923314895, "learning_rate": 4.934627262711394e-06, "loss": 0.6042, "step": 11636 }, { "epoch": 1.04, "grad_norm": 6.598903196079001, "learning_rate": 4.933904953871161e-06, "loss": 0.6358, "step": 11637 }, { "epoch": 1.04, "grad_norm": 5.250299612901703, "learning_rate": 4.933182646410516e-06, "loss": 0.667, "step": 11638 }, { "epoch": 1.04, "grad_norm": 4.649298014739443, "learning_rate": 4.9324603403445395e-06, "loss": 0.6194, "step": 11639 }, { "epoch": 1.04, "grad_norm": 6.003116751576399, "learning_rate": 4.9317380356883055e-06, "loss": 0.6118, "step": 11640 }, { "epoch": 1.04, "grad_norm": 5.583683552386966, "learning_rate": 4.931015732456889e-06, "loss": 0.5302, "step": 11641 }, { "epoch": 1.04, "grad_norm": 7.669728210104926, "learning_rate": 4.930293430665367e-06, "loss": 0.6306, "step": 11642 }, { "epoch": 1.04, "grad_norm": 10.094003165916055, "learning_rate": 4.929571130328819e-06, "loss": 0.6901, "step": 11643 }, { "epoch": 1.04, "grad_norm": 4.87248056962988, "learning_rate": 4.928848831462319e-06, "loss": 0.577, "step": 11644 }, { "epoch": 1.04, "grad_norm": 5.6630507968400075, "learning_rate": 4.9281265340809435e-06, "loss": 0.5678, "step": 11645 }, { "epoch": 1.04, "grad_norm": 7.88756660955437, "learning_rate": 4.927404238199769e-06, "loss": 0.6071, "step": 11646 }, { "epoch": 1.04, "grad_norm": 5.455358973522802, "learning_rate": 4.926681943833872e-06, "loss": 0.6555, "step": 11647 }, { "epoch": 1.04, "grad_norm": 7.333423319955297, "learning_rate": 4.925959650998329e-06, "loss": 0.6352, "step": 11648 }, { "epoch": 1.04, "grad_norm": 6.718901828710988, "learning_rate": 4.9252373597082165e-06, "loss": 0.5879, "step": 11649 }, { "epoch": 1.04, "grad_norm": 5.374517572077803, "learning_rate": 4.92451506997861e-06, "loss": 0.609, "step": 11650 }, { "epoch": 1.04, "grad_norm": 5.35732517137119, "learning_rate": 4.923792781824586e-06, "loss": 0.5759, "step": 11651 }, { "epoch": 1.04, "grad_norm": 5.229527602133704, "learning_rate": 4.923070495261221e-06, "loss": 0.6201, "step": 11652 }, { "epoch": 1.04, "grad_norm": 5.623533674160671, "learning_rate": 4.922348210303591e-06, "loss": 0.5854, "step": 11653 }, { "epoch": 1.04, "grad_norm": 6.750875083333629, "learning_rate": 4.921625926966774e-06, "loss": 0.5757, "step": 11654 }, { "epoch": 1.04, "grad_norm": 5.754604673351946, "learning_rate": 4.9209036452658415e-06, "loss": 0.5867, "step": 11655 }, { "epoch": 1.04, "grad_norm": 5.631581296535587, "learning_rate": 4.920181365215873e-06, "loss": 0.6255, "step": 11656 }, { "epoch": 1.04, "grad_norm": 7.197460075668641, "learning_rate": 4.919459086831944e-06, "loss": 0.5927, "step": 11657 }, { "epoch": 1.04, "grad_norm": 6.635072325130017, "learning_rate": 4.918736810129129e-06, "loss": 0.6422, "step": 11658 }, { "epoch": 1.04, "grad_norm": 6.162098069424316, "learning_rate": 4.9180145351225064e-06, "loss": 0.6225, "step": 11659 }, { "epoch": 1.04, "grad_norm": 6.101462297015165, "learning_rate": 4.91729226182715e-06, "loss": 0.6434, "step": 11660 }, { "epoch": 1.04, "grad_norm": 7.415433458426369, "learning_rate": 4.916569990258136e-06, "loss": 0.6466, "step": 11661 }, { "epoch": 1.04, "grad_norm": 6.9637416447519085, "learning_rate": 4.9158477204305425e-06, "loss": 0.6318, "step": 11662 }, { "epoch": 1.04, "grad_norm": 6.634312965043066, "learning_rate": 4.915125452359442e-06, "loss": 0.5878, "step": 11663 }, { "epoch": 1.04, "grad_norm": 6.053452036631529, "learning_rate": 4.914403186059912e-06, "loss": 0.6623, "step": 11664 }, { "epoch": 1.04, "grad_norm": 6.216970010796135, "learning_rate": 4.913680921547029e-06, "loss": 0.6405, "step": 11665 }, { "epoch": 1.04, "grad_norm": 7.462414792690141, "learning_rate": 4.9129586588358665e-06, "loss": 0.623, "step": 11666 }, { "epoch": 1.04, "grad_norm": 5.97304119637614, "learning_rate": 4.912236397941502e-06, "loss": 0.6026, "step": 11667 }, { "epoch": 1.04, "grad_norm": 6.8731889667952775, "learning_rate": 4.911514138879011e-06, "loss": 0.6311, "step": 11668 }, { "epoch": 1.04, "grad_norm": 5.354703066798265, "learning_rate": 4.91079188166347e-06, "loss": 0.6191, "step": 11669 }, { "epoch": 1.04, "grad_norm": 5.976543533995944, "learning_rate": 4.91006962630995e-06, "loss": 0.6492, "step": 11670 }, { "epoch": 1.04, "grad_norm": 4.474194872308304, "learning_rate": 4.9093473728335304e-06, "loss": 0.6665, "step": 11671 }, { "epoch": 1.04, "grad_norm": 5.60806012304972, "learning_rate": 4.908625121249288e-06, "loss": 0.6702, "step": 11672 }, { "epoch": 1.04, "grad_norm": 7.698428746998802, "learning_rate": 4.9079028715722936e-06, "loss": 0.5867, "step": 11673 }, { "epoch": 1.04, "grad_norm": 4.888983564660257, "learning_rate": 4.907180623817625e-06, "loss": 0.5738, "step": 11674 }, { "epoch": 1.04, "grad_norm": 6.994362240517239, "learning_rate": 4.906458378000359e-06, "loss": 0.6589, "step": 11675 }, { "epoch": 1.04, "grad_norm": 5.750232005680473, "learning_rate": 4.905736134135569e-06, "loss": 0.6971, "step": 11676 }, { "epoch": 1.04, "grad_norm": 5.079879369956708, "learning_rate": 4.90501389223833e-06, "loss": 0.5927, "step": 11677 }, { "epoch": 1.04, "grad_norm": 4.209765952711047, "learning_rate": 4.9042916523237195e-06, "loss": 0.5919, "step": 11678 }, { "epoch": 1.04, "grad_norm": 5.446984685313848, "learning_rate": 4.90356941440681e-06, "loss": 0.5978, "step": 11679 }, { "epoch": 1.04, "grad_norm": 6.681224008448138, "learning_rate": 4.902847178502677e-06, "loss": 0.6047, "step": 11680 }, { "epoch": 1.04, "grad_norm": 6.384887739390651, "learning_rate": 4.9021249446263985e-06, "loss": 0.5902, "step": 11681 }, { "epoch": 1.04, "grad_norm": 8.243363080994994, "learning_rate": 4.9014027127930455e-06, "loss": 0.6559, "step": 11682 }, { "epoch": 1.04, "grad_norm": 6.405491882290054, "learning_rate": 4.900680483017698e-06, "loss": 0.6092, "step": 11683 }, { "epoch": 1.04, "grad_norm": 6.670552355639677, "learning_rate": 4.899958255315425e-06, "loss": 0.5982, "step": 11684 }, { "epoch": 1.04, "grad_norm": 5.933360717651437, "learning_rate": 4.899236029701307e-06, "loss": 0.6296, "step": 11685 }, { "epoch": 1.04, "grad_norm": 7.71192642301557, "learning_rate": 4.898513806190414e-06, "loss": 0.5617, "step": 11686 }, { "epoch": 1.04, "grad_norm": 7.80469820721532, "learning_rate": 4.897791584797823e-06, "loss": 0.6257, "step": 11687 }, { "epoch": 1.04, "grad_norm": 6.030858516977971, "learning_rate": 4.8970693655386094e-06, "loss": 0.6234, "step": 11688 }, { "epoch": 1.04, "grad_norm": 6.118435147009873, "learning_rate": 4.896347148427847e-06, "loss": 0.5931, "step": 11689 }, { "epoch": 1.04, "grad_norm": 5.20871929849968, "learning_rate": 4.895624933480611e-06, "loss": 0.5933, "step": 11690 }, { "epoch": 1.04, "grad_norm": 6.526167848672538, "learning_rate": 4.894902720711976e-06, "loss": 0.6217, "step": 11691 }, { "epoch": 1.04, "grad_norm": 7.23542568669468, "learning_rate": 4.894180510137016e-06, "loss": 0.6284, "step": 11692 }, { "epoch": 1.04, "grad_norm": 10.75115138509132, "learning_rate": 4.893458301770807e-06, "loss": 0.6082, "step": 11693 }, { "epoch": 1.04, "grad_norm": 5.1761951481333766, "learning_rate": 4.892736095628422e-06, "loss": 0.6078, "step": 11694 }, { "epoch": 1.04, "grad_norm": 5.609762877523205, "learning_rate": 4.892013891724937e-06, "loss": 0.6178, "step": 11695 }, { "epoch": 1.04, "grad_norm": 5.742834671067485, "learning_rate": 4.891291690075423e-06, "loss": 0.5613, "step": 11696 }, { "epoch": 1.04, "grad_norm": 4.481944019115577, "learning_rate": 4.890569490694961e-06, "loss": 0.6127, "step": 11697 }, { "epoch": 1.04, "grad_norm": 11.02079964139869, "learning_rate": 4.88984729359862e-06, "loss": 0.6473, "step": 11698 }, { "epoch": 1.04, "grad_norm": 8.767967686760194, "learning_rate": 4.889125098801474e-06, "loss": 0.6641, "step": 11699 }, { "epoch": 1.04, "grad_norm": 7.643585819764252, "learning_rate": 4.888402906318599e-06, "loss": 0.6108, "step": 11700 }, { "epoch": 1.04, "grad_norm": 6.282890903374749, "learning_rate": 4.88768071616507e-06, "loss": 0.627, "step": 11701 }, { "epoch": 1.04, "grad_norm": 5.615156392001707, "learning_rate": 4.886958528355959e-06, "loss": 0.5702, "step": 11702 }, { "epoch": 1.04, "grad_norm": 5.298007604964225, "learning_rate": 4.886236342906341e-06, "loss": 0.6455, "step": 11703 }, { "epoch": 1.04, "grad_norm": 5.191555906326668, "learning_rate": 4.885514159831292e-06, "loss": 0.5851, "step": 11704 }, { "epoch": 1.04, "grad_norm": 6.2765043292873735, "learning_rate": 4.884791979145882e-06, "loss": 0.5403, "step": 11705 }, { "epoch": 1.04, "grad_norm": 6.066795794683276, "learning_rate": 4.8840698008651875e-06, "loss": 0.5788, "step": 11706 }, { "epoch": 1.04, "grad_norm": 5.681452545157281, "learning_rate": 4.883347625004284e-06, "loss": 0.6565, "step": 11707 }, { "epoch": 1.04, "grad_norm": 5.170712738911591, "learning_rate": 4.882625451578241e-06, "loss": 0.6254, "step": 11708 }, { "epoch": 1.04, "grad_norm": 5.813393733000676, "learning_rate": 4.881903280602137e-06, "loss": 0.6093, "step": 11709 }, { "epoch": 1.04, "grad_norm": 6.975799472219051, "learning_rate": 4.881181112091041e-06, "loss": 0.6506, "step": 11710 }, { "epoch": 1.04, "grad_norm": 6.496539030260688, "learning_rate": 4.880458946060029e-06, "loss": 0.6256, "step": 11711 }, { "epoch": 1.04, "grad_norm": 5.402567592852545, "learning_rate": 4.879736782524178e-06, "loss": 0.6341, "step": 11712 }, { "epoch": 1.04, "grad_norm": 6.015999049182045, "learning_rate": 4.879014621498556e-06, "loss": 0.5773, "step": 11713 }, { "epoch": 1.05, "grad_norm": 6.670364941604027, "learning_rate": 4.878292462998239e-06, "loss": 0.7058, "step": 11714 }, { "epoch": 1.05, "grad_norm": 6.557323492124184, "learning_rate": 4.8775703070383e-06, "loss": 0.6206, "step": 11715 }, { "epoch": 1.05, "grad_norm": 7.0107194420018715, "learning_rate": 4.876848153633814e-06, "loss": 0.6021, "step": 11716 }, { "epoch": 1.05, "grad_norm": 6.040577756339709, "learning_rate": 4.876126002799851e-06, "loss": 0.671, "step": 11717 }, { "epoch": 1.05, "grad_norm": 7.632509162373895, "learning_rate": 4.875403854551487e-06, "loss": 0.6404, "step": 11718 }, { "epoch": 1.05, "grad_norm": 6.286507927051945, "learning_rate": 4.874681708903796e-06, "loss": 0.595, "step": 11719 }, { "epoch": 1.05, "grad_norm": 7.05808090596921, "learning_rate": 4.8739595658718485e-06, "loss": 0.6502, "step": 11720 }, { "epoch": 1.05, "grad_norm": 5.4026720830152, "learning_rate": 4.873237425470719e-06, "loss": 0.6475, "step": 11721 }, { "epoch": 1.05, "grad_norm": 6.3336186252394215, "learning_rate": 4.8725152877154815e-06, "loss": 0.5929, "step": 11722 }, { "epoch": 1.05, "grad_norm": 5.88418267580631, "learning_rate": 4.871793152621208e-06, "loss": 0.6251, "step": 11723 }, { "epoch": 1.05, "grad_norm": 5.9624828129040575, "learning_rate": 4.871071020202971e-06, "loss": 0.6182, "step": 11724 }, { "epoch": 1.05, "grad_norm": 6.804611481216006, "learning_rate": 4.870348890475845e-06, "loss": 0.6636, "step": 11725 }, { "epoch": 1.05, "grad_norm": 7.320686859210225, "learning_rate": 4.869626763454904e-06, "loss": 0.6106, "step": 11726 }, { "epoch": 1.05, "grad_norm": 6.793488497837014, "learning_rate": 4.868904639155215e-06, "loss": 0.5963, "step": 11727 }, { "epoch": 1.05, "grad_norm": 6.022841638407189, "learning_rate": 4.868182517591856e-06, "loss": 0.5903, "step": 11728 }, { "epoch": 1.05, "grad_norm": 6.468389857595162, "learning_rate": 4.867460398779899e-06, "loss": 0.5934, "step": 11729 }, { "epoch": 1.05, "grad_norm": 4.547977899656438, "learning_rate": 4.866738282734415e-06, "loss": 0.6292, "step": 11730 }, { "epoch": 1.05, "grad_norm": 6.274633832878775, "learning_rate": 4.866016169470477e-06, "loss": 0.6275, "step": 11731 }, { "epoch": 1.05, "grad_norm": 5.952128760952303, "learning_rate": 4.86529405900316e-06, "loss": 0.6026, "step": 11732 }, { "epoch": 1.05, "grad_norm": 5.359678376122914, "learning_rate": 4.864571951347532e-06, "loss": 0.6436, "step": 11733 }, { "epoch": 1.05, "grad_norm": 7.307390315880051, "learning_rate": 4.863849846518669e-06, "loss": 0.5927, "step": 11734 }, { "epoch": 1.05, "grad_norm": 5.530438789690461, "learning_rate": 4.863127744531643e-06, "loss": 0.6116, "step": 11735 }, { "epoch": 1.05, "grad_norm": 8.413606672276698, "learning_rate": 4.862405645401524e-06, "loss": 0.6383, "step": 11736 }, { "epoch": 1.05, "grad_norm": 4.857676247374448, "learning_rate": 4.8616835491433865e-06, "loss": 0.6541, "step": 11737 }, { "epoch": 1.05, "grad_norm": 7.364604698564956, "learning_rate": 4.860961455772304e-06, "loss": 0.6116, "step": 11738 }, { "epoch": 1.05, "grad_norm": 6.414958290502444, "learning_rate": 4.8602393653033446e-06, "loss": 0.598, "step": 11739 }, { "epoch": 1.05, "grad_norm": 8.926654397711882, "learning_rate": 4.859517277751581e-06, "loss": 0.6255, "step": 11740 }, { "epoch": 1.05, "grad_norm": 4.841402046486643, "learning_rate": 4.858795193132091e-06, "loss": 0.611, "step": 11741 }, { "epoch": 1.05, "grad_norm": 5.185621988285458, "learning_rate": 4.85807311145994e-06, "loss": 0.5298, "step": 11742 }, { "epoch": 1.05, "grad_norm": 6.757299462400628, "learning_rate": 4.857351032750201e-06, "loss": 0.5998, "step": 11743 }, { "epoch": 1.05, "grad_norm": 7.266290111563711, "learning_rate": 4.856628957017946e-06, "loss": 0.5789, "step": 11744 }, { "epoch": 1.05, "grad_norm": 6.344375036575781, "learning_rate": 4.855906884278251e-06, "loss": 0.5669, "step": 11745 }, { "epoch": 1.05, "grad_norm": 6.053005413706421, "learning_rate": 4.85518481454618e-06, "loss": 0.6027, "step": 11746 }, { "epoch": 1.05, "grad_norm": 7.564944720703027, "learning_rate": 4.854462747836811e-06, "loss": 0.587, "step": 11747 }, { "epoch": 1.05, "grad_norm": 7.1601398947009205, "learning_rate": 4.853740684165214e-06, "loss": 0.6055, "step": 11748 }, { "epoch": 1.05, "grad_norm": 8.50900557851205, "learning_rate": 4.8530186235464585e-06, "loss": 0.6081, "step": 11749 }, { "epoch": 1.05, "grad_norm": 6.5734988469081665, "learning_rate": 4.852296565995618e-06, "loss": 0.6256, "step": 11750 }, { "epoch": 1.05, "grad_norm": 5.85584956246809, "learning_rate": 4.851574511527764e-06, "loss": 0.5939, "step": 11751 }, { "epoch": 1.05, "grad_norm": 4.919077774199888, "learning_rate": 4.850852460157965e-06, "loss": 0.6636, "step": 11752 }, { "epoch": 1.05, "grad_norm": 5.864336067456951, "learning_rate": 4.850130411901296e-06, "loss": 0.6385, "step": 11753 }, { "epoch": 1.05, "grad_norm": 8.15442365548519, "learning_rate": 4.849408366772827e-06, "loss": 0.6357, "step": 11754 }, { "epoch": 1.05, "grad_norm": 5.838671404881356, "learning_rate": 4.848686324787629e-06, "loss": 0.6337, "step": 11755 }, { "epoch": 1.05, "grad_norm": 5.509834530433153, "learning_rate": 4.847964285960771e-06, "loss": 0.6563, "step": 11756 }, { "epoch": 1.05, "grad_norm": 6.368825778401634, "learning_rate": 4.847242250307324e-06, "loss": 0.6343, "step": 11757 }, { "epoch": 1.05, "grad_norm": 4.936999963245161, "learning_rate": 4.846520217842363e-06, "loss": 0.627, "step": 11758 }, { "epoch": 1.05, "grad_norm": 5.7913211824017266, "learning_rate": 4.845798188580955e-06, "loss": 0.5478, "step": 11759 }, { "epoch": 1.05, "grad_norm": 4.632696324831424, "learning_rate": 4.8450761625381735e-06, "loss": 0.6042, "step": 11760 }, { "epoch": 1.05, "grad_norm": 7.6521022926579985, "learning_rate": 4.844354139729087e-06, "loss": 0.6315, "step": 11761 }, { "epoch": 1.05, "grad_norm": 5.3153221456926945, "learning_rate": 4.843632120168767e-06, "loss": 0.6665, "step": 11762 }, { "epoch": 1.05, "grad_norm": 5.074895151985437, "learning_rate": 4.842910103872283e-06, "loss": 0.6203, "step": 11763 }, { "epoch": 1.05, "grad_norm": 6.331818924280271, "learning_rate": 4.8421880908547085e-06, "loss": 0.5445, "step": 11764 }, { "epoch": 1.05, "grad_norm": 8.368291181363428, "learning_rate": 4.841466081131111e-06, "loss": 0.6069, "step": 11765 }, { "epoch": 1.05, "grad_norm": 6.442164322778927, "learning_rate": 4.840744074716562e-06, "loss": 0.6443, "step": 11766 }, { "epoch": 1.05, "grad_norm": 7.401104469258698, "learning_rate": 4.840022071626131e-06, "loss": 0.5857, "step": 11767 }, { "epoch": 1.05, "grad_norm": 6.07560241036891, "learning_rate": 4.83930007187489e-06, "loss": 0.6009, "step": 11768 }, { "epoch": 1.05, "grad_norm": 8.475487922194178, "learning_rate": 4.838578075477907e-06, "loss": 0.5784, "step": 11769 }, { "epoch": 1.05, "grad_norm": 5.352570880922076, "learning_rate": 4.837856082450255e-06, "loss": 0.5647, "step": 11770 }, { "epoch": 1.05, "grad_norm": 6.367518751697547, "learning_rate": 4.837134092807002e-06, "loss": 0.584, "step": 11771 }, { "epoch": 1.05, "grad_norm": 6.734740283243662, "learning_rate": 4.836412106563217e-06, "loss": 0.6306, "step": 11772 }, { "epoch": 1.05, "grad_norm": 5.350988927168498, "learning_rate": 4.83569012373397e-06, "loss": 0.6096, "step": 11773 }, { "epoch": 1.05, "grad_norm": 5.202142261853159, "learning_rate": 4.834968144334334e-06, "loss": 0.5993, "step": 11774 }, { "epoch": 1.05, "grad_norm": 6.336177699501183, "learning_rate": 4.834246168379375e-06, "loss": 0.5938, "step": 11775 }, { "epoch": 1.05, "grad_norm": 6.469469410509182, "learning_rate": 4.8335241958841645e-06, "loss": 0.701, "step": 11776 }, { "epoch": 1.05, "grad_norm": 5.935301079037251, "learning_rate": 4.832802226863773e-06, "loss": 0.599, "step": 11777 }, { "epoch": 1.05, "grad_norm": 5.049831617540954, "learning_rate": 4.832080261333267e-06, "loss": 0.6467, "step": 11778 }, { "epoch": 1.05, "grad_norm": 6.864238274597081, "learning_rate": 4.831358299307718e-06, "loss": 0.5365, "step": 11779 }, { "epoch": 1.05, "grad_norm": 5.338069643283755, "learning_rate": 4.830636340802197e-06, "loss": 0.6272, "step": 11780 }, { "epoch": 1.05, "grad_norm": 7.27295084210903, "learning_rate": 4.82991438583177e-06, "loss": 0.5687, "step": 11781 }, { "epoch": 1.05, "grad_norm": 6.846659664084782, "learning_rate": 4.829192434411508e-06, "loss": 0.6196, "step": 11782 }, { "epoch": 1.05, "grad_norm": 5.668511507025311, "learning_rate": 4.828470486556481e-06, "loss": 0.628, "step": 11783 }, { "epoch": 1.05, "grad_norm": 8.249759398659982, "learning_rate": 4.8277485422817575e-06, "loss": 0.6225, "step": 11784 }, { "epoch": 1.05, "grad_norm": 7.480873922819516, "learning_rate": 4.827026601602404e-06, "loss": 0.6246, "step": 11785 }, { "epoch": 1.05, "grad_norm": 6.076827900538968, "learning_rate": 4.826304664533492e-06, "loss": 0.5912, "step": 11786 }, { "epoch": 1.05, "grad_norm": 5.688156711095468, "learning_rate": 4.82558273109009e-06, "loss": 0.5545, "step": 11787 }, { "epoch": 1.05, "grad_norm": 6.50881643696435, "learning_rate": 4.824860801287266e-06, "loss": 0.5811, "step": 11788 }, { "epoch": 1.05, "grad_norm": 7.582430602939867, "learning_rate": 4.824138875140088e-06, "loss": 0.6281, "step": 11789 }, { "epoch": 1.05, "grad_norm": 5.401939204529119, "learning_rate": 4.823416952663628e-06, "loss": 0.5781, "step": 11790 }, { "epoch": 1.05, "grad_norm": 6.629525836852875, "learning_rate": 4.822695033872951e-06, "loss": 0.6378, "step": 11791 }, { "epoch": 1.05, "grad_norm": 6.996041153766011, "learning_rate": 4.821973118783128e-06, "loss": 0.5829, "step": 11792 }, { "epoch": 1.05, "grad_norm": 6.203014446511726, "learning_rate": 4.821251207409226e-06, "loss": 0.6067, "step": 11793 }, { "epoch": 1.05, "grad_norm": 4.917285312091409, "learning_rate": 4.820529299766313e-06, "loss": 0.5868, "step": 11794 }, { "epoch": 1.05, "grad_norm": 6.455050663404512, "learning_rate": 4.819807395869459e-06, "loss": 0.6627, "step": 11795 }, { "epoch": 1.05, "grad_norm": 5.445390904022384, "learning_rate": 4.819085495733729e-06, "loss": 0.6293, "step": 11796 }, { "epoch": 1.05, "grad_norm": 5.923701501862212, "learning_rate": 4.818363599374194e-06, "loss": 0.647, "step": 11797 }, { "epoch": 1.05, "grad_norm": 7.17997364272325, "learning_rate": 4.817641706805923e-06, "loss": 0.5981, "step": 11798 }, { "epoch": 1.05, "grad_norm": 5.141109792885425, "learning_rate": 4.81691981804398e-06, "loss": 0.6331, "step": 11799 }, { "epoch": 1.05, "grad_norm": 5.254763140363157, "learning_rate": 4.816197933103436e-06, "loss": 0.6249, "step": 11800 }, { "epoch": 1.05, "grad_norm": 5.824578256902001, "learning_rate": 4.815476051999356e-06, "loss": 0.6569, "step": 11801 }, { "epoch": 1.05, "grad_norm": 6.830397383273105, "learning_rate": 4.81475417474681e-06, "loss": 0.6132, "step": 11802 }, { "epoch": 1.05, "grad_norm": 5.6828532902062205, "learning_rate": 4.814032301360865e-06, "loss": 0.6157, "step": 11803 }, { "epoch": 1.05, "grad_norm": 7.39721586283343, "learning_rate": 4.813310431856589e-06, "loss": 0.5733, "step": 11804 }, { "epoch": 1.05, "grad_norm": 5.050004135871844, "learning_rate": 4.8125885662490486e-06, "loss": 0.6101, "step": 11805 }, { "epoch": 1.05, "grad_norm": 4.677923509319573, "learning_rate": 4.811866704553311e-06, "loss": 0.6194, "step": 11806 }, { "epoch": 1.05, "grad_norm": 7.667607783776006, "learning_rate": 4.811144846784444e-06, "loss": 0.6277, "step": 11807 }, { "epoch": 1.05, "grad_norm": 6.402030388676414, "learning_rate": 4.810422992957515e-06, "loss": 0.6421, "step": 11808 }, { "epoch": 1.05, "grad_norm": 6.49257382296834, "learning_rate": 4.80970114308759e-06, "loss": 0.6322, "step": 11809 }, { "epoch": 1.05, "grad_norm": 6.363581181471328, "learning_rate": 4.808979297189738e-06, "loss": 0.6238, "step": 11810 }, { "epoch": 1.05, "grad_norm": 6.81253949584474, "learning_rate": 4.808257455279025e-06, "loss": 0.5717, "step": 11811 }, { "epoch": 1.05, "grad_norm": 5.7011622042862244, "learning_rate": 4.807535617370517e-06, "loss": 0.612, "step": 11812 }, { "epoch": 1.05, "grad_norm": 6.276964419364145, "learning_rate": 4.8068137834792834e-06, "loss": 0.5343, "step": 11813 }, { "epoch": 1.05, "grad_norm": 6.742077638653688, "learning_rate": 4.806091953620387e-06, "loss": 0.5519, "step": 11814 }, { "epoch": 1.05, "grad_norm": 6.85163615848631, "learning_rate": 4.805370127808897e-06, "loss": 0.571, "step": 11815 }, { "epoch": 1.05, "grad_norm": 7.29849572895556, "learning_rate": 4.804648306059879e-06, "loss": 0.6105, "step": 11816 }, { "epoch": 1.05, "grad_norm": 7.2822846444648865, "learning_rate": 4.803926488388399e-06, "loss": 0.6074, "step": 11817 }, { "epoch": 1.05, "grad_norm": 7.515333532615236, "learning_rate": 4.803204674809525e-06, "loss": 0.5521, "step": 11818 }, { "epoch": 1.05, "grad_norm": 7.006201338075945, "learning_rate": 4.802482865338323e-06, "loss": 0.5984, "step": 11819 }, { "epoch": 1.05, "grad_norm": 5.61751221215997, "learning_rate": 4.801761059989858e-06, "loss": 0.5718, "step": 11820 }, { "epoch": 1.05, "grad_norm": 9.082838722199938, "learning_rate": 4.801039258779197e-06, "loss": 0.5864, "step": 11821 }, { "epoch": 1.05, "grad_norm": 6.427134211738039, "learning_rate": 4.800317461721405e-06, "loss": 0.5786, "step": 11822 }, { "epoch": 1.05, "grad_norm": 5.369585005626349, "learning_rate": 4.799595668831549e-06, "loss": 0.6513, "step": 11823 }, { "epoch": 1.05, "grad_norm": 5.166857242038706, "learning_rate": 4.798873880124696e-06, "loss": 0.6642, "step": 11824 }, { "epoch": 1.05, "grad_norm": 5.4010376318464655, "learning_rate": 4.798152095615909e-06, "loss": 0.613, "step": 11825 }, { "epoch": 1.05, "grad_norm": 5.502569614241491, "learning_rate": 4.797430315320254e-06, "loss": 0.6458, "step": 11826 }, { "epoch": 1.06, "grad_norm": 8.002812342884619, "learning_rate": 4.7967085392528e-06, "loss": 0.6212, "step": 11827 }, { "epoch": 1.06, "grad_norm": 5.86572233503092, "learning_rate": 4.79598676742861e-06, "loss": 0.6678, "step": 11828 }, { "epoch": 1.06, "grad_norm": 5.845179507658644, "learning_rate": 4.795264999862747e-06, "loss": 0.6288, "step": 11829 }, { "epoch": 1.06, "grad_norm": 5.642335694734807, "learning_rate": 4.7945432365702795e-06, "loss": 0.5974, "step": 11830 }, { "epoch": 1.06, "grad_norm": 8.08874135073581, "learning_rate": 4.793821477566273e-06, "loss": 0.6038, "step": 11831 }, { "epoch": 1.06, "grad_norm": 5.960528435970326, "learning_rate": 4.793099722865791e-06, "loss": 0.6272, "step": 11832 }, { "epoch": 1.06, "grad_norm": 7.4316293474010315, "learning_rate": 4.792377972483899e-06, "loss": 0.6801, "step": 11833 }, { "epoch": 1.06, "grad_norm": 6.732853082181118, "learning_rate": 4.7916562264356625e-06, "loss": 0.641, "step": 11834 }, { "epoch": 1.06, "grad_norm": 5.455132652691217, "learning_rate": 4.790934484736146e-06, "loss": 0.6481, "step": 11835 }, { "epoch": 1.06, "grad_norm": 5.445517598424585, "learning_rate": 4.790212747400414e-06, "loss": 0.5952, "step": 11836 }, { "epoch": 1.06, "grad_norm": 4.154493168262916, "learning_rate": 4.789491014443531e-06, "loss": 0.6622, "step": 11837 }, { "epoch": 1.06, "grad_norm": 6.88869627008, "learning_rate": 4.788769285880563e-06, "loss": 0.6504, "step": 11838 }, { "epoch": 1.06, "grad_norm": 6.388500239288493, "learning_rate": 4.788047561726573e-06, "loss": 0.6478, "step": 11839 }, { "epoch": 1.06, "grad_norm": 6.861530116780925, "learning_rate": 4.7873258419966265e-06, "loss": 0.6467, "step": 11840 }, { "epoch": 1.06, "grad_norm": 5.726625825366673, "learning_rate": 4.786604126705787e-06, "loss": 0.6143, "step": 11841 }, { "epoch": 1.06, "grad_norm": 5.000359057397192, "learning_rate": 4.785882415869121e-06, "loss": 0.5954, "step": 11842 }, { "epoch": 1.06, "grad_norm": 5.514104502732833, "learning_rate": 4.7851607095016875e-06, "loss": 0.6164, "step": 11843 }, { "epoch": 1.06, "grad_norm": 6.314265722874526, "learning_rate": 4.784439007618556e-06, "loss": 0.6044, "step": 11844 }, { "epoch": 1.06, "grad_norm": 7.6285984236473725, "learning_rate": 4.7837173102347865e-06, "loss": 0.5964, "step": 11845 }, { "epoch": 1.06, "grad_norm": 6.7563929275340096, "learning_rate": 4.782995617365444e-06, "loss": 0.587, "step": 11846 }, { "epoch": 1.06, "grad_norm": 7.1149452965973135, "learning_rate": 4.7822739290255945e-06, "loss": 0.6978, "step": 11847 }, { "epoch": 1.06, "grad_norm": 4.636319833060464, "learning_rate": 4.781552245230299e-06, "loss": 0.5569, "step": 11848 }, { "epoch": 1.06, "grad_norm": 5.943608140169962, "learning_rate": 4.7808305659946215e-06, "loss": 0.5653, "step": 11849 }, { "epoch": 1.06, "grad_norm": 8.329407536551669, "learning_rate": 4.780108891333628e-06, "loss": 0.6082, "step": 11850 }, { "epoch": 1.06, "grad_norm": 7.396040370931967, "learning_rate": 4.779387221262378e-06, "loss": 0.6275, "step": 11851 }, { "epoch": 1.06, "grad_norm": 5.448320071335851, "learning_rate": 4.778665555795937e-06, "loss": 0.5853, "step": 11852 }, { "epoch": 1.06, "grad_norm": 7.3428474972492355, "learning_rate": 4.777943894949369e-06, "loss": 0.5831, "step": 11853 }, { "epoch": 1.06, "grad_norm": 4.89987424343236, "learning_rate": 4.777222238737734e-06, "loss": 0.6278, "step": 11854 }, { "epoch": 1.06, "grad_norm": 6.85224129101676, "learning_rate": 4.776500587176097e-06, "loss": 0.6732, "step": 11855 }, { "epoch": 1.06, "grad_norm": 7.75314435969929, "learning_rate": 4.775778940279524e-06, "loss": 0.6944, "step": 11856 }, { "epoch": 1.06, "grad_norm": 6.233119273525305, "learning_rate": 4.775057298063072e-06, "loss": 0.6348, "step": 11857 }, { "epoch": 1.06, "grad_norm": 6.126632780411375, "learning_rate": 4.774335660541806e-06, "loss": 0.6362, "step": 11858 }, { "epoch": 1.06, "grad_norm": 5.91072813493523, "learning_rate": 4.773614027730789e-06, "loss": 0.5663, "step": 11859 }, { "epoch": 1.06, "grad_norm": 9.123015786199197, "learning_rate": 4.772892399645084e-06, "loss": 0.6333, "step": 11860 }, { "epoch": 1.06, "grad_norm": 10.494177290373202, "learning_rate": 4.772170776299752e-06, "loss": 0.6567, "step": 11861 }, { "epoch": 1.06, "grad_norm": 6.560968873110473, "learning_rate": 4.771449157709857e-06, "loss": 0.6268, "step": 11862 }, { "epoch": 1.06, "grad_norm": 5.186380082156406, "learning_rate": 4.77072754389046e-06, "loss": 0.633, "step": 11863 }, { "epoch": 1.06, "grad_norm": 5.559468478979155, "learning_rate": 4.770005934856623e-06, "loss": 0.5925, "step": 11864 }, { "epoch": 1.06, "grad_norm": 6.2576950730321315, "learning_rate": 4.7692843306234086e-06, "loss": 0.6461, "step": 11865 }, { "epoch": 1.06, "grad_norm": 6.508809671195857, "learning_rate": 4.768562731205879e-06, "loss": 0.6593, "step": 11866 }, { "epoch": 1.06, "grad_norm": 7.196084687330982, "learning_rate": 4.767841136619094e-06, "loss": 0.5616, "step": 11867 }, { "epoch": 1.06, "grad_norm": 6.165914950035639, "learning_rate": 4.76711954687812e-06, "loss": 0.5994, "step": 11868 }, { "epoch": 1.06, "grad_norm": 8.338427517383668, "learning_rate": 4.766397961998013e-06, "loss": 0.6474, "step": 11869 }, { "epoch": 1.06, "grad_norm": 7.291790467353545, "learning_rate": 4.765676381993839e-06, "loss": 0.6168, "step": 11870 }, { "epoch": 1.06, "grad_norm": 6.247941280259271, "learning_rate": 4.764954806880656e-06, "loss": 0.5581, "step": 11871 }, { "epoch": 1.06, "grad_norm": 8.266017154126727, "learning_rate": 4.764233236673526e-06, "loss": 0.6495, "step": 11872 }, { "epoch": 1.06, "grad_norm": 4.162647284284624, "learning_rate": 4.7635116713875115e-06, "loss": 0.6009, "step": 11873 }, { "epoch": 1.06, "grad_norm": 7.009618857339073, "learning_rate": 4.762790111037673e-06, "loss": 0.6226, "step": 11874 }, { "epoch": 1.06, "grad_norm": 5.973823875207583, "learning_rate": 4.76206855563907e-06, "loss": 0.652, "step": 11875 }, { "epoch": 1.06, "grad_norm": 5.754235530560493, "learning_rate": 4.761347005206767e-06, "loss": 0.6005, "step": 11876 }, { "epoch": 1.06, "grad_norm": 6.045635320784792, "learning_rate": 4.760625459755822e-06, "loss": 0.6177, "step": 11877 }, { "epoch": 1.06, "grad_norm": 7.150281436532465, "learning_rate": 4.759903919301297e-06, "loss": 0.5876, "step": 11878 }, { "epoch": 1.06, "grad_norm": 8.518743556133074, "learning_rate": 4.7591823838582505e-06, "loss": 0.6168, "step": 11879 }, { "epoch": 1.06, "grad_norm": 6.892860003121722, "learning_rate": 4.758460853441743e-06, "loss": 0.6755, "step": 11880 }, { "epoch": 1.06, "grad_norm": 6.0778341762537345, "learning_rate": 4.75773932806684e-06, "loss": 0.5735, "step": 11881 }, { "epoch": 1.06, "grad_norm": 8.112329020544912, "learning_rate": 4.757017807748595e-06, "loss": 0.6288, "step": 11882 }, { "epoch": 1.06, "grad_norm": 6.811071585328688, "learning_rate": 4.756296292502072e-06, "loss": 0.6424, "step": 11883 }, { "epoch": 1.06, "grad_norm": 5.897903004446372, "learning_rate": 4.755574782342331e-06, "loss": 0.6563, "step": 11884 }, { "epoch": 1.06, "grad_norm": 7.372758675233637, "learning_rate": 4.754853277284433e-06, "loss": 0.6261, "step": 11885 }, { "epoch": 1.06, "grad_norm": 6.875711513889177, "learning_rate": 4.754131777343432e-06, "loss": 0.6851, "step": 11886 }, { "epoch": 1.06, "grad_norm": 6.64910225667467, "learning_rate": 4.753410282534393e-06, "loss": 0.6213, "step": 11887 }, { "epoch": 1.06, "grad_norm": 6.849204229988389, "learning_rate": 4.752688792872376e-06, "loss": 0.5927, "step": 11888 }, { "epoch": 1.06, "grad_norm": 4.831495853063509, "learning_rate": 4.751967308372436e-06, "loss": 0.5844, "step": 11889 }, { "epoch": 1.06, "grad_norm": 7.207408142039933, "learning_rate": 4.751245829049637e-06, "loss": 0.5605, "step": 11890 }, { "epoch": 1.06, "grad_norm": 7.810504368951274, "learning_rate": 4.750524354919036e-06, "loss": 0.6245, "step": 11891 }, { "epoch": 1.06, "grad_norm": 6.99641686247119, "learning_rate": 4.749802885995693e-06, "loss": 0.6413, "step": 11892 }, { "epoch": 1.06, "grad_norm": 7.546083541445912, "learning_rate": 4.749081422294666e-06, "loss": 0.6235, "step": 11893 }, { "epoch": 1.06, "grad_norm": 5.174597369933894, "learning_rate": 4.748359963831016e-06, "loss": 0.5794, "step": 11894 }, { "epoch": 1.06, "grad_norm": 6.000604768863522, "learning_rate": 4.7476385106198005e-06, "loss": 0.6354, "step": 11895 }, { "epoch": 1.06, "grad_norm": 4.851895648051762, "learning_rate": 4.746917062676078e-06, "loss": 0.6269, "step": 11896 }, { "epoch": 1.06, "grad_norm": 7.139964652166451, "learning_rate": 4.746195620014908e-06, "loss": 0.6353, "step": 11897 }, { "epoch": 1.06, "grad_norm": 7.396252719761897, "learning_rate": 4.7454741826513475e-06, "loss": 0.601, "step": 11898 }, { "epoch": 1.06, "grad_norm": 6.777922978533689, "learning_rate": 4.744752750600459e-06, "loss": 0.611, "step": 11899 }, { "epoch": 1.06, "grad_norm": 7.813611550679431, "learning_rate": 4.744031323877295e-06, "loss": 0.6517, "step": 11900 }, { "epoch": 1.06, "grad_norm": 4.87158070864444, "learning_rate": 4.743309902496918e-06, "loss": 0.6536, "step": 11901 }, { "epoch": 1.06, "grad_norm": 7.978289460246686, "learning_rate": 4.742588486474383e-06, "loss": 0.6274, "step": 11902 }, { "epoch": 1.06, "grad_norm": 5.317852709458828, "learning_rate": 4.741867075824751e-06, "loss": 0.613, "step": 11903 }, { "epoch": 1.06, "grad_norm": 7.622324237581697, "learning_rate": 4.741145670563079e-06, "loss": 0.5851, "step": 11904 }, { "epoch": 1.06, "grad_norm": 7.131163318314807, "learning_rate": 4.740424270704422e-06, "loss": 0.6736, "step": 11905 }, { "epoch": 1.06, "grad_norm": 7.473256692765911, "learning_rate": 4.739702876263841e-06, "loss": 0.6117, "step": 11906 }, { "epoch": 1.06, "grad_norm": 6.3117328819429686, "learning_rate": 4.738981487256394e-06, "loss": 0.6813, "step": 11907 }, { "epoch": 1.06, "grad_norm": 6.839169450647451, "learning_rate": 4.738260103697134e-06, "loss": 0.6269, "step": 11908 }, { "epoch": 1.06, "grad_norm": 7.381631031290111, "learning_rate": 4.737538725601122e-06, "loss": 0.665, "step": 11909 }, { "epoch": 1.06, "grad_norm": 6.281182177184, "learning_rate": 4.736817352983416e-06, "loss": 0.6542, "step": 11910 }, { "epoch": 1.06, "grad_norm": 6.199938607010687, "learning_rate": 4.73609598585907e-06, "loss": 0.6402, "step": 11911 }, { "epoch": 1.06, "grad_norm": 5.331771006385832, "learning_rate": 4.735374624243142e-06, "loss": 0.6492, "step": 11912 }, { "epoch": 1.06, "grad_norm": 8.940182995941031, "learning_rate": 4.734653268150691e-06, "loss": 0.5808, "step": 11913 }, { "epoch": 1.06, "grad_norm": 10.054085035155888, "learning_rate": 4.733931917596772e-06, "loss": 0.635, "step": 11914 }, { "epoch": 1.06, "grad_norm": 5.183574051727937, "learning_rate": 4.73321057259644e-06, "loss": 0.6629, "step": 11915 }, { "epoch": 1.06, "grad_norm": 6.792533890532135, "learning_rate": 4.732489233164753e-06, "loss": 0.545, "step": 11916 }, { "epoch": 1.06, "grad_norm": 5.396796725271783, "learning_rate": 4.731767899316769e-06, "loss": 0.5808, "step": 11917 }, { "epoch": 1.06, "grad_norm": 6.097392051451859, "learning_rate": 4.7310465710675405e-06, "loss": 0.6788, "step": 11918 }, { "epoch": 1.06, "grad_norm": 6.23340952897792, "learning_rate": 4.730325248432127e-06, "loss": 0.6729, "step": 11919 }, { "epoch": 1.06, "grad_norm": 7.876469733306199, "learning_rate": 4.729603931425583e-06, "loss": 0.6362, "step": 11920 }, { "epoch": 1.06, "grad_norm": 5.721322871942779, "learning_rate": 4.728882620062965e-06, "loss": 0.5914, "step": 11921 }, { "epoch": 1.06, "grad_norm": 5.620378790164176, "learning_rate": 4.7281613143593274e-06, "loss": 0.5667, "step": 11922 }, { "epoch": 1.06, "grad_norm": 7.2410195089417275, "learning_rate": 4.727440014329729e-06, "loss": 0.6081, "step": 11923 }, { "epoch": 1.06, "grad_norm": 6.82248528179745, "learning_rate": 4.7267187199892225e-06, "loss": 0.623, "step": 11924 }, { "epoch": 1.06, "grad_norm": 4.945231296291067, "learning_rate": 4.725997431352864e-06, "loss": 0.5736, "step": 11925 }, { "epoch": 1.06, "grad_norm": 5.427891630166787, "learning_rate": 4.725276148435709e-06, "loss": 0.6401, "step": 11926 }, { "epoch": 1.06, "grad_norm": 6.003752325857021, "learning_rate": 4.724554871252813e-06, "loss": 0.5686, "step": 11927 }, { "epoch": 1.06, "grad_norm": 6.555226987020725, "learning_rate": 4.723833599819233e-06, "loss": 0.6172, "step": 11928 }, { "epoch": 1.06, "grad_norm": 5.5114240565046275, "learning_rate": 4.7231123341500195e-06, "loss": 0.6015, "step": 11929 }, { "epoch": 1.06, "grad_norm": 6.785665601692966, "learning_rate": 4.72239107426023e-06, "loss": 0.5882, "step": 11930 }, { "epoch": 1.06, "grad_norm": 7.691055845420415, "learning_rate": 4.721669820164918e-06, "loss": 0.6564, "step": 11931 }, { "epoch": 1.06, "grad_norm": 8.165784557960034, "learning_rate": 4.720948571879141e-06, "loss": 0.6232, "step": 11932 }, { "epoch": 1.06, "grad_norm": 7.5009923279236155, "learning_rate": 4.72022732941795e-06, "loss": 0.6251, "step": 11933 }, { "epoch": 1.06, "grad_norm": 5.776074208951971, "learning_rate": 4.719506092796401e-06, "loss": 0.5962, "step": 11934 }, { "epoch": 1.06, "grad_norm": 6.641402152127873, "learning_rate": 4.718784862029547e-06, "loss": 0.5992, "step": 11935 }, { "epoch": 1.06, "grad_norm": 4.549928946557599, "learning_rate": 4.718063637132445e-06, "loss": 0.655, "step": 11936 }, { "epoch": 1.06, "grad_norm": 8.362204629935281, "learning_rate": 4.717342418120146e-06, "loss": 0.6465, "step": 11937 }, { "epoch": 1.06, "grad_norm": 5.900429216286781, "learning_rate": 4.716621205007705e-06, "loss": 0.6326, "step": 11938 }, { "epoch": 1.07, "grad_norm": 5.901279048389313, "learning_rate": 4.715899997810177e-06, "loss": 0.5977, "step": 11939 }, { "epoch": 1.07, "grad_norm": 7.588033708835188, "learning_rate": 4.715178796542613e-06, "loss": 0.6055, "step": 11940 }, { "epoch": 1.07, "grad_norm": 7.664661152385146, "learning_rate": 4.714457601220068e-06, "loss": 0.6116, "step": 11941 }, { "epoch": 1.07, "grad_norm": 6.115518544087896, "learning_rate": 4.713736411857597e-06, "loss": 0.5694, "step": 11942 }, { "epoch": 1.07, "grad_norm": 6.0910672246913755, "learning_rate": 4.713015228470251e-06, "loss": 0.6039, "step": 11943 }, { "epoch": 1.07, "grad_norm": 5.812483987768947, "learning_rate": 4.712294051073082e-06, "loss": 0.6456, "step": 11944 }, { "epoch": 1.07, "grad_norm": 6.934580418338028, "learning_rate": 4.711572879681145e-06, "loss": 0.6702, "step": 11945 }, { "epoch": 1.07, "grad_norm": 4.325186950965554, "learning_rate": 4.710851714309494e-06, "loss": 0.5976, "step": 11946 }, { "epoch": 1.07, "grad_norm": 4.368591349085769, "learning_rate": 4.710130554973179e-06, "loss": 0.5621, "step": 11947 }, { "epoch": 1.07, "grad_norm": 6.333223989048208, "learning_rate": 4.709409401687253e-06, "loss": 0.6736, "step": 11948 }, { "epoch": 1.07, "grad_norm": 6.370523091843543, "learning_rate": 4.708688254466771e-06, "loss": 0.6514, "step": 11949 }, { "epoch": 1.07, "grad_norm": 6.394385442368738, "learning_rate": 4.707967113326782e-06, "loss": 0.6171, "step": 11950 }, { "epoch": 1.07, "grad_norm": 7.229174179025565, "learning_rate": 4.707245978282341e-06, "loss": 0.6465, "step": 11951 }, { "epoch": 1.07, "grad_norm": 7.3671810740176005, "learning_rate": 4.7065248493485e-06, "loss": 0.6482, "step": 11952 }, { "epoch": 1.07, "grad_norm": 6.403478234840632, "learning_rate": 4.705803726540308e-06, "loss": 0.6063, "step": 11953 }, { "epoch": 1.07, "grad_norm": 6.684188497482172, "learning_rate": 4.705082609872822e-06, "loss": 0.578, "step": 11954 }, { "epoch": 1.07, "grad_norm": 6.092798056155695, "learning_rate": 4.704361499361087e-06, "loss": 0.6437, "step": 11955 }, { "epoch": 1.07, "grad_norm": 5.462642036755844, "learning_rate": 4.7036403950201605e-06, "loss": 0.689, "step": 11956 }, { "epoch": 1.07, "grad_norm": 7.448240886193026, "learning_rate": 4.7029192968650926e-06, "loss": 0.538, "step": 11957 }, { "epoch": 1.07, "grad_norm": 6.104863429324404, "learning_rate": 4.702198204910932e-06, "loss": 0.6002, "step": 11958 }, { "epoch": 1.07, "grad_norm": 5.030524612116136, "learning_rate": 4.701477119172733e-06, "loss": 0.5963, "step": 11959 }, { "epoch": 1.07, "grad_norm": 7.072310648305771, "learning_rate": 4.700756039665544e-06, "loss": 0.6052, "step": 11960 }, { "epoch": 1.07, "grad_norm": 10.157621447263258, "learning_rate": 4.700034966404418e-06, "loss": 0.614, "step": 11961 }, { "epoch": 1.07, "grad_norm": 5.49994185495951, "learning_rate": 4.6993138994044055e-06, "loss": 0.6113, "step": 11962 }, { "epoch": 1.07, "grad_norm": 6.400381930664456, "learning_rate": 4.698592838680556e-06, "loss": 0.5192, "step": 11963 }, { "epoch": 1.07, "grad_norm": 7.310687894089106, "learning_rate": 4.697871784247922e-06, "loss": 0.6134, "step": 11964 }, { "epoch": 1.07, "grad_norm": 5.742090377180881, "learning_rate": 4.6971507361215515e-06, "loss": 0.6166, "step": 11965 }, { "epoch": 1.07, "grad_norm": 8.950683256385949, "learning_rate": 4.696429694316496e-06, "loss": 0.6781, "step": 11966 }, { "epoch": 1.07, "grad_norm": 5.588721792258092, "learning_rate": 4.695708658847808e-06, "loss": 0.6797, "step": 11967 }, { "epoch": 1.07, "grad_norm": 5.15859957091745, "learning_rate": 4.694987629730533e-06, "loss": 0.6667, "step": 11968 }, { "epoch": 1.07, "grad_norm": 6.601892782632707, "learning_rate": 4.6942666069797235e-06, "loss": 0.6441, "step": 11969 }, { "epoch": 1.07, "grad_norm": 6.166572157612888, "learning_rate": 4.693545590610431e-06, "loss": 0.5799, "step": 11970 }, { "epoch": 1.07, "grad_norm": 8.01804035131906, "learning_rate": 4.692824580637704e-06, "loss": 0.7079, "step": 11971 }, { "epoch": 1.07, "grad_norm": 7.277492839877546, "learning_rate": 4.692103577076588e-06, "loss": 0.5903, "step": 11972 }, { "epoch": 1.07, "grad_norm": 7.014439720204969, "learning_rate": 4.691382579942136e-06, "loss": 0.6059, "step": 11973 }, { "epoch": 1.07, "grad_norm": 5.876319343459802, "learning_rate": 4.690661589249399e-06, "loss": 0.609, "step": 11974 }, { "epoch": 1.07, "grad_norm": 8.672792765858931, "learning_rate": 4.689940605013422e-06, "loss": 0.6103, "step": 11975 }, { "epoch": 1.07, "grad_norm": 5.824626051923619, "learning_rate": 4.689219627249255e-06, "loss": 0.654, "step": 11976 }, { "epoch": 1.07, "grad_norm": 5.8295520323059025, "learning_rate": 4.68849865597195e-06, "loss": 0.5675, "step": 11977 }, { "epoch": 1.07, "grad_norm": 5.326408509111778, "learning_rate": 4.687777691196551e-06, "loss": 0.6153, "step": 11978 }, { "epoch": 1.07, "grad_norm": 5.481471536771306, "learning_rate": 4.687056732938111e-06, "loss": 0.6034, "step": 11979 }, { "epoch": 1.07, "grad_norm": 5.2155806010816335, "learning_rate": 4.6863357812116765e-06, "loss": 0.6456, "step": 11980 }, { "epoch": 1.07, "grad_norm": 5.744998030095122, "learning_rate": 4.685614836032294e-06, "loss": 0.59, "step": 11981 }, { "epoch": 1.07, "grad_norm": 5.908646758444147, "learning_rate": 4.684893897415013e-06, "loss": 0.6127, "step": 11982 }, { "epoch": 1.07, "grad_norm": 6.84652498101671, "learning_rate": 4.684172965374883e-06, "loss": 0.585, "step": 11983 }, { "epoch": 1.07, "grad_norm": 5.2165932581120185, "learning_rate": 4.683452039926951e-06, "loss": 0.5844, "step": 11984 }, { "epoch": 1.07, "grad_norm": 6.329453446115876, "learning_rate": 4.682731121086263e-06, "loss": 0.6003, "step": 11985 }, { "epoch": 1.07, "grad_norm": 5.277953752776681, "learning_rate": 4.682010208867872e-06, "loss": 0.5871, "step": 11986 }, { "epoch": 1.07, "grad_norm": 5.83383052371514, "learning_rate": 4.681289303286818e-06, "loss": 0.5969, "step": 11987 }, { "epoch": 1.07, "grad_norm": 5.666121518606537, "learning_rate": 4.6805684043581525e-06, "loss": 0.575, "step": 11988 }, { "epoch": 1.07, "grad_norm": 5.915760763976355, "learning_rate": 4.679847512096921e-06, "loss": 0.5542, "step": 11989 }, { "epoch": 1.07, "grad_norm": 7.5848834101012965, "learning_rate": 4.679126626518174e-06, "loss": 0.6229, "step": 11990 }, { "epoch": 1.07, "grad_norm": 7.1866452223808315, "learning_rate": 4.678405747636954e-06, "loss": 0.5676, "step": 11991 }, { "epoch": 1.07, "grad_norm": 7.0879100419871355, "learning_rate": 4.677684875468309e-06, "loss": 0.5744, "step": 11992 }, { "epoch": 1.07, "grad_norm": 6.127791495541252, "learning_rate": 4.676964010027288e-06, "loss": 0.6335, "step": 11993 }, { "epoch": 1.07, "grad_norm": 6.569321927022114, "learning_rate": 4.676243151328936e-06, "loss": 0.6087, "step": 11994 }, { "epoch": 1.07, "grad_norm": 6.304489267013175, "learning_rate": 4.675522299388298e-06, "loss": 0.603, "step": 11995 }, { "epoch": 1.07, "grad_norm": 5.3111917061422895, "learning_rate": 4.674801454220423e-06, "loss": 0.6719, "step": 11996 }, { "epoch": 1.07, "grad_norm": 7.824395466447082, "learning_rate": 4.674080615840353e-06, "loss": 0.604, "step": 11997 }, { "epoch": 1.07, "grad_norm": 5.538552550467098, "learning_rate": 4.673359784263137e-06, "loss": 0.6104, "step": 11998 }, { "epoch": 1.07, "grad_norm": 5.2883614185236345, "learning_rate": 4.672638959503822e-06, "loss": 0.6115, "step": 11999 }, { "epoch": 1.07, "grad_norm": 5.984832050114474, "learning_rate": 4.671918141577453e-06, "loss": 0.5792, "step": 12000 }, { "epoch": 1.07, "grad_norm": 7.4149865572395655, "learning_rate": 4.6711973304990715e-06, "loss": 0.5777, "step": 12001 }, { "epoch": 1.07, "grad_norm": 5.393975561739815, "learning_rate": 4.670476526283726e-06, "loss": 0.5811, "step": 12002 }, { "epoch": 1.07, "grad_norm": 7.058309309427976, "learning_rate": 4.669755728946462e-06, "loss": 0.6278, "step": 12003 }, { "epoch": 1.07, "grad_norm": 6.237332933206511, "learning_rate": 4.6690349385023236e-06, "loss": 0.6641, "step": 12004 }, { "epoch": 1.07, "grad_norm": 7.595909316875707, "learning_rate": 4.6683141549663555e-06, "loss": 0.5979, "step": 12005 }, { "epoch": 1.07, "grad_norm": 5.8455491370505674, "learning_rate": 4.667593378353604e-06, "loss": 0.5988, "step": 12006 }, { "epoch": 1.07, "grad_norm": 8.44536537519205, "learning_rate": 4.666872608679112e-06, "loss": 0.5795, "step": 12007 }, { "epoch": 1.07, "grad_norm": 6.162995820974451, "learning_rate": 4.666151845957925e-06, "loss": 0.6203, "step": 12008 }, { "epoch": 1.07, "grad_norm": 5.9392579739231515, "learning_rate": 4.665431090205088e-06, "loss": 0.6572, "step": 12009 }, { "epoch": 1.07, "grad_norm": 7.540427555049084, "learning_rate": 4.664710341435643e-06, "loss": 0.6462, "step": 12010 }, { "epoch": 1.07, "grad_norm": 6.0895665213782495, "learning_rate": 4.663989599664635e-06, "loss": 0.5421, "step": 12011 }, { "epoch": 1.07, "grad_norm": 5.9701478819068665, "learning_rate": 4.66326886490711e-06, "loss": 0.5854, "step": 12012 }, { "epoch": 1.07, "grad_norm": 5.825920288168263, "learning_rate": 4.662548137178109e-06, "loss": 0.5791, "step": 12013 }, { "epoch": 1.07, "grad_norm": 6.457803568835402, "learning_rate": 4.661827416492676e-06, "loss": 0.5463, "step": 12014 }, { "epoch": 1.07, "grad_norm": 6.096793792702986, "learning_rate": 4.661106702865858e-06, "loss": 0.6094, "step": 12015 }, { "epoch": 1.07, "grad_norm": 4.8748151496922585, "learning_rate": 4.660385996312694e-06, "loss": 0.6325, "step": 12016 }, { "epoch": 1.07, "grad_norm": 4.868297345700508, "learning_rate": 4.659665296848227e-06, "loss": 0.5743, "step": 12017 }, { "epoch": 1.07, "grad_norm": 5.914928349924997, "learning_rate": 4.658944604487502e-06, "loss": 0.6589, "step": 12018 }, { "epoch": 1.07, "grad_norm": 5.607176621052412, "learning_rate": 4.658223919245563e-06, "loss": 0.6068, "step": 12019 }, { "epoch": 1.07, "grad_norm": 7.598039529525154, "learning_rate": 4.65750324113745e-06, "loss": 0.6663, "step": 12020 }, { "epoch": 1.07, "grad_norm": 5.921123864964928, "learning_rate": 4.656782570178206e-06, "loss": 0.5816, "step": 12021 }, { "epoch": 1.07, "grad_norm": 6.619711791958025, "learning_rate": 4.656061906382876e-06, "loss": 0.6152, "step": 12022 }, { "epoch": 1.07, "grad_norm": 4.9808794448671785, "learning_rate": 4.655341249766499e-06, "loss": 0.6362, "step": 12023 }, { "epoch": 1.07, "grad_norm": 6.660330849402079, "learning_rate": 4.6546206003441195e-06, "loss": 0.6472, "step": 12024 }, { "epoch": 1.07, "grad_norm": 9.36586807806784, "learning_rate": 4.653899958130779e-06, "loss": 0.635, "step": 12025 }, { "epoch": 1.07, "grad_norm": 6.833453452794807, "learning_rate": 4.6531793231415174e-06, "loss": 0.5625, "step": 12026 }, { "epoch": 1.07, "grad_norm": 5.78080109161468, "learning_rate": 4.652458695391379e-06, "loss": 0.677, "step": 12027 }, { "epoch": 1.07, "grad_norm": 5.111010727221864, "learning_rate": 4.651738074895404e-06, "loss": 0.5562, "step": 12028 }, { "epoch": 1.07, "grad_norm": 7.363112814335502, "learning_rate": 4.651017461668636e-06, "loss": 0.5856, "step": 12029 }, { "epoch": 1.07, "grad_norm": 6.653322963230796, "learning_rate": 4.650296855726112e-06, "loss": 0.6553, "step": 12030 }, { "epoch": 1.07, "grad_norm": 5.697609573549606, "learning_rate": 4.6495762570828755e-06, "loss": 0.5515, "step": 12031 }, { "epoch": 1.07, "grad_norm": 9.313159098032665, "learning_rate": 4.648855665753968e-06, "loss": 0.6253, "step": 12032 }, { "epoch": 1.07, "grad_norm": 9.231770424228843, "learning_rate": 4.648135081754428e-06, "loss": 0.6165, "step": 12033 }, { "epoch": 1.07, "grad_norm": 10.392286096453311, "learning_rate": 4.647414505099298e-06, "loss": 0.6418, "step": 12034 }, { "epoch": 1.07, "grad_norm": 8.068453116538683, "learning_rate": 4.646693935803619e-06, "loss": 0.642, "step": 12035 }, { "epoch": 1.07, "grad_norm": 5.029806263517552, "learning_rate": 4.6459733738824295e-06, "loss": 0.6183, "step": 12036 }, { "epoch": 1.07, "grad_norm": 4.756783589564232, "learning_rate": 4.645252819350771e-06, "loss": 0.5826, "step": 12037 }, { "epoch": 1.07, "grad_norm": 6.494720613584122, "learning_rate": 4.644532272223683e-06, "loss": 0.5881, "step": 12038 }, { "epoch": 1.07, "grad_norm": 6.065618648146596, "learning_rate": 4.643811732516205e-06, "loss": 0.6171, "step": 12039 }, { "epoch": 1.07, "grad_norm": 6.146332572856123, "learning_rate": 4.643091200243378e-06, "loss": 0.6171, "step": 12040 }, { "epoch": 1.07, "grad_norm": 5.6333547328821485, "learning_rate": 4.642370675420239e-06, "loss": 0.6158, "step": 12041 }, { "epoch": 1.07, "grad_norm": 6.5961002297373765, "learning_rate": 4.641650158061829e-06, "loss": 0.6489, "step": 12042 }, { "epoch": 1.07, "grad_norm": 5.647150772168137, "learning_rate": 4.64092964818319e-06, "loss": 0.5965, "step": 12043 }, { "epoch": 1.07, "grad_norm": 5.653058234789503, "learning_rate": 4.640209145799356e-06, "loss": 0.6157, "step": 12044 }, { "epoch": 1.07, "grad_norm": 7.4688926001170985, "learning_rate": 4.639488650925369e-06, "loss": 0.6581, "step": 12045 }, { "epoch": 1.07, "grad_norm": 6.558730494480607, "learning_rate": 4.6387681635762655e-06, "loss": 0.6058, "step": 12046 }, { "epoch": 1.07, "grad_norm": 7.334274446442271, "learning_rate": 4.638047683767087e-06, "loss": 0.636, "step": 12047 }, { "epoch": 1.07, "grad_norm": 6.731253096415271, "learning_rate": 4.637327211512869e-06, "loss": 0.5802, "step": 12048 }, { "epoch": 1.07, "grad_norm": 6.41220219880376, "learning_rate": 4.636606746828651e-06, "loss": 0.6648, "step": 12049 }, { "epoch": 1.07, "grad_norm": 7.096633479035297, "learning_rate": 4.635886289729474e-06, "loss": 0.5981, "step": 12050 }, { "epoch": 1.08, "grad_norm": 6.72637533998417, "learning_rate": 4.635165840230371e-06, "loss": 0.5717, "step": 12051 }, { "epoch": 1.08, "grad_norm": 6.496541841386946, "learning_rate": 4.634445398346382e-06, "loss": 0.622, "step": 12052 }, { "epoch": 1.08, "grad_norm": 4.347419132950107, "learning_rate": 4.633724964092546e-06, "loss": 0.624, "step": 12053 }, { "epoch": 1.08, "grad_norm": 5.15919707006026, "learning_rate": 4.633004537483898e-06, "loss": 0.6193, "step": 12054 }, { "epoch": 1.08, "grad_norm": 5.383270221134732, "learning_rate": 4.632284118535477e-06, "loss": 0.6165, "step": 12055 }, { "epoch": 1.08, "grad_norm": 5.795948583368702, "learning_rate": 4.631563707262321e-06, "loss": 0.596, "step": 12056 }, { "epoch": 1.08, "grad_norm": 6.391270540430426, "learning_rate": 4.630843303679464e-06, "loss": 0.6642, "step": 12057 }, { "epoch": 1.08, "grad_norm": 5.002638898551668, "learning_rate": 4.630122907801947e-06, "loss": 0.5754, "step": 12058 }, { "epoch": 1.08, "grad_norm": 6.4147721428471245, "learning_rate": 4.629402519644803e-06, "loss": 0.6472, "step": 12059 }, { "epoch": 1.08, "grad_norm": 7.3653384128528545, "learning_rate": 4.6286821392230695e-06, "loss": 0.5081, "step": 12060 }, { "epoch": 1.08, "grad_norm": 6.095476539842993, "learning_rate": 4.627961766551783e-06, "loss": 0.6619, "step": 12061 }, { "epoch": 1.08, "grad_norm": 4.946561731677316, "learning_rate": 4.627241401645979e-06, "loss": 0.6149, "step": 12062 }, { "epoch": 1.08, "grad_norm": 8.291245431005313, "learning_rate": 4.626521044520696e-06, "loss": 0.5789, "step": 12063 }, { "epoch": 1.08, "grad_norm": 4.96777229375153, "learning_rate": 4.625800695190967e-06, "loss": 0.656, "step": 12064 }, { "epoch": 1.08, "grad_norm": 6.031272621498209, "learning_rate": 4.6250803536718294e-06, "loss": 0.5864, "step": 12065 }, { "epoch": 1.08, "grad_norm": 6.785223336403202, "learning_rate": 4.62436001997832e-06, "loss": 0.5914, "step": 12066 }, { "epoch": 1.08, "grad_norm": 6.414874728157363, "learning_rate": 4.6236396941254704e-06, "loss": 0.6072, "step": 12067 }, { "epoch": 1.08, "grad_norm": 4.699618381456815, "learning_rate": 4.622919376128318e-06, "loss": 0.6361, "step": 12068 }, { "epoch": 1.08, "grad_norm": 7.4551168138029436, "learning_rate": 4.622199066001899e-06, "loss": 0.6375, "step": 12069 }, { "epoch": 1.08, "grad_norm": 5.768849779864432, "learning_rate": 4.621478763761247e-06, "loss": 0.5957, "step": 12070 }, { "epoch": 1.08, "grad_norm": 5.224053711821635, "learning_rate": 4.620758469421396e-06, "loss": 0.6289, "step": 12071 }, { "epoch": 1.08, "grad_norm": 6.745967093039336, "learning_rate": 4.620038182997384e-06, "loss": 0.6312, "step": 12072 }, { "epoch": 1.08, "grad_norm": 5.539237950229194, "learning_rate": 4.619317904504241e-06, "loss": 0.6018, "step": 12073 }, { "epoch": 1.08, "grad_norm": 5.790032094884742, "learning_rate": 4.618597633957003e-06, "loss": 0.6136, "step": 12074 }, { "epoch": 1.08, "grad_norm": 7.340390396466589, "learning_rate": 4.617877371370703e-06, "loss": 0.6594, "step": 12075 }, { "epoch": 1.08, "grad_norm": 8.871324533243085, "learning_rate": 4.6171571167603776e-06, "loss": 0.6071, "step": 12076 }, { "epoch": 1.08, "grad_norm": 7.983024231892863, "learning_rate": 4.616436870141059e-06, "loss": 0.5835, "step": 12077 }, { "epoch": 1.08, "grad_norm": 6.223636743431161, "learning_rate": 4.615716631527779e-06, "loss": 0.5653, "step": 12078 }, { "epoch": 1.08, "grad_norm": 7.996878332110762, "learning_rate": 4.6149964009355745e-06, "loss": 0.6191, "step": 12079 }, { "epoch": 1.08, "grad_norm": 5.851596321400062, "learning_rate": 4.614276178379476e-06, "loss": 0.5474, "step": 12080 }, { "epoch": 1.08, "grad_norm": 5.694528085403803, "learning_rate": 4.613555963874517e-06, "loss": 0.6374, "step": 12081 }, { "epoch": 1.08, "grad_norm": 6.710077790560148, "learning_rate": 4.612835757435733e-06, "loss": 0.6029, "step": 12082 }, { "epoch": 1.08, "grad_norm": 5.032301855615928, "learning_rate": 4.612115559078154e-06, "loss": 0.6318, "step": 12083 }, { "epoch": 1.08, "grad_norm": 6.114690617818625, "learning_rate": 4.611395368816811e-06, "loss": 0.5928, "step": 12084 }, { "epoch": 1.08, "grad_norm": 5.058917552582104, "learning_rate": 4.610675186666742e-06, "loss": 0.6296, "step": 12085 }, { "epoch": 1.08, "grad_norm": 5.951238036531276, "learning_rate": 4.609955012642974e-06, "loss": 0.5849, "step": 12086 }, { "epoch": 1.08, "grad_norm": 5.185561162875943, "learning_rate": 4.609234846760543e-06, "loss": 0.5993, "step": 12087 }, { "epoch": 1.08, "grad_norm": 6.813865926986282, "learning_rate": 4.608514689034477e-06, "loss": 0.5751, "step": 12088 }, { "epoch": 1.08, "grad_norm": 7.206999276422698, "learning_rate": 4.60779453947981e-06, "loss": 0.637, "step": 12089 }, { "epoch": 1.08, "grad_norm": 5.215976501075295, "learning_rate": 4.607074398111572e-06, "loss": 0.5922, "step": 12090 }, { "epoch": 1.08, "grad_norm": 6.887408112606375, "learning_rate": 4.606354264944796e-06, "loss": 0.6145, "step": 12091 }, { "epoch": 1.08, "grad_norm": 7.543329313808365, "learning_rate": 4.605634139994514e-06, "loss": 0.6157, "step": 12092 }, { "epoch": 1.08, "grad_norm": 6.554584925375946, "learning_rate": 4.604914023275754e-06, "loss": 0.6379, "step": 12093 }, { "epoch": 1.08, "grad_norm": 6.453022567297067, "learning_rate": 4.6041939148035484e-06, "loss": 0.6074, "step": 12094 }, { "epoch": 1.08, "grad_norm": 7.323731010552585, "learning_rate": 4.603473814592929e-06, "loss": 0.6222, "step": 12095 }, { "epoch": 1.08, "grad_norm": 7.636118104825007, "learning_rate": 4.6027537226589246e-06, "loss": 0.6914, "step": 12096 }, { "epoch": 1.08, "grad_norm": 6.080530567479221, "learning_rate": 4.602033639016566e-06, "loss": 0.6055, "step": 12097 }, { "epoch": 1.08, "grad_norm": 7.671317788299645, "learning_rate": 4.601313563680886e-06, "loss": 0.5837, "step": 12098 }, { "epoch": 1.08, "grad_norm": 6.583909578517009, "learning_rate": 4.6005934966669094e-06, "loss": 0.5916, "step": 12099 }, { "epoch": 1.08, "grad_norm": 7.981229278583459, "learning_rate": 4.599873437989671e-06, "loss": 0.6388, "step": 12100 }, { "epoch": 1.08, "grad_norm": 6.224771115811171, "learning_rate": 4.599153387664198e-06, "loss": 0.6077, "step": 12101 }, { "epoch": 1.08, "grad_norm": 6.8750182066549, "learning_rate": 4.598433345705521e-06, "loss": 0.6137, "step": 12102 }, { "epoch": 1.08, "grad_norm": 6.248827198351331, "learning_rate": 4.597713312128666e-06, "loss": 0.6419, "step": 12103 }, { "epoch": 1.08, "grad_norm": 5.1422709658653405, "learning_rate": 4.596993286948666e-06, "loss": 0.577, "step": 12104 }, { "epoch": 1.08, "grad_norm": 6.566292236254606, "learning_rate": 4.596273270180549e-06, "loss": 0.6171, "step": 12105 }, { "epoch": 1.08, "grad_norm": 7.428699674564249, "learning_rate": 4.595553261839341e-06, "loss": 0.7029, "step": 12106 }, { "epoch": 1.08, "grad_norm": 7.527981573468961, "learning_rate": 4.594833261940075e-06, "loss": 0.621, "step": 12107 }, { "epoch": 1.08, "grad_norm": 6.978220492474521, "learning_rate": 4.594113270497777e-06, "loss": 0.6035, "step": 12108 }, { "epoch": 1.08, "grad_norm": 6.111645892068383, "learning_rate": 4.593393287527475e-06, "loss": 0.6482, "step": 12109 }, { "epoch": 1.08, "grad_norm": 6.415084100878169, "learning_rate": 4.592673313044198e-06, "loss": 0.6085, "step": 12110 }, { "epoch": 1.08, "grad_norm": 7.583565054801552, "learning_rate": 4.591953347062975e-06, "loss": 0.6171, "step": 12111 }, { "epoch": 1.08, "grad_norm": 6.488551658051941, "learning_rate": 4.5912333895988295e-06, "loss": 0.6039, "step": 12112 }, { "epoch": 1.08, "grad_norm": 6.726993501881336, "learning_rate": 4.5905134406667945e-06, "loss": 0.5843, "step": 12113 }, { "epoch": 1.08, "grad_norm": 6.513465220979658, "learning_rate": 4.589793500281893e-06, "loss": 0.5505, "step": 12114 }, { "epoch": 1.08, "grad_norm": 5.31402293533623, "learning_rate": 4.589073568459153e-06, "loss": 0.6167, "step": 12115 }, { "epoch": 1.08, "grad_norm": 6.184833878208935, "learning_rate": 4.588353645213606e-06, "loss": 0.6717, "step": 12116 }, { "epoch": 1.08, "grad_norm": 6.9358038801035455, "learning_rate": 4.5876337305602726e-06, "loss": 0.6221, "step": 12117 }, { "epoch": 1.08, "grad_norm": 7.133085220955131, "learning_rate": 4.586913824514183e-06, "loss": 0.5934, "step": 12118 }, { "epoch": 1.08, "grad_norm": 8.945281789441415, "learning_rate": 4.586193927090361e-06, "loss": 0.6165, "step": 12119 }, { "epoch": 1.08, "grad_norm": 6.508938074865498, "learning_rate": 4.585474038303835e-06, "loss": 0.6159, "step": 12120 }, { "epoch": 1.08, "grad_norm": 5.449734299850156, "learning_rate": 4.584754158169632e-06, "loss": 0.6682, "step": 12121 }, { "epoch": 1.08, "grad_norm": 6.03471414845222, "learning_rate": 4.584034286702775e-06, "loss": 0.5437, "step": 12122 }, { "epoch": 1.08, "grad_norm": 4.923315564435925, "learning_rate": 4.583314423918293e-06, "loss": 0.5731, "step": 12123 }, { "epoch": 1.08, "grad_norm": 8.767391181473451, "learning_rate": 4.582594569831209e-06, "loss": 0.6819, "step": 12124 }, { "epoch": 1.08, "grad_norm": 5.614699790474262, "learning_rate": 4.5818747244565486e-06, "loss": 0.5999, "step": 12125 }, { "epoch": 1.08, "grad_norm": 5.075181609663835, "learning_rate": 4.581154887809339e-06, "loss": 0.7166, "step": 12126 }, { "epoch": 1.08, "grad_norm": 6.07651540682775, "learning_rate": 4.580435059904602e-06, "loss": 0.5571, "step": 12127 }, { "epoch": 1.08, "grad_norm": 6.394427333263704, "learning_rate": 4.579715240757365e-06, "loss": 0.5815, "step": 12128 }, { "epoch": 1.08, "grad_norm": 5.845883085898629, "learning_rate": 4.578995430382653e-06, "loss": 0.5452, "step": 12129 }, { "epoch": 1.08, "grad_norm": 7.1896648307884625, "learning_rate": 4.578275628795489e-06, "loss": 0.6524, "step": 12130 }, { "epoch": 1.08, "grad_norm": 7.8357344493109515, "learning_rate": 4.577555836010898e-06, "loss": 0.5971, "step": 12131 }, { "epoch": 1.08, "grad_norm": 6.679404785601094, "learning_rate": 4.576836052043903e-06, "loss": 0.5557, "step": 12132 }, { "epoch": 1.08, "grad_norm": 5.708870363318491, "learning_rate": 4.57611627690953e-06, "loss": 0.6347, "step": 12133 }, { "epoch": 1.08, "grad_norm": 6.309345281386245, "learning_rate": 4.5753965106228e-06, "loss": 0.6963, "step": 12134 }, { "epoch": 1.08, "grad_norm": 6.331666062172255, "learning_rate": 4.574676753198737e-06, "loss": 0.6839, "step": 12135 }, { "epoch": 1.08, "grad_norm": 5.2793206163002475, "learning_rate": 4.573957004652368e-06, "loss": 0.7098, "step": 12136 }, { "epoch": 1.08, "grad_norm": 8.603115721946924, "learning_rate": 4.573237264998711e-06, "loss": 0.6989, "step": 12137 }, { "epoch": 1.08, "grad_norm": 6.794590622181864, "learning_rate": 4.572517534252793e-06, "loss": 0.5644, "step": 12138 }, { "epoch": 1.08, "grad_norm": 7.158617160545257, "learning_rate": 4.5717978124296356e-06, "loss": 0.61, "step": 12139 }, { "epoch": 1.08, "grad_norm": 8.536619640600705, "learning_rate": 4.57107809954426e-06, "loss": 0.6448, "step": 12140 }, { "epoch": 1.08, "grad_norm": 7.4351472978341615, "learning_rate": 4.570358395611689e-06, "loss": 0.6132, "step": 12141 }, { "epoch": 1.08, "grad_norm": 6.091325851212444, "learning_rate": 4.569638700646948e-06, "loss": 0.6084, "step": 12142 }, { "epoch": 1.08, "grad_norm": 5.482410429409856, "learning_rate": 4.568919014665055e-06, "loss": 0.6077, "step": 12143 }, { "epoch": 1.08, "grad_norm": 6.254367473884244, "learning_rate": 4.568199337681035e-06, "loss": 0.6737, "step": 12144 }, { "epoch": 1.08, "grad_norm": 4.2087272698140605, "learning_rate": 4.567479669709907e-06, "loss": 0.6127, "step": 12145 }, { "epoch": 1.08, "grad_norm": 6.649835923055252, "learning_rate": 4.566760010766695e-06, "loss": 0.6204, "step": 12146 }, { "epoch": 1.08, "grad_norm": 5.936716943874863, "learning_rate": 4.566040360866417e-06, "loss": 0.5985, "step": 12147 }, { "epoch": 1.08, "grad_norm": 6.183499230430857, "learning_rate": 4.565320720024097e-06, "loss": 0.577, "step": 12148 }, { "epoch": 1.08, "grad_norm": 6.6376103150746495, "learning_rate": 4.564601088254755e-06, "loss": 0.6088, "step": 12149 }, { "epoch": 1.08, "grad_norm": 5.96148309397227, "learning_rate": 4.563881465573411e-06, "loss": 0.6765, "step": 12150 }, { "epoch": 1.08, "grad_norm": 8.942533286014006, "learning_rate": 4.563161851995087e-06, "loss": 0.6497, "step": 12151 }, { "epoch": 1.08, "grad_norm": 7.004761348988787, "learning_rate": 4.562442247534803e-06, "loss": 0.5874, "step": 12152 }, { "epoch": 1.08, "grad_norm": 5.800899992565953, "learning_rate": 4.5617226522075784e-06, "loss": 0.698, "step": 12153 }, { "epoch": 1.08, "grad_norm": 6.2971476643812965, "learning_rate": 4.561003066028434e-06, "loss": 0.5868, "step": 12154 }, { "epoch": 1.08, "grad_norm": 5.901897579845932, "learning_rate": 4.5602834890123895e-06, "loss": 0.6263, "step": 12155 }, { "epoch": 1.08, "grad_norm": 5.35392700242602, "learning_rate": 4.559563921174463e-06, "loss": 0.6335, "step": 12156 }, { "epoch": 1.08, "grad_norm": 6.669500923508975, "learning_rate": 4.558844362529676e-06, "loss": 0.605, "step": 12157 }, { "epoch": 1.08, "grad_norm": 6.414039149801606, "learning_rate": 4.558124813093047e-06, "loss": 0.6027, "step": 12158 }, { "epoch": 1.08, "grad_norm": 7.305014443085208, "learning_rate": 4.5574052728795965e-06, "loss": 0.5806, "step": 12159 }, { "epoch": 1.08, "grad_norm": 5.153035057501803, "learning_rate": 4.5566857419043396e-06, "loss": 0.5582, "step": 12160 }, { "epoch": 1.08, "grad_norm": 6.431489669288865, "learning_rate": 4.555966220182297e-06, "loss": 0.604, "step": 12161 }, { "epoch": 1.08, "grad_norm": 6.1931837147166435, "learning_rate": 4.555246707728487e-06, "loss": 0.6357, "step": 12162 }, { "epoch": 1.09, "grad_norm": 5.760356090643258, "learning_rate": 4.554527204557927e-06, "loss": 0.6039, "step": 12163 }, { "epoch": 1.09, "grad_norm": 6.519162273357288, "learning_rate": 4.553807710685637e-06, "loss": 0.5506, "step": 12164 }, { "epoch": 1.09, "grad_norm": 6.860784977503885, "learning_rate": 4.553088226126635e-06, "loss": 0.6411, "step": 12165 }, { "epoch": 1.09, "grad_norm": 7.696702516199977, "learning_rate": 4.5523687508959366e-06, "loss": 0.5676, "step": 12166 }, { "epoch": 1.09, "grad_norm": 6.2426825677561055, "learning_rate": 4.55164928500856e-06, "loss": 0.6065, "step": 12167 }, { "epoch": 1.09, "grad_norm": 5.903898326270965, "learning_rate": 4.550929828479523e-06, "loss": 0.6366, "step": 12168 }, { "epoch": 1.09, "grad_norm": 4.594958249920871, "learning_rate": 4.550210381323842e-06, "loss": 0.628, "step": 12169 }, { "epoch": 1.09, "grad_norm": 5.992505809305903, "learning_rate": 4.549490943556535e-06, "loss": 0.6768, "step": 12170 }, { "epoch": 1.09, "grad_norm": 5.5161474155536725, "learning_rate": 4.5487715151926175e-06, "loss": 0.7219, "step": 12171 }, { "epoch": 1.09, "grad_norm": 6.9983282134198515, "learning_rate": 4.548052096247106e-06, "loss": 0.6254, "step": 12172 }, { "epoch": 1.09, "grad_norm": 7.33702874784635, "learning_rate": 4.5473326867350195e-06, "loss": 0.6143, "step": 12173 }, { "epoch": 1.09, "grad_norm": 7.545803323322835, "learning_rate": 4.54661328667137e-06, "loss": 0.6498, "step": 12174 }, { "epoch": 1.09, "grad_norm": 8.030333955912607, "learning_rate": 4.545893896071176e-06, "loss": 0.5757, "step": 12175 }, { "epoch": 1.09, "grad_norm": 6.900675270903722, "learning_rate": 4.545174514949452e-06, "loss": 0.6195, "step": 12176 }, { "epoch": 1.09, "grad_norm": 6.433352342393618, "learning_rate": 4.544455143321213e-06, "loss": 0.644, "step": 12177 }, { "epoch": 1.09, "grad_norm": 8.582431569888172, "learning_rate": 4.543735781201476e-06, "loss": 0.5921, "step": 12178 }, { "epoch": 1.09, "grad_norm": 6.856981569103109, "learning_rate": 4.543016428605256e-06, "loss": 0.6685, "step": 12179 }, { "epoch": 1.09, "grad_norm": 7.73768524827604, "learning_rate": 4.542297085547565e-06, "loss": 0.6089, "step": 12180 }, { "epoch": 1.09, "grad_norm": 5.916786377788344, "learning_rate": 4.541577752043424e-06, "loss": 0.6298, "step": 12181 }, { "epoch": 1.09, "grad_norm": 6.408303371534851, "learning_rate": 4.5408584281078396e-06, "loss": 0.6148, "step": 12182 }, { "epoch": 1.09, "grad_norm": 6.953881074790135, "learning_rate": 4.540139113755832e-06, "loss": 0.5911, "step": 12183 }, { "epoch": 1.09, "grad_norm": 8.843315242956763, "learning_rate": 4.539419809002413e-06, "loss": 0.6487, "step": 12184 }, { "epoch": 1.09, "grad_norm": 5.120903965464881, "learning_rate": 4.538700513862596e-06, "loss": 0.5954, "step": 12185 }, { "epoch": 1.09, "grad_norm": 6.719693989812334, "learning_rate": 4.537981228351396e-06, "loss": 0.6533, "step": 12186 }, { "epoch": 1.09, "grad_norm": 7.731159396154621, "learning_rate": 4.537261952483827e-06, "loss": 0.649, "step": 12187 }, { "epoch": 1.09, "grad_norm": 8.2516752935545, "learning_rate": 4.536542686274903e-06, "loss": 0.6613, "step": 12188 }, { "epoch": 1.09, "grad_norm": 6.158957241297204, "learning_rate": 4.535823429739632e-06, "loss": 0.6502, "step": 12189 }, { "epoch": 1.09, "grad_norm": 11.603742605243331, "learning_rate": 4.535104182893031e-06, "loss": 0.6026, "step": 12190 }, { "epoch": 1.09, "grad_norm": 5.524118859322126, "learning_rate": 4.534384945750114e-06, "loss": 0.6513, "step": 12191 }, { "epoch": 1.09, "grad_norm": 5.9518388553144, "learning_rate": 4.533665718325889e-06, "loss": 0.6093, "step": 12192 }, { "epoch": 1.09, "grad_norm": 7.6996137758412075, "learning_rate": 4.532946500635372e-06, "loss": 0.6289, "step": 12193 }, { "epoch": 1.09, "grad_norm": 5.84983464964117, "learning_rate": 4.532227292693574e-06, "loss": 0.6107, "step": 12194 }, { "epoch": 1.09, "grad_norm": 4.844525075675774, "learning_rate": 4.531508094515507e-06, "loss": 0.691, "step": 12195 }, { "epoch": 1.09, "grad_norm": 8.012257968740206, "learning_rate": 4.530788906116182e-06, "loss": 0.6412, "step": 12196 }, { "epoch": 1.09, "grad_norm": 6.061339579801128, "learning_rate": 4.530069727510612e-06, "loss": 0.6613, "step": 12197 }, { "epoch": 1.09, "grad_norm": 6.294308450327937, "learning_rate": 4.529350558713806e-06, "loss": 0.5858, "step": 12198 }, { "epoch": 1.09, "grad_norm": 7.990128359437187, "learning_rate": 4.528631399740778e-06, "loss": 0.6471, "step": 12199 }, { "epoch": 1.09, "grad_norm": 5.237255012733461, "learning_rate": 4.527912250606536e-06, "loss": 0.6424, "step": 12200 }, { "epoch": 1.09, "grad_norm": 5.73558155247318, "learning_rate": 4.527193111326092e-06, "loss": 0.6517, "step": 12201 }, { "epoch": 1.09, "grad_norm": 5.919372977202312, "learning_rate": 4.526473981914459e-06, "loss": 0.6824, "step": 12202 }, { "epoch": 1.09, "grad_norm": 6.085467487156427, "learning_rate": 4.525754862386642e-06, "loss": 0.6169, "step": 12203 }, { "epoch": 1.09, "grad_norm": 7.468404036353225, "learning_rate": 4.525035752757654e-06, "loss": 0.6112, "step": 12204 }, { "epoch": 1.09, "grad_norm": 6.909048819144996, "learning_rate": 4.524316653042504e-06, "loss": 0.5611, "step": 12205 }, { "epoch": 1.09, "grad_norm": 8.08497456737673, "learning_rate": 4.523597563256202e-06, "loss": 0.5609, "step": 12206 }, { "epoch": 1.09, "grad_norm": 5.861875161230403, "learning_rate": 4.52287848341376e-06, "loss": 0.6631, "step": 12207 }, { "epoch": 1.09, "grad_norm": 7.223697605052951, "learning_rate": 4.522159413530182e-06, "loss": 0.586, "step": 12208 }, { "epoch": 1.09, "grad_norm": 8.243593262353837, "learning_rate": 4.521440353620482e-06, "loss": 0.6558, "step": 12209 }, { "epoch": 1.09, "grad_norm": 4.083872121420837, "learning_rate": 4.520721303699664e-06, "loss": 0.6394, "step": 12210 }, { "epoch": 1.09, "grad_norm": 8.480147156170185, "learning_rate": 4.52000226378274e-06, "loss": 0.6396, "step": 12211 }, { "epoch": 1.09, "grad_norm": 8.318491605712856, "learning_rate": 4.519283233884719e-06, "loss": 0.6993, "step": 12212 }, { "epoch": 1.09, "grad_norm": 5.3119167580318765, "learning_rate": 4.518564214020607e-06, "loss": 0.6025, "step": 12213 }, { "epoch": 1.09, "grad_norm": 6.3869704792366155, "learning_rate": 4.517845204205412e-06, "loss": 0.6419, "step": 12214 }, { "epoch": 1.09, "grad_norm": 5.986770597097819, "learning_rate": 4.517126204454143e-06, "loss": 0.6238, "step": 12215 }, { "epoch": 1.09, "grad_norm": 6.611565167168221, "learning_rate": 4.516407214781807e-06, "loss": 0.5898, "step": 12216 }, { "epoch": 1.09, "grad_norm": 7.7562642593691224, "learning_rate": 4.515688235203413e-06, "loss": 0.607, "step": 12217 }, { "epoch": 1.09, "grad_norm": 6.317543305264216, "learning_rate": 4.514969265733965e-06, "loss": 0.5765, "step": 12218 }, { "epoch": 1.09, "grad_norm": 5.69957638326467, "learning_rate": 4.514250306388471e-06, "loss": 0.6346, "step": 12219 }, { "epoch": 1.09, "grad_norm": 7.0719332943096305, "learning_rate": 4.513531357181939e-06, "loss": 0.6357, "step": 12220 }, { "epoch": 1.09, "grad_norm": 7.149432804305595, "learning_rate": 4.512812418129373e-06, "loss": 0.6051, "step": 12221 }, { "epoch": 1.09, "grad_norm": 7.337803659007509, "learning_rate": 4.512093489245782e-06, "loss": 0.642, "step": 12222 }, { "epoch": 1.09, "grad_norm": 6.556745310241483, "learning_rate": 4.511374570546171e-06, "loss": 0.6299, "step": 12223 }, { "epoch": 1.09, "grad_norm": 5.905008658589598, "learning_rate": 4.510655662045544e-06, "loss": 0.6225, "step": 12224 }, { "epoch": 1.09, "grad_norm": 5.556754924980502, "learning_rate": 4.5099367637589105e-06, "loss": 0.6107, "step": 12225 }, { "epoch": 1.09, "grad_norm": 6.310635189962499, "learning_rate": 4.509217875701273e-06, "loss": 0.628, "step": 12226 }, { "epoch": 1.09, "grad_norm": 7.753661544794619, "learning_rate": 4.508498997887637e-06, "loss": 0.5794, "step": 12227 }, { "epoch": 1.09, "grad_norm": 6.384414306004821, "learning_rate": 4.507780130333009e-06, "loss": 0.6091, "step": 12228 }, { "epoch": 1.09, "grad_norm": 7.024731976827323, "learning_rate": 4.507061273052392e-06, "loss": 0.6219, "step": 12229 }, { "epoch": 1.09, "grad_norm": 5.151495166009182, "learning_rate": 4.506342426060792e-06, "loss": 0.599, "step": 12230 }, { "epoch": 1.09, "grad_norm": 5.806505085302793, "learning_rate": 4.505623589373215e-06, "loss": 0.6584, "step": 12231 }, { "epoch": 1.09, "grad_norm": 5.139605767067767, "learning_rate": 4.504904763004661e-06, "loss": 0.6684, "step": 12232 }, { "epoch": 1.09, "grad_norm": 6.766903352706043, "learning_rate": 4.504185946970135e-06, "loss": 0.6728, "step": 12233 }, { "epoch": 1.09, "grad_norm": 5.05974002230052, "learning_rate": 4.5034671412846415e-06, "loss": 0.63, "step": 12234 }, { "epoch": 1.09, "grad_norm": 6.268483846264399, "learning_rate": 4.502748345963185e-06, "loss": 0.6921, "step": 12235 }, { "epoch": 1.09, "grad_norm": 6.5862168821144325, "learning_rate": 4.502029561020767e-06, "loss": 0.6072, "step": 12236 }, { "epoch": 1.09, "grad_norm": 5.738767589100811, "learning_rate": 4.5013107864723915e-06, "loss": 0.5942, "step": 12237 }, { "epoch": 1.09, "grad_norm": 6.420906089489877, "learning_rate": 4.5005920223330625e-06, "loss": 0.7222, "step": 12238 }, { "epoch": 1.09, "grad_norm": 6.600011065174285, "learning_rate": 4.499873268617779e-06, "loss": 0.6387, "step": 12239 }, { "epoch": 1.09, "grad_norm": 7.421544454901337, "learning_rate": 4.499154525341547e-06, "loss": 0.6238, "step": 12240 }, { "epoch": 1.09, "grad_norm": 5.494780592092498, "learning_rate": 4.498435792519369e-06, "loss": 0.621, "step": 12241 }, { "epoch": 1.09, "grad_norm": 8.79119495428578, "learning_rate": 4.497717070166243e-06, "loss": 0.6364, "step": 12242 }, { "epoch": 1.09, "grad_norm": 6.021538092301, "learning_rate": 4.4969983582971735e-06, "loss": 0.611, "step": 12243 }, { "epoch": 1.09, "grad_norm": 5.629380610857261, "learning_rate": 4.4962796569271635e-06, "loss": 0.5972, "step": 12244 }, { "epoch": 1.09, "grad_norm": 7.912074580524507, "learning_rate": 4.495560966071213e-06, "loss": 0.5961, "step": 12245 }, { "epoch": 1.09, "grad_norm": 6.047839209518593, "learning_rate": 4.494842285744321e-06, "loss": 0.7044, "step": 12246 }, { "epoch": 1.09, "grad_norm": 8.166387592508032, "learning_rate": 4.494123615961489e-06, "loss": 0.635, "step": 12247 }, { "epoch": 1.09, "grad_norm": 7.2202462897732005, "learning_rate": 4.493404956737721e-06, "loss": 0.6449, "step": 12248 }, { "epoch": 1.09, "grad_norm": 5.524004144374282, "learning_rate": 4.492686308088013e-06, "loss": 0.568, "step": 12249 }, { "epoch": 1.09, "grad_norm": 6.810940822898996, "learning_rate": 4.491967670027367e-06, "loss": 0.5877, "step": 12250 }, { "epoch": 1.09, "grad_norm": 4.959927580661747, "learning_rate": 4.491249042570785e-06, "loss": 0.6984, "step": 12251 }, { "epoch": 1.09, "grad_norm": 7.7442878349345206, "learning_rate": 4.490530425733265e-06, "loss": 0.5569, "step": 12252 }, { "epoch": 1.09, "grad_norm": 6.034256743866717, "learning_rate": 4.489811819529805e-06, "loss": 0.6391, "step": 12253 }, { "epoch": 1.09, "grad_norm": 6.3128632710510955, "learning_rate": 4.489093223975408e-06, "loss": 0.6075, "step": 12254 }, { "epoch": 1.09, "grad_norm": 4.680369249550314, "learning_rate": 4.488374639085069e-06, "loss": 0.6064, "step": 12255 }, { "epoch": 1.09, "grad_norm": 7.490410664782851, "learning_rate": 4.487656064873791e-06, "loss": 0.5862, "step": 12256 }, { "epoch": 1.09, "grad_norm": 5.408086465010091, "learning_rate": 4.48693750135657e-06, "loss": 0.6774, "step": 12257 }, { "epoch": 1.09, "grad_norm": 5.584965074450606, "learning_rate": 4.486218948548405e-06, "loss": 0.6759, "step": 12258 }, { "epoch": 1.09, "grad_norm": 5.403060273272091, "learning_rate": 4.485500406464293e-06, "loss": 0.6782, "step": 12259 }, { "epoch": 1.09, "grad_norm": 5.5644455433007565, "learning_rate": 4.484781875119237e-06, "loss": 0.5714, "step": 12260 }, { "epoch": 1.09, "grad_norm": 5.897922949233469, "learning_rate": 4.484063354528228e-06, "loss": 0.5999, "step": 12261 }, { "epoch": 1.09, "grad_norm": 5.868035626257705, "learning_rate": 4.4833448447062675e-06, "loss": 0.6042, "step": 12262 }, { "epoch": 1.09, "grad_norm": 5.821417914057071, "learning_rate": 4.482626345668351e-06, "loss": 0.5426, "step": 12263 }, { "epoch": 1.09, "grad_norm": 6.619706956751019, "learning_rate": 4.481907857429478e-06, "loss": 0.5775, "step": 12264 }, { "epoch": 1.09, "grad_norm": 5.1393495085008825, "learning_rate": 4.481189380004642e-06, "loss": 0.6139, "step": 12265 }, { "epoch": 1.09, "grad_norm": 5.726604928409019, "learning_rate": 4.480470913408842e-06, "loss": 0.6257, "step": 12266 }, { "epoch": 1.09, "grad_norm": 6.34945879500513, "learning_rate": 4.479752457657075e-06, "loss": 0.5885, "step": 12267 }, { "epoch": 1.09, "grad_norm": 7.9158874829621215, "learning_rate": 4.479034012764335e-06, "loss": 0.6419, "step": 12268 }, { "epoch": 1.09, "grad_norm": 6.872906185239631, "learning_rate": 4.47831557874562e-06, "loss": 0.549, "step": 12269 }, { "epoch": 1.09, "grad_norm": 7.166174919221646, "learning_rate": 4.477597155615924e-06, "loss": 0.5727, "step": 12270 }, { "epoch": 1.09, "grad_norm": 7.115910284592625, "learning_rate": 4.476878743390244e-06, "loss": 0.5946, "step": 12271 }, { "epoch": 1.09, "grad_norm": 5.808928864151515, "learning_rate": 4.476160342083573e-06, "loss": 0.5792, "step": 12272 }, { "epoch": 1.09, "grad_norm": 6.53654568764681, "learning_rate": 4.47544195171091e-06, "loss": 0.6614, "step": 12273 }, { "epoch": 1.09, "grad_norm": 7.959849949279879, "learning_rate": 4.4747235722872484e-06, "loss": 0.5878, "step": 12274 }, { "epoch": 1.1, "grad_norm": 5.70533665586016, "learning_rate": 4.474005203827579e-06, "loss": 0.5805, "step": 12275 }, { "epoch": 1.1, "grad_norm": 7.305688319614058, "learning_rate": 4.473286846346901e-06, "loss": 0.6237, "step": 12276 }, { "epoch": 1.1, "grad_norm": 6.6664423211525685, "learning_rate": 4.4725684998602055e-06, "loss": 0.6477, "step": 12277 }, { "epoch": 1.1, "grad_norm": 5.927597892470878, "learning_rate": 4.4718501643824874e-06, "loss": 0.6534, "step": 12278 }, { "epoch": 1.1, "grad_norm": 8.052070157125787, "learning_rate": 4.47113183992874e-06, "loss": 0.6414, "step": 12279 }, { "epoch": 1.1, "grad_norm": 5.563652840711037, "learning_rate": 4.470413526513959e-06, "loss": 0.64, "step": 12280 }, { "epoch": 1.1, "grad_norm": 8.074049630588965, "learning_rate": 4.4696952241531344e-06, "loss": 0.6037, "step": 12281 }, { "epoch": 1.1, "grad_norm": 5.896781707317337, "learning_rate": 4.46897693286126e-06, "loss": 0.5893, "step": 12282 }, { "epoch": 1.1, "grad_norm": 8.637428202503258, "learning_rate": 4.468258652653331e-06, "loss": 0.564, "step": 12283 }, { "epoch": 1.1, "grad_norm": 6.727663766201061, "learning_rate": 4.467540383544338e-06, "loss": 0.5789, "step": 12284 }, { "epoch": 1.1, "grad_norm": 6.484280948430802, "learning_rate": 4.466822125549273e-06, "loss": 0.5875, "step": 12285 }, { "epoch": 1.1, "grad_norm": 6.571246326769667, "learning_rate": 4.466103878683128e-06, "loss": 0.7122, "step": 12286 }, { "epoch": 1.1, "grad_norm": 6.293334115070593, "learning_rate": 4.465385642960895e-06, "loss": 0.6711, "step": 12287 }, { "epoch": 1.1, "grad_norm": 7.651989369490887, "learning_rate": 4.464667418397568e-06, "loss": 0.6492, "step": 12288 }, { "epoch": 1.1, "grad_norm": 5.403644646325852, "learning_rate": 4.463949205008134e-06, "loss": 0.6342, "step": 12289 }, { "epoch": 1.1, "grad_norm": 7.080452113057424, "learning_rate": 4.463231002807589e-06, "loss": 0.5927, "step": 12290 }, { "epoch": 1.1, "grad_norm": 6.5355422925202005, "learning_rate": 4.462512811810919e-06, "loss": 0.6339, "step": 12291 }, { "epoch": 1.1, "grad_norm": 6.411445782785753, "learning_rate": 4.461794632033118e-06, "loss": 0.6392, "step": 12292 }, { "epoch": 1.1, "grad_norm": 11.073622358318127, "learning_rate": 4.4610764634891745e-06, "loss": 0.6507, "step": 12293 }, { "epoch": 1.1, "grad_norm": 8.278814292535646, "learning_rate": 4.46035830619408e-06, "loss": 0.6139, "step": 12294 }, { "epoch": 1.1, "grad_norm": 7.547537672072074, "learning_rate": 4.459640160162825e-06, "loss": 0.5857, "step": 12295 }, { "epoch": 1.1, "grad_norm": 6.208388523061698, "learning_rate": 4.458922025410396e-06, "loss": 0.6332, "step": 12296 }, { "epoch": 1.1, "grad_norm": 7.750194341309088, "learning_rate": 4.458203901951787e-06, "loss": 0.6327, "step": 12297 }, { "epoch": 1.1, "grad_norm": 8.558155369040444, "learning_rate": 4.4574857898019845e-06, "loss": 0.6275, "step": 12298 }, { "epoch": 1.1, "grad_norm": 6.65942440733257, "learning_rate": 4.456767688975977e-06, "loss": 0.6796, "step": 12299 }, { "epoch": 1.1, "grad_norm": 7.4589926030428755, "learning_rate": 4.456049599488755e-06, "loss": 0.5629, "step": 12300 }, { "epoch": 1.1, "grad_norm": 5.51413672002122, "learning_rate": 4.455331521355307e-06, "loss": 0.6239, "step": 12301 }, { "epoch": 1.1, "grad_norm": 7.115248376621213, "learning_rate": 4.45461345459062e-06, "loss": 0.6354, "step": 12302 }, { "epoch": 1.1, "grad_norm": 8.002535954580285, "learning_rate": 4.4538953992096845e-06, "loss": 0.6429, "step": 12303 }, { "epoch": 1.1, "grad_norm": 5.431163707315679, "learning_rate": 4.453177355227485e-06, "loss": 0.6138, "step": 12304 }, { "epoch": 1.1, "grad_norm": 4.91134669728509, "learning_rate": 4.452459322659011e-06, "loss": 0.576, "step": 12305 }, { "epoch": 1.1, "grad_norm": 8.351551576368372, "learning_rate": 4.451741301519249e-06, "loss": 0.5982, "step": 12306 }, { "epoch": 1.1, "grad_norm": 5.707691539682638, "learning_rate": 4.4510232918231866e-06, "loss": 0.5871, "step": 12307 }, { "epoch": 1.1, "grad_norm": 6.203384223131686, "learning_rate": 4.450305293585811e-06, "loss": 0.6248, "step": 12308 }, { "epoch": 1.1, "grad_norm": 5.541717880530045, "learning_rate": 4.449587306822109e-06, "loss": 0.6363, "step": 12309 }, { "epoch": 1.1, "grad_norm": 12.296621688930657, "learning_rate": 4.448869331547065e-06, "loss": 0.6302, "step": 12310 }, { "epoch": 1.1, "grad_norm": 7.641099279382813, "learning_rate": 4.448151367775669e-06, "loss": 0.6254, "step": 12311 }, { "epoch": 1.1, "grad_norm": 5.952893194050032, "learning_rate": 4.447433415522904e-06, "loss": 0.6428, "step": 12312 }, { "epoch": 1.1, "grad_norm": 7.003795634632259, "learning_rate": 4.446715474803756e-06, "loss": 0.5882, "step": 12313 }, { "epoch": 1.1, "grad_norm": 6.040308905317815, "learning_rate": 4.445997545633211e-06, "loss": 0.5298, "step": 12314 }, { "epoch": 1.1, "grad_norm": 5.926834470766514, "learning_rate": 4.445279628026254e-06, "loss": 0.6244, "step": 12315 }, { "epoch": 1.1, "grad_norm": 5.559586506491983, "learning_rate": 4.4445617219978685e-06, "loss": 0.5755, "step": 12316 }, { "epoch": 1.1, "grad_norm": 7.203405085628962, "learning_rate": 4.4438438275630435e-06, "loss": 0.6477, "step": 12317 }, { "epoch": 1.1, "grad_norm": 6.251462698610916, "learning_rate": 4.443125944736759e-06, "loss": 0.5764, "step": 12318 }, { "epoch": 1.1, "grad_norm": 5.348878059629113, "learning_rate": 4.442408073534002e-06, "loss": 0.6345, "step": 12319 }, { "epoch": 1.1, "grad_norm": 7.090945574769848, "learning_rate": 4.441690213969753e-06, "loss": 0.5535, "step": 12320 }, { "epoch": 1.1, "grad_norm": 5.202938594131278, "learning_rate": 4.440972366059e-06, "loss": 0.54, "step": 12321 }, { "epoch": 1.1, "grad_norm": 8.506620296000484, "learning_rate": 4.440254529816724e-06, "loss": 0.6361, "step": 12322 }, { "epoch": 1.1, "grad_norm": 7.064689937246912, "learning_rate": 4.439536705257909e-06, "loss": 0.5722, "step": 12323 }, { "epoch": 1.1, "grad_norm": 6.028254494834181, "learning_rate": 4.438818892397538e-06, "loss": 0.7365, "step": 12324 }, { "epoch": 1.1, "grad_norm": 5.858246225220811, "learning_rate": 4.438101091250593e-06, "loss": 0.5999, "step": 12325 }, { "epoch": 1.1, "grad_norm": 6.698644628943827, "learning_rate": 4.437383301832057e-06, "loss": 0.5651, "step": 12326 }, { "epoch": 1.1, "grad_norm": 7.032581522567862, "learning_rate": 4.436665524156914e-06, "loss": 0.6306, "step": 12327 }, { "epoch": 1.1, "grad_norm": 7.7791307370559, "learning_rate": 4.435947758240143e-06, "loss": 0.6821, "step": 12328 }, { "epoch": 1.1, "grad_norm": 6.125175355200564, "learning_rate": 4.435230004096727e-06, "loss": 0.6201, "step": 12329 }, { "epoch": 1.1, "grad_norm": 5.933177633314032, "learning_rate": 4.434512261741649e-06, "loss": 0.5902, "step": 12330 }, { "epoch": 1.1, "grad_norm": 6.44008733821542, "learning_rate": 4.433794531189889e-06, "loss": 0.665, "step": 12331 }, { "epoch": 1.1, "grad_norm": 6.432819785012518, "learning_rate": 4.43307681245643e-06, "loss": 0.6371, "step": 12332 }, { "epoch": 1.1, "grad_norm": 7.584359129538197, "learning_rate": 4.4323591055562486e-06, "loss": 0.559, "step": 12333 }, { "epoch": 1.1, "grad_norm": 8.342848590895008, "learning_rate": 4.431641410504329e-06, "loss": 0.5731, "step": 12334 }, { "epoch": 1.1, "grad_norm": 4.670150368832689, "learning_rate": 4.430923727315649e-06, "loss": 0.5474, "step": 12335 }, { "epoch": 1.1, "grad_norm": 7.614852778674106, "learning_rate": 4.430206056005189e-06, "loss": 0.6394, "step": 12336 }, { "epoch": 1.1, "grad_norm": 5.897686349855462, "learning_rate": 4.429488396587933e-06, "loss": 0.5372, "step": 12337 }, { "epoch": 1.1, "grad_norm": 8.86987137800057, "learning_rate": 4.428770749078855e-06, "loss": 0.6493, "step": 12338 }, { "epoch": 1.1, "grad_norm": 7.322595701834847, "learning_rate": 4.428053113492936e-06, "loss": 0.6548, "step": 12339 }, { "epoch": 1.1, "grad_norm": 6.609767476672049, "learning_rate": 4.427335489845158e-06, "loss": 0.5989, "step": 12340 }, { "epoch": 1.1, "grad_norm": 7.467316270168323, "learning_rate": 4.426617878150496e-06, "loss": 0.6862, "step": 12341 }, { "epoch": 1.1, "grad_norm": 5.99362208302428, "learning_rate": 4.42590027842393e-06, "loss": 0.6091, "step": 12342 }, { "epoch": 1.1, "grad_norm": 5.0991015502895705, "learning_rate": 4.4251826906804395e-06, "loss": 0.6264, "step": 12343 }, { "epoch": 1.1, "grad_norm": 6.780292242503265, "learning_rate": 4.424465114935001e-06, "loss": 0.6215, "step": 12344 }, { "epoch": 1.1, "grad_norm": 5.621525573859071, "learning_rate": 4.4237475512025915e-06, "loss": 0.628, "step": 12345 }, { "epoch": 1.1, "grad_norm": 6.303983320631144, "learning_rate": 4.4230299994981925e-06, "loss": 0.5582, "step": 12346 }, { "epoch": 1.1, "grad_norm": 6.623996830633488, "learning_rate": 4.422312459836778e-06, "loss": 0.6984, "step": 12347 }, { "epoch": 1.1, "grad_norm": 7.654012700535251, "learning_rate": 4.421594932233325e-06, "loss": 0.5855, "step": 12348 }, { "epoch": 1.1, "grad_norm": 7.108672551249345, "learning_rate": 4.420877416702811e-06, "loss": 0.5485, "step": 12349 }, { "epoch": 1.1, "grad_norm": 7.988541788889543, "learning_rate": 4.420159913260214e-06, "loss": 0.6496, "step": 12350 }, { "epoch": 1.1, "grad_norm": 5.0855714822936555, "learning_rate": 4.419442421920507e-06, "loss": 0.5646, "step": 12351 }, { "epoch": 1.1, "grad_norm": 6.528477028728079, "learning_rate": 4.418724942698668e-06, "loss": 0.6512, "step": 12352 }, { "epoch": 1.1, "grad_norm": 6.311732985940998, "learning_rate": 4.418007475609675e-06, "loss": 0.598, "step": 12353 }, { "epoch": 1.1, "grad_norm": 5.129219990402013, "learning_rate": 4.4172900206685e-06, "loss": 0.6463, "step": 12354 }, { "epoch": 1.1, "grad_norm": 4.823236676426535, "learning_rate": 4.416572577890119e-06, "loss": 0.5717, "step": 12355 }, { "epoch": 1.1, "grad_norm": 6.521081307296095, "learning_rate": 4.4158551472895095e-06, "loss": 0.5318, "step": 12356 }, { "epoch": 1.1, "grad_norm": 5.5726882722869755, "learning_rate": 4.4151377288816435e-06, "loss": 0.5633, "step": 12357 }, { "epoch": 1.1, "grad_norm": 5.061898299965168, "learning_rate": 4.4144203226814976e-06, "loss": 0.5421, "step": 12358 }, { "epoch": 1.1, "grad_norm": 7.208486041286212, "learning_rate": 4.413702928704043e-06, "loss": 0.6334, "step": 12359 }, { "epoch": 1.1, "grad_norm": 5.422023332030965, "learning_rate": 4.412985546964257e-06, "loss": 0.6087, "step": 12360 }, { "epoch": 1.1, "grad_norm": 6.220483077826912, "learning_rate": 4.412268177477114e-06, "loss": 0.621, "step": 12361 }, { "epoch": 1.1, "grad_norm": 6.993377165106248, "learning_rate": 4.411550820257584e-06, "loss": 0.6033, "step": 12362 }, { "epoch": 1.1, "grad_norm": 6.37843666510677, "learning_rate": 4.4108334753206415e-06, "loss": 0.6109, "step": 12363 }, { "epoch": 1.1, "grad_norm": 3.881749816634972, "learning_rate": 4.410116142681261e-06, "loss": 0.5963, "step": 12364 }, { "epoch": 1.1, "grad_norm": 6.188729966087592, "learning_rate": 4.409398822354413e-06, "loss": 0.599, "step": 12365 }, { "epoch": 1.1, "grad_norm": 6.949533545731097, "learning_rate": 4.4086815143550714e-06, "loss": 0.6371, "step": 12366 }, { "epoch": 1.1, "grad_norm": 7.230467369545347, "learning_rate": 4.407964218698209e-06, "loss": 0.4971, "step": 12367 }, { "epoch": 1.1, "grad_norm": 5.159056186811219, "learning_rate": 4.407246935398797e-06, "loss": 0.6419, "step": 12368 }, { "epoch": 1.1, "grad_norm": 6.746281725943701, "learning_rate": 4.406529664471806e-06, "loss": 0.6536, "step": 12369 }, { "epoch": 1.1, "grad_norm": 6.586043087650528, "learning_rate": 4.405812405932208e-06, "loss": 0.6357, "step": 12370 }, { "epoch": 1.1, "grad_norm": 6.943258165223436, "learning_rate": 4.405095159794977e-06, "loss": 0.5849, "step": 12371 }, { "epoch": 1.1, "grad_norm": 6.202479419623973, "learning_rate": 4.4043779260750805e-06, "loss": 0.5889, "step": 12372 }, { "epoch": 1.1, "grad_norm": 7.5472318225488895, "learning_rate": 4.40366070478749e-06, "loss": 0.5765, "step": 12373 }, { "epoch": 1.1, "grad_norm": 8.340520822779478, "learning_rate": 4.402943495947177e-06, "loss": 0.6624, "step": 12374 }, { "epoch": 1.1, "grad_norm": 6.486992070680566, "learning_rate": 4.402226299569112e-06, "loss": 0.6118, "step": 12375 }, { "epoch": 1.1, "grad_norm": 5.155550667044679, "learning_rate": 4.4015091156682635e-06, "loss": 0.651, "step": 12376 }, { "epoch": 1.1, "grad_norm": 6.8859233471665275, "learning_rate": 4.4007919442596e-06, "loss": 0.6039, "step": 12377 }, { "epoch": 1.1, "grad_norm": 7.083404134234437, "learning_rate": 4.400074785358094e-06, "loss": 0.5844, "step": 12378 }, { "epoch": 1.1, "grad_norm": 7.045470689755019, "learning_rate": 4.399357638978712e-06, "loss": 0.6164, "step": 12379 }, { "epoch": 1.1, "grad_norm": 5.434743719673973, "learning_rate": 4.3986405051364235e-06, "loss": 0.6193, "step": 12380 }, { "epoch": 1.1, "grad_norm": 4.4099929461144765, "learning_rate": 4.397923383846199e-06, "loss": 0.6019, "step": 12381 }, { "epoch": 1.1, "grad_norm": 6.41881763434502, "learning_rate": 4.3972062751230044e-06, "loss": 0.7064, "step": 12382 }, { "epoch": 1.1, "grad_norm": 5.412402494977789, "learning_rate": 4.396489178981809e-06, "loss": 0.5931, "step": 12383 }, { "epoch": 1.1, "grad_norm": 4.824250187414263, "learning_rate": 4.39577209543758e-06, "loss": 0.6527, "step": 12384 }, { "epoch": 1.1, "grad_norm": 5.57780432670521, "learning_rate": 4.395055024505286e-06, "loss": 0.6619, "step": 12385 }, { "epoch": 1.1, "grad_norm": 6.19687217878122, "learning_rate": 4.394337966199892e-06, "loss": 0.5803, "step": 12386 }, { "epoch": 1.11, "grad_norm": 5.707706656635849, "learning_rate": 4.393620920536369e-06, "loss": 0.5975, "step": 12387 }, { "epoch": 1.11, "grad_norm": 4.818186518169836, "learning_rate": 4.392903887529679e-06, "loss": 0.649, "step": 12388 }, { "epoch": 1.11, "grad_norm": 5.7079894210161815, "learning_rate": 4.392186867194791e-06, "loss": 0.6001, "step": 12389 }, { "epoch": 1.11, "grad_norm": 6.727353704296412, "learning_rate": 4.391469859546674e-06, "loss": 0.6562, "step": 12390 }, { "epoch": 1.11, "grad_norm": 5.428996348307807, "learning_rate": 4.3907528646002885e-06, "loss": 0.5763, "step": 12391 }, { "epoch": 1.11, "grad_norm": 6.242836744676603, "learning_rate": 4.390035882370603e-06, "loss": 0.6677, "step": 12392 }, { "epoch": 1.11, "grad_norm": 8.007000675606493, "learning_rate": 4.3893189128725825e-06, "loss": 0.6115, "step": 12393 }, { "epoch": 1.11, "grad_norm": 9.614788131239115, "learning_rate": 4.388601956121193e-06, "loss": 0.5954, "step": 12394 }, { "epoch": 1.11, "grad_norm": 7.123306355801591, "learning_rate": 4.387885012131398e-06, "loss": 0.6433, "step": 12395 }, { "epoch": 1.11, "grad_norm": 7.057619506010608, "learning_rate": 4.387168080918163e-06, "loss": 0.5624, "step": 12396 }, { "epoch": 1.11, "grad_norm": 8.43527977015133, "learning_rate": 4.386451162496453e-06, "loss": 0.5732, "step": 12397 }, { "epoch": 1.11, "grad_norm": 7.206583386841495, "learning_rate": 4.38573425688123e-06, "loss": 0.6484, "step": 12398 }, { "epoch": 1.11, "grad_norm": 6.66446825977004, "learning_rate": 4.38501736408746e-06, "loss": 0.6717, "step": 12399 }, { "epoch": 1.11, "grad_norm": 5.37790114849956, "learning_rate": 4.384300484130105e-06, "loss": 0.6482, "step": 12400 }, { "epoch": 1.11, "grad_norm": 6.8611866011087494, "learning_rate": 4.3835836170241294e-06, "loss": 0.6369, "step": 12401 }, { "epoch": 1.11, "grad_norm": 6.37478310784572, "learning_rate": 4.382866762784496e-06, "loss": 0.6997, "step": 12402 }, { "epoch": 1.11, "grad_norm": 6.757619642759219, "learning_rate": 4.382149921426168e-06, "loss": 0.6179, "step": 12403 }, { "epoch": 1.11, "grad_norm": 8.101600946205096, "learning_rate": 4.3814330929641084e-06, "loss": 0.6682, "step": 12404 }, { "epoch": 1.11, "grad_norm": 5.777036555503142, "learning_rate": 4.380716277413277e-06, "loss": 0.6391, "step": 12405 }, { "epoch": 1.11, "grad_norm": 6.564618207466893, "learning_rate": 4.379999474788636e-06, "loss": 0.6304, "step": 12406 }, { "epoch": 1.11, "grad_norm": 8.084343800579402, "learning_rate": 4.37928268510515e-06, "loss": 0.6068, "step": 12407 }, { "epoch": 1.11, "grad_norm": 7.34911926123663, "learning_rate": 4.378565908377777e-06, "loss": 0.5924, "step": 12408 }, { "epoch": 1.11, "grad_norm": 7.402555656962099, "learning_rate": 4.37784914462148e-06, "loss": 0.5843, "step": 12409 }, { "epoch": 1.11, "grad_norm": 6.153507789077746, "learning_rate": 4.377132393851221e-06, "loss": 0.5969, "step": 12410 }, { "epoch": 1.11, "grad_norm": 6.803080181587068, "learning_rate": 4.376415656081957e-06, "loss": 0.5766, "step": 12411 }, { "epoch": 1.11, "grad_norm": 5.611108427822724, "learning_rate": 4.375698931328652e-06, "loss": 0.6481, "step": 12412 }, { "epoch": 1.11, "grad_norm": 5.377720683296471, "learning_rate": 4.374982219606265e-06, "loss": 0.6285, "step": 12413 }, { "epoch": 1.11, "grad_norm": 6.566584130384131, "learning_rate": 4.374265520929754e-06, "loss": 0.6397, "step": 12414 }, { "epoch": 1.11, "grad_norm": 6.2380933210489475, "learning_rate": 4.373548835314081e-06, "loss": 0.5738, "step": 12415 }, { "epoch": 1.11, "grad_norm": 7.227957805225629, "learning_rate": 4.3728321627742045e-06, "loss": 0.6052, "step": 12416 }, { "epoch": 1.11, "grad_norm": 7.580587815743619, "learning_rate": 4.372115503325082e-06, "loss": 0.6362, "step": 12417 }, { "epoch": 1.11, "grad_norm": 6.018137962271837, "learning_rate": 4.371398856981676e-06, "loss": 0.6477, "step": 12418 }, { "epoch": 1.11, "grad_norm": 5.302042772310123, "learning_rate": 4.370682223758939e-06, "loss": 0.6299, "step": 12419 }, { "epoch": 1.11, "grad_norm": 4.903882409075015, "learning_rate": 4.369965603671835e-06, "loss": 0.6136, "step": 12420 }, { "epoch": 1.11, "grad_norm": 6.026947219538958, "learning_rate": 4.369248996735317e-06, "loss": 0.5363, "step": 12421 }, { "epoch": 1.11, "grad_norm": 6.394808748258217, "learning_rate": 4.368532402964345e-06, "loss": 0.6638, "step": 12422 }, { "epoch": 1.11, "grad_norm": 6.026583006770837, "learning_rate": 4.367815822373878e-06, "loss": 0.624, "step": 12423 }, { "epoch": 1.11, "grad_norm": 9.682190355272507, "learning_rate": 4.3670992549788695e-06, "loss": 0.687, "step": 12424 }, { "epoch": 1.11, "grad_norm": 6.753024838589873, "learning_rate": 4.366382700794278e-06, "loss": 0.6196, "step": 12425 }, { "epoch": 1.11, "grad_norm": 5.606800317503926, "learning_rate": 4.365666159835061e-06, "loss": 0.6121, "step": 12426 }, { "epoch": 1.11, "grad_norm": 7.740747846959924, "learning_rate": 4.364949632116173e-06, "loss": 0.6053, "step": 12427 }, { "epoch": 1.11, "grad_norm": 6.365163939825341, "learning_rate": 4.364233117652571e-06, "loss": 0.5781, "step": 12428 }, { "epoch": 1.11, "grad_norm": 6.1127559354564385, "learning_rate": 4.36351661645921e-06, "loss": 0.6422, "step": 12429 }, { "epoch": 1.11, "grad_norm": 7.398025596576054, "learning_rate": 4.362800128551047e-06, "loss": 0.68, "step": 12430 }, { "epoch": 1.11, "grad_norm": 4.688255607611663, "learning_rate": 4.362083653943035e-06, "loss": 0.6423, "step": 12431 }, { "epoch": 1.11, "grad_norm": 4.618062843465149, "learning_rate": 4.36136719265013e-06, "loss": 0.5814, "step": 12432 }, { "epoch": 1.11, "grad_norm": 6.3580771582789035, "learning_rate": 4.360650744687287e-06, "loss": 0.6268, "step": 12433 }, { "epoch": 1.11, "grad_norm": 6.873281376717463, "learning_rate": 4.359934310069459e-06, "loss": 0.6502, "step": 12434 }, { "epoch": 1.11, "grad_norm": 6.678365900411953, "learning_rate": 4.359217888811599e-06, "loss": 0.5564, "step": 12435 }, { "epoch": 1.11, "grad_norm": 6.526321240589909, "learning_rate": 4.358501480928664e-06, "loss": 0.5636, "step": 12436 }, { "epoch": 1.11, "grad_norm": 6.848404341323435, "learning_rate": 4.357785086435606e-06, "loss": 0.6099, "step": 12437 }, { "epoch": 1.11, "grad_norm": 7.6531470434488895, "learning_rate": 4.357068705347376e-06, "loss": 0.6007, "step": 12438 }, { "epoch": 1.11, "grad_norm": 5.423178350183519, "learning_rate": 4.3563523376789305e-06, "loss": 0.5997, "step": 12439 }, { "epoch": 1.11, "grad_norm": 6.975362659270571, "learning_rate": 4.355635983445219e-06, "loss": 0.5793, "step": 12440 }, { "epoch": 1.11, "grad_norm": 6.992931754902949, "learning_rate": 4.354919642661196e-06, "loss": 0.6186, "step": 12441 }, { "epoch": 1.11, "grad_norm": 6.944494568500657, "learning_rate": 4.3542033153418125e-06, "loss": 0.6264, "step": 12442 }, { "epoch": 1.11, "grad_norm": 5.645468188325184, "learning_rate": 4.353487001502021e-06, "loss": 0.5628, "step": 12443 }, { "epoch": 1.11, "grad_norm": 5.332459353316634, "learning_rate": 4.352770701156773e-06, "loss": 0.5967, "step": 12444 }, { "epoch": 1.11, "grad_norm": 4.857239640242378, "learning_rate": 4.352054414321017e-06, "loss": 0.5704, "step": 12445 }, { "epoch": 1.11, "grad_norm": 9.711345877767736, "learning_rate": 4.351338141009707e-06, "loss": 0.5747, "step": 12446 }, { "epoch": 1.11, "grad_norm": 5.534222193789139, "learning_rate": 4.350621881237796e-06, "loss": 0.5915, "step": 12447 }, { "epoch": 1.11, "grad_norm": 5.63022087233751, "learning_rate": 4.349905635020228e-06, "loss": 0.6257, "step": 12448 }, { "epoch": 1.11, "grad_norm": 7.648655366447596, "learning_rate": 4.349189402371957e-06, "loss": 0.5991, "step": 12449 }, { "epoch": 1.11, "grad_norm": 5.947573647739892, "learning_rate": 4.348473183307931e-06, "loss": 0.6269, "step": 12450 }, { "epoch": 1.11, "grad_norm": 6.272823555543137, "learning_rate": 4.3477569778431e-06, "loss": 0.6076, "step": 12451 }, { "epoch": 1.11, "grad_norm": 5.5522211252028, "learning_rate": 4.347040785992416e-06, "loss": 0.5709, "step": 12452 }, { "epoch": 1.11, "grad_norm": 6.697756422723997, "learning_rate": 4.346324607770824e-06, "loss": 0.5811, "step": 12453 }, { "epoch": 1.11, "grad_norm": 5.7018061148913155, "learning_rate": 4.345608443193276e-06, "loss": 0.6291, "step": 12454 }, { "epoch": 1.11, "grad_norm": 5.102227921239702, "learning_rate": 4.344892292274717e-06, "loss": 0.6709, "step": 12455 }, { "epoch": 1.11, "grad_norm": 6.276516018848707, "learning_rate": 4.344176155030097e-06, "loss": 0.5638, "step": 12456 }, { "epoch": 1.11, "grad_norm": 5.492320153876466, "learning_rate": 4.343460031474363e-06, "loss": 0.5303, "step": 12457 }, { "epoch": 1.11, "grad_norm": 9.0718688600014, "learning_rate": 4.342743921622464e-06, "loss": 0.6302, "step": 12458 }, { "epoch": 1.11, "grad_norm": 6.261531803497046, "learning_rate": 4.342027825489346e-06, "loss": 0.6338, "step": 12459 }, { "epoch": 1.11, "grad_norm": 6.205018183694389, "learning_rate": 4.341311743089956e-06, "loss": 0.6444, "step": 12460 }, { "epoch": 1.11, "grad_norm": 6.501725863083191, "learning_rate": 4.3405956744392406e-06, "loss": 0.5892, "step": 12461 }, { "epoch": 1.11, "grad_norm": 6.292504809848594, "learning_rate": 4.339879619552149e-06, "loss": 0.5514, "step": 12462 }, { "epoch": 1.11, "grad_norm": 10.19470357076131, "learning_rate": 4.339163578443621e-06, "loss": 0.6051, "step": 12463 }, { "epoch": 1.11, "grad_norm": 6.284857003072968, "learning_rate": 4.338447551128609e-06, "loss": 0.6008, "step": 12464 }, { "epoch": 1.11, "grad_norm": 6.26584153759166, "learning_rate": 4.337731537622054e-06, "loss": 0.5329, "step": 12465 }, { "epoch": 1.11, "grad_norm": 6.015777222956886, "learning_rate": 4.337015537938902e-06, "loss": 0.6349, "step": 12466 }, { "epoch": 1.11, "grad_norm": 4.080442623891034, "learning_rate": 4.336299552094101e-06, "loss": 0.6579, "step": 12467 }, { "epoch": 1.11, "grad_norm": 5.120084567398552, "learning_rate": 4.3355835801025916e-06, "loss": 0.5706, "step": 12468 }, { "epoch": 1.11, "grad_norm": 6.29673921573495, "learning_rate": 4.334867621979319e-06, "loss": 0.5807, "step": 12469 }, { "epoch": 1.11, "grad_norm": 5.904192019295189, "learning_rate": 4.334151677739231e-06, "loss": 0.642, "step": 12470 }, { "epoch": 1.11, "grad_norm": 8.64445017362723, "learning_rate": 4.333435747397267e-06, "loss": 0.6387, "step": 12471 }, { "epoch": 1.11, "grad_norm": 5.899354577504616, "learning_rate": 4.3327198309683714e-06, "loss": 0.6397, "step": 12472 }, { "epoch": 1.11, "grad_norm": 5.6337836643974395, "learning_rate": 4.332003928467489e-06, "loss": 0.5508, "step": 12473 }, { "epoch": 1.11, "grad_norm": 5.620037102957254, "learning_rate": 4.331288039909562e-06, "loss": 0.5864, "step": 12474 }, { "epoch": 1.11, "grad_norm": 6.8953244871441175, "learning_rate": 4.3305721653095315e-06, "loss": 0.6538, "step": 12475 }, { "epoch": 1.11, "grad_norm": 7.687014403385439, "learning_rate": 4.329856304682344e-06, "loss": 0.5875, "step": 12476 }, { "epoch": 1.11, "grad_norm": 10.512247297688809, "learning_rate": 4.329140458042937e-06, "loss": 0.6375, "step": 12477 }, { "epoch": 1.11, "grad_norm": 4.771293271935824, "learning_rate": 4.328424625406252e-06, "loss": 0.5864, "step": 12478 }, { "epoch": 1.11, "grad_norm": 8.652545428663629, "learning_rate": 4.327708806787233e-06, "loss": 0.6564, "step": 12479 }, { "epoch": 1.11, "grad_norm": 6.479175633557988, "learning_rate": 4.326993002200821e-06, "loss": 0.6717, "step": 12480 }, { "epoch": 1.11, "grad_norm": 6.666530378587797, "learning_rate": 4.326277211661955e-06, "loss": 0.5923, "step": 12481 }, { "epoch": 1.11, "grad_norm": 5.800401294752259, "learning_rate": 4.325561435185578e-06, "loss": 0.5945, "step": 12482 }, { "epoch": 1.11, "grad_norm": 7.282074071767701, "learning_rate": 4.3248456727866295e-06, "loss": 0.5863, "step": 12483 }, { "epoch": 1.11, "grad_norm": 6.28403227202662, "learning_rate": 4.324129924480048e-06, "loss": 0.6617, "step": 12484 }, { "epoch": 1.11, "grad_norm": 6.014911094259924, "learning_rate": 4.323414190280774e-06, "loss": 0.6725, "step": 12485 }, { "epoch": 1.11, "grad_norm": 5.804543694542524, "learning_rate": 4.322698470203748e-06, "loss": 0.6394, "step": 12486 }, { "epoch": 1.11, "grad_norm": 7.246969209843704, "learning_rate": 4.3219827642639075e-06, "loss": 0.6243, "step": 12487 }, { "epoch": 1.11, "grad_norm": 5.623589438042041, "learning_rate": 4.321267072476191e-06, "loss": 0.5928, "step": 12488 }, { "epoch": 1.11, "grad_norm": 6.839547263816092, "learning_rate": 4.32055139485554e-06, "loss": 0.6166, "step": 12489 }, { "epoch": 1.11, "grad_norm": 6.038959113012882, "learning_rate": 4.319835731416892e-06, "loss": 0.6802, "step": 12490 }, { "epoch": 1.11, "grad_norm": 6.087174651842763, "learning_rate": 4.3191200821751805e-06, "loss": 0.6025, "step": 12491 }, { "epoch": 1.11, "grad_norm": 5.8796404001446225, "learning_rate": 4.318404447145347e-06, "loss": 0.6087, "step": 12492 }, { "epoch": 1.11, "grad_norm": 7.416923316230232, "learning_rate": 4.3176888263423285e-06, "loss": 0.5934, "step": 12493 }, { "epoch": 1.11, "grad_norm": 5.88999139999343, "learning_rate": 4.316973219781061e-06, "loss": 0.582, "step": 12494 }, { "epoch": 1.11, "grad_norm": 6.632695198496162, "learning_rate": 4.316257627476481e-06, "loss": 0.6619, "step": 12495 }, { "epoch": 1.11, "grad_norm": 4.596205656113535, "learning_rate": 4.315542049443527e-06, "loss": 0.6781, "step": 12496 }, { "epoch": 1.11, "grad_norm": 9.91531155214133, "learning_rate": 4.314826485697133e-06, "loss": 0.6264, "step": 12497 }, { "epoch": 1.11, "grad_norm": 5.933905512762475, "learning_rate": 4.314110936252235e-06, "loss": 0.5342, "step": 12498 }, { "epoch": 1.12, "grad_norm": 5.7134579605899365, "learning_rate": 4.313395401123771e-06, "loss": 0.6118, "step": 12499 }, { "epoch": 1.12, "grad_norm": 5.441317437712676, "learning_rate": 4.312679880326672e-06, "loss": 0.6088, "step": 12500 }, { "epoch": 1.12, "grad_norm": 7.247213545953423, "learning_rate": 4.311964373875877e-06, "loss": 0.6521, "step": 12501 }, { "epoch": 1.12, "grad_norm": 5.653324851007315, "learning_rate": 4.3112488817863186e-06, "loss": 0.5797, "step": 12502 }, { "epoch": 1.12, "grad_norm": 6.0305300304034395, "learning_rate": 4.310533404072931e-06, "loss": 0.5913, "step": 12503 }, { "epoch": 1.12, "grad_norm": 6.7100721957474185, "learning_rate": 4.309817940750649e-06, "loss": 0.5473, "step": 12504 }, { "epoch": 1.12, "grad_norm": 7.789056445119292, "learning_rate": 4.309102491834408e-06, "loss": 0.578, "step": 12505 }, { "epoch": 1.12, "grad_norm": 7.703240101293288, "learning_rate": 4.308387057339138e-06, "loss": 0.6053, "step": 12506 }, { "epoch": 1.12, "grad_norm": 6.266163317888491, "learning_rate": 4.307671637279772e-06, "loss": 0.6054, "step": 12507 }, { "epoch": 1.12, "grad_norm": 8.808345907961963, "learning_rate": 4.306956231671245e-06, "loss": 0.6264, "step": 12508 }, { "epoch": 1.12, "grad_norm": 5.5523556821321565, "learning_rate": 4.30624084052849e-06, "loss": 0.5925, "step": 12509 }, { "epoch": 1.12, "grad_norm": 6.030779240530797, "learning_rate": 4.305525463866438e-06, "loss": 0.6844, "step": 12510 }, { "epoch": 1.12, "grad_norm": 7.265723569847724, "learning_rate": 4.30481010170002e-06, "loss": 0.5844, "step": 12511 }, { "epoch": 1.12, "grad_norm": 7.084113222295643, "learning_rate": 4.3040947540441705e-06, "loss": 0.6148, "step": 12512 }, { "epoch": 1.12, "grad_norm": 6.0876815522526115, "learning_rate": 4.303379420913817e-06, "loss": 0.6332, "step": 12513 }, { "epoch": 1.12, "grad_norm": 6.14428910290338, "learning_rate": 4.302664102323893e-06, "loss": 0.6116, "step": 12514 }, { "epoch": 1.12, "grad_norm": 6.228048183349472, "learning_rate": 4.301948798289329e-06, "loss": 0.6365, "step": 12515 }, { "epoch": 1.12, "grad_norm": 5.937987867433675, "learning_rate": 4.301233508825055e-06, "loss": 0.6187, "step": 12516 }, { "epoch": 1.12, "grad_norm": 6.318244485067579, "learning_rate": 4.300518233946001e-06, "loss": 0.6663, "step": 12517 }, { "epoch": 1.12, "grad_norm": 5.903630660028428, "learning_rate": 4.299802973667098e-06, "loss": 0.6623, "step": 12518 }, { "epoch": 1.12, "grad_norm": 7.511419558976613, "learning_rate": 4.299087728003276e-06, "loss": 0.5997, "step": 12519 }, { "epoch": 1.12, "grad_norm": 5.553843127604773, "learning_rate": 4.29837249696946e-06, "loss": 0.6268, "step": 12520 }, { "epoch": 1.12, "grad_norm": 4.066676864247577, "learning_rate": 4.297657280580582e-06, "loss": 0.5872, "step": 12521 }, { "epoch": 1.12, "grad_norm": 6.774241679291997, "learning_rate": 4.296942078851571e-06, "loss": 0.5809, "step": 12522 }, { "epoch": 1.12, "grad_norm": 6.7297972980097285, "learning_rate": 4.2962268917973535e-06, "loss": 0.6514, "step": 12523 }, { "epoch": 1.12, "grad_norm": 7.240355376812263, "learning_rate": 4.295511719432858e-06, "loss": 0.7017, "step": 12524 }, { "epoch": 1.12, "grad_norm": 6.731004362429373, "learning_rate": 4.294796561773014e-06, "loss": 0.6514, "step": 12525 }, { "epoch": 1.12, "grad_norm": 8.615624176410025, "learning_rate": 4.294081418832746e-06, "loss": 0.6822, "step": 12526 }, { "epoch": 1.12, "grad_norm": 5.632605730172293, "learning_rate": 4.293366290626984e-06, "loss": 0.6165, "step": 12527 }, { "epoch": 1.12, "grad_norm": 6.546942323678026, "learning_rate": 4.292651177170652e-06, "loss": 0.5991, "step": 12528 }, { "epoch": 1.12, "grad_norm": 4.837376174574886, "learning_rate": 4.291936078478678e-06, "loss": 0.5799, "step": 12529 }, { "epoch": 1.12, "grad_norm": 6.4113561550944205, "learning_rate": 4.2912209945659885e-06, "loss": 0.5509, "step": 12530 }, { "epoch": 1.12, "grad_norm": 7.720073173635836, "learning_rate": 4.290505925447507e-06, "loss": 0.5805, "step": 12531 }, { "epoch": 1.12, "grad_norm": 7.073765502608775, "learning_rate": 4.289790871138161e-06, "loss": 0.5361, "step": 12532 }, { "epoch": 1.12, "grad_norm": 5.698559074096666, "learning_rate": 4.2890758316528755e-06, "loss": 0.6139, "step": 12533 }, { "epoch": 1.12, "grad_norm": 7.92010389570392, "learning_rate": 4.2883608070065765e-06, "loss": 0.6643, "step": 12534 }, { "epoch": 1.12, "grad_norm": 5.360420409190641, "learning_rate": 4.287645797214186e-06, "loss": 0.6453, "step": 12535 }, { "epoch": 1.12, "grad_norm": 5.92851103903131, "learning_rate": 4.286930802290629e-06, "loss": 0.6266, "step": 12536 }, { "epoch": 1.12, "grad_norm": 7.446982826261007, "learning_rate": 4.286215822250831e-06, "loss": 0.6104, "step": 12537 }, { "epoch": 1.12, "grad_norm": 5.85985106970029, "learning_rate": 4.285500857109713e-06, "loss": 0.6176, "step": 12538 }, { "epoch": 1.12, "grad_norm": 6.737865135053264, "learning_rate": 4.2847859068822004e-06, "loss": 0.6011, "step": 12539 }, { "epoch": 1.12, "grad_norm": 6.0605909697910585, "learning_rate": 4.284070971583216e-06, "loss": 0.5646, "step": 12540 }, { "epoch": 1.12, "grad_norm": 5.290337526787252, "learning_rate": 4.283356051227682e-06, "loss": 0.6131, "step": 12541 }, { "epoch": 1.12, "grad_norm": 5.870423278533271, "learning_rate": 4.282641145830521e-06, "loss": 0.6623, "step": 12542 }, { "epoch": 1.12, "grad_norm": 5.990075902693554, "learning_rate": 4.281926255406655e-06, "loss": 0.6441, "step": 12543 }, { "epoch": 1.12, "grad_norm": 8.046875197671858, "learning_rate": 4.281211379971006e-06, "loss": 0.5887, "step": 12544 }, { "epoch": 1.12, "grad_norm": 6.644751911767105, "learning_rate": 4.280496519538495e-06, "loss": 0.6915, "step": 12545 }, { "epoch": 1.12, "grad_norm": 5.64651775605895, "learning_rate": 4.279781674124044e-06, "loss": 0.6326, "step": 12546 }, { "epoch": 1.12, "grad_norm": 8.458152262899448, "learning_rate": 4.279066843742573e-06, "loss": 0.6242, "step": 12547 }, { "epoch": 1.12, "grad_norm": 6.716092399522176, "learning_rate": 4.2783520284090056e-06, "loss": 0.6278, "step": 12548 }, { "epoch": 1.12, "grad_norm": 8.495129903061246, "learning_rate": 4.2776372281382555e-06, "loss": 0.6204, "step": 12549 }, { "epoch": 1.12, "grad_norm": 5.859250351027002, "learning_rate": 4.2769224429452485e-06, "loss": 0.6128, "step": 12550 }, { "epoch": 1.12, "grad_norm": 5.6383455903845325, "learning_rate": 4.276207672844901e-06, "loss": 0.6076, "step": 12551 }, { "epoch": 1.12, "grad_norm": 6.54431183311348, "learning_rate": 4.275492917852132e-06, "loss": 0.5916, "step": 12552 }, { "epoch": 1.12, "grad_norm": 7.250890108293123, "learning_rate": 4.274778177981863e-06, "loss": 0.5999, "step": 12553 }, { "epoch": 1.12, "grad_norm": 6.805617472680836, "learning_rate": 4.274063453249011e-06, "loss": 0.6062, "step": 12554 }, { "epoch": 1.12, "grad_norm": 5.434437259733753, "learning_rate": 4.273348743668494e-06, "loss": 0.5893, "step": 12555 }, { "epoch": 1.12, "grad_norm": 6.916842749480253, "learning_rate": 4.272634049255233e-06, "loss": 0.5349, "step": 12556 }, { "epoch": 1.12, "grad_norm": 7.428018979257786, "learning_rate": 4.271919370024142e-06, "loss": 0.6945, "step": 12557 }, { "epoch": 1.12, "grad_norm": 8.645703099745653, "learning_rate": 4.271204705990138e-06, "loss": 0.625, "step": 12558 }, { "epoch": 1.12, "grad_norm": 7.4814715216574, "learning_rate": 4.270490057168142e-06, "loss": 0.6055, "step": 12559 }, { "epoch": 1.12, "grad_norm": 5.940894450436005, "learning_rate": 4.269775423573067e-06, "loss": 0.5758, "step": 12560 }, { "epoch": 1.12, "grad_norm": 5.000167840556448, "learning_rate": 4.269060805219831e-06, "loss": 0.6564, "step": 12561 }, { "epoch": 1.12, "grad_norm": 7.133457918040288, "learning_rate": 4.268346202123352e-06, "loss": 0.6399, "step": 12562 }, { "epoch": 1.12, "grad_norm": 6.872302051587071, "learning_rate": 4.267631614298543e-06, "loss": 0.6193, "step": 12563 }, { "epoch": 1.12, "grad_norm": 5.560978196055702, "learning_rate": 4.266917041760319e-06, "loss": 0.6135, "step": 12564 }, { "epoch": 1.12, "grad_norm": 7.385390873332481, "learning_rate": 4.266202484523597e-06, "loss": 0.5498, "step": 12565 }, { "epoch": 1.12, "grad_norm": 6.487515154735273, "learning_rate": 4.265487942603291e-06, "loss": 0.6646, "step": 12566 }, { "epoch": 1.12, "grad_norm": 6.663119525327174, "learning_rate": 4.264773416014315e-06, "loss": 0.6148, "step": 12567 }, { "epoch": 1.12, "grad_norm": 5.907333322972671, "learning_rate": 4.264058904771584e-06, "loss": 0.6838, "step": 12568 }, { "epoch": 1.12, "grad_norm": 5.62998157967311, "learning_rate": 4.263344408890013e-06, "loss": 0.5541, "step": 12569 }, { "epoch": 1.12, "grad_norm": 6.091646347992674, "learning_rate": 4.262629928384512e-06, "loss": 0.6487, "step": 12570 }, { "epoch": 1.12, "grad_norm": 7.1232226141445265, "learning_rate": 4.261915463269997e-06, "loss": 0.66, "step": 12571 }, { "epoch": 1.12, "grad_norm": 9.716415726282506, "learning_rate": 4.261201013561382e-06, "loss": 0.6201, "step": 12572 }, { "epoch": 1.12, "grad_norm": 4.353528590735751, "learning_rate": 4.260486579273577e-06, "loss": 0.5861, "step": 12573 }, { "epoch": 1.12, "grad_norm": 5.7599399564986875, "learning_rate": 4.259772160421495e-06, "loss": 0.6063, "step": 12574 }, { "epoch": 1.12, "grad_norm": 6.023318942672598, "learning_rate": 4.259057757020048e-06, "loss": 0.6731, "step": 12575 }, { "epoch": 1.12, "grad_norm": 6.20617711537705, "learning_rate": 4.258343369084147e-06, "loss": 0.5863, "step": 12576 }, { "epoch": 1.12, "grad_norm": 6.533455501667167, "learning_rate": 4.257628996628707e-06, "loss": 0.5712, "step": 12577 }, { "epoch": 1.12, "grad_norm": 7.924636033386092, "learning_rate": 4.2569146396686335e-06, "loss": 0.5742, "step": 12578 }, { "epoch": 1.12, "grad_norm": 4.854630442719464, "learning_rate": 4.25620029821884e-06, "loss": 0.5651, "step": 12579 }, { "epoch": 1.12, "grad_norm": 6.4648697030662605, "learning_rate": 4.255485972294236e-06, "loss": 0.5723, "step": 12580 }, { "epoch": 1.12, "grad_norm": 5.779738673282413, "learning_rate": 4.254771661909732e-06, "loss": 0.5691, "step": 12581 }, { "epoch": 1.12, "grad_norm": 8.03151137797915, "learning_rate": 4.254057367080237e-06, "loss": 0.6242, "step": 12582 }, { "epoch": 1.12, "grad_norm": 6.969144080385405, "learning_rate": 4.253343087820662e-06, "loss": 0.5834, "step": 12583 }, { "epoch": 1.12, "grad_norm": 4.957695694540873, "learning_rate": 4.252628824145913e-06, "loss": 0.5794, "step": 12584 }, { "epoch": 1.12, "grad_norm": 7.16738261192193, "learning_rate": 4.251914576070904e-06, "loss": 0.5688, "step": 12585 }, { "epoch": 1.12, "grad_norm": 5.310930878727806, "learning_rate": 4.251200343610537e-06, "loss": 0.61, "step": 12586 }, { "epoch": 1.12, "grad_norm": 5.430806467512137, "learning_rate": 4.2504861267797235e-06, "loss": 0.6435, "step": 12587 }, { "epoch": 1.12, "grad_norm": 5.288129616329784, "learning_rate": 4.249771925593371e-06, "loss": 0.6023, "step": 12588 }, { "epoch": 1.12, "grad_norm": 7.275389170974548, "learning_rate": 4.249057740066387e-06, "loss": 0.6678, "step": 12589 }, { "epoch": 1.12, "grad_norm": 6.59452582144278, "learning_rate": 4.248343570213677e-06, "loss": 0.5462, "step": 12590 }, { "epoch": 1.12, "grad_norm": 6.477370382870268, "learning_rate": 4.247629416050152e-06, "loss": 0.5485, "step": 12591 }, { "epoch": 1.12, "grad_norm": 7.072146654692142, "learning_rate": 4.246915277590713e-06, "loss": 0.7186, "step": 12592 }, { "epoch": 1.12, "grad_norm": 6.6341279317006485, "learning_rate": 4.246201154850268e-06, "loss": 0.5879, "step": 12593 }, { "epoch": 1.12, "grad_norm": 6.031028223532872, "learning_rate": 4.245487047843722e-06, "loss": 0.6045, "step": 12594 }, { "epoch": 1.12, "grad_norm": 8.169968311930544, "learning_rate": 4.244772956585984e-06, "loss": 0.6503, "step": 12595 }, { "epoch": 1.12, "grad_norm": 4.770817414272366, "learning_rate": 4.2440588810919556e-06, "loss": 0.7011, "step": 12596 }, { "epoch": 1.12, "grad_norm": 7.420205228086168, "learning_rate": 4.243344821376541e-06, "loss": 0.5677, "step": 12597 }, { "epoch": 1.12, "grad_norm": 5.539940078599746, "learning_rate": 4.242630777454649e-06, "loss": 0.589, "step": 12598 }, { "epoch": 1.12, "grad_norm": 4.4275348856243895, "learning_rate": 4.2419167493411794e-06, "loss": 0.5345, "step": 12599 }, { "epoch": 1.12, "grad_norm": 5.18953761728353, "learning_rate": 4.2412027370510365e-06, "loss": 0.6245, "step": 12600 }, { "epoch": 1.12, "grad_norm": 5.017957215767398, "learning_rate": 4.2404887405991264e-06, "loss": 0.6089, "step": 12601 }, { "epoch": 1.12, "grad_norm": 8.912039653653796, "learning_rate": 4.23977476000035e-06, "loss": 0.6128, "step": 12602 }, { "epoch": 1.12, "grad_norm": 5.34966727286289, "learning_rate": 4.2390607952696106e-06, "loss": 0.6058, "step": 12603 }, { "epoch": 1.12, "grad_norm": 5.8519158463911705, "learning_rate": 4.238346846421811e-06, "loss": 0.5385, "step": 12604 }, { "epoch": 1.12, "grad_norm": 5.088551482706895, "learning_rate": 4.2376329134718526e-06, "loss": 0.5375, "step": 12605 }, { "epoch": 1.12, "grad_norm": 4.617207121207288, "learning_rate": 4.236918996434639e-06, "loss": 0.6688, "step": 12606 }, { "epoch": 1.12, "grad_norm": 6.357094574082609, "learning_rate": 4.236205095325069e-06, "loss": 0.6014, "step": 12607 }, { "epoch": 1.12, "grad_norm": 5.949445411706624, "learning_rate": 4.235491210158045e-06, "loss": 0.6116, "step": 12608 }, { "epoch": 1.12, "grad_norm": 4.836761575830953, "learning_rate": 4.234777340948468e-06, "loss": 0.5581, "step": 12609 }, { "epoch": 1.12, "grad_norm": 7.125425113484446, "learning_rate": 4.234063487711237e-06, "loss": 0.6425, "step": 12610 }, { "epoch": 1.13, "grad_norm": 4.9552841003218, "learning_rate": 4.2333496504612544e-06, "loss": 0.6238, "step": 12611 }, { "epoch": 1.13, "grad_norm": 7.844297311056438, "learning_rate": 4.232635829213419e-06, "loss": 0.6612, "step": 12612 }, { "epoch": 1.13, "grad_norm": 3.976919915712569, "learning_rate": 4.23192202398263e-06, "loss": 0.5286, "step": 12613 }, { "epoch": 1.13, "grad_norm": 6.410874947924332, "learning_rate": 4.231208234783786e-06, "loss": 0.6068, "step": 12614 }, { "epoch": 1.13, "grad_norm": 7.193329231437338, "learning_rate": 4.230494461631786e-06, "loss": 0.6949, "step": 12615 }, { "epoch": 1.13, "grad_norm": 5.46414875884019, "learning_rate": 4.229780704541531e-06, "loss": 0.6332, "step": 12616 }, { "epoch": 1.13, "grad_norm": 5.962758084194527, "learning_rate": 4.229066963527915e-06, "loss": 0.6674, "step": 12617 }, { "epoch": 1.13, "grad_norm": 8.502094469393777, "learning_rate": 4.228353238605839e-06, "loss": 0.5961, "step": 12618 }, { "epoch": 1.13, "grad_norm": 5.6339336469914665, "learning_rate": 4.227639529790199e-06, "loss": 0.5663, "step": 12619 }, { "epoch": 1.13, "grad_norm": 6.819457661223192, "learning_rate": 4.226925837095894e-06, "loss": 0.561, "step": 12620 }, { "epoch": 1.13, "grad_norm": 8.076832392882768, "learning_rate": 4.226212160537818e-06, "loss": 0.623, "step": 12621 }, { "epoch": 1.13, "grad_norm": 6.962524814103424, "learning_rate": 4.225498500130868e-06, "loss": 0.697, "step": 12622 }, { "epoch": 1.13, "grad_norm": 5.444347984686597, "learning_rate": 4.2247848558899415e-06, "loss": 0.6445, "step": 12623 }, { "epoch": 1.13, "grad_norm": 8.875845333231622, "learning_rate": 4.224071227829932e-06, "loss": 0.5706, "step": 12624 }, { "epoch": 1.13, "grad_norm": 4.2990162082842875, "learning_rate": 4.223357615965737e-06, "loss": 0.5659, "step": 12625 }, { "epoch": 1.13, "grad_norm": 5.1639851791217195, "learning_rate": 4.222644020312251e-06, "loss": 0.6286, "step": 12626 }, { "epoch": 1.13, "grad_norm": 7.142686118827435, "learning_rate": 4.221930440884369e-06, "loss": 0.6713, "step": 12627 }, { "epoch": 1.13, "grad_norm": 6.606556344887824, "learning_rate": 4.221216877696984e-06, "loss": 0.6128, "step": 12628 }, { "epoch": 1.13, "grad_norm": 5.825885758856009, "learning_rate": 4.220503330764993e-06, "loss": 0.5959, "step": 12629 }, { "epoch": 1.13, "grad_norm": 5.919752072333492, "learning_rate": 4.2197898001032864e-06, "loss": 0.5779, "step": 12630 }, { "epoch": 1.13, "grad_norm": 8.315978496799756, "learning_rate": 4.21907628572676e-06, "loss": 0.5516, "step": 12631 }, { "epoch": 1.13, "grad_norm": 7.931908316588876, "learning_rate": 4.218362787650305e-06, "loss": 0.6139, "step": 12632 }, { "epoch": 1.13, "grad_norm": 7.807503741748099, "learning_rate": 4.217649305888816e-06, "loss": 0.558, "step": 12633 }, { "epoch": 1.13, "grad_norm": 6.152737266495149, "learning_rate": 4.216935840457183e-06, "loss": 0.6647, "step": 12634 }, { "epoch": 1.13, "grad_norm": 4.719033698010361, "learning_rate": 4.2162223913703025e-06, "loss": 0.6129, "step": 12635 }, { "epoch": 1.13, "grad_norm": 8.21001507201182, "learning_rate": 4.215508958643062e-06, "loss": 0.625, "step": 12636 }, { "epoch": 1.13, "grad_norm": 4.799218633764392, "learning_rate": 4.214795542290353e-06, "loss": 0.5796, "step": 12637 }, { "epoch": 1.13, "grad_norm": 10.768683716493918, "learning_rate": 4.214082142327066e-06, "loss": 0.6423, "step": 12638 }, { "epoch": 1.13, "grad_norm": 5.5463201947189305, "learning_rate": 4.213368758768096e-06, "loss": 0.5739, "step": 12639 }, { "epoch": 1.13, "grad_norm": 6.240257711424719, "learning_rate": 4.212655391628329e-06, "loss": 0.5774, "step": 12640 }, { "epoch": 1.13, "grad_norm": 7.128317232122081, "learning_rate": 4.211942040922656e-06, "loss": 0.6164, "step": 12641 }, { "epoch": 1.13, "grad_norm": 6.123918484759255, "learning_rate": 4.211228706665969e-06, "loss": 0.6001, "step": 12642 }, { "epoch": 1.13, "grad_norm": 6.460897375480822, "learning_rate": 4.210515388873153e-06, "loss": 0.5799, "step": 12643 }, { "epoch": 1.13, "grad_norm": 9.797370459393301, "learning_rate": 4.209802087559101e-06, "loss": 0.6111, "step": 12644 }, { "epoch": 1.13, "grad_norm": 8.714295799633994, "learning_rate": 4.209088802738699e-06, "loss": 0.5975, "step": 12645 }, { "epoch": 1.13, "grad_norm": 4.953990790805406, "learning_rate": 4.208375534426837e-06, "loss": 0.6299, "step": 12646 }, { "epoch": 1.13, "grad_norm": 6.711421118634388, "learning_rate": 4.207662282638401e-06, "loss": 0.5981, "step": 12647 }, { "epoch": 1.13, "grad_norm": 6.1069776209122, "learning_rate": 4.20694904738828e-06, "loss": 0.5487, "step": 12648 }, { "epoch": 1.13, "grad_norm": 6.166576316330994, "learning_rate": 4.206235828691363e-06, "loss": 0.5666, "step": 12649 }, { "epoch": 1.13, "grad_norm": 7.908183987889166, "learning_rate": 4.205522626562533e-06, "loss": 0.6116, "step": 12650 }, { "epoch": 1.13, "grad_norm": 7.143491140045141, "learning_rate": 4.204809441016677e-06, "loss": 0.6172, "step": 12651 }, { "epoch": 1.13, "grad_norm": 6.271974062529674, "learning_rate": 4.204096272068685e-06, "loss": 0.5864, "step": 12652 }, { "epoch": 1.13, "grad_norm": 9.081076796014594, "learning_rate": 4.203383119733439e-06, "loss": 0.6474, "step": 12653 }, { "epoch": 1.13, "grad_norm": 5.322175724906395, "learning_rate": 4.202669984025825e-06, "loss": 0.6181, "step": 12654 }, { "epoch": 1.13, "grad_norm": 6.058714280042682, "learning_rate": 4.20195686496073e-06, "loss": 0.616, "step": 12655 }, { "epoch": 1.13, "grad_norm": 5.027504728221585, "learning_rate": 4.201243762553038e-06, "loss": 0.6322, "step": 12656 }, { "epoch": 1.13, "grad_norm": 5.575385707803813, "learning_rate": 4.200530676817632e-06, "loss": 0.6112, "step": 12657 }, { "epoch": 1.13, "grad_norm": 4.828332688309542, "learning_rate": 4.199817607769399e-06, "loss": 0.6411, "step": 12658 }, { "epoch": 1.13, "grad_norm": 4.928712119123263, "learning_rate": 4.199104555423219e-06, "loss": 0.5565, "step": 12659 }, { "epoch": 1.13, "grad_norm": 6.726832603140673, "learning_rate": 4.198391519793979e-06, "loss": 0.6786, "step": 12660 }, { "epoch": 1.13, "grad_norm": 8.056123519710118, "learning_rate": 4.197678500896561e-06, "loss": 0.6974, "step": 12661 }, { "epoch": 1.13, "grad_norm": 7.954240246553722, "learning_rate": 4.196965498745845e-06, "loss": 0.569, "step": 12662 }, { "epoch": 1.13, "grad_norm": 5.819523592431655, "learning_rate": 4.196252513356717e-06, "loss": 0.6152, "step": 12663 }, { "epoch": 1.13, "grad_norm": 4.7666273788002345, "learning_rate": 4.1955395447440585e-06, "loss": 0.5805, "step": 12664 }, { "epoch": 1.13, "grad_norm": 8.28439395408195, "learning_rate": 4.194826592922751e-06, "loss": 0.5406, "step": 12665 }, { "epoch": 1.13, "grad_norm": 5.186520893297952, "learning_rate": 4.194113657907672e-06, "loss": 0.5387, "step": 12666 }, { "epoch": 1.13, "grad_norm": 4.89582197075012, "learning_rate": 4.193400739713707e-06, "loss": 0.587, "step": 12667 }, { "epoch": 1.13, "grad_norm": 8.467188976767641, "learning_rate": 4.192687838355736e-06, "loss": 0.6653, "step": 12668 }, { "epoch": 1.13, "grad_norm": 5.037381235541991, "learning_rate": 4.191974953848637e-06, "loss": 0.5686, "step": 12669 }, { "epoch": 1.13, "grad_norm": 6.572959212276394, "learning_rate": 4.191262086207291e-06, "loss": 0.6427, "step": 12670 }, { "epoch": 1.13, "grad_norm": 6.210053869088005, "learning_rate": 4.190549235446579e-06, "loss": 0.5804, "step": 12671 }, { "epoch": 1.13, "grad_norm": 7.326169962240459, "learning_rate": 4.189836401581378e-06, "loss": 0.6197, "step": 12672 }, { "epoch": 1.13, "grad_norm": 6.333692452935304, "learning_rate": 4.189123584626569e-06, "loss": 0.5907, "step": 12673 }, { "epoch": 1.13, "grad_norm": 6.118125662132285, "learning_rate": 4.188410784597029e-06, "loss": 0.5918, "step": 12674 }, { "epoch": 1.13, "grad_norm": 5.681988467837658, "learning_rate": 4.187698001507637e-06, "loss": 0.6622, "step": 12675 }, { "epoch": 1.13, "grad_norm": 7.465401883676141, "learning_rate": 4.186985235373269e-06, "loss": 0.6295, "step": 12676 }, { "epoch": 1.13, "grad_norm": 7.296987835785575, "learning_rate": 4.186272486208805e-06, "loss": 0.6114, "step": 12677 }, { "epoch": 1.13, "grad_norm": 7.639720527971992, "learning_rate": 4.185559754029122e-06, "loss": 0.6302, "step": 12678 }, { "epoch": 1.13, "grad_norm": 4.688682945601475, "learning_rate": 4.184847038849094e-06, "loss": 0.6027, "step": 12679 }, { "epoch": 1.13, "grad_norm": 6.350043471093085, "learning_rate": 4.184134340683598e-06, "loss": 0.6058, "step": 12680 }, { "epoch": 1.13, "grad_norm": 7.458072245147057, "learning_rate": 4.183421659547512e-06, "loss": 0.5657, "step": 12681 }, { "epoch": 1.13, "grad_norm": 6.5493362656049925, "learning_rate": 4.18270899545571e-06, "loss": 0.6538, "step": 12682 }, { "epoch": 1.13, "grad_norm": 7.500736731559442, "learning_rate": 4.181996348423068e-06, "loss": 0.6143, "step": 12683 }, { "epoch": 1.13, "grad_norm": 5.2576764710640855, "learning_rate": 4.181283718464462e-06, "loss": 0.5741, "step": 12684 }, { "epoch": 1.13, "grad_norm": 5.800755571559783, "learning_rate": 4.180571105594764e-06, "loss": 0.6002, "step": 12685 }, { "epoch": 1.13, "grad_norm": 5.471930041886409, "learning_rate": 4.179858509828849e-06, "loss": 0.5819, "step": 12686 }, { "epoch": 1.13, "grad_norm": 5.96610695322445, "learning_rate": 4.179145931181594e-06, "loss": 0.5855, "step": 12687 }, { "epoch": 1.13, "grad_norm": 6.927183113306261, "learning_rate": 4.178433369667868e-06, "loss": 0.6984, "step": 12688 }, { "epoch": 1.13, "grad_norm": 5.70317171687799, "learning_rate": 4.177720825302547e-06, "loss": 0.5914, "step": 12689 }, { "epoch": 1.13, "grad_norm": 8.236083615690184, "learning_rate": 4.177008298100502e-06, "loss": 0.6124, "step": 12690 }, { "epoch": 1.13, "grad_norm": 5.637475939020546, "learning_rate": 4.176295788076608e-06, "loss": 0.6446, "step": 12691 }, { "epoch": 1.13, "grad_norm": 6.770102139228452, "learning_rate": 4.175583295245736e-06, "loss": 0.6591, "step": 12692 }, { "epoch": 1.13, "grad_norm": 7.350057745896262, "learning_rate": 4.174870819622755e-06, "loss": 0.5849, "step": 12693 }, { "epoch": 1.13, "grad_norm": 4.9855297849665865, "learning_rate": 4.174158361222541e-06, "loss": 0.5413, "step": 12694 }, { "epoch": 1.13, "grad_norm": 6.064068768018749, "learning_rate": 4.173445920059961e-06, "loss": 0.6503, "step": 12695 }, { "epoch": 1.13, "grad_norm": 8.202963995744277, "learning_rate": 4.172733496149887e-06, "loss": 0.6136, "step": 12696 }, { "epoch": 1.13, "grad_norm": 6.306456038250808, "learning_rate": 4.172021089507191e-06, "loss": 0.5673, "step": 12697 }, { "epoch": 1.13, "grad_norm": 6.409561043291567, "learning_rate": 4.171308700146741e-06, "loss": 0.6294, "step": 12698 }, { "epoch": 1.13, "grad_norm": 6.697725374062372, "learning_rate": 4.170596328083407e-06, "loss": 0.6039, "step": 12699 }, { "epoch": 1.13, "grad_norm": 7.323796131847582, "learning_rate": 4.169883973332058e-06, "loss": 0.5681, "step": 12700 }, { "epoch": 1.13, "grad_norm": 5.674420703859286, "learning_rate": 4.169171635907563e-06, "loss": 0.5468, "step": 12701 }, { "epoch": 1.13, "grad_norm": 5.239631491299969, "learning_rate": 4.168459315824791e-06, "loss": 0.603, "step": 12702 }, { "epoch": 1.13, "grad_norm": 5.312389100461924, "learning_rate": 4.16774701309861e-06, "loss": 0.5326, "step": 12703 }, { "epoch": 1.13, "grad_norm": 7.379880029324593, "learning_rate": 4.167034727743886e-06, "loss": 0.5908, "step": 12704 }, { "epoch": 1.13, "grad_norm": 4.996491974536102, "learning_rate": 4.1663224597754905e-06, "loss": 0.6279, "step": 12705 }, { "epoch": 1.13, "grad_norm": 5.9410683986714545, "learning_rate": 4.165610209208286e-06, "loss": 0.5961, "step": 12706 }, { "epoch": 1.13, "grad_norm": 6.79877507566606, "learning_rate": 4.1648979760571425e-06, "loss": 0.5601, "step": 12707 }, { "epoch": 1.13, "grad_norm": 5.759308860300633, "learning_rate": 4.164185760336924e-06, "loss": 0.6363, "step": 12708 }, { "epoch": 1.13, "grad_norm": 6.403143590025384, "learning_rate": 4.163473562062498e-06, "loss": 0.6306, "step": 12709 }, { "epoch": 1.13, "grad_norm": 8.088698030591823, "learning_rate": 4.1627613812487284e-06, "loss": 0.5612, "step": 12710 }, { "epoch": 1.13, "grad_norm": 6.25837451932744, "learning_rate": 4.162049217910481e-06, "loss": 0.6393, "step": 12711 }, { "epoch": 1.13, "grad_norm": 7.355127702528974, "learning_rate": 4.161337072062623e-06, "loss": 0.6171, "step": 12712 }, { "epoch": 1.13, "grad_norm": 6.404315763727701, "learning_rate": 4.160624943720015e-06, "loss": 0.5954, "step": 12713 }, { "epoch": 1.13, "grad_norm": 5.757445688024219, "learning_rate": 4.159912832897524e-06, "loss": 0.599, "step": 12714 }, { "epoch": 1.13, "grad_norm": 4.5739824279960635, "learning_rate": 4.159200739610013e-06, "loss": 0.5546, "step": 12715 }, { "epoch": 1.13, "grad_norm": 5.570251189514082, "learning_rate": 4.158488663872346e-06, "loss": 0.6777, "step": 12716 }, { "epoch": 1.13, "grad_norm": 6.3087020230117945, "learning_rate": 4.157776605699383e-06, "loss": 0.6052, "step": 12717 }, { "epoch": 1.13, "grad_norm": 5.516401456893279, "learning_rate": 4.157064565105991e-06, "loss": 0.587, "step": 12718 }, { "epoch": 1.13, "grad_norm": 7.967350149704901, "learning_rate": 4.156352542107029e-06, "loss": 0.6188, "step": 12719 }, { "epoch": 1.13, "grad_norm": 4.654118097714581, "learning_rate": 4.15564053671736e-06, "loss": 0.5781, "step": 12720 }, { "epoch": 1.13, "grad_norm": 6.691401781781373, "learning_rate": 4.154928548951848e-06, "loss": 0.6757, "step": 12721 }, { "epoch": 1.13, "grad_norm": 6.226859324028045, "learning_rate": 4.154216578825351e-06, "loss": 0.5911, "step": 12722 }, { "epoch": 1.14, "grad_norm": 5.3251342668830315, "learning_rate": 4.15350462635273e-06, "loss": 0.5871, "step": 12723 }, { "epoch": 1.14, "grad_norm": 5.143308841401558, "learning_rate": 4.1527926915488445e-06, "loss": 0.6205, "step": 12724 }, { "epoch": 1.14, "grad_norm": 6.699136838272306, "learning_rate": 4.152080774428558e-06, "loss": 0.5602, "step": 12725 }, { "epoch": 1.14, "grad_norm": 6.605763130199257, "learning_rate": 4.1513688750067284e-06, "loss": 0.6026, "step": 12726 }, { "epoch": 1.14, "grad_norm": 5.300114079442129, "learning_rate": 4.150656993298214e-06, "loss": 0.6425, "step": 12727 }, { "epoch": 1.14, "grad_norm": 5.842733429162357, "learning_rate": 4.149945129317877e-06, "loss": 0.5617, "step": 12728 }, { "epoch": 1.14, "grad_norm": 5.380793756538788, "learning_rate": 4.149233283080571e-06, "loss": 0.5706, "step": 12729 }, { "epoch": 1.14, "grad_norm": 5.028300939575589, "learning_rate": 4.148521454601159e-06, "loss": 0.6265, "step": 12730 }, { "epoch": 1.14, "grad_norm": 7.8889565086954665, "learning_rate": 4.147809643894497e-06, "loss": 0.625, "step": 12731 }, { "epoch": 1.14, "grad_norm": 4.865211723973894, "learning_rate": 4.1470978509754416e-06, "loss": 0.5891, "step": 12732 }, { "epoch": 1.14, "grad_norm": 6.808786602730541, "learning_rate": 4.14638607585885e-06, "loss": 0.6265, "step": 12733 }, { "epoch": 1.14, "grad_norm": 5.766070203249128, "learning_rate": 4.1456743185595815e-06, "loss": 0.5744, "step": 12734 }, { "epoch": 1.14, "grad_norm": 8.075150449958093, "learning_rate": 4.14496257909249e-06, "loss": 0.6031, "step": 12735 }, { "epoch": 1.14, "grad_norm": 6.354861107191467, "learning_rate": 4.144250857472434e-06, "loss": 0.6466, "step": 12736 }, { "epoch": 1.14, "grad_norm": 6.527357840624476, "learning_rate": 4.143539153714264e-06, "loss": 0.5781, "step": 12737 }, { "epoch": 1.14, "grad_norm": 5.6616421839879285, "learning_rate": 4.14282746783284e-06, "loss": 0.598, "step": 12738 }, { "epoch": 1.14, "grad_norm": 7.333362555937387, "learning_rate": 4.142115799843015e-06, "loss": 0.6448, "step": 12739 }, { "epoch": 1.14, "grad_norm": 7.453391436371966, "learning_rate": 4.141404149759644e-06, "loss": 0.6512, "step": 12740 }, { "epoch": 1.14, "grad_norm": 5.970562121879597, "learning_rate": 4.1406925175975825e-06, "loss": 0.6456, "step": 12741 }, { "epoch": 1.14, "grad_norm": 6.947067527403919, "learning_rate": 4.139980903371681e-06, "loss": 0.5786, "step": 12742 }, { "epoch": 1.14, "grad_norm": 7.125958269744058, "learning_rate": 4.139269307096795e-06, "loss": 0.6059, "step": 12743 }, { "epoch": 1.14, "grad_norm": 6.047556641680677, "learning_rate": 4.138557728787778e-06, "loss": 0.6695, "step": 12744 }, { "epoch": 1.14, "grad_norm": 5.031019876509816, "learning_rate": 4.137846168459481e-06, "loss": 0.5728, "step": 12745 }, { "epoch": 1.14, "grad_norm": 6.880993271217456, "learning_rate": 4.137134626126757e-06, "loss": 0.6501, "step": 12746 }, { "epoch": 1.14, "grad_norm": 5.081469797923237, "learning_rate": 4.13642310180446e-06, "loss": 0.6063, "step": 12747 }, { "epoch": 1.14, "grad_norm": 5.942432961520711, "learning_rate": 4.135711595507437e-06, "loss": 0.6652, "step": 12748 }, { "epoch": 1.14, "grad_norm": 8.136049596210688, "learning_rate": 4.135000107250543e-06, "loss": 0.6025, "step": 12749 }, { "epoch": 1.14, "grad_norm": 7.350976950096308, "learning_rate": 4.134288637048629e-06, "loss": 0.5891, "step": 12750 }, { "epoch": 1.14, "grad_norm": 4.758320824760471, "learning_rate": 4.133577184916543e-06, "loss": 0.6853, "step": 12751 }, { "epoch": 1.14, "grad_norm": 12.199076974291925, "learning_rate": 4.132865750869134e-06, "loss": 0.6169, "step": 12752 }, { "epoch": 1.14, "grad_norm": 6.944342950291794, "learning_rate": 4.132154334921255e-06, "loss": 0.606, "step": 12753 }, { "epoch": 1.14, "grad_norm": 5.931219516405805, "learning_rate": 4.131442937087754e-06, "loss": 0.63, "step": 12754 }, { "epoch": 1.14, "grad_norm": 6.157822474561904, "learning_rate": 4.130731557383477e-06, "loss": 0.6257, "step": 12755 }, { "epoch": 1.14, "grad_norm": 6.10263260902672, "learning_rate": 4.130020195823277e-06, "loss": 0.5934, "step": 12756 }, { "epoch": 1.14, "grad_norm": 6.75764409861294, "learning_rate": 4.129308852422001e-06, "loss": 0.5929, "step": 12757 }, { "epoch": 1.14, "grad_norm": 11.647357216690482, "learning_rate": 4.128597527194495e-06, "loss": 0.6152, "step": 12758 }, { "epoch": 1.14, "grad_norm": 5.543494380550673, "learning_rate": 4.127886220155606e-06, "loss": 0.591, "step": 12759 }, { "epoch": 1.14, "grad_norm": 4.539793705869565, "learning_rate": 4.127174931320184e-06, "loss": 0.6275, "step": 12760 }, { "epoch": 1.14, "grad_norm": 6.889793883793437, "learning_rate": 4.126463660703073e-06, "loss": 0.5949, "step": 12761 }, { "epoch": 1.14, "grad_norm": 7.092178034925467, "learning_rate": 4.12575240831912e-06, "loss": 0.6176, "step": 12762 }, { "epoch": 1.14, "grad_norm": 7.670853216669609, "learning_rate": 4.125041174183172e-06, "loss": 0.577, "step": 12763 }, { "epoch": 1.14, "grad_norm": 6.859521412804961, "learning_rate": 4.124329958310075e-06, "loss": 0.6117, "step": 12764 }, { "epoch": 1.14, "grad_norm": 6.973185397904458, "learning_rate": 4.12361876071467e-06, "loss": 0.6102, "step": 12765 }, { "epoch": 1.14, "grad_norm": 7.288597881500008, "learning_rate": 4.122907581411803e-06, "loss": 0.5992, "step": 12766 }, { "epoch": 1.14, "grad_norm": 8.436943258155077, "learning_rate": 4.122196420416321e-06, "loss": 0.609, "step": 12767 }, { "epoch": 1.14, "grad_norm": 6.138179883334849, "learning_rate": 4.121485277743066e-06, "loss": 0.5633, "step": 12768 }, { "epoch": 1.14, "grad_norm": 5.128256097209497, "learning_rate": 4.120774153406882e-06, "loss": 0.5615, "step": 12769 }, { "epoch": 1.14, "grad_norm": 5.321727757305374, "learning_rate": 4.120063047422612e-06, "loss": 0.5916, "step": 12770 }, { "epoch": 1.14, "grad_norm": 6.592273565257728, "learning_rate": 4.119351959805099e-06, "loss": 0.5967, "step": 12771 }, { "epoch": 1.14, "grad_norm": 4.6479033584633545, "learning_rate": 4.118640890569185e-06, "loss": 0.6123, "step": 12772 }, { "epoch": 1.14, "grad_norm": 6.095865924443309, "learning_rate": 4.117929839729713e-06, "loss": 0.5944, "step": 12773 }, { "epoch": 1.14, "grad_norm": 5.394881126178612, "learning_rate": 4.117218807301522e-06, "loss": 0.5825, "step": 12774 }, { "epoch": 1.14, "grad_norm": 5.832065361073542, "learning_rate": 4.116507793299458e-06, "loss": 0.5901, "step": 12775 }, { "epoch": 1.14, "grad_norm": 6.236404395882163, "learning_rate": 4.115796797738357e-06, "loss": 0.588, "step": 12776 }, { "epoch": 1.14, "grad_norm": 4.883805925776225, "learning_rate": 4.115085820633061e-06, "loss": 0.6807, "step": 12777 }, { "epoch": 1.14, "grad_norm": 8.295645737960045, "learning_rate": 4.114374861998412e-06, "loss": 0.591, "step": 12778 }, { "epoch": 1.14, "grad_norm": 6.339563205810006, "learning_rate": 4.113663921849249e-06, "loss": 0.6469, "step": 12779 }, { "epoch": 1.14, "grad_norm": 8.045728616981826, "learning_rate": 4.11295300020041e-06, "loss": 0.6168, "step": 12780 }, { "epoch": 1.14, "grad_norm": 6.986106198077401, "learning_rate": 4.112242097066733e-06, "loss": 0.5533, "step": 12781 }, { "epoch": 1.14, "grad_norm": 4.616816668337794, "learning_rate": 4.111531212463058e-06, "loss": 0.6175, "step": 12782 }, { "epoch": 1.14, "grad_norm": 6.867884790340103, "learning_rate": 4.110820346404226e-06, "loss": 0.6197, "step": 12783 }, { "epoch": 1.14, "grad_norm": 7.989078606775965, "learning_rate": 4.110109498905069e-06, "loss": 0.628, "step": 12784 }, { "epoch": 1.14, "grad_norm": 5.447944089010971, "learning_rate": 4.10939866998043e-06, "loss": 0.648, "step": 12785 }, { "epoch": 1.14, "grad_norm": 5.684374747059337, "learning_rate": 4.10868785964514e-06, "loss": 0.6126, "step": 12786 }, { "epoch": 1.14, "grad_norm": 6.550044145885111, "learning_rate": 4.107977067914041e-06, "loss": 0.6205, "step": 12787 }, { "epoch": 1.14, "grad_norm": 5.918509800947694, "learning_rate": 4.107266294801968e-06, "loss": 0.6203, "step": 12788 }, { "epoch": 1.14, "grad_norm": 6.175138519646643, "learning_rate": 4.106555540323755e-06, "loss": 0.584, "step": 12789 }, { "epoch": 1.14, "grad_norm": 6.244555923210068, "learning_rate": 4.105844804494238e-06, "loss": 0.6128, "step": 12790 }, { "epoch": 1.14, "grad_norm": 5.875723140691153, "learning_rate": 4.1051340873282545e-06, "loss": 0.5932, "step": 12791 }, { "epoch": 1.14, "grad_norm": 7.6101211710140495, "learning_rate": 4.104423388840635e-06, "loss": 0.6155, "step": 12792 }, { "epoch": 1.14, "grad_norm": 7.337509126754695, "learning_rate": 4.103712709046219e-06, "loss": 0.6458, "step": 12793 }, { "epoch": 1.14, "grad_norm": 7.806206478829382, "learning_rate": 4.103002047959834e-06, "loss": 0.6489, "step": 12794 }, { "epoch": 1.14, "grad_norm": 4.123203951112772, "learning_rate": 4.102291405596319e-06, "loss": 0.6035, "step": 12795 }, { "epoch": 1.14, "grad_norm": 9.555776568653464, "learning_rate": 4.1015807819705036e-06, "loss": 0.6194, "step": 12796 }, { "epoch": 1.14, "grad_norm": 5.700463455818202, "learning_rate": 4.100870177097222e-06, "loss": 0.6108, "step": 12797 }, { "epoch": 1.14, "grad_norm": 6.059683023690938, "learning_rate": 4.1001595909913075e-06, "loss": 0.5479, "step": 12798 }, { "epoch": 1.14, "grad_norm": 6.127899678297387, "learning_rate": 4.09944902366759e-06, "loss": 0.5469, "step": 12799 }, { "epoch": 1.14, "grad_norm": 7.0156331235176435, "learning_rate": 4.0987384751409015e-06, "loss": 0.6052, "step": 12800 }, { "epoch": 1.14, "grad_norm": 9.402652326663736, "learning_rate": 4.098027945426075e-06, "loss": 0.5805, "step": 12801 }, { "epoch": 1.14, "grad_norm": 6.214074715173855, "learning_rate": 4.097317434537939e-06, "loss": 0.5724, "step": 12802 }, { "epoch": 1.14, "grad_norm": 6.122618038910121, "learning_rate": 4.096606942491324e-06, "loss": 0.6138, "step": 12803 }, { "epoch": 1.14, "grad_norm": 7.324854168897886, "learning_rate": 4.095896469301062e-06, "loss": 0.6904, "step": 12804 }, { "epoch": 1.14, "grad_norm": 4.958021496848178, "learning_rate": 4.09518601498198e-06, "loss": 0.6138, "step": 12805 }, { "epoch": 1.14, "grad_norm": 6.791586359127114, "learning_rate": 4.094475579548909e-06, "loss": 0.6001, "step": 12806 }, { "epoch": 1.14, "grad_norm": 6.627803443853279, "learning_rate": 4.093765163016678e-06, "loss": 0.55, "step": 12807 }, { "epoch": 1.14, "grad_norm": 7.974814155850224, "learning_rate": 4.0930547654001154e-06, "loss": 0.5925, "step": 12808 }, { "epoch": 1.14, "grad_norm": 5.893931761876743, "learning_rate": 4.092344386714047e-06, "loss": 0.6525, "step": 12809 }, { "epoch": 1.14, "grad_norm": 5.581244452895232, "learning_rate": 4.091634026973301e-06, "loss": 0.6254, "step": 12810 }, { "epoch": 1.14, "grad_norm": 8.185131563953298, "learning_rate": 4.090923686192707e-06, "loss": 0.6148, "step": 12811 }, { "epoch": 1.14, "grad_norm": 6.018644560787843, "learning_rate": 4.090213364387089e-06, "loss": 0.5823, "step": 12812 }, { "epoch": 1.14, "grad_norm": 5.273722739574082, "learning_rate": 4.089503061571274e-06, "loss": 0.5465, "step": 12813 }, { "epoch": 1.14, "grad_norm": 5.372631725903289, "learning_rate": 4.08879277776009e-06, "loss": 0.5956, "step": 12814 }, { "epoch": 1.14, "grad_norm": 7.7677059168467295, "learning_rate": 4.088082512968361e-06, "loss": 0.6257, "step": 12815 }, { "epoch": 1.14, "grad_norm": 7.899159156747473, "learning_rate": 4.0873722672109115e-06, "loss": 0.5434, "step": 12816 }, { "epoch": 1.14, "grad_norm": 6.303273939884204, "learning_rate": 4.086662040502569e-06, "loss": 0.5995, "step": 12817 }, { "epoch": 1.14, "grad_norm": 9.790862909693988, "learning_rate": 4.085951832858155e-06, "loss": 0.5866, "step": 12818 }, { "epoch": 1.14, "grad_norm": 8.773710760791847, "learning_rate": 4.085241644292495e-06, "loss": 0.6231, "step": 12819 }, { "epoch": 1.14, "grad_norm": 4.816050416873687, "learning_rate": 4.084531474820413e-06, "loss": 0.539, "step": 12820 }, { "epoch": 1.14, "grad_norm": 6.252981499276761, "learning_rate": 4.083821324456731e-06, "loss": 0.5737, "step": 12821 }, { "epoch": 1.14, "grad_norm": 5.403551955250211, "learning_rate": 4.083111193216274e-06, "loss": 0.6352, "step": 12822 }, { "epoch": 1.14, "grad_norm": 6.499492197440524, "learning_rate": 4.082401081113861e-06, "loss": 0.6833, "step": 12823 }, { "epoch": 1.14, "grad_norm": 7.25817667440091, "learning_rate": 4.081690988164317e-06, "loss": 0.6811, "step": 12824 }, { "epoch": 1.14, "grad_norm": 5.292793425763199, "learning_rate": 4.080980914382461e-06, "loss": 0.5822, "step": 12825 }, { "epoch": 1.14, "grad_norm": 6.241834975960904, "learning_rate": 4.080270859783117e-06, "loss": 0.6337, "step": 12826 }, { "epoch": 1.14, "grad_norm": 6.36673943803984, "learning_rate": 4.079560824381105e-06, "loss": 0.5574, "step": 12827 }, { "epoch": 1.14, "grad_norm": 8.441263619417304, "learning_rate": 4.078850808191243e-06, "loss": 0.5733, "step": 12828 }, { "epoch": 1.14, "grad_norm": 7.339624316272556, "learning_rate": 4.078140811228355e-06, "loss": 0.6111, "step": 12829 }, { "epoch": 1.14, "grad_norm": 5.916008062555011, "learning_rate": 4.077430833507259e-06, "loss": 0.6722, "step": 12830 }, { "epoch": 1.14, "grad_norm": 6.632471359294994, "learning_rate": 4.0767208750427725e-06, "loss": 0.5919, "step": 12831 }, { "epoch": 1.14, "grad_norm": 8.372138304110585, "learning_rate": 4.076010935849716e-06, "loss": 0.6031, "step": 12832 }, { "epoch": 1.14, "grad_norm": 5.029163707120544, "learning_rate": 4.075301015942909e-06, "loss": 0.6016, "step": 12833 }, { "epoch": 1.14, "grad_norm": 5.824890235988416, "learning_rate": 4.074591115337167e-06, "loss": 0.5474, "step": 12834 }, { "epoch": 1.15, "grad_norm": 6.362884615608136, "learning_rate": 4.073881234047309e-06, "loss": 0.5992, "step": 12835 }, { "epoch": 1.15, "grad_norm": 5.333278801761608, "learning_rate": 4.0731713720881526e-06, "loss": 0.5698, "step": 12836 }, { "epoch": 1.15, "grad_norm": 6.208397677427569, "learning_rate": 4.072461529474516e-06, "loss": 0.6053, "step": 12837 }, { "epoch": 1.15, "grad_norm": 6.173229520615358, "learning_rate": 4.071751706221212e-06, "loss": 0.5854, "step": 12838 }, { "epoch": 1.15, "grad_norm": 5.726421716103314, "learning_rate": 4.071041902343057e-06, "loss": 0.5852, "step": 12839 }, { "epoch": 1.15, "grad_norm": 7.311094120248424, "learning_rate": 4.070332117854869e-06, "loss": 0.586, "step": 12840 }, { "epoch": 1.15, "grad_norm": 5.254355299643176, "learning_rate": 4.069622352771462e-06, "loss": 0.5996, "step": 12841 }, { "epoch": 1.15, "grad_norm": 7.5260880288762095, "learning_rate": 4.06891260710765e-06, "loss": 0.6103, "step": 12842 }, { "epoch": 1.15, "grad_norm": 7.6769890931178395, "learning_rate": 4.06820288087825e-06, "loss": 0.593, "step": 12843 }, { "epoch": 1.15, "grad_norm": 5.824417505602332, "learning_rate": 4.0674931740980725e-06, "loss": 0.6334, "step": 12844 }, { "epoch": 1.15, "grad_norm": 5.702695288803621, "learning_rate": 4.066783486781934e-06, "loss": 0.5568, "step": 12845 }, { "epoch": 1.15, "grad_norm": 8.902140913020927, "learning_rate": 4.0660738189446455e-06, "loss": 0.6335, "step": 12846 }, { "epoch": 1.15, "grad_norm": 7.750322244524445, "learning_rate": 4.065364170601022e-06, "loss": 0.5937, "step": 12847 }, { "epoch": 1.15, "grad_norm": 5.219518044125492, "learning_rate": 4.064654541765873e-06, "loss": 0.6074, "step": 12848 }, { "epoch": 1.15, "grad_norm": 6.7976813563133645, "learning_rate": 4.063944932454014e-06, "loss": 0.6504, "step": 12849 }, { "epoch": 1.15, "grad_norm": 6.329784321131278, "learning_rate": 4.063235342680253e-06, "loss": 0.6322, "step": 12850 }, { "epoch": 1.15, "grad_norm": 5.408937774643539, "learning_rate": 4.062525772459406e-06, "loss": 0.6168, "step": 12851 }, { "epoch": 1.15, "grad_norm": 7.655391881049637, "learning_rate": 4.061816221806276e-06, "loss": 0.6282, "step": 12852 }, { "epoch": 1.15, "grad_norm": 6.555664216417251, "learning_rate": 4.0611066907356815e-06, "loss": 0.6407, "step": 12853 }, { "epoch": 1.15, "grad_norm": 6.046652080314439, "learning_rate": 4.060397179262426e-06, "loss": 0.5624, "step": 12854 }, { "epoch": 1.15, "grad_norm": 6.0731467601213325, "learning_rate": 4.059687687401323e-06, "loss": 0.7119, "step": 12855 }, { "epoch": 1.15, "grad_norm": 6.092792978439218, "learning_rate": 4.05897821516718e-06, "loss": 0.6854, "step": 12856 }, { "epoch": 1.15, "grad_norm": 5.027003200515344, "learning_rate": 4.058268762574805e-06, "loss": 0.5935, "step": 12857 }, { "epoch": 1.15, "grad_norm": 10.484743995597071, "learning_rate": 4.057559329639009e-06, "loss": 0.5196, "step": 12858 }, { "epoch": 1.15, "grad_norm": 5.028704907707448, "learning_rate": 4.056849916374597e-06, "loss": 0.6068, "step": 12859 }, { "epoch": 1.15, "grad_norm": 7.9471582758263395, "learning_rate": 4.056140522796377e-06, "loss": 0.5888, "step": 12860 }, { "epoch": 1.15, "grad_norm": 7.323088985046348, "learning_rate": 4.055431148919158e-06, "loss": 0.6431, "step": 12861 }, { "epoch": 1.15, "grad_norm": 5.850591541806382, "learning_rate": 4.054721794757744e-06, "loss": 0.666, "step": 12862 }, { "epoch": 1.15, "grad_norm": 8.466758536432264, "learning_rate": 4.054012460326942e-06, "loss": 0.5766, "step": 12863 }, { "epoch": 1.15, "grad_norm": 6.232755544749327, "learning_rate": 4.05330314564156e-06, "loss": 0.5546, "step": 12864 }, { "epoch": 1.15, "grad_norm": 7.738421201072238, "learning_rate": 4.052593850716401e-06, "loss": 0.6597, "step": 12865 }, { "epoch": 1.15, "grad_norm": 7.324660711368953, "learning_rate": 4.051884575566271e-06, "loss": 0.5845, "step": 12866 }, { "epoch": 1.15, "grad_norm": 5.785817337006844, "learning_rate": 4.0511753202059735e-06, "loss": 0.581, "step": 12867 }, { "epoch": 1.15, "grad_norm": 4.728488083068472, "learning_rate": 4.050466084650314e-06, "loss": 0.6193, "step": 12868 }, { "epoch": 1.15, "grad_norm": 12.262278881169339, "learning_rate": 4.049756868914094e-06, "loss": 0.6783, "step": 12869 }, { "epoch": 1.15, "grad_norm": 6.754396868791832, "learning_rate": 4.049047673012119e-06, "loss": 0.6547, "step": 12870 }, { "epoch": 1.15, "grad_norm": 6.762727559654872, "learning_rate": 4.048338496959192e-06, "loss": 0.6784, "step": 12871 }, { "epoch": 1.15, "grad_norm": 6.124918721979807, "learning_rate": 4.047629340770113e-06, "loss": 0.5947, "step": 12872 }, { "epoch": 1.15, "grad_norm": 5.416438891243214, "learning_rate": 4.046920204459687e-06, "loss": 0.5155, "step": 12873 }, { "epoch": 1.15, "grad_norm": 5.715530101349614, "learning_rate": 4.046211088042715e-06, "loss": 0.5956, "step": 12874 }, { "epoch": 1.15, "grad_norm": 5.68798714187965, "learning_rate": 4.0455019915339964e-06, "loss": 0.6065, "step": 12875 }, { "epoch": 1.15, "grad_norm": 6.710219835867501, "learning_rate": 4.044792914948335e-06, "loss": 0.5652, "step": 12876 }, { "epoch": 1.15, "grad_norm": 6.285799349308264, "learning_rate": 4.0440838583005295e-06, "loss": 0.6255, "step": 12877 }, { "epoch": 1.15, "grad_norm": 6.582549425778738, "learning_rate": 4.043374821605379e-06, "loss": 0.6226, "step": 12878 }, { "epoch": 1.15, "grad_norm": 6.13527730243023, "learning_rate": 4.042665804877683e-06, "loss": 0.5936, "step": 12879 }, { "epoch": 1.15, "grad_norm": 7.998567597244722, "learning_rate": 4.041956808132246e-06, "loss": 0.6136, "step": 12880 }, { "epoch": 1.15, "grad_norm": 4.674853484921768, "learning_rate": 4.041247831383861e-06, "loss": 0.622, "step": 12881 }, { "epoch": 1.15, "grad_norm": 6.61713501277379, "learning_rate": 4.040538874647325e-06, "loss": 0.5933, "step": 12882 }, { "epoch": 1.15, "grad_norm": 12.064019993350454, "learning_rate": 4.039829937937441e-06, "loss": 0.6597, "step": 12883 }, { "epoch": 1.15, "grad_norm": 4.793041162295543, "learning_rate": 4.039121021269005e-06, "loss": 0.5805, "step": 12884 }, { "epoch": 1.15, "grad_norm": 5.63716929067069, "learning_rate": 4.038412124656811e-06, "loss": 0.5674, "step": 12885 }, { "epoch": 1.15, "grad_norm": 7.267135561357943, "learning_rate": 4.03770324811566e-06, "loss": 0.5962, "step": 12886 }, { "epoch": 1.15, "grad_norm": 5.415399790833891, "learning_rate": 4.036994391660346e-06, "loss": 0.685, "step": 12887 }, { "epoch": 1.15, "grad_norm": 6.097844314674362, "learning_rate": 4.0362855553056646e-06, "loss": 0.6498, "step": 12888 }, { "epoch": 1.15, "grad_norm": 5.5044093783752865, "learning_rate": 4.035576739066411e-06, "loss": 0.6295, "step": 12889 }, { "epoch": 1.15, "grad_norm": 8.341189359945046, "learning_rate": 4.034867942957383e-06, "loss": 0.5968, "step": 12890 }, { "epoch": 1.15, "grad_norm": 5.91013106763395, "learning_rate": 4.034159166993371e-06, "loss": 0.5802, "step": 12891 }, { "epoch": 1.15, "grad_norm": 6.17667503415963, "learning_rate": 4.033450411189172e-06, "loss": 0.5983, "step": 12892 }, { "epoch": 1.15, "grad_norm": 6.216968826718032, "learning_rate": 4.03274167555958e-06, "loss": 0.6585, "step": 12893 }, { "epoch": 1.15, "grad_norm": 6.164144173998226, "learning_rate": 4.032032960119388e-06, "loss": 0.6776, "step": 12894 }, { "epoch": 1.15, "grad_norm": 6.00529538806573, "learning_rate": 4.0313242648833865e-06, "loss": 0.5401, "step": 12895 }, { "epoch": 1.15, "grad_norm": 6.556242762324982, "learning_rate": 4.030615589866369e-06, "loss": 0.6407, "step": 12896 }, { "epoch": 1.15, "grad_norm": 5.266595833477032, "learning_rate": 4.029906935083129e-06, "loss": 0.5925, "step": 12897 }, { "epoch": 1.15, "grad_norm": 6.450261018060676, "learning_rate": 4.029198300548457e-06, "loss": 0.5699, "step": 12898 }, { "epoch": 1.15, "grad_norm": 5.1503394899620885, "learning_rate": 4.028489686277145e-06, "loss": 0.587, "step": 12899 }, { "epoch": 1.15, "grad_norm": 6.696315274523511, "learning_rate": 4.0277810922839824e-06, "loss": 0.5791, "step": 12900 }, { "epoch": 1.15, "grad_norm": 6.478380931244562, "learning_rate": 4.02707251858376e-06, "loss": 0.601, "step": 12901 }, { "epoch": 1.15, "grad_norm": 4.938362554736422, "learning_rate": 4.026363965191269e-06, "loss": 0.6834, "step": 12902 }, { "epoch": 1.15, "grad_norm": 7.262734009340027, "learning_rate": 4.025655432121299e-06, "loss": 0.6065, "step": 12903 }, { "epoch": 1.15, "grad_norm": 5.843808971286931, "learning_rate": 4.0249469193886355e-06, "loss": 0.5908, "step": 12904 }, { "epoch": 1.15, "grad_norm": 5.057167342921941, "learning_rate": 4.0242384270080715e-06, "loss": 0.6446, "step": 12905 }, { "epoch": 1.15, "grad_norm": 4.899461529320566, "learning_rate": 4.0235299549943935e-06, "loss": 0.5436, "step": 12906 }, { "epoch": 1.15, "grad_norm": 7.349229949305356, "learning_rate": 4.022821503362388e-06, "loss": 0.6769, "step": 12907 }, { "epoch": 1.15, "grad_norm": 5.624426351035255, "learning_rate": 4.022113072126845e-06, "loss": 0.5597, "step": 12908 }, { "epoch": 1.15, "grad_norm": 6.391669634530487, "learning_rate": 4.021404661302551e-06, "loss": 0.6199, "step": 12909 }, { "epoch": 1.15, "grad_norm": 6.831207460454864, "learning_rate": 4.02069627090429e-06, "loss": 0.6278, "step": 12910 }, { "epoch": 1.15, "grad_norm": 5.873230444466613, "learning_rate": 4.01998790094685e-06, "loss": 0.6232, "step": 12911 }, { "epoch": 1.15, "grad_norm": 5.266884672211902, "learning_rate": 4.0192795514450165e-06, "loss": 0.632, "step": 12912 }, { "epoch": 1.15, "grad_norm": 4.894407259696568, "learning_rate": 4.018571222413575e-06, "loss": 0.6186, "step": 12913 }, { "epoch": 1.15, "grad_norm": 7.042329040379265, "learning_rate": 4.017862913867309e-06, "loss": 0.6311, "step": 12914 }, { "epoch": 1.15, "grad_norm": 6.469520317146092, "learning_rate": 4.017154625821003e-06, "loss": 0.5782, "step": 12915 }, { "epoch": 1.15, "grad_norm": 6.720201659902491, "learning_rate": 4.016446358289444e-06, "loss": 0.6451, "step": 12916 }, { "epoch": 1.15, "grad_norm": 6.834522335972365, "learning_rate": 4.015738111287412e-06, "loss": 0.5956, "step": 12917 }, { "epoch": 1.15, "grad_norm": 7.005359422646475, "learning_rate": 4.015029884829691e-06, "loss": 0.6228, "step": 12918 }, { "epoch": 1.15, "grad_norm": 5.010213049187701, "learning_rate": 4.014321678931065e-06, "loss": 0.6034, "step": 12919 }, { "epoch": 1.15, "grad_norm": 6.318113505934496, "learning_rate": 4.013613493606315e-06, "loss": 0.637, "step": 12920 }, { "epoch": 1.15, "grad_norm": 8.456426201722001, "learning_rate": 4.012905328870223e-06, "loss": 0.5506, "step": 12921 }, { "epoch": 1.15, "grad_norm": 6.325230261444733, "learning_rate": 4.012197184737572e-06, "loss": 0.5617, "step": 12922 }, { "epoch": 1.15, "grad_norm": 5.878872081746013, "learning_rate": 4.011489061223142e-06, "loss": 0.552, "step": 12923 }, { "epoch": 1.15, "grad_norm": 8.447291423814715, "learning_rate": 4.010780958341711e-06, "loss": 0.6592, "step": 12924 }, { "epoch": 1.15, "grad_norm": 7.136925288731995, "learning_rate": 4.010072876108061e-06, "loss": 0.6096, "step": 12925 }, { "epoch": 1.15, "grad_norm": 4.7735551525400854, "learning_rate": 4.009364814536974e-06, "loss": 0.572, "step": 12926 }, { "epoch": 1.15, "grad_norm": 4.610777470220228, "learning_rate": 4.008656773643224e-06, "loss": 0.6069, "step": 12927 }, { "epoch": 1.15, "grad_norm": 5.229757944790915, "learning_rate": 4.007948753441594e-06, "loss": 0.5917, "step": 12928 }, { "epoch": 1.15, "grad_norm": 6.240319092679932, "learning_rate": 4.007240753946862e-06, "loss": 0.591, "step": 12929 }, { "epoch": 1.15, "grad_norm": 4.857702847974442, "learning_rate": 4.006532775173805e-06, "loss": 0.5804, "step": 12930 }, { "epoch": 1.15, "grad_norm": 6.601304181465091, "learning_rate": 4.005824817137199e-06, "loss": 0.6259, "step": 12931 }, { "epoch": 1.15, "grad_norm": 7.028212465461521, "learning_rate": 4.005116879851825e-06, "loss": 0.6131, "step": 12932 }, { "epoch": 1.15, "grad_norm": 7.009572914792403, "learning_rate": 4.004408963332456e-06, "loss": 0.6109, "step": 12933 }, { "epoch": 1.15, "grad_norm": 11.034448736563673, "learning_rate": 4.003701067593871e-06, "loss": 0.6847, "step": 12934 }, { "epoch": 1.15, "grad_norm": 7.018727209609907, "learning_rate": 4.002993192650843e-06, "loss": 0.5612, "step": 12935 }, { "epoch": 1.15, "grad_norm": 7.398749039704398, "learning_rate": 4.0022853385181484e-06, "loss": 0.6514, "step": 12936 }, { "epoch": 1.15, "grad_norm": 5.34905754032123, "learning_rate": 4.001577505210564e-06, "loss": 0.5986, "step": 12937 }, { "epoch": 1.15, "grad_norm": 6.332238507782955, "learning_rate": 4.000869692742862e-06, "loss": 0.6638, "step": 12938 }, { "epoch": 1.15, "grad_norm": 8.236282493853684, "learning_rate": 4.000161901129818e-06, "loss": 0.6232, "step": 12939 }, { "epoch": 1.15, "grad_norm": 6.179038575653843, "learning_rate": 3.999454130386203e-06, "loss": 0.6635, "step": 12940 }, { "epoch": 1.15, "grad_norm": 8.425671142289678, "learning_rate": 3.9987463805267914e-06, "loss": 0.6218, "step": 12941 }, { "epoch": 1.15, "grad_norm": 6.985831364104041, "learning_rate": 3.998038651566359e-06, "loss": 0.6259, "step": 12942 }, { "epoch": 1.15, "grad_norm": 8.799333842306265, "learning_rate": 3.997330943519673e-06, "loss": 0.6574, "step": 12943 }, { "epoch": 1.15, "grad_norm": 6.548162156102045, "learning_rate": 3.996623256401509e-06, "loss": 0.6571, "step": 12944 }, { "epoch": 1.15, "grad_norm": 6.488667078780645, "learning_rate": 3.9959155902266365e-06, "loss": 0.6368, "step": 12945 }, { "epoch": 1.15, "grad_norm": 6.096531459219551, "learning_rate": 3.995207945009827e-06, "loss": 0.5616, "step": 12946 }, { "epoch": 1.16, "grad_norm": 5.135233585678908, "learning_rate": 3.994500320765852e-06, "loss": 0.6185, "step": 12947 }, { "epoch": 1.16, "grad_norm": 5.117003878272828, "learning_rate": 3.993792717509481e-06, "loss": 0.5966, "step": 12948 }, { "epoch": 1.16, "grad_norm": 7.205804767360758, "learning_rate": 3.993085135255482e-06, "loss": 0.5098, "step": 12949 }, { "epoch": 1.16, "grad_norm": 6.247721642017168, "learning_rate": 3.992377574018627e-06, "loss": 0.5973, "step": 12950 }, { "epoch": 1.16, "grad_norm": 8.732337919947808, "learning_rate": 3.991670033813683e-06, "loss": 0.5776, "step": 12951 }, { "epoch": 1.16, "grad_norm": 6.808583985680122, "learning_rate": 3.990962514655421e-06, "loss": 0.619, "step": 12952 }, { "epoch": 1.16, "grad_norm": 6.5386362157793405, "learning_rate": 3.990255016558604e-06, "loss": 0.5711, "step": 12953 }, { "epoch": 1.16, "grad_norm": 8.382117100636847, "learning_rate": 3.989547539538004e-06, "loss": 0.6067, "step": 12954 }, { "epoch": 1.16, "grad_norm": 6.106438004258891, "learning_rate": 3.9888400836083844e-06, "loss": 0.5881, "step": 12955 }, { "epoch": 1.16, "grad_norm": 8.225845523732604, "learning_rate": 3.988132648784514e-06, "loss": 0.6306, "step": 12956 }, { "epoch": 1.16, "grad_norm": 7.076381821596656, "learning_rate": 3.98742523508116e-06, "loss": 0.5598, "step": 12957 }, { "epoch": 1.16, "grad_norm": 5.998279670270793, "learning_rate": 3.9867178425130856e-06, "loss": 0.5854, "step": 12958 }, { "epoch": 1.16, "grad_norm": 7.572892684706871, "learning_rate": 3.986010471095057e-06, "loss": 0.635, "step": 12959 }, { "epoch": 1.16, "grad_norm": 6.4374491011574335, "learning_rate": 3.98530312084184e-06, "loss": 0.6253, "step": 12960 }, { "epoch": 1.16, "grad_norm": 4.559749376764543, "learning_rate": 3.9845957917681975e-06, "loss": 0.5984, "step": 12961 }, { "epoch": 1.16, "grad_norm": 4.315177411202582, "learning_rate": 3.983888483888893e-06, "loss": 0.5957, "step": 12962 }, { "epoch": 1.16, "grad_norm": 8.807879081974322, "learning_rate": 3.983181197218694e-06, "loss": 0.589, "step": 12963 }, { "epoch": 1.16, "grad_norm": 5.821924924069717, "learning_rate": 3.9824739317723585e-06, "loss": 0.6013, "step": 12964 }, { "epoch": 1.16, "grad_norm": 7.952922065383657, "learning_rate": 3.981766687564652e-06, "loss": 0.5928, "step": 12965 }, { "epoch": 1.16, "grad_norm": 6.073007473876, "learning_rate": 3.981059464610338e-06, "loss": 0.6084, "step": 12966 }, { "epoch": 1.16, "grad_norm": 5.2214790207171875, "learning_rate": 3.980352262924175e-06, "loss": 0.6294, "step": 12967 }, { "epoch": 1.16, "grad_norm": 7.011536638037049, "learning_rate": 3.979645082520924e-06, "loss": 0.6033, "step": 12968 }, { "epoch": 1.16, "grad_norm": 6.381083232034313, "learning_rate": 3.978937923415348e-06, "loss": 0.5547, "step": 12969 }, { "epoch": 1.16, "grad_norm": 5.087215600429319, "learning_rate": 3.978230785622208e-06, "loss": 0.5747, "step": 12970 }, { "epoch": 1.16, "grad_norm": 7.719575712256528, "learning_rate": 3.977523669156261e-06, "loss": 0.6574, "step": 12971 }, { "epoch": 1.16, "grad_norm": 6.4682768140732465, "learning_rate": 3.9768165740322685e-06, "loss": 0.6653, "step": 12972 }, { "epoch": 1.16, "grad_norm": 6.085554925423577, "learning_rate": 3.976109500264991e-06, "loss": 0.658, "step": 12973 }, { "epoch": 1.16, "grad_norm": 6.64776042493028, "learning_rate": 3.975402447869183e-06, "loss": 0.5746, "step": 12974 }, { "epoch": 1.16, "grad_norm": 7.133397846551657, "learning_rate": 3.974695416859606e-06, "loss": 0.6128, "step": 12975 }, { "epoch": 1.16, "grad_norm": 5.5579077019004135, "learning_rate": 3.973988407251018e-06, "loss": 0.5986, "step": 12976 }, { "epoch": 1.16, "grad_norm": 6.156625648590385, "learning_rate": 3.9732814190581736e-06, "loss": 0.6228, "step": 12977 }, { "epoch": 1.16, "grad_norm": 5.665695063169347, "learning_rate": 3.972574452295831e-06, "loss": 0.5422, "step": 12978 }, { "epoch": 1.16, "grad_norm": 6.69833412722042, "learning_rate": 3.971867506978748e-06, "loss": 0.6542, "step": 12979 }, { "epoch": 1.16, "grad_norm": 8.791863524189502, "learning_rate": 3.971160583121678e-06, "loss": 0.6429, "step": 12980 }, { "epoch": 1.16, "grad_norm": 6.632376419562274, "learning_rate": 3.97045368073938e-06, "loss": 0.614, "step": 12981 }, { "epoch": 1.16, "grad_norm": 6.809302172332681, "learning_rate": 3.969746799846605e-06, "loss": 0.629, "step": 12982 }, { "epoch": 1.16, "grad_norm": 4.805766213068198, "learning_rate": 3.969039940458109e-06, "loss": 0.5685, "step": 12983 }, { "epoch": 1.16, "grad_norm": 6.943879478451886, "learning_rate": 3.968333102588646e-06, "loss": 0.6437, "step": 12984 }, { "epoch": 1.16, "grad_norm": 6.161374955857747, "learning_rate": 3.967626286252971e-06, "loss": 0.5965, "step": 12985 }, { "epoch": 1.16, "grad_norm": 7.655589665133632, "learning_rate": 3.966919491465837e-06, "loss": 0.5884, "step": 12986 }, { "epoch": 1.16, "grad_norm": 5.438425702386194, "learning_rate": 3.966212718241995e-06, "loss": 0.6151, "step": 12987 }, { "epoch": 1.16, "grad_norm": 4.633190352018585, "learning_rate": 3.965505966596199e-06, "loss": 0.6698, "step": 12988 }, { "epoch": 1.16, "grad_norm": 7.512873862448835, "learning_rate": 3.964799236543201e-06, "loss": 0.5931, "step": 12989 }, { "epoch": 1.16, "grad_norm": 5.74831177559796, "learning_rate": 3.964092528097751e-06, "loss": 0.5918, "step": 12990 }, { "epoch": 1.16, "grad_norm": 8.377836647078722, "learning_rate": 3.9633858412746e-06, "loss": 0.682, "step": 12991 }, { "epoch": 1.16, "grad_norm": 6.464391536541433, "learning_rate": 3.962679176088502e-06, "loss": 0.6789, "step": 12992 }, { "epoch": 1.16, "grad_norm": 6.578944076367026, "learning_rate": 3.961972532554203e-06, "loss": 0.6341, "step": 12993 }, { "epoch": 1.16, "grad_norm": 6.053424478823554, "learning_rate": 3.961265910686454e-06, "loss": 0.5545, "step": 12994 }, { "epoch": 1.16, "grad_norm": 8.79194251902041, "learning_rate": 3.960559310500006e-06, "loss": 0.7113, "step": 12995 }, { "epoch": 1.16, "grad_norm": 6.311557473901445, "learning_rate": 3.959852732009606e-06, "loss": 0.5586, "step": 12996 }, { "epoch": 1.16, "grad_norm": 5.998757266510518, "learning_rate": 3.95914617523e-06, "loss": 0.5903, "step": 12997 }, { "epoch": 1.16, "grad_norm": 6.491434918618766, "learning_rate": 3.958439640175939e-06, "loss": 0.6245, "step": 12998 }, { "epoch": 1.16, "grad_norm": 5.66220241939905, "learning_rate": 3.957733126862171e-06, "loss": 0.6211, "step": 12999 }, { "epoch": 1.16, "grad_norm": 5.814238222288148, "learning_rate": 3.957026635303439e-06, "loss": 0.5416, "step": 13000 }, { "epoch": 1.16, "grad_norm": 5.306995974195617, "learning_rate": 3.956320165514493e-06, "loss": 0.6487, "step": 13001 }, { "epoch": 1.16, "grad_norm": 6.035796972102513, "learning_rate": 3.9556137175100786e-06, "loss": 0.5434, "step": 13002 }, { "epoch": 1.16, "grad_norm": 6.657102822186134, "learning_rate": 3.95490729130494e-06, "loss": 0.6036, "step": 13003 }, { "epoch": 1.16, "grad_norm": 5.838548432768052, "learning_rate": 3.9542008869138226e-06, "loss": 0.5719, "step": 13004 }, { "epoch": 1.16, "grad_norm": 7.717549281554352, "learning_rate": 3.953494504351473e-06, "loss": 0.5642, "step": 13005 }, { "epoch": 1.16, "grad_norm": 6.116543351412588, "learning_rate": 3.952788143632632e-06, "loss": 0.6085, "step": 13006 }, { "epoch": 1.16, "grad_norm": 7.11017652768376, "learning_rate": 3.9520818047720454e-06, "loss": 0.6382, "step": 13007 }, { "epoch": 1.16, "grad_norm": 7.292829767253248, "learning_rate": 3.951375487784457e-06, "loss": 0.6158, "step": 13008 }, { "epoch": 1.16, "grad_norm": 6.424063490813165, "learning_rate": 3.950669192684609e-06, "loss": 0.5952, "step": 13009 }, { "epoch": 1.16, "grad_norm": 7.898971423169459, "learning_rate": 3.949962919487245e-06, "loss": 0.6337, "step": 13010 }, { "epoch": 1.16, "grad_norm": 8.813872882492872, "learning_rate": 3.9492566682071025e-06, "loss": 0.6639, "step": 13011 }, { "epoch": 1.16, "grad_norm": 6.2334693327831, "learning_rate": 3.948550438858928e-06, "loss": 0.6211, "step": 13012 }, { "epoch": 1.16, "grad_norm": 8.002893682889669, "learning_rate": 3.947844231457459e-06, "loss": 0.6904, "step": 13013 }, { "epoch": 1.16, "grad_norm": 8.29305328517966, "learning_rate": 3.947138046017438e-06, "loss": 0.601, "step": 13014 }, { "epoch": 1.16, "grad_norm": 7.963350086670499, "learning_rate": 3.946431882553605e-06, "loss": 0.6072, "step": 13015 }, { "epoch": 1.16, "grad_norm": 5.7294365711590665, "learning_rate": 3.945725741080699e-06, "loss": 0.577, "step": 13016 }, { "epoch": 1.16, "grad_norm": 5.8003413777655854, "learning_rate": 3.945019621613459e-06, "loss": 0.6008, "step": 13017 }, { "epoch": 1.16, "grad_norm": 5.120669689166336, "learning_rate": 3.944313524166624e-06, "loss": 0.5705, "step": 13018 }, { "epoch": 1.16, "grad_norm": 7.846715806069802, "learning_rate": 3.943607448754932e-06, "loss": 0.551, "step": 13019 }, { "epoch": 1.16, "grad_norm": 6.697288165173782, "learning_rate": 3.9429013953931225e-06, "loss": 0.6392, "step": 13020 }, { "epoch": 1.16, "grad_norm": 6.2101551995713375, "learning_rate": 3.94219536409593e-06, "loss": 0.6159, "step": 13021 }, { "epoch": 1.16, "grad_norm": 6.557074014057159, "learning_rate": 3.941489354878092e-06, "loss": 0.5902, "step": 13022 }, { "epoch": 1.16, "grad_norm": 7.324671095183377, "learning_rate": 3.940783367754348e-06, "loss": 0.6556, "step": 13023 }, { "epoch": 1.16, "grad_norm": 8.084648410221194, "learning_rate": 3.940077402739431e-06, "loss": 0.5513, "step": 13024 }, { "epoch": 1.16, "grad_norm": 8.617682893983577, "learning_rate": 3.939371459848077e-06, "loss": 0.6166, "step": 13025 }, { "epoch": 1.16, "grad_norm": 4.515876656907204, "learning_rate": 3.93866553909502e-06, "loss": 0.5537, "step": 13026 }, { "epoch": 1.16, "grad_norm": 6.1047014362838485, "learning_rate": 3.9379596404949946e-06, "loss": 0.5834, "step": 13027 }, { "epoch": 1.16, "grad_norm": 6.650562809176478, "learning_rate": 3.937253764062738e-06, "loss": 0.5607, "step": 13028 }, { "epoch": 1.16, "grad_norm": 6.536342784491075, "learning_rate": 3.936547909812981e-06, "loss": 0.568, "step": 13029 }, { "epoch": 1.16, "grad_norm": 5.342780264509894, "learning_rate": 3.9358420777604574e-06, "loss": 0.6148, "step": 13030 }, { "epoch": 1.16, "grad_norm": 9.428270898556068, "learning_rate": 3.9351362679199e-06, "loss": 0.5843, "step": 13031 }, { "epoch": 1.16, "grad_norm": 6.47963437244726, "learning_rate": 3.934430480306041e-06, "loss": 0.5978, "step": 13032 }, { "epoch": 1.16, "grad_norm": 6.960527303633001, "learning_rate": 3.933724714933612e-06, "loss": 0.6527, "step": 13033 }, { "epoch": 1.16, "grad_norm": 5.04560279964691, "learning_rate": 3.933018971817344e-06, "loss": 0.5302, "step": 13034 }, { "epoch": 1.16, "grad_norm": 8.475576444622297, "learning_rate": 3.932313250971969e-06, "loss": 0.5723, "step": 13035 }, { "epoch": 1.16, "grad_norm": 7.528136085185549, "learning_rate": 3.931607552412217e-06, "loss": 0.615, "step": 13036 }, { "epoch": 1.16, "grad_norm": 7.0575569920201575, "learning_rate": 3.930901876152817e-06, "loss": 0.5799, "step": 13037 }, { "epoch": 1.16, "grad_norm": 5.055816558079321, "learning_rate": 3.930196222208501e-06, "loss": 0.6236, "step": 13038 }, { "epoch": 1.16, "grad_norm": 7.23980310206584, "learning_rate": 3.929490590593994e-06, "loss": 0.6325, "step": 13039 }, { "epoch": 1.16, "grad_norm": 9.26627310604706, "learning_rate": 3.9287849813240265e-06, "loss": 0.5973, "step": 13040 }, { "epoch": 1.16, "grad_norm": 8.77397695484451, "learning_rate": 3.928079394413327e-06, "loss": 0.6377, "step": 13041 }, { "epoch": 1.16, "grad_norm": 6.765782726298847, "learning_rate": 3.927373829876622e-06, "loss": 0.6016, "step": 13042 }, { "epoch": 1.16, "grad_norm": 6.663810923043396, "learning_rate": 3.9266682877286405e-06, "loss": 0.6199, "step": 13043 }, { "epoch": 1.16, "grad_norm": 6.647399375397755, "learning_rate": 3.925962767984107e-06, "loss": 0.5285, "step": 13044 }, { "epoch": 1.16, "grad_norm": 5.595437831850956, "learning_rate": 3.925257270657749e-06, "loss": 0.5957, "step": 13045 }, { "epoch": 1.16, "grad_norm": 6.673434830295425, "learning_rate": 3.924551795764292e-06, "loss": 0.6482, "step": 13046 }, { "epoch": 1.16, "grad_norm": 5.295389970660264, "learning_rate": 3.92384634331846e-06, "loss": 0.556, "step": 13047 }, { "epoch": 1.16, "grad_norm": 5.803718525959643, "learning_rate": 3.92314091333498e-06, "loss": 0.6506, "step": 13048 }, { "epoch": 1.16, "grad_norm": 7.907367962224505, "learning_rate": 3.9224355058285755e-06, "loss": 0.5515, "step": 13049 }, { "epoch": 1.16, "grad_norm": 6.859039437303726, "learning_rate": 3.921730120813968e-06, "loss": 0.5886, "step": 13050 }, { "epoch": 1.16, "grad_norm": 6.419256837303687, "learning_rate": 3.921024758305884e-06, "loss": 0.5744, "step": 13051 }, { "epoch": 1.16, "grad_norm": 5.0350692170564, "learning_rate": 3.920319418319046e-06, "loss": 0.6123, "step": 13052 }, { "epoch": 1.16, "grad_norm": 8.591031276284657, "learning_rate": 3.919614100868177e-06, "loss": 0.6212, "step": 13053 }, { "epoch": 1.16, "grad_norm": 5.4696260864712185, "learning_rate": 3.9189088059679955e-06, "loss": 0.6299, "step": 13054 }, { "epoch": 1.16, "grad_norm": 4.382606749937304, "learning_rate": 3.918203533633225e-06, "loss": 0.5746, "step": 13055 }, { "epoch": 1.16, "grad_norm": 9.360086269628882, "learning_rate": 3.9174982838785875e-06, "loss": 0.6519, "step": 13056 }, { "epoch": 1.16, "grad_norm": 4.825289869499037, "learning_rate": 3.916793056718802e-06, "loss": 0.5773, "step": 13057 }, { "epoch": 1.16, "grad_norm": 6.303396750281862, "learning_rate": 3.916087852168588e-06, "loss": 0.6891, "step": 13058 }, { "epoch": 1.16, "grad_norm": 5.4232250096541605, "learning_rate": 3.915382670242669e-06, "loss": 0.5975, "step": 13059 }, { "epoch": 1.17, "grad_norm": 7.739321471134495, "learning_rate": 3.91467751095576e-06, "loss": 0.556, "step": 13060 }, { "epoch": 1.17, "grad_norm": 4.5234425282383475, "learning_rate": 3.913972374322581e-06, "loss": 0.5785, "step": 13061 }, { "epoch": 1.17, "grad_norm": 7.295995856776078, "learning_rate": 3.913267260357852e-06, "loss": 0.5756, "step": 13062 }, { "epoch": 1.17, "grad_norm": 7.657182058263523, "learning_rate": 3.912562169076287e-06, "loss": 0.6339, "step": 13063 }, { "epoch": 1.17, "grad_norm": 4.5569762811890495, "learning_rate": 3.911857100492605e-06, "loss": 0.5532, "step": 13064 }, { "epoch": 1.17, "grad_norm": 7.769926487014623, "learning_rate": 3.9111520546215255e-06, "loss": 0.5575, "step": 13065 }, { "epoch": 1.17, "grad_norm": 6.35077762964298, "learning_rate": 3.91044703147776e-06, "loss": 0.662, "step": 13066 }, { "epoch": 1.17, "grad_norm": 6.891584086086863, "learning_rate": 3.909742031076029e-06, "loss": 0.5653, "step": 13067 }, { "epoch": 1.17, "grad_norm": 5.9575296102349755, "learning_rate": 3.909037053431044e-06, "loss": 0.5632, "step": 13068 }, { "epoch": 1.17, "grad_norm": 5.588207350387445, "learning_rate": 3.908332098557523e-06, "loss": 0.571, "step": 13069 }, { "epoch": 1.17, "grad_norm": 4.906235502922406, "learning_rate": 3.907627166470176e-06, "loss": 0.6163, "step": 13070 }, { "epoch": 1.17, "grad_norm": 5.770251806959943, "learning_rate": 3.90692225718372e-06, "loss": 0.5602, "step": 13071 }, { "epoch": 1.17, "grad_norm": 7.204008855596645, "learning_rate": 3.90621737071287e-06, "loss": 0.6856, "step": 13072 }, { "epoch": 1.17, "grad_norm": 7.23175817118081, "learning_rate": 3.905512507072336e-06, "loss": 0.6206, "step": 13073 }, { "epoch": 1.17, "grad_norm": 5.738104857263245, "learning_rate": 3.904807666276831e-06, "loss": 0.5794, "step": 13074 }, { "epoch": 1.17, "grad_norm": 5.228177568655165, "learning_rate": 3.904102848341068e-06, "loss": 0.5972, "step": 13075 }, { "epoch": 1.17, "grad_norm": 6.0774810778863575, "learning_rate": 3.903398053279757e-06, "loss": 0.6308, "step": 13076 }, { "epoch": 1.17, "grad_norm": 8.02918837488522, "learning_rate": 3.902693281107611e-06, "loss": 0.6125, "step": 13077 }, { "epoch": 1.17, "grad_norm": 7.284985421628524, "learning_rate": 3.901988531839341e-06, "loss": 0.6074, "step": 13078 }, { "epoch": 1.17, "grad_norm": 5.403344839933403, "learning_rate": 3.901283805489654e-06, "loss": 0.624, "step": 13079 }, { "epoch": 1.17, "grad_norm": 8.689008689432383, "learning_rate": 3.900579102073262e-06, "loss": 0.6386, "step": 13080 }, { "epoch": 1.17, "grad_norm": 6.22590554860552, "learning_rate": 3.899874421604873e-06, "loss": 0.5736, "step": 13081 }, { "epoch": 1.17, "grad_norm": 6.53600547668058, "learning_rate": 3.899169764099199e-06, "loss": 0.633, "step": 13082 }, { "epoch": 1.17, "grad_norm": 7.5288998685311785, "learning_rate": 3.898465129570943e-06, "loss": 0.6216, "step": 13083 }, { "epoch": 1.17, "grad_norm": 8.481900718823557, "learning_rate": 3.897760518034815e-06, "loss": 0.582, "step": 13084 }, { "epoch": 1.17, "grad_norm": 6.05133806209224, "learning_rate": 3.897055929505523e-06, "loss": 0.6241, "step": 13085 }, { "epoch": 1.17, "grad_norm": 9.436327479474544, "learning_rate": 3.896351363997772e-06, "loss": 0.622, "step": 13086 }, { "epoch": 1.17, "grad_norm": 7.435300030243588, "learning_rate": 3.89564682152627e-06, "loss": 0.5921, "step": 13087 }, { "epoch": 1.17, "grad_norm": 6.363143953399953, "learning_rate": 3.894942302105723e-06, "loss": 0.6041, "step": 13088 }, { "epoch": 1.17, "grad_norm": 4.951531391521561, "learning_rate": 3.894237805750834e-06, "loss": 0.6322, "step": 13089 }, { "epoch": 1.17, "grad_norm": 6.554669069377846, "learning_rate": 3.893533332476309e-06, "loss": 0.6279, "step": 13090 }, { "epoch": 1.17, "grad_norm": 6.814255114066726, "learning_rate": 3.892828882296854e-06, "loss": 0.6017, "step": 13091 }, { "epoch": 1.17, "grad_norm": 8.922956837119923, "learning_rate": 3.892124455227171e-06, "loss": 0.6165, "step": 13092 }, { "epoch": 1.17, "grad_norm": 8.617459965961334, "learning_rate": 3.8914200512819626e-06, "loss": 0.6223, "step": 13093 }, { "epoch": 1.17, "grad_norm": 5.203476049972661, "learning_rate": 3.8907156704759344e-06, "loss": 0.66, "step": 13094 }, { "epoch": 1.17, "grad_norm": 7.83257745916291, "learning_rate": 3.8900113128237865e-06, "loss": 0.5459, "step": 13095 }, { "epoch": 1.17, "grad_norm": 7.961540856629339, "learning_rate": 3.889306978340223e-06, "loss": 0.5427, "step": 13096 }, { "epoch": 1.17, "grad_norm": 5.300691447045469, "learning_rate": 3.888602667039943e-06, "loss": 0.6251, "step": 13097 }, { "epoch": 1.17, "grad_norm": 5.727507634393755, "learning_rate": 3.887898378937649e-06, "loss": 0.6074, "step": 13098 }, { "epoch": 1.17, "grad_norm": 4.990901270673559, "learning_rate": 3.88719411404804e-06, "loss": 0.5976, "step": 13099 }, { "epoch": 1.17, "grad_norm": 7.106082400185761, "learning_rate": 3.8864898723858155e-06, "loss": 0.5682, "step": 13100 }, { "epoch": 1.17, "grad_norm": 4.270537214662647, "learning_rate": 3.885785653965678e-06, "loss": 0.6049, "step": 13101 }, { "epoch": 1.17, "grad_norm": 7.003440328559636, "learning_rate": 3.885081458802325e-06, "loss": 0.621, "step": 13102 }, { "epoch": 1.17, "grad_norm": 5.879410440772941, "learning_rate": 3.884377286910454e-06, "loss": 0.5902, "step": 13103 }, { "epoch": 1.17, "grad_norm": 6.366635983973927, "learning_rate": 3.883673138304765e-06, "loss": 0.5961, "step": 13104 }, { "epoch": 1.17, "grad_norm": 6.620583603027561, "learning_rate": 3.882969012999954e-06, "loss": 0.6036, "step": 13105 }, { "epoch": 1.17, "grad_norm": 7.603844786210148, "learning_rate": 3.882264911010719e-06, "loss": 0.5664, "step": 13106 }, { "epoch": 1.17, "grad_norm": 6.845710719703136, "learning_rate": 3.8815608323517555e-06, "loss": 0.564, "step": 13107 }, { "epoch": 1.17, "grad_norm": 6.531370222691904, "learning_rate": 3.8808567770377605e-06, "loss": 0.5582, "step": 13108 }, { "epoch": 1.17, "grad_norm": 7.145407624095916, "learning_rate": 3.88015274508343e-06, "loss": 0.6039, "step": 13109 }, { "epoch": 1.17, "grad_norm": 5.148181286168265, "learning_rate": 3.879448736503458e-06, "loss": 0.6172, "step": 13110 }, { "epoch": 1.17, "grad_norm": 5.755471130672694, "learning_rate": 3.878744751312542e-06, "loss": 0.5624, "step": 13111 }, { "epoch": 1.17, "grad_norm": 6.460231436370971, "learning_rate": 3.87804078952537e-06, "loss": 0.5787, "step": 13112 }, { "epoch": 1.17, "grad_norm": 7.4518605952844705, "learning_rate": 3.877336851156642e-06, "loss": 0.6139, "step": 13113 }, { "epoch": 1.17, "grad_norm": 6.261750079520075, "learning_rate": 3.876632936221047e-06, "loss": 0.5859, "step": 13114 }, { "epoch": 1.17, "grad_norm": 9.406566212529814, "learning_rate": 3.87592904473328e-06, "loss": 0.7079, "step": 13115 }, { "epoch": 1.17, "grad_norm": 7.751445276641179, "learning_rate": 3.875225176708033e-06, "loss": 0.59, "step": 13116 }, { "epoch": 1.17, "grad_norm": 5.7898044581478505, "learning_rate": 3.874521332159996e-06, "loss": 0.5909, "step": 13117 }, { "epoch": 1.17, "grad_norm": 7.799519186078594, "learning_rate": 3.873817511103862e-06, "loss": 0.621, "step": 13118 }, { "epoch": 1.17, "grad_norm": 8.053185341928462, "learning_rate": 3.8731137135543216e-06, "loss": 0.5938, "step": 13119 }, { "epoch": 1.17, "grad_norm": 7.749671556629064, "learning_rate": 3.8724099395260635e-06, "loss": 0.611, "step": 13120 }, { "epoch": 1.17, "grad_norm": 6.109910660447366, "learning_rate": 3.871706189033779e-06, "loss": 0.5638, "step": 13121 }, { "epoch": 1.17, "grad_norm": 5.720315338068922, "learning_rate": 3.8710024620921586e-06, "loss": 0.5806, "step": 13122 }, { "epoch": 1.17, "grad_norm": 5.689276258460644, "learning_rate": 3.870298758715888e-06, "loss": 0.5627, "step": 13123 }, { "epoch": 1.17, "grad_norm": 7.492206940913018, "learning_rate": 3.869595078919655e-06, "loss": 0.6511, "step": 13124 }, { "epoch": 1.17, "grad_norm": 6.316523620767012, "learning_rate": 3.8688914227181534e-06, "loss": 0.6109, "step": 13125 }, { "epoch": 1.17, "grad_norm": 5.502326252774817, "learning_rate": 3.868187790126064e-06, "loss": 0.6025, "step": 13126 }, { "epoch": 1.17, "grad_norm": 8.12602342859233, "learning_rate": 3.867484181158076e-06, "loss": 0.5605, "step": 13127 }, { "epoch": 1.17, "grad_norm": 6.367728858075991, "learning_rate": 3.866780595828874e-06, "loss": 0.6237, "step": 13128 }, { "epoch": 1.17, "grad_norm": 8.67963829277927, "learning_rate": 3.866077034153147e-06, "loss": 0.6456, "step": 13129 }, { "epoch": 1.17, "grad_norm": 8.431035422706968, "learning_rate": 3.8653734961455786e-06, "loss": 0.6159, "step": 13130 }, { "epoch": 1.17, "grad_norm": 8.049136954745777, "learning_rate": 3.864669981820853e-06, "loss": 0.6428, "step": 13131 }, { "epoch": 1.17, "grad_norm": 7.05467948109974, "learning_rate": 3.863966491193656e-06, "loss": 0.576, "step": 13132 }, { "epoch": 1.17, "grad_norm": 8.597968310594995, "learning_rate": 3.863263024278669e-06, "loss": 0.6204, "step": 13133 }, { "epoch": 1.17, "grad_norm": 7.499246525751215, "learning_rate": 3.862559581090578e-06, "loss": 0.6086, "step": 13134 }, { "epoch": 1.17, "grad_norm": 5.325690905350083, "learning_rate": 3.861856161644066e-06, "loss": 0.6034, "step": 13135 }, { "epoch": 1.17, "grad_norm": 8.523044288625295, "learning_rate": 3.8611527659538125e-06, "loss": 0.681, "step": 13136 }, { "epoch": 1.17, "grad_norm": 7.853815249076578, "learning_rate": 3.860449394034501e-06, "loss": 0.5793, "step": 13137 }, { "epoch": 1.17, "grad_norm": 7.609702290793866, "learning_rate": 3.859746045900814e-06, "loss": 0.5884, "step": 13138 }, { "epoch": 1.17, "grad_norm": 7.236393574553493, "learning_rate": 3.859042721567432e-06, "loss": 0.6753, "step": 13139 }, { "epoch": 1.17, "grad_norm": 7.782241058988325, "learning_rate": 3.858339421049033e-06, "loss": 0.6076, "step": 13140 }, { "epoch": 1.17, "grad_norm": 5.019723673610625, "learning_rate": 3.857636144360298e-06, "loss": 0.6538, "step": 13141 }, { "epoch": 1.17, "grad_norm": 7.3155479605752305, "learning_rate": 3.856932891515908e-06, "loss": 0.6119, "step": 13142 }, { "epoch": 1.17, "grad_norm": 6.172754714235043, "learning_rate": 3.856229662530539e-06, "loss": 0.604, "step": 13143 }, { "epoch": 1.17, "grad_norm": 7.3300155384264265, "learning_rate": 3.855526457418871e-06, "loss": 0.6136, "step": 13144 }, { "epoch": 1.17, "grad_norm": 9.474951542008082, "learning_rate": 3.854823276195584e-06, "loss": 0.6301, "step": 13145 }, { "epoch": 1.17, "grad_norm": 8.618798306147506, "learning_rate": 3.8541201188753505e-06, "loss": 0.5933, "step": 13146 }, { "epoch": 1.17, "grad_norm": 6.301752541749486, "learning_rate": 3.853416985472851e-06, "loss": 0.57, "step": 13147 }, { "epoch": 1.17, "grad_norm": 5.194995784976399, "learning_rate": 3.8527138760027625e-06, "loss": 0.566, "step": 13148 }, { "epoch": 1.17, "grad_norm": 4.782089503732792, "learning_rate": 3.852010790479758e-06, "loss": 0.5894, "step": 13149 }, { "epoch": 1.17, "grad_norm": 6.550366673635147, "learning_rate": 3.851307728918513e-06, "loss": 0.5854, "step": 13150 }, { "epoch": 1.17, "grad_norm": 8.009008709232404, "learning_rate": 3.850604691333707e-06, "loss": 0.5922, "step": 13151 }, { "epoch": 1.17, "grad_norm": 6.777689459605355, "learning_rate": 3.849901677740008e-06, "loss": 0.6091, "step": 13152 }, { "epoch": 1.17, "grad_norm": 5.639235064820066, "learning_rate": 3.849198688152093e-06, "loss": 0.6037, "step": 13153 }, { "epoch": 1.17, "grad_norm": 3.7338570093759094, "learning_rate": 3.848495722584637e-06, "loss": 0.5773, "step": 13154 }, { "epoch": 1.17, "grad_norm": 4.932110868013976, "learning_rate": 3.847792781052312e-06, "loss": 0.6116, "step": 13155 }, { "epoch": 1.17, "grad_norm": 5.483975580918394, "learning_rate": 3.8470898635697865e-06, "loss": 0.6359, "step": 13156 }, { "epoch": 1.17, "grad_norm": 5.135931707829017, "learning_rate": 3.846386970151735e-06, "loss": 0.5768, "step": 13157 }, { "epoch": 1.17, "grad_norm": 7.171791283464882, "learning_rate": 3.845684100812831e-06, "loss": 0.6024, "step": 13158 }, { "epoch": 1.17, "grad_norm": 4.77972339164141, "learning_rate": 3.844981255567742e-06, "loss": 0.5674, "step": 13159 }, { "epoch": 1.17, "grad_norm": 7.223079184455958, "learning_rate": 3.844278434431141e-06, "loss": 0.576, "step": 13160 }, { "epoch": 1.17, "grad_norm": 7.041031677132561, "learning_rate": 3.843575637417696e-06, "loss": 0.6083, "step": 13161 }, { "epoch": 1.17, "grad_norm": 8.280997444682933, "learning_rate": 3.842872864542077e-06, "loss": 0.7025, "step": 13162 }, { "epoch": 1.17, "grad_norm": 4.944981007285902, "learning_rate": 3.8421701158189525e-06, "loss": 0.6304, "step": 13163 }, { "epoch": 1.17, "grad_norm": 6.996941525027087, "learning_rate": 3.841467391262993e-06, "loss": 0.6453, "step": 13164 }, { "epoch": 1.17, "grad_norm": 8.057070001192715, "learning_rate": 3.840764690888863e-06, "loss": 0.5732, "step": 13165 }, { "epoch": 1.17, "grad_norm": 6.302275058954713, "learning_rate": 3.840062014711232e-06, "loss": 0.6284, "step": 13166 }, { "epoch": 1.17, "grad_norm": 6.203503656619742, "learning_rate": 3.839359362744766e-06, "loss": 0.5412, "step": 13167 }, { "epoch": 1.17, "grad_norm": 7.094757247561121, "learning_rate": 3.838656735004134e-06, "loss": 0.6401, "step": 13168 }, { "epoch": 1.17, "grad_norm": 5.650541856092161, "learning_rate": 3.837954131503996e-06, "loss": 0.6458, "step": 13169 }, { "epoch": 1.17, "grad_norm": 5.754594358883974, "learning_rate": 3.837251552259021e-06, "loss": 0.5263, "step": 13170 }, { "epoch": 1.17, "grad_norm": 7.265323803836049, "learning_rate": 3.836548997283876e-06, "loss": 0.6305, "step": 13171 }, { "epoch": 1.18, "grad_norm": 4.948198077878267, "learning_rate": 3.835846466593219e-06, "loss": 0.6037, "step": 13172 }, { "epoch": 1.18, "grad_norm": 5.945206844423957, "learning_rate": 3.835143960201719e-06, "loss": 0.6175, "step": 13173 }, { "epoch": 1.18, "grad_norm": 7.245099390056637, "learning_rate": 3.8344414781240395e-06, "loss": 0.582, "step": 13174 }, { "epoch": 1.18, "grad_norm": 5.28294939504522, "learning_rate": 3.83373902037484e-06, "loss": 0.5542, "step": 13175 }, { "epoch": 1.18, "grad_norm": 6.417907208507796, "learning_rate": 3.8330365869687845e-06, "loss": 0.6689, "step": 13176 }, { "epoch": 1.18, "grad_norm": 8.546569193457564, "learning_rate": 3.832334177920535e-06, "loss": 0.684, "step": 13177 }, { "epoch": 1.18, "grad_norm": 6.7596159689019615, "learning_rate": 3.831631793244752e-06, "loss": 0.5488, "step": 13178 }, { "epoch": 1.18, "grad_norm": 9.301007571183177, "learning_rate": 3.830929432956097e-06, "loss": 0.5768, "step": 13179 }, { "epoch": 1.18, "grad_norm": 7.396348941705788, "learning_rate": 3.83022709706923e-06, "loss": 0.578, "step": 13180 }, { "epoch": 1.18, "grad_norm": 6.018489918179628, "learning_rate": 3.829524785598811e-06, "loss": 0.5622, "step": 13181 }, { "epoch": 1.18, "grad_norm": 6.586007370308069, "learning_rate": 3.828822498559498e-06, "loss": 0.6688, "step": 13182 }, { "epoch": 1.18, "grad_norm": 6.608359072932163, "learning_rate": 3.828120235965952e-06, "loss": 0.6159, "step": 13183 }, { "epoch": 1.18, "grad_norm": 7.426969731812003, "learning_rate": 3.827417997832828e-06, "loss": 0.6029, "step": 13184 }, { "epoch": 1.18, "grad_norm": 6.799087998865868, "learning_rate": 3.826715784174786e-06, "loss": 0.6191, "step": 13185 }, { "epoch": 1.18, "grad_norm": 6.530763546105602, "learning_rate": 3.826013595006482e-06, "loss": 0.602, "step": 13186 }, { "epoch": 1.18, "grad_norm": 7.612883333465717, "learning_rate": 3.825311430342575e-06, "loss": 0.6488, "step": 13187 }, { "epoch": 1.18, "grad_norm": 7.42220551112495, "learning_rate": 3.824609290197716e-06, "loss": 0.6287, "step": 13188 }, { "epoch": 1.18, "grad_norm": 8.127347148017519, "learning_rate": 3.823907174586568e-06, "loss": 0.6506, "step": 13189 }, { "epoch": 1.18, "grad_norm": 5.954993209563043, "learning_rate": 3.823205083523779e-06, "loss": 0.5899, "step": 13190 }, { "epoch": 1.18, "grad_norm": 5.179945254715884, "learning_rate": 3.822503017024007e-06, "loss": 0.5687, "step": 13191 }, { "epoch": 1.18, "grad_norm": 7.351674509100959, "learning_rate": 3.821800975101908e-06, "loss": 0.6416, "step": 13192 }, { "epoch": 1.18, "grad_norm": 5.393741220404368, "learning_rate": 3.821098957772132e-06, "loss": 0.611, "step": 13193 }, { "epoch": 1.18, "grad_norm": 8.647179031731183, "learning_rate": 3.820396965049332e-06, "loss": 0.5695, "step": 13194 }, { "epoch": 1.18, "grad_norm": 5.281014566765714, "learning_rate": 3.8196949969481645e-06, "loss": 0.6073, "step": 13195 }, { "epoch": 1.18, "grad_norm": 4.537891772787161, "learning_rate": 3.818993053483278e-06, "loss": 0.5313, "step": 13196 }, { "epoch": 1.18, "grad_norm": 6.590657468638105, "learning_rate": 3.818291134669328e-06, "loss": 0.6193, "step": 13197 }, { "epoch": 1.18, "grad_norm": 10.146222918283696, "learning_rate": 3.817589240520959e-06, "loss": 0.5697, "step": 13198 }, { "epoch": 1.18, "grad_norm": 6.663575967152685, "learning_rate": 3.816887371052827e-06, "loss": 0.5568, "step": 13199 }, { "epoch": 1.18, "grad_norm": 7.361293799322229, "learning_rate": 3.816185526279579e-06, "loss": 0.5968, "step": 13200 }, { "epoch": 1.18, "grad_norm": 5.719444284799625, "learning_rate": 3.815483706215866e-06, "loss": 0.6294, "step": 13201 }, { "epoch": 1.18, "grad_norm": 6.347761300437313, "learning_rate": 3.814781910876337e-06, "loss": 0.627, "step": 13202 }, { "epoch": 1.18, "grad_norm": 11.462790234371068, "learning_rate": 3.814080140275639e-06, "loss": 0.657, "step": 13203 }, { "epoch": 1.18, "grad_norm": 5.0866315872042325, "learning_rate": 3.8133783944284208e-06, "loss": 0.6257, "step": 13204 }, { "epoch": 1.18, "grad_norm": 5.43256800342738, "learning_rate": 3.8126766733493315e-06, "loss": 0.6821, "step": 13205 }, { "epoch": 1.18, "grad_norm": 7.112844067881695, "learning_rate": 3.8119749770530156e-06, "loss": 0.6339, "step": 13206 }, { "epoch": 1.18, "grad_norm": 5.310850292709379, "learning_rate": 3.81127330555412e-06, "loss": 0.5606, "step": 13207 }, { "epoch": 1.18, "grad_norm": 6.125483434917712, "learning_rate": 3.810571658867292e-06, "loss": 0.6158, "step": 13208 }, { "epoch": 1.18, "grad_norm": 8.040806908805983, "learning_rate": 3.8098700370071746e-06, "loss": 0.6371, "step": 13209 }, { "epoch": 1.18, "grad_norm": 5.0604166184261254, "learning_rate": 3.8091684399884145e-06, "loss": 0.5755, "step": 13210 }, { "epoch": 1.18, "grad_norm": 7.413286094195066, "learning_rate": 3.8084668678256566e-06, "loss": 0.6647, "step": 13211 }, { "epoch": 1.18, "grad_norm": 7.391750070465308, "learning_rate": 3.8077653205335424e-06, "loss": 0.6458, "step": 13212 }, { "epoch": 1.18, "grad_norm": 9.11981539978656, "learning_rate": 3.8070637981267154e-06, "loss": 0.6247, "step": 13213 }, { "epoch": 1.18, "grad_norm": 5.462681142292151, "learning_rate": 3.806362300619819e-06, "loss": 0.6049, "step": 13214 }, { "epoch": 1.18, "grad_norm": 6.399418519794362, "learning_rate": 3.805660828027497e-06, "loss": 0.5698, "step": 13215 }, { "epoch": 1.18, "grad_norm": 7.748295216649823, "learning_rate": 3.804959380364388e-06, "loss": 0.5842, "step": 13216 }, { "epoch": 1.18, "grad_norm": 6.884095179822025, "learning_rate": 3.8042579576451354e-06, "loss": 0.6035, "step": 13217 }, { "epoch": 1.18, "grad_norm": 7.505648163349529, "learning_rate": 3.8035565598843803e-06, "loss": 0.602, "step": 13218 }, { "epoch": 1.18, "grad_norm": 5.319434379300883, "learning_rate": 3.8028551870967606e-06, "loss": 0.6509, "step": 13219 }, { "epoch": 1.18, "grad_norm": 5.375517202565116, "learning_rate": 3.802153839296917e-06, "loss": 0.5999, "step": 13220 }, { "epoch": 1.18, "grad_norm": 6.949610461389127, "learning_rate": 3.8014525164994903e-06, "loss": 0.5484, "step": 13221 }, { "epoch": 1.18, "grad_norm": 5.048975103510584, "learning_rate": 3.8007512187191165e-06, "loss": 0.6512, "step": 13222 }, { "epoch": 1.18, "grad_norm": 5.508492638069277, "learning_rate": 3.8000499459704345e-06, "loss": 0.5734, "step": 13223 }, { "epoch": 1.18, "grad_norm": 7.060789261054142, "learning_rate": 3.7993486982680826e-06, "loss": 0.5722, "step": 13224 }, { "epoch": 1.18, "grad_norm": 7.648121968998392, "learning_rate": 3.7986474756266967e-06, "loss": 0.6652, "step": 13225 }, { "epoch": 1.18, "grad_norm": 6.653296374801162, "learning_rate": 3.7979462780609157e-06, "loss": 0.6165, "step": 13226 }, { "epoch": 1.18, "grad_norm": 6.594883576088282, "learning_rate": 3.797245105585372e-06, "loss": 0.5927, "step": 13227 }, { "epoch": 1.18, "grad_norm": 8.01665206685435, "learning_rate": 3.7965439582147034e-06, "loss": 0.56, "step": 13228 }, { "epoch": 1.18, "grad_norm": 5.391254319164415, "learning_rate": 3.795842835963543e-06, "loss": 0.6248, "step": 13229 }, { "epoch": 1.18, "grad_norm": 8.88211341795923, "learning_rate": 3.7951417388465267e-06, "loss": 0.5783, "step": 13230 }, { "epoch": 1.18, "grad_norm": 5.566379749638653, "learning_rate": 3.7944406668782884e-06, "loss": 0.6597, "step": 13231 }, { "epoch": 1.18, "grad_norm": 8.40596420084098, "learning_rate": 3.793739620073461e-06, "loss": 0.646, "step": 13232 }, { "epoch": 1.18, "grad_norm": 4.501862450967066, "learning_rate": 3.7930385984466765e-06, "loss": 0.5636, "step": 13233 }, { "epoch": 1.18, "grad_norm": 5.100769400178658, "learning_rate": 3.7923376020125695e-06, "loss": 0.578, "step": 13234 }, { "epoch": 1.18, "grad_norm": 5.910944398082639, "learning_rate": 3.7916366307857695e-06, "loss": 0.5889, "step": 13235 }, { "epoch": 1.18, "grad_norm": 6.385611246147002, "learning_rate": 3.7909356847809083e-06, "loss": 0.5772, "step": 13236 }, { "epoch": 1.18, "grad_norm": 8.317599667652292, "learning_rate": 3.790234764012618e-06, "loss": 0.5768, "step": 13237 }, { "epoch": 1.18, "grad_norm": 6.338641447885498, "learning_rate": 3.7895338684955265e-06, "loss": 0.6223, "step": 13238 }, { "epoch": 1.18, "grad_norm": 8.14470365668281, "learning_rate": 3.7888329982442645e-06, "loss": 0.5897, "step": 13239 }, { "epoch": 1.18, "grad_norm": 6.535561989130127, "learning_rate": 3.788132153273464e-06, "loss": 0.6137, "step": 13240 }, { "epoch": 1.18, "grad_norm": 5.440169201279031, "learning_rate": 3.7874313335977486e-06, "loss": 0.6441, "step": 13241 }, { "epoch": 1.18, "grad_norm": 7.841297096789114, "learning_rate": 3.7867305392317487e-06, "loss": 0.6267, "step": 13242 }, { "epoch": 1.18, "grad_norm": 6.940754531040744, "learning_rate": 3.786029770190091e-06, "loss": 0.5876, "step": 13243 }, { "epoch": 1.18, "grad_norm": 8.156419852464348, "learning_rate": 3.785329026487405e-06, "loss": 0.6463, "step": 13244 }, { "epoch": 1.18, "grad_norm": 9.980607311821721, "learning_rate": 3.784628308138314e-06, "loss": 0.6243, "step": 13245 }, { "epoch": 1.18, "grad_norm": 6.602694513251037, "learning_rate": 3.7839276151574454e-06, "loss": 0.6361, "step": 13246 }, { "epoch": 1.18, "grad_norm": 8.576985364428046, "learning_rate": 3.7832269475594264e-06, "loss": 0.5945, "step": 13247 }, { "epoch": 1.18, "grad_norm": 5.664315858664254, "learning_rate": 3.7825263053588784e-06, "loss": 0.5689, "step": 13248 }, { "epoch": 1.18, "grad_norm": 6.5470681706782825, "learning_rate": 3.781825688570428e-06, "loss": 0.6106, "step": 13249 }, { "epoch": 1.18, "grad_norm": 4.896469984249298, "learning_rate": 3.781125097208699e-06, "loss": 0.5564, "step": 13250 }, { "epoch": 1.18, "grad_norm": 8.240674168278007, "learning_rate": 3.7804245312883143e-06, "loss": 0.6658, "step": 13251 }, { "epoch": 1.18, "grad_norm": 5.7718166489550695, "learning_rate": 3.779723990823896e-06, "loss": 0.5789, "step": 13252 }, { "epoch": 1.18, "grad_norm": 5.989728306379843, "learning_rate": 3.779023475830068e-06, "loss": 0.7092, "step": 13253 }, { "epoch": 1.18, "grad_norm": 5.500942951599489, "learning_rate": 3.7783229863214503e-06, "loss": 0.605, "step": 13254 }, { "epoch": 1.18, "grad_norm": 7.666505927375248, "learning_rate": 3.7776225223126668e-06, "loss": 0.5962, "step": 13255 }, { "epoch": 1.18, "grad_norm": 6.40636839716343, "learning_rate": 3.776922083818334e-06, "loss": 0.532, "step": 13256 }, { "epoch": 1.18, "grad_norm": 7.630788685669185, "learning_rate": 3.7762216708530752e-06, "loss": 0.6458, "step": 13257 }, { "epoch": 1.18, "grad_norm": 6.035297666169042, "learning_rate": 3.7755212834315086e-06, "loss": 0.5701, "step": 13258 }, { "epoch": 1.18, "grad_norm": 5.378867132589014, "learning_rate": 3.774820921568253e-06, "loss": 0.6499, "step": 13259 }, { "epoch": 1.18, "grad_norm": 7.958589691222815, "learning_rate": 3.774120585277929e-06, "loss": 0.6083, "step": 13260 }, { "epoch": 1.18, "grad_norm": 8.392173707471922, "learning_rate": 3.7734202745751513e-06, "loss": 0.6344, "step": 13261 }, { "epoch": 1.18, "grad_norm": 6.92480870133496, "learning_rate": 3.7727199894745403e-06, "loss": 0.6006, "step": 13262 }, { "epoch": 1.18, "grad_norm": 6.267763790569879, "learning_rate": 3.7720197299907125e-06, "loss": 0.5968, "step": 13263 }, { "epoch": 1.18, "grad_norm": 6.060929570616743, "learning_rate": 3.771319496138282e-06, "loss": 0.6187, "step": 13264 }, { "epoch": 1.18, "grad_norm": 5.104468331594591, "learning_rate": 3.7706192879318677e-06, "loss": 0.5615, "step": 13265 }, { "epoch": 1.18, "grad_norm": 7.191561124696481, "learning_rate": 3.769919105386082e-06, "loss": 0.6241, "step": 13266 }, { "epoch": 1.18, "grad_norm": 8.657674235112134, "learning_rate": 3.769218948515542e-06, "loss": 0.6335, "step": 13267 }, { "epoch": 1.18, "grad_norm": 5.6828150271855735, "learning_rate": 3.7685188173348617e-06, "loss": 0.5148, "step": 13268 }, { "epoch": 1.18, "grad_norm": 4.815823582785311, "learning_rate": 3.767818711858654e-06, "loss": 0.5578, "step": 13269 }, { "epoch": 1.18, "grad_norm": 5.931385042715725, "learning_rate": 3.767118632101533e-06, "loss": 0.5911, "step": 13270 }, { "epoch": 1.18, "grad_norm": 5.36604679466121, "learning_rate": 3.7664185780781086e-06, "loss": 0.5914, "step": 13271 }, { "epoch": 1.18, "grad_norm": 6.526798259526903, "learning_rate": 3.7657185498029953e-06, "loss": 0.5897, "step": 13272 }, { "epoch": 1.18, "grad_norm": 6.087691965343447, "learning_rate": 3.765018547290805e-06, "loss": 0.5558, "step": 13273 }, { "epoch": 1.18, "grad_norm": 7.596228389798004, "learning_rate": 3.7643185705561474e-06, "loss": 0.6214, "step": 13274 }, { "epoch": 1.18, "grad_norm": 4.946643460316142, "learning_rate": 3.7636186196136344e-06, "loss": 0.6083, "step": 13275 }, { "epoch": 1.18, "grad_norm": 6.347269396300193, "learning_rate": 3.762918694477874e-06, "loss": 0.5773, "step": 13276 }, { "epoch": 1.18, "grad_norm": 7.4929287401877485, "learning_rate": 3.762218795163477e-06, "loss": 0.6728, "step": 13277 }, { "epoch": 1.18, "grad_norm": 4.625121906964087, "learning_rate": 3.761518921685052e-06, "loss": 0.5948, "step": 13278 }, { "epoch": 1.18, "grad_norm": 8.412082163470508, "learning_rate": 3.760819074057207e-06, "loss": 0.6219, "step": 13279 }, { "epoch": 1.18, "grad_norm": 7.985843124683444, "learning_rate": 3.76011925229455e-06, "loss": 0.566, "step": 13280 }, { "epoch": 1.18, "grad_norm": 6.548101312657256, "learning_rate": 3.75941945641169e-06, "loss": 0.6511, "step": 13281 }, { "epoch": 1.18, "grad_norm": 7.240070132273118, "learning_rate": 3.75871968642323e-06, "loss": 0.5319, "step": 13282 }, { "epoch": 1.18, "grad_norm": 6.667293647052264, "learning_rate": 3.7580199423437785e-06, "loss": 0.6617, "step": 13283 }, { "epoch": 1.19, "grad_norm": 8.837019475422412, "learning_rate": 3.757320224187943e-06, "loss": 0.6347, "step": 13284 }, { "epoch": 1.19, "grad_norm": 6.0935397922269985, "learning_rate": 3.756620531970325e-06, "loss": 0.6361, "step": 13285 }, { "epoch": 1.19, "grad_norm": 6.115331221320091, "learning_rate": 3.7559208657055303e-06, "loss": 0.6281, "step": 13286 }, { "epoch": 1.19, "grad_norm": 6.837845988772841, "learning_rate": 3.755221225408162e-06, "loss": 0.6325, "step": 13287 }, { "epoch": 1.19, "grad_norm": 7.316747432985996, "learning_rate": 3.7545216110928263e-06, "loss": 0.6276, "step": 13288 }, { "epoch": 1.19, "grad_norm": 5.199342894312467, "learning_rate": 3.753822022774123e-06, "loss": 0.5602, "step": 13289 }, { "epoch": 1.19, "grad_norm": 5.9296696470862615, "learning_rate": 3.753122460466656e-06, "loss": 0.5909, "step": 13290 }, { "epoch": 1.19, "grad_norm": 6.3088975727473064, "learning_rate": 3.7524229241850284e-06, "loss": 0.5641, "step": 13291 }, { "epoch": 1.19, "grad_norm": 6.414238619615227, "learning_rate": 3.7517234139438384e-06, "loss": 0.6554, "step": 13292 }, { "epoch": 1.19, "grad_norm": 6.841049319880134, "learning_rate": 3.751023929757689e-06, "loss": 0.5834, "step": 13293 }, { "epoch": 1.19, "grad_norm": 6.010294441135963, "learning_rate": 3.750324471641181e-06, "loss": 0.639, "step": 13294 }, { "epoch": 1.19, "grad_norm": 9.229695815841394, "learning_rate": 3.749625039608911e-06, "loss": 0.635, "step": 13295 }, { "epoch": 1.19, "grad_norm": 5.540344754532188, "learning_rate": 3.7489256336754808e-06, "loss": 0.5727, "step": 13296 }, { "epoch": 1.19, "grad_norm": 7.151932074001832, "learning_rate": 3.748226253855489e-06, "loss": 0.6235, "step": 13297 }, { "epoch": 1.19, "grad_norm": 6.168453831367233, "learning_rate": 3.747526900163534e-06, "loss": 0.5883, "step": 13298 }, { "epoch": 1.19, "grad_norm": 4.935356159178155, "learning_rate": 3.74682757261421e-06, "loss": 0.5572, "step": 13299 }, { "epoch": 1.19, "grad_norm": 6.239923943658096, "learning_rate": 3.746128271222116e-06, "loss": 0.6167, "step": 13300 }, { "epoch": 1.19, "grad_norm": 6.423374920120001, "learning_rate": 3.74542899600185e-06, "loss": 0.6183, "step": 13301 }, { "epoch": 1.19, "grad_norm": 7.1237377099317225, "learning_rate": 3.7447297469680037e-06, "loss": 0.6318, "step": 13302 }, { "epoch": 1.19, "grad_norm": 6.387916506163475, "learning_rate": 3.744030524135176e-06, "loss": 0.5456, "step": 13303 }, { "epoch": 1.19, "grad_norm": 6.773770048278127, "learning_rate": 3.7433313275179615e-06, "loss": 0.5879, "step": 13304 }, { "epoch": 1.19, "grad_norm": 7.642016470015299, "learning_rate": 3.742632157130952e-06, "loss": 0.5694, "step": 13305 }, { "epoch": 1.19, "grad_norm": 7.599297983853513, "learning_rate": 3.7419330129887433e-06, "loss": 0.6193, "step": 13306 }, { "epoch": 1.19, "grad_norm": 7.6446573905629425, "learning_rate": 3.741233895105929e-06, "loss": 0.65, "step": 13307 }, { "epoch": 1.19, "grad_norm": 7.210638884693305, "learning_rate": 3.740534803497099e-06, "loss": 0.6439, "step": 13308 }, { "epoch": 1.19, "grad_norm": 6.381276249951244, "learning_rate": 3.739835738176847e-06, "loss": 0.5418, "step": 13309 }, { "epoch": 1.19, "grad_norm": 6.767867866686351, "learning_rate": 3.7391366991597645e-06, "loss": 0.5676, "step": 13310 }, { "epoch": 1.19, "grad_norm": 7.296950697007662, "learning_rate": 3.7384376864604422e-06, "loss": 0.5709, "step": 13311 }, { "epoch": 1.19, "grad_norm": 6.285868834222443, "learning_rate": 3.7377387000934718e-06, "loss": 0.5684, "step": 13312 }, { "epoch": 1.19, "grad_norm": 6.763670166663195, "learning_rate": 3.73703974007344e-06, "loss": 0.6617, "step": 13313 }, { "epoch": 1.19, "grad_norm": 7.537799752909669, "learning_rate": 3.7363408064149387e-06, "loss": 0.6376, "step": 13314 }, { "epoch": 1.19, "grad_norm": 5.849846124156815, "learning_rate": 3.735641899132555e-06, "loss": 0.6191, "step": 13315 }, { "epoch": 1.19, "grad_norm": 5.808325279229659, "learning_rate": 3.734943018240877e-06, "loss": 0.5652, "step": 13316 }, { "epoch": 1.19, "grad_norm": 7.392338458294858, "learning_rate": 3.7342441637544946e-06, "loss": 0.6199, "step": 13317 }, { "epoch": 1.19, "grad_norm": 6.812279635082237, "learning_rate": 3.7335453356879925e-06, "loss": 0.6859, "step": 13318 }, { "epoch": 1.19, "grad_norm": 5.402024635774427, "learning_rate": 3.7328465340559572e-06, "loss": 0.5737, "step": 13319 }, { "epoch": 1.19, "grad_norm": 7.44177630983827, "learning_rate": 3.732147758872977e-06, "loss": 0.6287, "step": 13320 }, { "epoch": 1.19, "grad_norm": 8.34228154208312, "learning_rate": 3.731449010153635e-06, "loss": 0.6008, "step": 13321 }, { "epoch": 1.19, "grad_norm": 5.577441400818133, "learning_rate": 3.7307502879125168e-06, "loss": 0.6183, "step": 13322 }, { "epoch": 1.19, "grad_norm": 9.274844485097661, "learning_rate": 3.7300515921642077e-06, "loss": 0.5931, "step": 13323 }, { "epoch": 1.19, "grad_norm": 7.333971070286751, "learning_rate": 3.7293529229232896e-06, "loss": 0.5541, "step": 13324 }, { "epoch": 1.19, "grad_norm": 6.617372528403955, "learning_rate": 3.7286542802043456e-06, "loss": 0.6152, "step": 13325 }, { "epoch": 1.19, "grad_norm": 6.898132804161141, "learning_rate": 3.7279556640219617e-06, "loss": 0.6085, "step": 13326 }, { "epoch": 1.19, "grad_norm": 5.557944291987163, "learning_rate": 3.727257074390718e-06, "loss": 0.6431, "step": 13327 }, { "epoch": 1.19, "grad_norm": 6.096262078492384, "learning_rate": 3.7265585113251933e-06, "loss": 0.58, "step": 13328 }, { "epoch": 1.19, "grad_norm": 5.0923032264784815, "learning_rate": 3.7258599748399717e-06, "loss": 0.596, "step": 13329 }, { "epoch": 1.19, "grad_norm": 6.204784179701877, "learning_rate": 3.7251614649496327e-06, "loss": 0.5687, "step": 13330 }, { "epoch": 1.19, "grad_norm": 6.163593417308658, "learning_rate": 3.724462981668756e-06, "loss": 0.5634, "step": 13331 }, { "epoch": 1.19, "grad_norm": 7.216488183742146, "learning_rate": 3.723764525011921e-06, "loss": 0.6073, "step": 13332 }, { "epoch": 1.19, "grad_norm": 6.908128317353288, "learning_rate": 3.7230660949937077e-06, "loss": 0.5712, "step": 13333 }, { "epoch": 1.19, "grad_norm": 5.395384841171362, "learning_rate": 3.7223676916286924e-06, "loss": 0.5783, "step": 13334 }, { "epoch": 1.19, "grad_norm": 7.206625552311317, "learning_rate": 3.721669314931453e-06, "loss": 0.6437, "step": 13335 }, { "epoch": 1.19, "grad_norm": 5.138890824520557, "learning_rate": 3.720970964916568e-06, "loss": 0.5508, "step": 13336 }, { "epoch": 1.19, "grad_norm": 6.907259011999707, "learning_rate": 3.720272641598613e-06, "loss": 0.5539, "step": 13337 }, { "epoch": 1.19, "grad_norm": 6.77308647027385, "learning_rate": 3.7195743449921636e-06, "loss": 0.5643, "step": 13338 }, { "epoch": 1.19, "grad_norm": 8.050993946611047, "learning_rate": 3.718876075111797e-06, "loss": 0.5897, "step": 13339 }, { "epoch": 1.19, "grad_norm": 5.668025934411838, "learning_rate": 3.718177831972085e-06, "loss": 0.6356, "step": 13340 }, { "epoch": 1.19, "grad_norm": 6.101800675397439, "learning_rate": 3.7174796155876056e-06, "loss": 0.5768, "step": 13341 }, { "epoch": 1.19, "grad_norm": 7.0751932016519925, "learning_rate": 3.7167814259729296e-06, "loss": 0.5779, "step": 13342 }, { "epoch": 1.19, "grad_norm": 5.998333013343968, "learning_rate": 3.7160832631426313e-06, "loss": 0.6689, "step": 13343 }, { "epoch": 1.19, "grad_norm": 8.708210994188647, "learning_rate": 3.7153851271112823e-06, "loss": 0.6379, "step": 13344 }, { "epoch": 1.19, "grad_norm": 6.2749219821951066, "learning_rate": 3.714687017893456e-06, "loss": 0.5954, "step": 13345 }, { "epoch": 1.19, "grad_norm": 6.741420807799858, "learning_rate": 3.713988935503724e-06, "loss": 0.6066, "step": 13346 }, { "epoch": 1.19, "grad_norm": 5.792812730450356, "learning_rate": 3.7132908799566552e-06, "loss": 0.6012, "step": 13347 }, { "epoch": 1.19, "grad_norm": 6.50524200313823, "learning_rate": 3.712592851266822e-06, "loss": 0.64, "step": 13348 }, { "epoch": 1.19, "grad_norm": 7.481621204089206, "learning_rate": 3.7118948494487943e-06, "loss": 0.5935, "step": 13349 }, { "epoch": 1.19, "grad_norm": 8.153970854546364, "learning_rate": 3.71119687451714e-06, "loss": 0.5718, "step": 13350 }, { "epoch": 1.19, "grad_norm": 5.368904877323651, "learning_rate": 3.7104989264864294e-06, "loss": 0.635, "step": 13351 }, { "epoch": 1.19, "grad_norm": 6.820112626328267, "learning_rate": 3.7098010053712285e-06, "loss": 0.6577, "step": 13352 }, { "epoch": 1.19, "grad_norm": 5.3518502113369255, "learning_rate": 3.7091031111861064e-06, "loss": 0.5468, "step": 13353 }, { "epoch": 1.19, "grad_norm": 6.760270766985963, "learning_rate": 3.7084052439456314e-06, "loss": 0.6516, "step": 13354 }, { "epoch": 1.19, "grad_norm": 7.118158925536969, "learning_rate": 3.707707403664367e-06, "loss": 0.5769, "step": 13355 }, { "epoch": 1.19, "grad_norm": 6.894813199152525, "learning_rate": 3.707009590356882e-06, "loss": 0.6032, "step": 13356 }, { "epoch": 1.19, "grad_norm": 6.058749299610843, "learning_rate": 3.7063118040377387e-06, "loss": 0.5395, "step": 13357 }, { "epoch": 1.19, "grad_norm": 7.636230658910119, "learning_rate": 3.7056140447215038e-06, "loss": 0.6086, "step": 13358 }, { "epoch": 1.19, "grad_norm": 8.20518466879551, "learning_rate": 3.7049163124227404e-06, "loss": 0.5478, "step": 13359 }, { "epoch": 1.19, "grad_norm": 6.0024708210386075, "learning_rate": 3.704218607156013e-06, "loss": 0.6487, "step": 13360 }, { "epoch": 1.19, "grad_norm": 6.111633604856421, "learning_rate": 3.7035209289358853e-06, "loss": 0.617, "step": 13361 }, { "epoch": 1.19, "grad_norm": 7.001762036576767, "learning_rate": 3.702823277776918e-06, "loss": 0.6183, "step": 13362 }, { "epoch": 1.19, "grad_norm": 6.728208792333243, "learning_rate": 3.7021256536936745e-06, "loss": 0.5905, "step": 13363 }, { "epoch": 1.19, "grad_norm": 6.367934634851974, "learning_rate": 3.7014280567007156e-06, "loss": 0.6489, "step": 13364 }, { "epoch": 1.19, "grad_norm": 6.041847191060068, "learning_rate": 3.700730486812602e-06, "loss": 0.5555, "step": 13365 }, { "epoch": 1.19, "grad_norm": 7.365171179129494, "learning_rate": 3.7000329440438943e-06, "loss": 0.5594, "step": 13366 }, { "epoch": 1.19, "grad_norm": 7.465580709639025, "learning_rate": 3.699335428409152e-06, "loss": 0.6252, "step": 13367 }, { "epoch": 1.19, "grad_norm": 6.212286361538738, "learning_rate": 3.698637939922934e-06, "loss": 0.6195, "step": 13368 }, { "epoch": 1.19, "grad_norm": 4.7696498958472455, "learning_rate": 3.6979404785997987e-06, "loss": 0.5948, "step": 13369 }, { "epoch": 1.19, "grad_norm": 5.689260460015791, "learning_rate": 3.697243044454307e-06, "loss": 0.5936, "step": 13370 }, { "epoch": 1.19, "grad_norm": 5.390039822540484, "learning_rate": 3.6965456375010123e-06, "loss": 0.6192, "step": 13371 }, { "epoch": 1.19, "grad_norm": 5.815751775174619, "learning_rate": 3.695848257754472e-06, "loss": 0.5975, "step": 13372 }, { "epoch": 1.19, "grad_norm": 5.276173778013968, "learning_rate": 3.6951509052292434e-06, "loss": 0.6367, "step": 13373 }, { "epoch": 1.19, "grad_norm": 7.0723634437266245, "learning_rate": 3.694453579939883e-06, "loss": 0.6596, "step": 13374 }, { "epoch": 1.19, "grad_norm": 7.92943420646439, "learning_rate": 3.6937562819009443e-06, "loss": 0.6453, "step": 13375 }, { "epoch": 1.19, "grad_norm": 6.234335758242693, "learning_rate": 3.6930590111269816e-06, "loss": 0.6025, "step": 13376 }, { "epoch": 1.19, "grad_norm": 5.531777454037257, "learning_rate": 3.692361767632552e-06, "loss": 0.6328, "step": 13377 }, { "epoch": 1.19, "grad_norm": 7.712010850284064, "learning_rate": 3.6916645514322053e-06, "loss": 0.5869, "step": 13378 }, { "epoch": 1.19, "grad_norm": 8.359634302580409, "learning_rate": 3.690967362540496e-06, "loss": 0.6161, "step": 13379 }, { "epoch": 1.19, "grad_norm": 7.481288670367188, "learning_rate": 3.690270200971977e-06, "loss": 0.6286, "step": 13380 }, { "epoch": 1.19, "grad_norm": 5.832034131502601, "learning_rate": 3.689573066741199e-06, "loss": 0.6539, "step": 13381 }, { "epoch": 1.19, "grad_norm": 6.684148107185263, "learning_rate": 3.688875959862712e-06, "loss": 0.6819, "step": 13382 }, { "epoch": 1.19, "grad_norm": 7.166003390579808, "learning_rate": 3.688178880351071e-06, "loss": 0.5682, "step": 13383 }, { "epoch": 1.19, "grad_norm": 7.279794586850889, "learning_rate": 3.6874818282208202e-06, "loss": 0.5893, "step": 13384 }, { "epoch": 1.19, "grad_norm": 10.200082078579573, "learning_rate": 3.686784803486515e-06, "loss": 0.6844, "step": 13385 }, { "epoch": 1.19, "grad_norm": 6.4499319426330315, "learning_rate": 3.686087806162699e-06, "loss": 0.5994, "step": 13386 }, { "epoch": 1.19, "grad_norm": 8.381656677468134, "learning_rate": 3.6853908362639233e-06, "loss": 0.6476, "step": 13387 }, { "epoch": 1.19, "grad_norm": 6.363877402522761, "learning_rate": 3.6846938938047345e-06, "loss": 0.6404, "step": 13388 }, { "epoch": 1.19, "grad_norm": 6.012799199197222, "learning_rate": 3.68399697879968e-06, "loss": 0.5979, "step": 13389 }, { "epoch": 1.19, "grad_norm": 9.688515158515365, "learning_rate": 3.683300091263307e-06, "loss": 0.65, "step": 13390 }, { "epoch": 1.19, "grad_norm": 5.340990871851641, "learning_rate": 3.682603231210161e-06, "loss": 0.5997, "step": 13391 }, { "epoch": 1.19, "grad_norm": 6.180246543453739, "learning_rate": 3.6819063986547865e-06, "loss": 0.5668, "step": 13392 }, { "epoch": 1.19, "grad_norm": 6.525451847076877, "learning_rate": 3.681209593611731e-06, "loss": 0.6236, "step": 13393 }, { "epoch": 1.19, "grad_norm": 5.278110805525595, "learning_rate": 3.680512816095536e-06, "loss": 0.5717, "step": 13394 }, { "epoch": 1.19, "grad_norm": 7.0761307915444, "learning_rate": 3.679816066120746e-06, "loss": 0.6066, "step": 13395 }, { "epoch": 1.2, "grad_norm": 6.4382547642093515, "learning_rate": 3.6791193437019057e-06, "loss": 0.6302, "step": 13396 }, { "epoch": 1.2, "grad_norm": 6.026694764826297, "learning_rate": 3.6784226488535557e-06, "loss": 0.6258, "step": 13397 }, { "epoch": 1.2, "grad_norm": 5.583972786627504, "learning_rate": 3.6777259815902385e-06, "loss": 0.6069, "step": 13398 }, { "epoch": 1.2, "grad_norm": 5.410957446317676, "learning_rate": 3.677029341926498e-06, "loss": 0.6454, "step": 13399 }, { "epoch": 1.2, "grad_norm": 6.642764136223984, "learning_rate": 3.6763327298768713e-06, "loss": 0.5532, "step": 13400 }, { "epoch": 1.2, "grad_norm": 5.806724778840881, "learning_rate": 3.6756361454558987e-06, "loss": 0.5714, "step": 13401 }, { "epoch": 1.2, "grad_norm": 6.109153636810562, "learning_rate": 3.674939588678122e-06, "loss": 0.5934, "step": 13402 }, { "epoch": 1.2, "grad_norm": 7.5558213319209235, "learning_rate": 3.6742430595580803e-06, "loss": 0.5528, "step": 13403 }, { "epoch": 1.2, "grad_norm": 5.396656410559189, "learning_rate": 3.67354655811031e-06, "loss": 0.6158, "step": 13404 }, { "epoch": 1.2, "grad_norm": 7.025110289598913, "learning_rate": 3.672850084349351e-06, "loss": 0.6211, "step": 13405 }, { "epoch": 1.2, "grad_norm": 7.631370099735983, "learning_rate": 3.672153638289741e-06, "loss": 0.5557, "step": 13406 }, { "epoch": 1.2, "grad_norm": 5.436376643303969, "learning_rate": 3.6714572199460145e-06, "loss": 0.6381, "step": 13407 }, { "epoch": 1.2, "grad_norm": 6.360879261903947, "learning_rate": 3.6707608293327092e-06, "loss": 0.6128, "step": 13408 }, { "epoch": 1.2, "grad_norm": 6.688349962289531, "learning_rate": 3.670064466464362e-06, "loss": 0.5485, "step": 13409 }, { "epoch": 1.2, "grad_norm": 7.225358542943175, "learning_rate": 3.6693681313555053e-06, "loss": 0.5611, "step": 13410 }, { "epoch": 1.2, "grad_norm": 7.4853367045870005, "learning_rate": 3.6686718240206744e-06, "loss": 0.6258, "step": 13411 }, { "epoch": 1.2, "grad_norm": 7.117383553738129, "learning_rate": 3.6679755444744055e-06, "loss": 0.5555, "step": 13412 }, { "epoch": 1.2, "grad_norm": 6.55056395502896, "learning_rate": 3.6672792927312307e-06, "loss": 0.562, "step": 13413 }, { "epoch": 1.2, "grad_norm": 5.9510342837356225, "learning_rate": 3.6665830688056797e-06, "loss": 0.5794, "step": 13414 }, { "epoch": 1.2, "grad_norm": 5.7470150233605555, "learning_rate": 3.665886872712287e-06, "loss": 0.5904, "step": 13415 }, { "epoch": 1.2, "grad_norm": 7.416530551095121, "learning_rate": 3.6651907044655854e-06, "loss": 0.6729, "step": 13416 }, { "epoch": 1.2, "grad_norm": 5.785884103178055, "learning_rate": 3.6644945640801035e-06, "loss": 0.6192, "step": 13417 }, { "epoch": 1.2, "grad_norm": 8.47907456455891, "learning_rate": 3.663798451570373e-06, "loss": 0.5795, "step": 13418 }, { "epoch": 1.2, "grad_norm": 6.413815094207633, "learning_rate": 3.6631023669509245e-06, "loss": 0.5701, "step": 13419 }, { "epoch": 1.2, "grad_norm": 7.5730509754455015, "learning_rate": 3.6624063102362854e-06, "loss": 0.63, "step": 13420 }, { "epoch": 1.2, "grad_norm": 4.95195520875387, "learning_rate": 3.6617102814409856e-06, "loss": 0.5744, "step": 13421 }, { "epoch": 1.2, "grad_norm": 5.28903149340452, "learning_rate": 3.6610142805795534e-06, "loss": 0.5378, "step": 13422 }, { "epoch": 1.2, "grad_norm": 6.170294841867409, "learning_rate": 3.660318307666515e-06, "loss": 0.5193, "step": 13423 }, { "epoch": 1.2, "grad_norm": 6.1120929242656095, "learning_rate": 3.659622362716398e-06, "loss": 0.6029, "step": 13424 }, { "epoch": 1.2, "grad_norm": 6.2150607636132404, "learning_rate": 3.65892644574373e-06, "loss": 0.5764, "step": 13425 }, { "epoch": 1.2, "grad_norm": 5.839085036099872, "learning_rate": 3.658230556763034e-06, "loss": 0.6325, "step": 13426 }, { "epoch": 1.2, "grad_norm": 6.790711140060222, "learning_rate": 3.657534695788838e-06, "loss": 0.6223, "step": 13427 }, { "epoch": 1.2, "grad_norm": 7.268448662204009, "learning_rate": 3.6568388628356654e-06, "loss": 0.6811, "step": 13428 }, { "epoch": 1.2, "grad_norm": 6.802607309271588, "learning_rate": 3.65614305791804e-06, "loss": 0.6674, "step": 13429 }, { "epoch": 1.2, "grad_norm": 6.553434981697832, "learning_rate": 3.655447281050485e-06, "loss": 0.5806, "step": 13430 }, { "epoch": 1.2, "grad_norm": 5.75191898522017, "learning_rate": 3.6547515322475226e-06, "loss": 0.5723, "step": 13431 }, { "epoch": 1.2, "grad_norm": 8.420756003216937, "learning_rate": 3.6540558115236766e-06, "loss": 0.6518, "step": 13432 }, { "epoch": 1.2, "grad_norm": 4.8719181280495025, "learning_rate": 3.6533601188934677e-06, "loss": 0.5099, "step": 13433 }, { "epoch": 1.2, "grad_norm": 6.219725796661219, "learning_rate": 3.652664454371418e-06, "loss": 0.6194, "step": 13434 }, { "epoch": 1.2, "grad_norm": 6.1649765486890455, "learning_rate": 3.651968817972046e-06, "loss": 0.6152, "step": 13435 }, { "epoch": 1.2, "grad_norm": 7.401132509805776, "learning_rate": 3.6512732097098735e-06, "loss": 0.6425, "step": 13436 }, { "epoch": 1.2, "grad_norm": 7.475634055010808, "learning_rate": 3.650577629599419e-06, "loss": 0.5959, "step": 13437 }, { "epoch": 1.2, "grad_norm": 5.866006466905289, "learning_rate": 3.6498820776552012e-06, "loss": 0.5129, "step": 13438 }, { "epoch": 1.2, "grad_norm": 5.365268849156915, "learning_rate": 3.649186553891738e-06, "loss": 0.5199, "step": 13439 }, { "epoch": 1.2, "grad_norm": 6.852090716109708, "learning_rate": 3.648491058323548e-06, "loss": 0.6229, "step": 13440 }, { "epoch": 1.2, "grad_norm": 5.545140374887758, "learning_rate": 3.6477955909651465e-06, "loss": 0.5909, "step": 13441 }, { "epoch": 1.2, "grad_norm": 7.007464495513034, "learning_rate": 3.6471001518310533e-06, "loss": 0.68, "step": 13442 }, { "epoch": 1.2, "grad_norm": 5.649516078457325, "learning_rate": 3.6464047409357793e-06, "loss": 0.6019, "step": 13443 }, { "epoch": 1.2, "grad_norm": 6.0638617064300915, "learning_rate": 3.645709358293843e-06, "loss": 0.5948, "step": 13444 }, { "epoch": 1.2, "grad_norm": 6.2767983954504505, "learning_rate": 3.645014003919757e-06, "loss": 0.6186, "step": 13445 }, { "epoch": 1.2, "grad_norm": 6.903209155005053, "learning_rate": 3.644318677828036e-06, "loss": 0.5957, "step": 13446 }, { "epoch": 1.2, "grad_norm": 6.0433584869065236, "learning_rate": 3.643623380033195e-06, "loss": 0.601, "step": 13447 }, { "epoch": 1.2, "grad_norm": 5.279335490355251, "learning_rate": 3.6429281105497443e-06, "loss": 0.6755, "step": 13448 }, { "epoch": 1.2, "grad_norm": 5.745026484132621, "learning_rate": 3.642232869392197e-06, "loss": 0.5892, "step": 13449 }, { "epoch": 1.2, "grad_norm": 5.570097558881129, "learning_rate": 3.641537656575067e-06, "loss": 0.6562, "step": 13450 }, { "epoch": 1.2, "grad_norm": 6.316970160286934, "learning_rate": 3.6408424721128623e-06, "loss": 0.6534, "step": 13451 }, { "epoch": 1.2, "grad_norm": 5.953060113527509, "learning_rate": 3.640147316020094e-06, "loss": 0.6128, "step": 13452 }, { "epoch": 1.2, "grad_norm": 9.334902464848572, "learning_rate": 3.639452188311274e-06, "loss": 0.6058, "step": 13453 }, { "epoch": 1.2, "grad_norm": 6.260760494810699, "learning_rate": 3.6387570890009083e-06, "loss": 0.5569, "step": 13454 }, { "epoch": 1.2, "grad_norm": 5.419175287614351, "learning_rate": 3.6380620181035074e-06, "loss": 0.5966, "step": 13455 }, { "epoch": 1.2, "grad_norm": 6.254001382729129, "learning_rate": 3.637366975633581e-06, "loss": 0.6313, "step": 13456 }, { "epoch": 1.2, "grad_norm": 6.887830111327473, "learning_rate": 3.6366719616056354e-06, "loss": 0.6336, "step": 13457 }, { "epoch": 1.2, "grad_norm": 6.246545503275529, "learning_rate": 3.6359769760341756e-06, "loss": 0.5795, "step": 13458 }, { "epoch": 1.2, "grad_norm": 7.609166419548819, "learning_rate": 3.6352820189337085e-06, "loss": 0.6346, "step": 13459 }, { "epoch": 1.2, "grad_norm": 6.886720766770024, "learning_rate": 3.6345870903187417e-06, "loss": 0.6283, "step": 13460 }, { "epoch": 1.2, "grad_norm": 4.312832307825357, "learning_rate": 3.6338921902037782e-06, "loss": 0.6527, "step": 13461 }, { "epoch": 1.2, "grad_norm": 6.186255809462418, "learning_rate": 3.633197318603323e-06, "loss": 0.5829, "step": 13462 }, { "epoch": 1.2, "grad_norm": 6.46061794843479, "learning_rate": 3.6325024755318826e-06, "loss": 0.5466, "step": 13463 }, { "epoch": 1.2, "grad_norm": 5.696618396058396, "learning_rate": 3.6318076610039563e-06, "loss": 0.6098, "step": 13464 }, { "epoch": 1.2, "grad_norm": 6.39099216724389, "learning_rate": 3.6311128750340495e-06, "loss": 0.5559, "step": 13465 }, { "epoch": 1.2, "grad_norm": 7.715127282462917, "learning_rate": 3.6304181176366647e-06, "loss": 0.6186, "step": 13466 }, { "epoch": 1.2, "grad_norm": 7.430671364930992, "learning_rate": 3.6297233888263004e-06, "loss": 0.5894, "step": 13467 }, { "epoch": 1.2, "grad_norm": 11.32433630042538, "learning_rate": 3.629028688617461e-06, "loss": 0.5864, "step": 13468 }, { "epoch": 1.2, "grad_norm": 5.76485883462344, "learning_rate": 3.628334017024645e-06, "loss": 0.6051, "step": 13469 }, { "epoch": 1.2, "grad_norm": 6.862312266734907, "learning_rate": 3.6276393740623515e-06, "loss": 0.5758, "step": 13470 }, { "epoch": 1.2, "grad_norm": 7.084893719507433, "learning_rate": 3.6269447597450834e-06, "loss": 0.6346, "step": 13471 }, { "epoch": 1.2, "grad_norm": 6.740075927238261, "learning_rate": 3.6262501740873344e-06, "loss": 0.6236, "step": 13472 }, { "epoch": 1.2, "grad_norm": 9.290273262851208, "learning_rate": 3.6255556171036056e-06, "loss": 0.6173, "step": 13473 }, { "epoch": 1.2, "grad_norm": 6.171461344662817, "learning_rate": 3.624861088808392e-06, "loss": 0.5847, "step": 13474 }, { "epoch": 1.2, "grad_norm": 6.9268736223860925, "learning_rate": 3.6241665892161914e-06, "loss": 0.6013, "step": 13475 }, { "epoch": 1.2, "grad_norm": 5.4546088228048335, "learning_rate": 3.6234721183415016e-06, "loss": 0.6107, "step": 13476 }, { "epoch": 1.2, "grad_norm": 7.590112483658299, "learning_rate": 3.6227776761988158e-06, "loss": 0.5984, "step": 13477 }, { "epoch": 1.2, "grad_norm": 5.642075570392588, "learning_rate": 3.6220832628026303e-06, "loss": 0.5921, "step": 13478 }, { "epoch": 1.2, "grad_norm": 6.227625016150862, "learning_rate": 3.6213888781674396e-06, "loss": 0.5815, "step": 13479 }, { "epoch": 1.2, "grad_norm": 5.8834600629175275, "learning_rate": 3.620694522307736e-06, "loss": 0.5707, "step": 13480 }, { "epoch": 1.2, "grad_norm": 7.144045281716197, "learning_rate": 3.6200001952380138e-06, "loss": 0.6746, "step": 13481 }, { "epoch": 1.2, "grad_norm": 6.199366796315545, "learning_rate": 3.6193058969727657e-06, "loss": 0.6035, "step": 13482 }, { "epoch": 1.2, "grad_norm": 7.3313862945462995, "learning_rate": 3.6186116275264834e-06, "loss": 0.643, "step": 13483 }, { "epoch": 1.2, "grad_norm": 8.115140724911324, "learning_rate": 3.6179173869136575e-06, "loss": 0.6173, "step": 13484 }, { "epoch": 1.2, "grad_norm": 6.215081643300225, "learning_rate": 3.617223175148782e-06, "loss": 0.5843, "step": 13485 }, { "epoch": 1.2, "grad_norm": 5.394690674045719, "learning_rate": 3.616528992246343e-06, "loss": 0.5799, "step": 13486 }, { "epoch": 1.2, "grad_norm": 5.8027945380135835, "learning_rate": 3.61583483822083e-06, "loss": 0.5733, "step": 13487 }, { "epoch": 1.2, "grad_norm": 5.743858826347586, "learning_rate": 3.6151407130867346e-06, "loss": 0.5827, "step": 13488 }, { "epoch": 1.2, "grad_norm": 6.614220286873898, "learning_rate": 3.6144466168585447e-06, "loss": 0.578, "step": 13489 }, { "epoch": 1.2, "grad_norm": 5.7753686731642615, "learning_rate": 3.6137525495507456e-06, "loss": 0.5954, "step": 13490 }, { "epoch": 1.2, "grad_norm": 6.685945364385989, "learning_rate": 3.613058511177827e-06, "loss": 0.6542, "step": 13491 }, { "epoch": 1.2, "grad_norm": 6.82382789205328, "learning_rate": 3.6123645017542754e-06, "loss": 0.586, "step": 13492 }, { "epoch": 1.2, "grad_norm": 8.757346902782816, "learning_rate": 3.6116705212945754e-06, "loss": 0.6658, "step": 13493 }, { "epoch": 1.2, "grad_norm": 4.6031511035343415, "learning_rate": 3.610976569813212e-06, "loss": 0.6565, "step": 13494 }, { "epoch": 1.2, "grad_norm": 5.679110651127734, "learning_rate": 3.610282647324672e-06, "loss": 0.5674, "step": 13495 }, { "epoch": 1.2, "grad_norm": 7.303850456199928, "learning_rate": 3.609588753843438e-06, "loss": 0.6317, "step": 13496 }, { "epoch": 1.2, "grad_norm": 6.305525666307039, "learning_rate": 3.6088948893839938e-06, "loss": 0.6372, "step": 13497 }, { "epoch": 1.2, "grad_norm": 9.007270545148677, "learning_rate": 3.6082010539608224e-06, "loss": 0.5913, "step": 13498 }, { "epoch": 1.2, "grad_norm": 9.599923266591107, "learning_rate": 3.6075072475884053e-06, "loss": 0.5965, "step": 13499 }, { "epoch": 1.2, "grad_norm": 5.82696728154465, "learning_rate": 3.6068134702812275e-06, "loss": 0.5871, "step": 13500 }, { "epoch": 1.2, "grad_norm": 5.942248415524232, "learning_rate": 3.606119722053765e-06, "loss": 0.6002, "step": 13501 }, { "epoch": 1.2, "grad_norm": 7.554624312448088, "learning_rate": 3.605426002920502e-06, "loss": 0.5731, "step": 13502 }, { "epoch": 1.2, "grad_norm": 7.018830101239312, "learning_rate": 3.6047323128959155e-06, "loss": 0.6769, "step": 13503 }, { "epoch": 1.2, "grad_norm": 6.817969522441525, "learning_rate": 3.604038651994487e-06, "loss": 0.5928, "step": 13504 }, { "epoch": 1.2, "grad_norm": 5.8294227249977855, "learning_rate": 3.603345020230696e-06, "loss": 0.6481, "step": 13505 }, { "epoch": 1.2, "grad_norm": 7.918528355117688, "learning_rate": 3.6026514176190173e-06, "loss": 0.5975, "step": 13506 }, { "epoch": 1.2, "grad_norm": 6.209877336487939, "learning_rate": 3.60195784417393e-06, "loss": 0.5648, "step": 13507 }, { "epoch": 1.21, "grad_norm": 7.821566509572182, "learning_rate": 3.6012642999099124e-06, "loss": 0.5838, "step": 13508 }, { "epoch": 1.21, "grad_norm": 6.34115095906706, "learning_rate": 3.6005707848414385e-06, "loss": 0.612, "step": 13509 }, { "epoch": 1.21, "grad_norm": 5.034771982717397, "learning_rate": 3.5998772989829856e-06, "loss": 0.5443, "step": 13510 }, { "epoch": 1.21, "grad_norm": 5.747182229983236, "learning_rate": 3.5991838423490265e-06, "loss": 0.6105, "step": 13511 }, { "epoch": 1.21, "grad_norm": 5.691849233078796, "learning_rate": 3.5984904149540375e-06, "loss": 0.5771, "step": 13512 }, { "epoch": 1.21, "grad_norm": 4.85816319229172, "learning_rate": 3.5977970168124925e-06, "loss": 0.565, "step": 13513 }, { "epoch": 1.21, "grad_norm": 5.183793716407959, "learning_rate": 3.5971036479388645e-06, "loss": 0.5822, "step": 13514 }, { "epoch": 1.21, "grad_norm": 5.396954914237455, "learning_rate": 3.596410308347625e-06, "loss": 0.5746, "step": 13515 }, { "epoch": 1.21, "grad_norm": 5.446578996356294, "learning_rate": 3.595716998053246e-06, "loss": 0.5874, "step": 13516 }, { "epoch": 1.21, "grad_norm": 4.913065810572012, "learning_rate": 3.5950237170701985e-06, "loss": 0.6196, "step": 13517 }, { "epoch": 1.21, "grad_norm": 7.238654536215153, "learning_rate": 3.5943304654129556e-06, "loss": 0.5446, "step": 13518 }, { "epoch": 1.21, "grad_norm": 5.608655369086084, "learning_rate": 3.593637243095985e-06, "loss": 0.5951, "step": 13519 }, { "epoch": 1.21, "grad_norm": 6.021294730942938, "learning_rate": 3.5929440501337574e-06, "loss": 0.6996, "step": 13520 }, { "epoch": 1.21, "grad_norm": 5.188487757806333, "learning_rate": 3.5922508865407414e-06, "loss": 0.5199, "step": 13521 }, { "epoch": 1.21, "grad_norm": 7.578262845405902, "learning_rate": 3.5915577523314047e-06, "loss": 0.6462, "step": 13522 }, { "epoch": 1.21, "grad_norm": 8.308587444249335, "learning_rate": 3.5908646475202163e-06, "loss": 0.6338, "step": 13523 }, { "epoch": 1.21, "grad_norm": 7.161080599981999, "learning_rate": 3.5901715721216417e-06, "loss": 0.5935, "step": 13524 }, { "epoch": 1.21, "grad_norm": 6.665958829984157, "learning_rate": 3.589478526150148e-06, "loss": 0.5468, "step": 13525 }, { "epoch": 1.21, "grad_norm": 5.247252408884917, "learning_rate": 3.588785509620202e-06, "loss": 0.5954, "step": 13526 }, { "epoch": 1.21, "grad_norm": 5.2605726063989815, "learning_rate": 3.588092522546267e-06, "loss": 0.6094, "step": 13527 }, { "epoch": 1.21, "grad_norm": 6.125745486744999, "learning_rate": 3.5873995649428085e-06, "loss": 0.6178, "step": 13528 }, { "epoch": 1.21, "grad_norm": 7.212053369477381, "learning_rate": 3.586706636824293e-06, "loss": 0.6391, "step": 13529 }, { "epoch": 1.21, "grad_norm": 5.697577093246554, "learning_rate": 3.5860137382051795e-06, "loss": 0.5617, "step": 13530 }, { "epoch": 1.21, "grad_norm": 7.608779831226991, "learning_rate": 3.5853208690999325e-06, "loss": 0.702, "step": 13531 }, { "epoch": 1.21, "grad_norm": 6.111523340904321, "learning_rate": 3.584628029523014e-06, "loss": 0.6916, "step": 13532 }, { "epoch": 1.21, "grad_norm": 6.048511635879818, "learning_rate": 3.5839352194888867e-06, "loss": 0.5896, "step": 13533 }, { "epoch": 1.21, "grad_norm": 5.316130757934675, "learning_rate": 3.58324243901201e-06, "loss": 0.5785, "step": 13534 }, { "epoch": 1.21, "grad_norm": 4.9789709282241486, "learning_rate": 3.5825496881068437e-06, "loss": 0.6514, "step": 13535 }, { "epoch": 1.21, "grad_norm": 4.772108612526293, "learning_rate": 3.58185696678785e-06, "loss": 0.6201, "step": 13536 }, { "epoch": 1.21, "grad_norm": 8.216509841134839, "learning_rate": 3.5811642750694857e-06, "loss": 0.6363, "step": 13537 }, { "epoch": 1.21, "grad_norm": 5.175431395922183, "learning_rate": 3.58047161296621e-06, "loss": 0.5856, "step": 13538 }, { "epoch": 1.21, "grad_norm": 4.845018754886855, "learning_rate": 3.5797789804924815e-06, "loss": 0.6225, "step": 13539 }, { "epoch": 1.21, "grad_norm": 6.245404901505736, "learning_rate": 3.5790863776627556e-06, "loss": 0.6366, "step": 13540 }, { "epoch": 1.21, "grad_norm": 8.480958676570017, "learning_rate": 3.5783938044914895e-06, "loss": 0.6037, "step": 13541 }, { "epoch": 1.21, "grad_norm": 7.58019185631316, "learning_rate": 3.5777012609931406e-06, "loss": 0.5737, "step": 13542 }, { "epoch": 1.21, "grad_norm": 6.847005554219167, "learning_rate": 3.5770087471821645e-06, "loss": 0.6153, "step": 13543 }, { "epoch": 1.21, "grad_norm": 6.090265085342041, "learning_rate": 3.576316263073012e-06, "loss": 0.6116, "step": 13544 }, { "epoch": 1.21, "grad_norm": 5.673642059023037, "learning_rate": 3.57562380868014e-06, "loss": 0.6067, "step": 13545 }, { "epoch": 1.21, "grad_norm": 6.557212777336027, "learning_rate": 3.5749313840180026e-06, "loss": 0.5878, "step": 13546 }, { "epoch": 1.21, "grad_norm": 8.537432240662312, "learning_rate": 3.574238989101051e-06, "loss": 0.649, "step": 13547 }, { "epoch": 1.21, "grad_norm": 5.958083884256966, "learning_rate": 3.573546623943738e-06, "loss": 0.5846, "step": 13548 }, { "epoch": 1.21, "grad_norm": 7.009278459758131, "learning_rate": 3.5728542885605155e-06, "loss": 0.6578, "step": 13549 }, { "epoch": 1.21, "grad_norm": 7.8317902998830204, "learning_rate": 3.5721619829658338e-06, "loss": 0.6283, "step": 13550 }, { "epoch": 1.21, "grad_norm": 7.19746750980491, "learning_rate": 3.5714697071741444e-06, "loss": 0.5641, "step": 13551 }, { "epoch": 1.21, "grad_norm": 6.064444794587696, "learning_rate": 3.570777461199897e-06, "loss": 0.572, "step": 13552 }, { "epoch": 1.21, "grad_norm": 5.699465718327881, "learning_rate": 3.570085245057539e-06, "loss": 0.5462, "step": 13553 }, { "epoch": 1.21, "grad_norm": 6.358162214064198, "learning_rate": 3.56939305876152e-06, "loss": 0.512, "step": 13554 }, { "epoch": 1.21, "grad_norm": 7.965856438700478, "learning_rate": 3.5687009023262897e-06, "loss": 0.6306, "step": 13555 }, { "epoch": 1.21, "grad_norm": 5.0811818247818525, "learning_rate": 3.568008775766292e-06, "loss": 0.6627, "step": 13556 }, { "epoch": 1.21, "grad_norm": 6.949226354563478, "learning_rate": 3.5673166790959745e-06, "loss": 0.5543, "step": 13557 }, { "epoch": 1.21, "grad_norm": 5.7686978814773, "learning_rate": 3.566624612329787e-06, "loss": 0.6154, "step": 13558 }, { "epoch": 1.21, "grad_norm": 10.008112930725066, "learning_rate": 3.56593257548217e-06, "loss": 0.5387, "step": 13559 }, { "epoch": 1.21, "grad_norm": 5.262690014972771, "learning_rate": 3.5652405685675696e-06, "loss": 0.5828, "step": 13560 }, { "epoch": 1.21, "grad_norm": 5.724215502508707, "learning_rate": 3.564548591600429e-06, "loss": 0.5595, "step": 13561 }, { "epoch": 1.21, "grad_norm": 4.935514420405982, "learning_rate": 3.563856644595195e-06, "loss": 0.5629, "step": 13562 }, { "epoch": 1.21, "grad_norm": 7.136352426902465, "learning_rate": 3.5631647275663073e-06, "loss": 0.5795, "step": 13563 }, { "epoch": 1.21, "grad_norm": 7.691878849404457, "learning_rate": 3.562472840528209e-06, "loss": 0.5861, "step": 13564 }, { "epoch": 1.21, "grad_norm": 7.185483630908752, "learning_rate": 3.5617809834953433e-06, "loss": 0.6019, "step": 13565 }, { "epoch": 1.21, "grad_norm": 5.22843177093469, "learning_rate": 3.5610891564821482e-06, "loss": 0.5446, "step": 13566 }, { "epoch": 1.21, "grad_norm": 5.3476173357644, "learning_rate": 3.560397359503066e-06, "loss": 0.5779, "step": 13567 }, { "epoch": 1.21, "grad_norm": 6.771327529535014, "learning_rate": 3.5597055925725375e-06, "loss": 0.6011, "step": 13568 }, { "epoch": 1.21, "grad_norm": 7.814000158522739, "learning_rate": 3.559013855705e-06, "loss": 0.5922, "step": 13569 }, { "epoch": 1.21, "grad_norm": 6.434577369544952, "learning_rate": 3.5583221489148923e-06, "loss": 0.5568, "step": 13570 }, { "epoch": 1.21, "grad_norm": 6.037034089091607, "learning_rate": 3.5576304722166533e-06, "loss": 0.598, "step": 13571 }, { "epoch": 1.21, "grad_norm": 6.679545792571334, "learning_rate": 3.5569388256247206e-06, "loss": 0.6661, "step": 13572 }, { "epoch": 1.21, "grad_norm": 5.287390696154459, "learning_rate": 3.556247209153528e-06, "loss": 0.5799, "step": 13573 }, { "epoch": 1.21, "grad_norm": 4.731024940803107, "learning_rate": 3.5555556228175126e-06, "loss": 0.6259, "step": 13574 }, { "epoch": 1.21, "grad_norm": 6.298304327728533, "learning_rate": 3.554864066631112e-06, "loss": 0.5847, "step": 13575 }, { "epoch": 1.21, "grad_norm": 6.8888470622305675, "learning_rate": 3.554172540608757e-06, "loss": 0.5767, "step": 13576 }, { "epoch": 1.21, "grad_norm": 7.770712682237114, "learning_rate": 3.553481044764885e-06, "loss": 0.5803, "step": 13577 }, { "epoch": 1.21, "grad_norm": 6.399882620080166, "learning_rate": 3.552789579113929e-06, "loss": 0.7094, "step": 13578 }, { "epoch": 1.21, "grad_norm": 5.27193692060216, "learning_rate": 3.55209814367032e-06, "loss": 0.6033, "step": 13579 }, { "epoch": 1.21, "grad_norm": 7.804444640116811, "learning_rate": 3.5514067384484918e-06, "loss": 0.6298, "step": 13580 }, { "epoch": 1.21, "grad_norm": 6.643739799188149, "learning_rate": 3.5507153634628765e-06, "loss": 0.608, "step": 13581 }, { "epoch": 1.21, "grad_norm": 5.392851998981026, "learning_rate": 3.5500240187279024e-06, "loss": 0.5508, "step": 13582 }, { "epoch": 1.21, "grad_norm": 9.19786163085266, "learning_rate": 3.549332704258002e-06, "loss": 0.634, "step": 13583 }, { "epoch": 1.21, "grad_norm": 6.658542767268432, "learning_rate": 3.5486414200676056e-06, "loss": 0.6057, "step": 13584 }, { "epoch": 1.21, "grad_norm": 7.488448371941349, "learning_rate": 3.54795016617114e-06, "loss": 0.5837, "step": 13585 }, { "epoch": 1.21, "grad_norm": 4.751706048737387, "learning_rate": 3.547258942583037e-06, "loss": 0.6008, "step": 13586 }, { "epoch": 1.21, "grad_norm": 6.161856639633811, "learning_rate": 3.5465677493177202e-06, "loss": 0.6627, "step": 13587 }, { "epoch": 1.21, "grad_norm": 6.083215262675206, "learning_rate": 3.5458765863896193e-06, "loss": 0.5857, "step": 13588 }, { "epoch": 1.21, "grad_norm": 5.252792084557848, "learning_rate": 3.5451854538131593e-06, "loss": 0.6216, "step": 13589 }, { "epoch": 1.21, "grad_norm": 7.4753792648109325, "learning_rate": 3.5444943516027673e-06, "loss": 0.6323, "step": 13590 }, { "epoch": 1.21, "grad_norm": 8.299125672989351, "learning_rate": 3.543803279772869e-06, "loss": 0.6339, "step": 13591 }, { "epoch": 1.21, "grad_norm": 6.870168991739116, "learning_rate": 3.5431122383378867e-06, "loss": 0.6892, "step": 13592 }, { "epoch": 1.21, "grad_norm": 4.749067434263115, "learning_rate": 3.5424212273122466e-06, "loss": 0.6186, "step": 13593 }, { "epoch": 1.21, "grad_norm": 5.46922010491614, "learning_rate": 3.541730246710372e-06, "loss": 0.5789, "step": 13594 }, { "epoch": 1.21, "grad_norm": 6.167847006382334, "learning_rate": 3.541039296546684e-06, "loss": 0.6145, "step": 13595 }, { "epoch": 1.21, "grad_norm": 5.475966029601449, "learning_rate": 3.540348376835606e-06, "loss": 0.5981, "step": 13596 }, { "epoch": 1.21, "grad_norm": 5.505954464377029, "learning_rate": 3.539657487591559e-06, "loss": 0.6835, "step": 13597 }, { "epoch": 1.21, "grad_norm": 7.055002830628653, "learning_rate": 3.5389666288289636e-06, "loss": 0.6123, "step": 13598 }, { "epoch": 1.21, "grad_norm": 6.048872772717313, "learning_rate": 3.538275800562241e-06, "loss": 0.5986, "step": 13599 }, { "epoch": 1.21, "grad_norm": 7.297833155321413, "learning_rate": 3.537585002805809e-06, "loss": 0.6389, "step": 13600 }, { "epoch": 1.21, "grad_norm": 6.108885373183691, "learning_rate": 3.53689423557409e-06, "loss": 0.5926, "step": 13601 }, { "epoch": 1.21, "grad_norm": 8.432027351649202, "learning_rate": 3.536203498881497e-06, "loss": 0.6126, "step": 13602 }, { "epoch": 1.21, "grad_norm": 6.934804474551162, "learning_rate": 3.5355127927424503e-06, "loss": 0.6002, "step": 13603 }, { "epoch": 1.21, "grad_norm": 6.573655443672592, "learning_rate": 3.534822117171368e-06, "loss": 0.6502, "step": 13604 }, { "epoch": 1.21, "grad_norm": 6.3426984798113155, "learning_rate": 3.5341314721826648e-06, "loss": 0.6042, "step": 13605 }, { "epoch": 1.21, "grad_norm": 6.826241625278029, "learning_rate": 3.5334408577907576e-06, "loss": 0.6366, "step": 13606 }, { "epoch": 1.21, "grad_norm": 5.293728297765808, "learning_rate": 3.5327502740100595e-06, "loss": 0.5609, "step": 13607 }, { "epoch": 1.21, "grad_norm": 6.591031327740003, "learning_rate": 3.532059720854986e-06, "loss": 0.6025, "step": 13608 }, { "epoch": 1.21, "grad_norm": 7.109288836358396, "learning_rate": 3.531369198339953e-06, "loss": 0.6073, "step": 13609 }, { "epoch": 1.21, "grad_norm": 8.129298005338526, "learning_rate": 3.5306787064793702e-06, "loss": 0.5673, "step": 13610 }, { "epoch": 1.21, "grad_norm": 6.337513331728844, "learning_rate": 3.5299882452876512e-06, "loss": 0.581, "step": 13611 }, { "epoch": 1.21, "grad_norm": 6.688341191027541, "learning_rate": 3.52929781477921e-06, "loss": 0.5538, "step": 13612 }, { "epoch": 1.21, "grad_norm": 7.032925484353675, "learning_rate": 3.5286074149684547e-06, "loss": 0.6012, "step": 13613 }, { "epoch": 1.21, "grad_norm": 6.621584534829211, "learning_rate": 3.5279170458697974e-06, "loss": 0.6712, "step": 13614 }, { "epoch": 1.21, "grad_norm": 5.6504114617296635, "learning_rate": 3.527226707497651e-06, "loss": 0.6059, "step": 13615 }, { "epoch": 1.21, "grad_norm": 6.382215044998568, "learning_rate": 3.5265363998664195e-06, "loss": 0.5328, "step": 13616 }, { "epoch": 1.21, "grad_norm": 7.055524677340241, "learning_rate": 3.5258461229905127e-06, "loss": 0.5189, "step": 13617 }, { "epoch": 1.21, "grad_norm": 8.677874487686541, "learning_rate": 3.5251558768843407e-06, "loss": 0.6176, "step": 13618 }, { "epoch": 1.21, "grad_norm": 6.709031712703059, "learning_rate": 3.52446566156231e-06, "loss": 0.6207, "step": 13619 }, { "epoch": 1.22, "grad_norm": 7.7522118303487515, "learning_rate": 3.5237754770388266e-06, "loss": 0.6134, "step": 13620 }, { "epoch": 1.22, "grad_norm": 5.842348690106948, "learning_rate": 3.5230853233282973e-06, "loss": 0.5741, "step": 13621 }, { "epoch": 1.22, "grad_norm": 4.712162914931422, "learning_rate": 3.522395200445129e-06, "loss": 0.5906, "step": 13622 }, { "epoch": 1.22, "grad_norm": 5.931746814081168, "learning_rate": 3.5217051084037234e-06, "loss": 0.6257, "step": 13623 }, { "epoch": 1.22, "grad_norm": 8.90105844462885, "learning_rate": 3.521015047218486e-06, "loss": 0.6049, "step": 13624 }, { "epoch": 1.22, "grad_norm": 7.897725953464819, "learning_rate": 3.5203250169038224e-06, "loss": 0.68, "step": 13625 }, { "epoch": 1.22, "grad_norm": 10.05183484412896, "learning_rate": 3.519635017474132e-06, "loss": 0.6454, "step": 13626 }, { "epoch": 1.22, "grad_norm": 10.911588825782172, "learning_rate": 3.5189450489438186e-06, "loss": 0.584, "step": 13627 }, { "epoch": 1.22, "grad_norm": 6.384114681509384, "learning_rate": 3.5182551113272856e-06, "loss": 0.5876, "step": 13628 }, { "epoch": 1.22, "grad_norm": 6.965074899257299, "learning_rate": 3.51756520463893e-06, "loss": 0.5926, "step": 13629 }, { "epoch": 1.22, "grad_norm": 7.104897477094417, "learning_rate": 3.5168753288931573e-06, "loss": 0.595, "step": 13630 }, { "epoch": 1.22, "grad_norm": 5.156096393268672, "learning_rate": 3.516185484104362e-06, "loss": 0.6082, "step": 13631 }, { "epoch": 1.22, "grad_norm": 7.934671736175371, "learning_rate": 3.5154956702869458e-06, "loss": 0.6235, "step": 13632 }, { "epoch": 1.22, "grad_norm": 6.572038244748409, "learning_rate": 3.5148058874553053e-06, "loss": 0.6237, "step": 13633 }, { "epoch": 1.22, "grad_norm": 6.161589079322032, "learning_rate": 3.5141161356238395e-06, "loss": 0.6385, "step": 13634 }, { "epoch": 1.22, "grad_norm": 5.750867208485521, "learning_rate": 3.513426414806947e-06, "loss": 0.6465, "step": 13635 }, { "epoch": 1.22, "grad_norm": 7.432961772962671, "learning_rate": 3.5127367250190214e-06, "loss": 0.5916, "step": 13636 }, { "epoch": 1.22, "grad_norm": 6.531570560101088, "learning_rate": 3.512047066274459e-06, "loss": 0.5797, "step": 13637 }, { "epoch": 1.22, "grad_norm": 5.858078392919785, "learning_rate": 3.511357438587657e-06, "loss": 0.6343, "step": 13638 }, { "epoch": 1.22, "grad_norm": 6.403997021452365, "learning_rate": 3.5106678419730073e-06, "loss": 0.6436, "step": 13639 }, { "epoch": 1.22, "grad_norm": 6.41242625899477, "learning_rate": 3.5099782764449043e-06, "loss": 0.6087, "step": 13640 }, { "epoch": 1.22, "grad_norm": 11.474015051655877, "learning_rate": 3.5092887420177437e-06, "loss": 0.5978, "step": 13641 }, { "epoch": 1.22, "grad_norm": 7.0520918870069105, "learning_rate": 3.508599238705914e-06, "loss": 0.5919, "step": 13642 }, { "epoch": 1.22, "grad_norm": 8.15430365636395, "learning_rate": 3.5079097665238094e-06, "loss": 0.6149, "step": 13643 }, { "epoch": 1.22, "grad_norm": 5.544066321223188, "learning_rate": 3.5072203254858234e-06, "loss": 0.6559, "step": 13644 }, { "epoch": 1.22, "grad_norm": 8.374510285451224, "learning_rate": 3.5065309156063424e-06, "loss": 0.5872, "step": 13645 }, { "epoch": 1.22, "grad_norm": 7.8094833668540895, "learning_rate": 3.5058415368997572e-06, "loss": 0.6453, "step": 13646 }, { "epoch": 1.22, "grad_norm": 5.8337935450445455, "learning_rate": 3.5051521893804576e-06, "loss": 0.6319, "step": 13647 }, { "epoch": 1.22, "grad_norm": 7.462943200175098, "learning_rate": 3.5044628730628333e-06, "loss": 0.6311, "step": 13648 }, { "epoch": 1.22, "grad_norm": 6.042105511355303, "learning_rate": 3.5037735879612704e-06, "loss": 0.6884, "step": 13649 }, { "epoch": 1.22, "grad_norm": 6.643086314276672, "learning_rate": 3.503084334090157e-06, "loss": 0.5331, "step": 13650 }, { "epoch": 1.22, "grad_norm": 5.924219723632321, "learning_rate": 3.502395111463881e-06, "loss": 0.6517, "step": 13651 }, { "epoch": 1.22, "grad_norm": 6.20928582277627, "learning_rate": 3.5017059200968266e-06, "loss": 0.651, "step": 13652 }, { "epoch": 1.22, "grad_norm": 7.469244963902982, "learning_rate": 3.50101676000338e-06, "loss": 0.5803, "step": 13653 }, { "epoch": 1.22, "grad_norm": 8.312863024084436, "learning_rate": 3.5003276311979268e-06, "loss": 0.5989, "step": 13654 }, { "epoch": 1.22, "grad_norm": 6.144202534261614, "learning_rate": 3.499638533694849e-06, "loss": 0.5665, "step": 13655 }, { "epoch": 1.22, "grad_norm": 9.587468724546763, "learning_rate": 3.4989494675085307e-06, "loss": 0.5859, "step": 13656 }, { "epoch": 1.22, "grad_norm": 7.223086322069463, "learning_rate": 3.4982604326533564e-06, "loss": 0.686, "step": 13657 }, { "epoch": 1.22, "grad_norm": 5.117887076690238, "learning_rate": 3.497571429143708e-06, "loss": 0.6427, "step": 13658 }, { "epoch": 1.22, "grad_norm": 6.165763739887072, "learning_rate": 3.496882456993963e-06, "loss": 0.5731, "step": 13659 }, { "epoch": 1.22, "grad_norm": 8.09498622097635, "learning_rate": 3.4961935162185056e-06, "loss": 0.5587, "step": 13660 }, { "epoch": 1.22, "grad_norm": 6.72486533936278, "learning_rate": 3.4955046068317154e-06, "loss": 0.6503, "step": 13661 }, { "epoch": 1.22, "grad_norm": 6.023933415007256, "learning_rate": 3.494815728847972e-06, "loss": 0.5835, "step": 13662 }, { "epoch": 1.22, "grad_norm": 5.011070614499679, "learning_rate": 3.4941268822816533e-06, "loss": 0.5541, "step": 13663 }, { "epoch": 1.22, "grad_norm": 10.591474001354547, "learning_rate": 3.493438067147139e-06, "loss": 0.6553, "step": 13664 }, { "epoch": 1.22, "grad_norm": 7.144185767060184, "learning_rate": 3.492749283458805e-06, "loss": 0.5777, "step": 13665 }, { "epoch": 1.22, "grad_norm": 6.504227521965311, "learning_rate": 3.4920605312310283e-06, "loss": 0.6418, "step": 13666 }, { "epoch": 1.22, "grad_norm": 5.287541750859735, "learning_rate": 3.491371810478188e-06, "loss": 0.6427, "step": 13667 }, { "epoch": 1.22, "grad_norm": 5.608583146730501, "learning_rate": 3.490683121214655e-06, "loss": 0.6035, "step": 13668 }, { "epoch": 1.22, "grad_norm": 5.299171854694549, "learning_rate": 3.489994463454807e-06, "loss": 0.5516, "step": 13669 }, { "epoch": 1.22, "grad_norm": 6.127466173594637, "learning_rate": 3.489305837213019e-06, "loss": 0.581, "step": 13670 }, { "epoch": 1.22, "grad_norm": 6.348778427654451, "learning_rate": 3.488617242503662e-06, "loss": 0.603, "step": 13671 }, { "epoch": 1.22, "grad_norm": 6.398051689099296, "learning_rate": 3.487928679341111e-06, "loss": 0.6764, "step": 13672 }, { "epoch": 1.22, "grad_norm": 8.50113187552315, "learning_rate": 3.487240147739738e-06, "loss": 0.5823, "step": 13673 }, { "epoch": 1.22, "grad_norm": 6.010880745358703, "learning_rate": 3.486551647713914e-06, "loss": 0.6221, "step": 13674 }, { "epoch": 1.22, "grad_norm": 6.224583280749061, "learning_rate": 3.4858631792780084e-06, "loss": 0.5835, "step": 13675 }, { "epoch": 1.22, "grad_norm": 7.538297862491756, "learning_rate": 3.4851747424463933e-06, "loss": 0.6184, "step": 13676 }, { "epoch": 1.22, "grad_norm": 6.626137788212427, "learning_rate": 3.4844863372334386e-06, "loss": 0.5878, "step": 13677 }, { "epoch": 1.22, "grad_norm": 4.758321681791451, "learning_rate": 3.4837979636535125e-06, "loss": 0.5776, "step": 13678 }, { "epoch": 1.22, "grad_norm": 9.067182589008237, "learning_rate": 3.4831096217209824e-06, "loss": 0.5972, "step": 13679 }, { "epoch": 1.22, "grad_norm": 7.910604666708848, "learning_rate": 3.4824213114502194e-06, "loss": 0.5989, "step": 13680 }, { "epoch": 1.22, "grad_norm": 8.002398584513303, "learning_rate": 3.481733032855586e-06, "loss": 0.6592, "step": 13681 }, { "epoch": 1.22, "grad_norm": 6.135466437030478, "learning_rate": 3.4810447859514517e-06, "loss": 0.6162, "step": 13682 }, { "epoch": 1.22, "grad_norm": 6.121029674490818, "learning_rate": 3.48035657075218e-06, "loss": 0.5445, "step": 13683 }, { "epoch": 1.22, "grad_norm": 9.657794510852804, "learning_rate": 3.479668387272138e-06, "loss": 0.5755, "step": 13684 }, { "epoch": 1.22, "grad_norm": 8.002570491172465, "learning_rate": 3.4789802355256886e-06, "loss": 0.6348, "step": 13685 }, { "epoch": 1.22, "grad_norm": 6.863625871417886, "learning_rate": 3.4782921155271955e-06, "loss": 0.6016, "step": 13686 }, { "epoch": 1.22, "grad_norm": 5.748697924626328, "learning_rate": 3.477604027291024e-06, "loss": 0.6247, "step": 13687 }, { "epoch": 1.22, "grad_norm": 5.194435251414398, "learning_rate": 3.4769159708315325e-06, "loss": 0.5957, "step": 13688 }, { "epoch": 1.22, "grad_norm": 7.318623970362775, "learning_rate": 3.4762279461630854e-06, "loss": 0.5877, "step": 13689 }, { "epoch": 1.22, "grad_norm": 4.492506882571419, "learning_rate": 3.4755399533000415e-06, "loss": 0.5818, "step": 13690 }, { "epoch": 1.22, "grad_norm": 5.660255247698129, "learning_rate": 3.4748519922567635e-06, "loss": 0.6284, "step": 13691 }, { "epoch": 1.22, "grad_norm": 6.165666952655602, "learning_rate": 3.4741640630476103e-06, "loss": 0.6211, "step": 13692 }, { "epoch": 1.22, "grad_norm": 6.302844061391524, "learning_rate": 3.4734761656869404e-06, "loss": 0.6277, "step": 13693 }, { "epoch": 1.22, "grad_norm": 6.47509193220591, "learning_rate": 3.4727883001891123e-06, "loss": 0.6217, "step": 13694 }, { "epoch": 1.22, "grad_norm": 5.97898364845394, "learning_rate": 3.4721004665684844e-06, "loss": 0.6012, "step": 13695 }, { "epoch": 1.22, "grad_norm": 6.899759510333936, "learning_rate": 3.471412664839413e-06, "loss": 0.6356, "step": 13696 }, { "epoch": 1.22, "grad_norm": 7.1652801621887585, "learning_rate": 3.470724895016254e-06, "loss": 0.64, "step": 13697 }, { "epoch": 1.22, "grad_norm": 8.554186527615782, "learning_rate": 3.470037157113365e-06, "loss": 0.6346, "step": 13698 }, { "epoch": 1.22, "grad_norm": 3.945789277809698, "learning_rate": 3.4693494511450987e-06, "loss": 0.6249, "step": 13699 }, { "epoch": 1.22, "grad_norm": 7.93985263334273, "learning_rate": 3.468661777125811e-06, "loss": 0.5837, "step": 13700 }, { "epoch": 1.22, "grad_norm": 6.0448620376442825, "learning_rate": 3.467974135069856e-06, "loss": 0.633, "step": 13701 }, { "epoch": 1.22, "grad_norm": 6.021546296705873, "learning_rate": 3.4672865249915864e-06, "loss": 0.6323, "step": 13702 }, { "epoch": 1.22, "grad_norm": 7.319041769079637, "learning_rate": 3.466598946905353e-06, "loss": 0.5491, "step": 13703 }, { "epoch": 1.22, "grad_norm": 4.41857156286032, "learning_rate": 3.4659114008255073e-06, "loss": 0.583, "step": 13704 }, { "epoch": 1.22, "grad_norm": 5.423507891545533, "learning_rate": 3.465223886766403e-06, "loss": 0.6125, "step": 13705 }, { "epoch": 1.22, "grad_norm": 7.897584245984497, "learning_rate": 3.464536404742388e-06, "loss": 0.5999, "step": 13706 }, { "epoch": 1.22, "grad_norm": 7.462102382313217, "learning_rate": 3.4638489547678126e-06, "loss": 0.5913, "step": 13707 }, { "epoch": 1.22, "grad_norm": 8.588243638263608, "learning_rate": 3.4631615368570275e-06, "loss": 0.5783, "step": 13708 }, { "epoch": 1.22, "grad_norm": 5.629166074858269, "learning_rate": 3.4624741510243787e-06, "loss": 0.6265, "step": 13709 }, { "epoch": 1.22, "grad_norm": 8.426405063558105, "learning_rate": 3.4617867972842146e-06, "loss": 0.6383, "step": 13710 }, { "epoch": 1.22, "grad_norm": 5.471963568692138, "learning_rate": 3.4610994756508844e-06, "loss": 0.5508, "step": 13711 }, { "epoch": 1.22, "grad_norm": 6.934229493168135, "learning_rate": 3.4604121861387308e-06, "loss": 0.6287, "step": 13712 }, { "epoch": 1.22, "grad_norm": 7.65844871668796, "learning_rate": 3.4597249287621004e-06, "loss": 0.6279, "step": 13713 }, { "epoch": 1.22, "grad_norm": 8.068638360336003, "learning_rate": 3.4590377035353412e-06, "loss": 0.5806, "step": 13714 }, { "epoch": 1.22, "grad_norm": 5.739169334062597, "learning_rate": 3.458350510472794e-06, "loss": 0.6107, "step": 13715 }, { "epoch": 1.22, "grad_norm": 6.731840569681148, "learning_rate": 3.4576633495888055e-06, "loss": 0.6111, "step": 13716 }, { "epoch": 1.22, "grad_norm": 8.029395218125194, "learning_rate": 3.456976220897715e-06, "loss": 0.6139, "step": 13717 }, { "epoch": 1.22, "grad_norm": 5.560664634787917, "learning_rate": 3.4562891244138673e-06, "loss": 0.611, "step": 13718 }, { "epoch": 1.22, "grad_norm": 8.231727208473416, "learning_rate": 3.455602060151603e-06, "loss": 0.6081, "step": 13719 }, { "epoch": 1.22, "grad_norm": 4.162461830374463, "learning_rate": 3.4549150281252635e-06, "loss": 0.6066, "step": 13720 }, { "epoch": 1.22, "grad_norm": 6.072580154935637, "learning_rate": 3.45422802834919e-06, "loss": 0.5566, "step": 13721 }, { "epoch": 1.22, "grad_norm": 5.852888006938338, "learning_rate": 3.4535410608377206e-06, "loss": 0.6079, "step": 13722 }, { "epoch": 1.22, "grad_norm": 7.238629582356523, "learning_rate": 3.452854125605194e-06, "loss": 0.586, "step": 13723 }, { "epoch": 1.22, "grad_norm": 8.44460878540945, "learning_rate": 3.452167222665951e-06, "loss": 0.6166, "step": 13724 }, { "epoch": 1.22, "grad_norm": 5.071057968557668, "learning_rate": 3.4514803520343266e-06, "loss": 0.6116, "step": 13725 }, { "epoch": 1.22, "grad_norm": 6.440454964130839, "learning_rate": 3.4507935137246584e-06, "loss": 0.6096, "step": 13726 }, { "epoch": 1.22, "grad_norm": 7.352271576079093, "learning_rate": 3.450106707751284e-06, "loss": 0.5797, "step": 13727 }, { "epoch": 1.22, "grad_norm": 4.7382830608654185, "learning_rate": 3.449419934128536e-06, "loss": 0.5818, "step": 13728 }, { "epoch": 1.22, "grad_norm": 6.806600843786048, "learning_rate": 3.4487331928707522e-06, "loss": 0.6335, "step": 13729 }, { "epoch": 1.22, "grad_norm": 5.091271184703984, "learning_rate": 3.4480464839922665e-06, "loss": 0.5975, "step": 13730 }, { "epoch": 1.22, "grad_norm": 4.737315887974828, "learning_rate": 3.447359807507413e-06, "loss": 0.578, "step": 13731 }, { "epoch": 1.23, "grad_norm": 6.621983206801497, "learning_rate": 3.446673163430521e-06, "loss": 0.6085, "step": 13732 }, { "epoch": 1.23, "grad_norm": 5.350291413618931, "learning_rate": 3.445986551775925e-06, "loss": 0.5824, "step": 13733 }, { "epoch": 1.23, "grad_norm": 5.546765446452017, "learning_rate": 3.4452999725579577e-06, "loss": 0.5421, "step": 13734 }, { "epoch": 1.23, "grad_norm": 7.268518222855655, "learning_rate": 3.4446134257909474e-06, "loss": 0.6305, "step": 13735 }, { "epoch": 1.23, "grad_norm": 6.888291633438685, "learning_rate": 3.443926911489226e-06, "loss": 0.599, "step": 13736 }, { "epoch": 1.23, "grad_norm": 6.83516913225478, "learning_rate": 3.443240429667123e-06, "loss": 0.5442, "step": 13737 }, { "epoch": 1.23, "grad_norm": 7.133745252992368, "learning_rate": 3.4425539803389663e-06, "loss": 0.6044, "step": 13738 }, { "epoch": 1.23, "grad_norm": 7.650552730660366, "learning_rate": 3.4418675635190844e-06, "loss": 0.5784, "step": 13739 }, { "epoch": 1.23, "grad_norm": 7.180726383210313, "learning_rate": 3.441181179221806e-06, "loss": 0.571, "step": 13740 }, { "epoch": 1.23, "grad_norm": 5.260247412255486, "learning_rate": 3.4404948274614554e-06, "loss": 0.5982, "step": 13741 }, { "epoch": 1.23, "grad_norm": 5.208784260931408, "learning_rate": 3.4398085082523604e-06, "loss": 0.6217, "step": 13742 }, { "epoch": 1.23, "grad_norm": 7.874968863367118, "learning_rate": 3.4391222216088467e-06, "loss": 0.6362, "step": 13743 }, { "epoch": 1.23, "grad_norm": 7.163676296293012, "learning_rate": 3.4384359675452373e-06, "loss": 0.6549, "step": 13744 }, { "epoch": 1.23, "grad_norm": 4.942995236625064, "learning_rate": 3.437749746075859e-06, "loss": 0.6791, "step": 13745 }, { "epoch": 1.23, "grad_norm": 5.385802563310941, "learning_rate": 3.4370635572150324e-06, "loss": 0.5785, "step": 13746 }, { "epoch": 1.23, "grad_norm": 6.628853323665255, "learning_rate": 3.4363774009770817e-06, "loss": 0.5475, "step": 13747 }, { "epoch": 1.23, "grad_norm": 7.107363138269978, "learning_rate": 3.4356912773763274e-06, "loss": 0.5745, "step": 13748 }, { "epoch": 1.23, "grad_norm": 6.239732325654228, "learning_rate": 3.4350051864270923e-06, "loss": 0.6126, "step": 13749 }, { "epoch": 1.23, "grad_norm": 7.376257752567354, "learning_rate": 3.4343191281436973e-06, "loss": 0.6064, "step": 13750 }, { "epoch": 1.23, "grad_norm": 7.527535559973688, "learning_rate": 3.4336331025404613e-06, "loss": 0.5809, "step": 13751 }, { "epoch": 1.23, "grad_norm": 6.133310473979701, "learning_rate": 3.4329471096317035e-06, "loss": 0.659, "step": 13752 }, { "epoch": 1.23, "grad_norm": 7.316843532550631, "learning_rate": 3.432261149431744e-06, "loss": 0.6286, "step": 13753 }, { "epoch": 1.23, "grad_norm": 5.980576901338581, "learning_rate": 3.4315752219548985e-06, "loss": 0.6268, "step": 13754 }, { "epoch": 1.23, "grad_norm": 6.732802479643587, "learning_rate": 3.430889327215486e-06, "loss": 0.5918, "step": 13755 }, { "epoch": 1.23, "grad_norm": 6.9217898434268434, "learning_rate": 3.4302034652278226e-06, "loss": 0.5873, "step": 13756 }, { "epoch": 1.23, "grad_norm": 7.243299592992757, "learning_rate": 3.4295176360062244e-06, "loss": 0.5632, "step": 13757 }, { "epoch": 1.23, "grad_norm": 8.265570259431275, "learning_rate": 3.4288318395650066e-06, "loss": 0.6179, "step": 13758 }, { "epoch": 1.23, "grad_norm": 7.044740321188384, "learning_rate": 3.4281460759184825e-06, "loss": 0.5506, "step": 13759 }, { "epoch": 1.23, "grad_norm": 5.684671330764707, "learning_rate": 3.4274603450809686e-06, "loss": 0.6246, "step": 13760 }, { "epoch": 1.23, "grad_norm": 7.275206070781751, "learning_rate": 3.426774647066774e-06, "loss": 0.6113, "step": 13761 }, { "epoch": 1.23, "grad_norm": 5.536551062964168, "learning_rate": 3.4260889818902133e-06, "loss": 0.5916, "step": 13762 }, { "epoch": 1.23, "grad_norm": 6.087734492598801, "learning_rate": 3.425403349565599e-06, "loss": 0.5939, "step": 13763 }, { "epoch": 1.23, "grad_norm": 8.554672226656763, "learning_rate": 3.4247177501072414e-06, "loss": 0.5723, "step": 13764 }, { "epoch": 1.23, "grad_norm": 8.814128736950552, "learning_rate": 3.424032183529451e-06, "loss": 0.599, "step": 13765 }, { "epoch": 1.23, "grad_norm": 6.96271757032685, "learning_rate": 3.4233466498465375e-06, "loss": 0.5832, "step": 13766 }, { "epoch": 1.23, "grad_norm": 5.47901840707784, "learning_rate": 3.422661149072809e-06, "loss": 0.5625, "step": 13767 }, { "epoch": 1.23, "grad_norm": 7.073526204867449, "learning_rate": 3.4219756812225763e-06, "loss": 0.6381, "step": 13768 }, { "epoch": 1.23, "grad_norm": 8.283941346070618, "learning_rate": 3.421290246310144e-06, "loss": 0.6649, "step": 13769 }, { "epoch": 1.23, "grad_norm": 5.874826238269134, "learning_rate": 3.420604844349821e-06, "loss": 0.5874, "step": 13770 }, { "epoch": 1.23, "grad_norm": 5.485772770941453, "learning_rate": 3.4199194753559136e-06, "loss": 0.6396, "step": 13771 }, { "epoch": 1.23, "grad_norm": 6.505555517534428, "learning_rate": 3.4192341393427257e-06, "loss": 0.642, "step": 13772 }, { "epoch": 1.23, "grad_norm": 8.079182563210349, "learning_rate": 3.418548836324563e-06, "loss": 0.6432, "step": 13773 }, { "epoch": 1.23, "grad_norm": 7.366760566509017, "learning_rate": 3.4178635663157333e-06, "loss": 0.586, "step": 13774 }, { "epoch": 1.23, "grad_norm": 7.99856690471874, "learning_rate": 3.417178329330535e-06, "loss": 0.6047, "step": 13775 }, { "epoch": 1.23, "grad_norm": 6.09555258764799, "learning_rate": 3.4164931253832724e-06, "loss": 0.6109, "step": 13776 }, { "epoch": 1.23, "grad_norm": 6.201360711269401, "learning_rate": 3.4158079544882474e-06, "loss": 0.5848, "step": 13777 }, { "epoch": 1.23, "grad_norm": 7.516912977370182, "learning_rate": 3.4151228166597627e-06, "loss": 0.6389, "step": 13778 }, { "epoch": 1.23, "grad_norm": 8.46292951364681, "learning_rate": 3.414437711912118e-06, "loss": 0.6119, "step": 13779 }, { "epoch": 1.23, "grad_norm": 5.9028876320684525, "learning_rate": 3.4137526402596135e-06, "loss": 0.6213, "step": 13780 }, { "epoch": 1.23, "grad_norm": 7.272898241021619, "learning_rate": 3.4130676017165497e-06, "loss": 0.5905, "step": 13781 }, { "epoch": 1.23, "grad_norm": 7.056132214435461, "learning_rate": 3.4123825962972236e-06, "loss": 0.5966, "step": 13782 }, { "epoch": 1.23, "grad_norm": 6.614495605729986, "learning_rate": 3.4116976240159337e-06, "loss": 0.5658, "step": 13783 }, { "epoch": 1.23, "grad_norm": 5.370092959658471, "learning_rate": 3.4110126848869798e-06, "loss": 0.5586, "step": 13784 }, { "epoch": 1.23, "grad_norm": 5.658755901739205, "learning_rate": 3.4103277789246537e-06, "loss": 0.6569, "step": 13785 }, { "epoch": 1.23, "grad_norm": 8.359420472165821, "learning_rate": 3.4096429061432544e-06, "loss": 0.6212, "step": 13786 }, { "epoch": 1.23, "grad_norm": 5.398281183844312, "learning_rate": 3.4089580665570775e-06, "loss": 0.5778, "step": 13787 }, { "epoch": 1.23, "grad_norm": 6.049647571079885, "learning_rate": 3.408273260180418e-06, "loss": 0.6448, "step": 13788 }, { "epoch": 1.23, "grad_norm": 7.492346171167204, "learning_rate": 3.4075884870275667e-06, "loss": 0.5659, "step": 13789 }, { "epoch": 1.23, "grad_norm": 7.8041512664030455, "learning_rate": 3.4069037471128173e-06, "loss": 0.6039, "step": 13790 }, { "epoch": 1.23, "grad_norm": 6.394143938803646, "learning_rate": 3.4062190404504654e-06, "loss": 0.5668, "step": 13791 }, { "epoch": 1.23, "grad_norm": 8.711275653626348, "learning_rate": 3.4055343670547993e-06, "loss": 0.5659, "step": 13792 }, { "epoch": 1.23, "grad_norm": 6.03300205176709, "learning_rate": 3.4048497269401114e-06, "loss": 0.6199, "step": 13793 }, { "epoch": 1.23, "grad_norm": 6.264464209572731, "learning_rate": 3.4041651201206925e-06, "loss": 0.575, "step": 13794 }, { "epoch": 1.23, "grad_norm": 7.104780004814482, "learning_rate": 3.4034805466108324e-06, "loss": 0.6275, "step": 13795 }, { "epoch": 1.23, "grad_norm": 7.342228665765283, "learning_rate": 3.4027960064248183e-06, "loss": 0.5858, "step": 13796 }, { "epoch": 1.23, "grad_norm": 7.008383516219498, "learning_rate": 3.402111499576941e-06, "loss": 0.5789, "step": 13797 }, { "epoch": 1.23, "grad_norm": 5.652815789509286, "learning_rate": 3.4014270260814864e-06, "loss": 0.5333, "step": 13798 }, { "epoch": 1.23, "grad_norm": 8.439345461799373, "learning_rate": 3.400742585952741e-06, "loss": 0.6463, "step": 13799 }, { "epoch": 1.23, "grad_norm": 5.091178086429984, "learning_rate": 3.4000581792049937e-06, "loss": 0.5408, "step": 13800 }, { "epoch": 1.23, "grad_norm": 5.886366345203147, "learning_rate": 3.399373805852526e-06, "loss": 0.5939, "step": 13801 }, { "epoch": 1.23, "grad_norm": 8.027574769572873, "learning_rate": 3.3986894659096257e-06, "loss": 0.5597, "step": 13802 }, { "epoch": 1.23, "grad_norm": 6.066205263703978, "learning_rate": 3.3980051593905784e-06, "loss": 0.5938, "step": 13803 }, { "epoch": 1.23, "grad_norm": 7.223139143452273, "learning_rate": 3.3973208863096628e-06, "loss": 0.5391, "step": 13804 }, { "epoch": 1.23, "grad_norm": 6.302200513792785, "learning_rate": 3.396636646681164e-06, "loss": 0.6796, "step": 13805 }, { "epoch": 1.23, "grad_norm": 6.8603332403046915, "learning_rate": 3.3959524405193634e-06, "loss": 0.6222, "step": 13806 }, { "epoch": 1.23, "grad_norm": 5.064098174291945, "learning_rate": 3.3952682678385437e-06, "loss": 0.5643, "step": 13807 }, { "epoch": 1.23, "grad_norm": 9.321930834602918, "learning_rate": 3.3945841286529833e-06, "loss": 0.5889, "step": 13808 }, { "epoch": 1.23, "grad_norm": 7.445381032130245, "learning_rate": 3.3939000229769637e-06, "loss": 0.6274, "step": 13809 }, { "epoch": 1.23, "grad_norm": 5.225767120114856, "learning_rate": 3.3932159508247653e-06, "loss": 0.556, "step": 13810 }, { "epoch": 1.23, "grad_norm": 6.240831379631339, "learning_rate": 3.3925319122106637e-06, "loss": 0.5938, "step": 13811 }, { "epoch": 1.23, "grad_norm": 6.356868316352738, "learning_rate": 3.391847907148938e-06, "loss": 0.6394, "step": 13812 }, { "epoch": 1.23, "grad_norm": 6.591946369639385, "learning_rate": 3.3911639356538666e-06, "loss": 0.5651, "step": 13813 }, { "epoch": 1.23, "grad_norm": 6.730617109324473, "learning_rate": 3.390479997739724e-06, "loss": 0.4826, "step": 13814 }, { "epoch": 1.23, "grad_norm": 5.423344655866369, "learning_rate": 3.3897960934207863e-06, "loss": 0.5878, "step": 13815 }, { "epoch": 1.23, "grad_norm": 7.957952268398784, "learning_rate": 3.3891122227113298e-06, "loss": 0.6509, "step": 13816 }, { "epoch": 1.23, "grad_norm": 7.868810673812395, "learning_rate": 3.3884283856256295e-06, "loss": 0.5637, "step": 13817 }, { "epoch": 1.23, "grad_norm": 5.117163318406728, "learning_rate": 3.3877445821779548e-06, "loss": 0.5561, "step": 13818 }, { "epoch": 1.23, "grad_norm": 5.960162593175425, "learning_rate": 3.387060812382581e-06, "loss": 0.5727, "step": 13819 }, { "epoch": 1.23, "grad_norm": 6.6040776156262435, "learning_rate": 3.386377076253782e-06, "loss": 0.6372, "step": 13820 }, { "epoch": 1.23, "grad_norm": 5.663369105859036, "learning_rate": 3.385693373805827e-06, "loss": 0.5975, "step": 13821 }, { "epoch": 1.23, "grad_norm": 6.065868476541639, "learning_rate": 3.3850097050529867e-06, "loss": 0.6123, "step": 13822 }, { "epoch": 1.23, "grad_norm": 5.558087024508988, "learning_rate": 3.3843260700095336e-06, "loss": 0.6088, "step": 13823 }, { "epoch": 1.23, "grad_norm": 6.83926611704635, "learning_rate": 3.383642468689734e-06, "loss": 0.5856, "step": 13824 }, { "epoch": 1.23, "grad_norm": 7.536724242089029, "learning_rate": 3.382958901107859e-06, "loss": 0.5655, "step": 13825 }, { "epoch": 1.23, "grad_norm": 8.955462900309438, "learning_rate": 3.382275367278176e-06, "loss": 0.5679, "step": 13826 }, { "epoch": 1.23, "grad_norm": 7.423887103565648, "learning_rate": 3.3815918672149505e-06, "loss": 0.5305, "step": 13827 }, { "epoch": 1.23, "grad_norm": 6.803187189764014, "learning_rate": 3.3809084009324512e-06, "loss": 0.6026, "step": 13828 }, { "epoch": 1.23, "grad_norm": 5.972950532994315, "learning_rate": 3.3802249684449445e-06, "loss": 0.5457, "step": 13829 }, { "epoch": 1.23, "grad_norm": 5.139203312561252, "learning_rate": 3.3795415697666935e-06, "loss": 0.6229, "step": 13830 }, { "epoch": 1.23, "grad_norm": 8.429347743674175, "learning_rate": 3.3788582049119622e-06, "loss": 0.5935, "step": 13831 }, { "epoch": 1.23, "grad_norm": 5.716283593459564, "learning_rate": 3.3781748738950186e-06, "loss": 0.6444, "step": 13832 }, { "epoch": 1.23, "grad_norm": 6.851309906996595, "learning_rate": 3.377491576730122e-06, "loss": 0.5633, "step": 13833 }, { "epoch": 1.23, "grad_norm": 6.347437490178266, "learning_rate": 3.3768083134315336e-06, "loss": 0.7057, "step": 13834 }, { "epoch": 1.23, "grad_norm": 6.007478878279024, "learning_rate": 3.376125084013518e-06, "loss": 0.5915, "step": 13835 }, { "epoch": 1.23, "grad_norm": 7.1383571268190105, "learning_rate": 3.375441888490335e-06, "loss": 0.5933, "step": 13836 }, { "epoch": 1.23, "grad_norm": 8.49990888404691, "learning_rate": 3.374758726876245e-06, "loss": 0.6307, "step": 13837 }, { "epoch": 1.23, "grad_norm": 6.1092539465595435, "learning_rate": 3.3740755991855063e-06, "loss": 0.603, "step": 13838 }, { "epoch": 1.23, "grad_norm": 12.438065399448549, "learning_rate": 3.3733925054323802e-06, "loss": 0.6006, "step": 13839 }, { "epoch": 1.23, "grad_norm": 8.386851136374954, "learning_rate": 3.3727094456311227e-06, "loss": 0.582, "step": 13840 }, { "epoch": 1.23, "grad_norm": 5.714682309771767, "learning_rate": 3.372026419795992e-06, "loss": 0.5546, "step": 13841 }, { "epoch": 1.23, "grad_norm": 6.287703828468095, "learning_rate": 3.3713434279412437e-06, "loss": 0.6194, "step": 13842 }, { "epoch": 1.23, "grad_norm": 5.436042608619036, "learning_rate": 3.3706604700811353e-06, "loss": 0.6053, "step": 13843 }, { "epoch": 1.24, "grad_norm": 6.6547446074045675, "learning_rate": 3.3699775462299224e-06, "loss": 0.5812, "step": 13844 }, { "epoch": 1.24, "grad_norm": 6.998792279460274, "learning_rate": 3.3692946564018567e-06, "loss": 0.569, "step": 13845 }, { "epoch": 1.24, "grad_norm": 5.89599515321578, "learning_rate": 3.368611800611197e-06, "loss": 0.6312, "step": 13846 }, { "epoch": 1.24, "grad_norm": 4.720132327775423, "learning_rate": 3.36792897887219e-06, "loss": 0.5919, "step": 13847 }, { "epoch": 1.24, "grad_norm": 6.314852840694335, "learning_rate": 3.367246191199092e-06, "loss": 0.5844, "step": 13848 }, { "epoch": 1.24, "grad_norm": 7.4835013929533165, "learning_rate": 3.3665634376061553e-06, "loss": 0.6061, "step": 13849 }, { "epoch": 1.24, "grad_norm": 4.866757715312992, "learning_rate": 3.3658807181076282e-06, "loss": 0.5646, "step": 13850 }, { "epoch": 1.24, "grad_norm": 5.459109643119882, "learning_rate": 3.3651980327177635e-06, "loss": 0.5896, "step": 13851 }, { "epoch": 1.24, "grad_norm": 6.748618680906519, "learning_rate": 3.364515381450809e-06, "loss": 0.5752, "step": 13852 }, { "epoch": 1.24, "grad_norm": 8.500540691977445, "learning_rate": 3.3638327643210146e-06, "loss": 0.5663, "step": 13853 }, { "epoch": 1.24, "grad_norm": 6.935904536868631, "learning_rate": 3.363150181342629e-06, "loss": 0.5755, "step": 13854 }, { "epoch": 1.24, "grad_norm": 5.79234544378916, "learning_rate": 3.3624676325298974e-06, "loss": 0.6437, "step": 13855 }, { "epoch": 1.24, "grad_norm": 5.910852941472072, "learning_rate": 3.3617851178970675e-06, "loss": 0.5789, "step": 13856 }, { "epoch": 1.24, "grad_norm": 5.748201795773867, "learning_rate": 3.3611026374583867e-06, "loss": 0.5369, "step": 13857 }, { "epoch": 1.24, "grad_norm": 7.355561238021794, "learning_rate": 3.360420191228099e-06, "loss": 0.6014, "step": 13858 }, { "epoch": 1.24, "grad_norm": 7.816483051637275, "learning_rate": 3.359737779220449e-06, "loss": 0.5995, "step": 13859 }, { "epoch": 1.24, "grad_norm": 6.8680668432124, "learning_rate": 3.3590554014496822e-06, "loss": 0.612, "step": 13860 }, { "epoch": 1.24, "grad_norm": 6.7736035368089755, "learning_rate": 3.3583730579300395e-06, "loss": 0.603, "step": 13861 }, { "epoch": 1.24, "grad_norm": 6.021797996647345, "learning_rate": 3.3576907486757624e-06, "loss": 0.5434, "step": 13862 }, { "epoch": 1.24, "grad_norm": 7.023400251767776, "learning_rate": 3.3570084737010955e-06, "loss": 0.5884, "step": 13863 }, { "epoch": 1.24, "grad_norm": 7.706593096474654, "learning_rate": 3.3563262330202794e-06, "loss": 0.6153, "step": 13864 }, { "epoch": 1.24, "grad_norm": 5.911287300481809, "learning_rate": 3.355644026647552e-06, "loss": 0.6135, "step": 13865 }, { "epoch": 1.24, "grad_norm": 4.370784763008989, "learning_rate": 3.3549618545971552e-06, "loss": 0.6348, "step": 13866 }, { "epoch": 1.24, "grad_norm": 7.582992648057642, "learning_rate": 3.3542797168833274e-06, "loss": 0.6024, "step": 13867 }, { "epoch": 1.24, "grad_norm": 6.098431327471657, "learning_rate": 3.353597613520306e-06, "loss": 0.6541, "step": 13868 }, { "epoch": 1.24, "grad_norm": 6.731793667342119, "learning_rate": 3.352915544522329e-06, "loss": 0.5625, "step": 13869 }, { "epoch": 1.24, "grad_norm": 6.176387696693082, "learning_rate": 3.3522335099036336e-06, "loss": 0.6352, "step": 13870 }, { "epoch": 1.24, "grad_norm": 6.167264657918009, "learning_rate": 3.351551509678454e-06, "loss": 0.6581, "step": 13871 }, { "epoch": 1.24, "grad_norm": 7.54501724410477, "learning_rate": 3.3508695438610273e-06, "loss": 0.6405, "step": 13872 }, { "epoch": 1.24, "grad_norm": 7.153118358778522, "learning_rate": 3.3501876124655874e-06, "loss": 0.5866, "step": 13873 }, { "epoch": 1.24, "grad_norm": 6.291358473005948, "learning_rate": 3.3495057155063673e-06, "loss": 0.6125, "step": 13874 }, { "epoch": 1.24, "grad_norm": 8.214560423136342, "learning_rate": 3.348823852997604e-06, "loss": 0.5992, "step": 13875 }, { "epoch": 1.24, "grad_norm": 4.1187240087199255, "learning_rate": 3.348142024953523e-06, "loss": 0.5912, "step": 13876 }, { "epoch": 1.24, "grad_norm": 6.386724419656915, "learning_rate": 3.347460231388361e-06, "loss": 0.6342, "step": 13877 }, { "epoch": 1.24, "grad_norm": 6.835162587074977, "learning_rate": 3.346778472316348e-06, "loss": 0.544, "step": 13878 }, { "epoch": 1.24, "grad_norm": 6.719989010017606, "learning_rate": 3.3460967477517127e-06, "loss": 0.563, "step": 13879 }, { "epoch": 1.24, "grad_norm": 8.573657042383761, "learning_rate": 3.3454150577086876e-06, "loss": 0.5899, "step": 13880 }, { "epoch": 1.24, "grad_norm": 5.649239763244389, "learning_rate": 3.344733402201498e-06, "loss": 0.5953, "step": 13881 }, { "epoch": 1.24, "grad_norm": 6.471414105137186, "learning_rate": 3.344051781244374e-06, "loss": 0.6382, "step": 13882 }, { "epoch": 1.24, "grad_norm": 6.674510955425737, "learning_rate": 3.3433701948515435e-06, "loss": 0.5887, "step": 13883 }, { "epoch": 1.24, "grad_norm": 5.767751906944727, "learning_rate": 3.3426886430372307e-06, "loss": 0.6019, "step": 13884 }, { "epoch": 1.24, "grad_norm": 5.49385661001288, "learning_rate": 3.3420071258156638e-06, "loss": 0.5548, "step": 13885 }, { "epoch": 1.24, "grad_norm": 7.108482465337122, "learning_rate": 3.3413256432010683e-06, "loss": 0.5754, "step": 13886 }, { "epoch": 1.24, "grad_norm": 5.077595090303393, "learning_rate": 3.3406441952076664e-06, "loss": 0.5733, "step": 13887 }, { "epoch": 1.24, "grad_norm": 9.84980552106509, "learning_rate": 3.3399627818496827e-06, "loss": 0.5762, "step": 13888 }, { "epoch": 1.24, "grad_norm": 6.207316334677537, "learning_rate": 3.339281403141342e-06, "loss": 0.576, "step": 13889 }, { "epoch": 1.24, "grad_norm": 5.347552823153011, "learning_rate": 3.3386000590968654e-06, "loss": 0.5775, "step": 13890 }, { "epoch": 1.24, "grad_norm": 6.5630194323878, "learning_rate": 3.3379187497304723e-06, "loss": 0.6647, "step": 13891 }, { "epoch": 1.24, "grad_norm": 6.367205384942764, "learning_rate": 3.337237475056386e-06, "loss": 0.5469, "step": 13892 }, { "epoch": 1.24, "grad_norm": 4.879093139408942, "learning_rate": 3.3365562350888263e-06, "loss": 0.5868, "step": 13893 }, { "epoch": 1.24, "grad_norm": 8.702342431011823, "learning_rate": 3.335875029842012e-06, "loss": 0.6109, "step": 13894 }, { "epoch": 1.24, "grad_norm": 6.516611780673539, "learning_rate": 3.3351938593301613e-06, "loss": 0.5633, "step": 13895 }, { "epoch": 1.24, "grad_norm": 7.024793299945083, "learning_rate": 3.3345127235674944e-06, "loss": 0.5977, "step": 13896 }, { "epoch": 1.24, "grad_norm": 6.48420557814582, "learning_rate": 3.333831622568226e-06, "loss": 0.6032, "step": 13897 }, { "epoch": 1.24, "grad_norm": 6.034627900049218, "learning_rate": 3.3331505563465736e-06, "loss": 0.6097, "step": 13898 }, { "epoch": 1.24, "grad_norm": 6.651525007556082, "learning_rate": 3.3324695249167538e-06, "loss": 0.5998, "step": 13899 }, { "epoch": 1.24, "grad_norm": 7.592706365074424, "learning_rate": 3.3317885282929806e-06, "loss": 0.632, "step": 13900 }, { "epoch": 1.24, "grad_norm": 5.760941586381532, "learning_rate": 3.3311075664894676e-06, "loss": 0.625, "step": 13901 }, { "epoch": 1.24, "grad_norm": 8.367649246904879, "learning_rate": 3.330426639520431e-06, "loss": 0.6341, "step": 13902 }, { "epoch": 1.24, "grad_norm": 5.723750576591715, "learning_rate": 3.3297457474000804e-06, "loss": 0.5599, "step": 13903 }, { "epoch": 1.24, "grad_norm": 6.779393195185581, "learning_rate": 3.3290648901426327e-06, "loss": 0.6007, "step": 13904 }, { "epoch": 1.24, "grad_norm": 5.79752474451026, "learning_rate": 3.3283840677622925e-06, "loss": 0.6244, "step": 13905 }, { "epoch": 1.24, "grad_norm": 6.940958944924962, "learning_rate": 3.3277032802732765e-06, "loss": 0.5736, "step": 13906 }, { "epoch": 1.24, "grad_norm": 7.040517495967791, "learning_rate": 3.3270225276897903e-06, "loss": 0.6132, "step": 13907 }, { "epoch": 1.24, "grad_norm": 6.740321827810527, "learning_rate": 3.326341810026046e-06, "loss": 0.5642, "step": 13908 }, { "epoch": 1.24, "grad_norm": 5.758758085012527, "learning_rate": 3.3256611272962514e-06, "loss": 0.5724, "step": 13909 }, { "epoch": 1.24, "grad_norm": 5.960155427388263, "learning_rate": 3.3249804795146135e-06, "loss": 0.6378, "step": 13910 }, { "epoch": 1.24, "grad_norm": 8.627263009727804, "learning_rate": 3.324299866695339e-06, "loss": 0.6433, "step": 13911 }, { "epoch": 1.24, "grad_norm": 8.965456731788608, "learning_rate": 3.323619288852637e-06, "loss": 0.6392, "step": 13912 }, { "epoch": 1.24, "grad_norm": 5.998165146975378, "learning_rate": 3.3229387460007095e-06, "loss": 0.6126, "step": 13913 }, { "epoch": 1.24, "grad_norm": 5.6404390822475365, "learning_rate": 3.322258238153764e-06, "loss": 0.5596, "step": 13914 }, { "epoch": 1.24, "grad_norm": 6.56688601281048, "learning_rate": 3.3215777653260033e-06, "loss": 0.6059, "step": 13915 }, { "epoch": 1.24, "grad_norm": 6.089792394264766, "learning_rate": 3.3208973275316305e-06, "loss": 0.552, "step": 13916 }, { "epoch": 1.24, "grad_norm": 5.390239928363491, "learning_rate": 3.3202169247848505e-06, "loss": 0.6008, "step": 13917 }, { "epoch": 1.24, "grad_norm": 7.560480473525067, "learning_rate": 3.3195365570998627e-06, "loss": 0.5421, "step": 13918 }, { "epoch": 1.24, "grad_norm": 5.875405081928904, "learning_rate": 3.3188562244908697e-06, "loss": 0.6854, "step": 13919 }, { "epoch": 1.24, "grad_norm": 5.851865931709931, "learning_rate": 3.31817592697207e-06, "loss": 0.6307, "step": 13920 }, { "epoch": 1.24, "grad_norm": 4.597350905407898, "learning_rate": 3.3174956645576644e-06, "loss": 0.6356, "step": 13921 }, { "epoch": 1.24, "grad_norm": 4.689732060899616, "learning_rate": 3.316815437261853e-06, "loss": 0.5917, "step": 13922 }, { "epoch": 1.24, "grad_norm": 8.696663644990037, "learning_rate": 3.3161352450988337e-06, "loss": 0.6373, "step": 13923 }, { "epoch": 1.24, "grad_norm": 4.525005208182841, "learning_rate": 3.3154550880828025e-06, "loss": 0.5755, "step": 13924 }, { "epoch": 1.24, "grad_norm": 7.714016680325924, "learning_rate": 3.314774966227958e-06, "loss": 0.587, "step": 13925 }, { "epoch": 1.24, "grad_norm": 5.581185838256802, "learning_rate": 3.3140948795484952e-06, "loss": 0.6407, "step": 13926 }, { "epoch": 1.24, "grad_norm": 5.9696007597262435, "learning_rate": 3.3134148280586104e-06, "loss": 0.565, "step": 13927 }, { "epoch": 1.24, "grad_norm": 8.226026397727015, "learning_rate": 3.312734811772497e-06, "loss": 0.612, "step": 13928 }, { "epoch": 1.24, "grad_norm": 7.337696780749672, "learning_rate": 3.312054830704348e-06, "loss": 0.6475, "step": 13929 }, { "epoch": 1.24, "grad_norm": 6.901229962925826, "learning_rate": 3.31137488486836e-06, "loss": 0.6021, "step": 13930 }, { "epoch": 1.24, "grad_norm": 6.307436812868264, "learning_rate": 3.310694974278722e-06, "loss": 0.5735, "step": 13931 }, { "epoch": 1.24, "grad_norm": 4.19860041895276, "learning_rate": 3.310015098949629e-06, "loss": 0.62, "step": 13932 }, { "epoch": 1.24, "grad_norm": 6.701268129243264, "learning_rate": 3.3093352588952677e-06, "loss": 0.5944, "step": 13933 }, { "epoch": 1.24, "grad_norm": 6.785709252767324, "learning_rate": 3.308655454129831e-06, "loss": 0.5876, "step": 13934 }, { "epoch": 1.24, "grad_norm": 6.525936507433927, "learning_rate": 3.307975684667507e-06, "loss": 0.5913, "step": 13935 }, { "epoch": 1.24, "grad_norm": 6.0085616853388775, "learning_rate": 3.3072959505224855e-06, "loss": 0.5911, "step": 13936 }, { "epoch": 1.24, "grad_norm": 9.018474483307616, "learning_rate": 3.306616251708955e-06, "loss": 0.6248, "step": 13937 }, { "epoch": 1.24, "grad_norm": 5.494178558542679, "learning_rate": 3.3059365882411004e-06, "loss": 0.5909, "step": 13938 }, { "epoch": 1.24, "grad_norm": 8.937744527601746, "learning_rate": 3.3052569601331096e-06, "loss": 0.6206, "step": 13939 }, { "epoch": 1.24, "grad_norm": 5.665924616097426, "learning_rate": 3.30457736739917e-06, "loss": 0.5967, "step": 13940 }, { "epoch": 1.24, "grad_norm": 5.170349005519378, "learning_rate": 3.303897810053463e-06, "loss": 0.6451, "step": 13941 }, { "epoch": 1.24, "grad_norm": 7.132660501036077, "learning_rate": 3.3032182881101755e-06, "loss": 0.5742, "step": 13942 }, { "epoch": 1.24, "grad_norm": 6.142602706370649, "learning_rate": 3.3025388015834907e-06, "loss": 0.6059, "step": 13943 }, { "epoch": 1.24, "grad_norm": 4.893511869243785, "learning_rate": 3.3018593504875905e-06, "loss": 0.6091, "step": 13944 }, { "epoch": 1.24, "grad_norm": 6.794473494485555, "learning_rate": 3.3011799348366573e-06, "loss": 0.6165, "step": 13945 }, { "epoch": 1.24, "grad_norm": 7.596216671876299, "learning_rate": 3.3005005546448742e-06, "loss": 0.6361, "step": 13946 }, { "epoch": 1.24, "grad_norm": 5.1601907055207645, "learning_rate": 3.2998212099264206e-06, "loss": 0.5645, "step": 13947 }, { "epoch": 1.24, "grad_norm": 7.225906915174299, "learning_rate": 3.299141900695474e-06, "loss": 0.6181, "step": 13948 }, { "epoch": 1.24, "grad_norm": 6.813350845986244, "learning_rate": 3.2984626269662156e-06, "loss": 0.5525, "step": 13949 }, { "epoch": 1.24, "grad_norm": 6.340626314796184, "learning_rate": 3.297783388752824e-06, "loss": 0.5871, "step": 13950 }, { "epoch": 1.24, "grad_norm": 5.8822046604436515, "learning_rate": 3.2971041860694765e-06, "loss": 0.5647, "step": 13951 }, { "epoch": 1.24, "grad_norm": 5.685727765431623, "learning_rate": 3.2964250189303486e-06, "loss": 0.6093, "step": 13952 }, { "epoch": 1.24, "grad_norm": 7.117931502683133, "learning_rate": 3.2957458873496194e-06, "loss": 0.5839, "step": 13953 }, { "epoch": 1.24, "grad_norm": 8.837130059155726, "learning_rate": 3.2950667913414614e-06, "loss": 0.6292, "step": 13954 }, { "epoch": 1.24, "grad_norm": 8.430477627141649, "learning_rate": 3.29438773092005e-06, "loss": 0.6248, "step": 13955 }, { "epoch": 1.25, "grad_norm": 7.248600283537663, "learning_rate": 3.293708706099561e-06, "loss": 0.6677, "step": 13956 }, { "epoch": 1.25, "grad_norm": 10.242730343810818, "learning_rate": 3.293029716894165e-06, "loss": 0.6643, "step": 13957 }, { "epoch": 1.25, "grad_norm": 6.076699233082732, "learning_rate": 3.2923507633180353e-06, "loss": 0.636, "step": 13958 }, { "epoch": 1.25, "grad_norm": 9.333338623987936, "learning_rate": 3.2916718453853445e-06, "loss": 0.5913, "step": 13959 }, { "epoch": 1.25, "grad_norm": 9.467258616390868, "learning_rate": 3.290992963110262e-06, "loss": 0.5665, "step": 13960 }, { "epoch": 1.25, "grad_norm": 7.29823500933735, "learning_rate": 3.2903141165069607e-06, "loss": 0.6299, "step": 13961 }, { "epoch": 1.25, "grad_norm": 6.2532297487837365, "learning_rate": 3.289635305589606e-06, "loss": 0.6508, "step": 13962 }, { "epoch": 1.25, "grad_norm": 5.60076781389035, "learning_rate": 3.28895653037237e-06, "loss": 0.6628, "step": 13963 }, { "epoch": 1.25, "grad_norm": 6.401855859392039, "learning_rate": 3.2882777908694175e-06, "loss": 0.6101, "step": 13964 }, { "epoch": 1.25, "grad_norm": 6.602857243994107, "learning_rate": 3.2875990870949184e-06, "loss": 0.5767, "step": 13965 }, { "epoch": 1.25, "grad_norm": 6.675746954187519, "learning_rate": 3.286920419063039e-06, "loss": 0.5749, "step": 13966 }, { "epoch": 1.25, "grad_norm": 6.651322710025811, "learning_rate": 3.2862417867879432e-06, "loss": 0.5619, "step": 13967 }, { "epoch": 1.25, "grad_norm": 8.106140528568869, "learning_rate": 3.285563190283797e-06, "loss": 0.6639, "step": 13968 }, { "epoch": 1.25, "grad_norm": 5.006326743248452, "learning_rate": 3.2848846295647653e-06, "loss": 0.5506, "step": 13969 }, { "epoch": 1.25, "grad_norm": 9.031778398428461, "learning_rate": 3.28420610464501e-06, "loss": 0.5631, "step": 13970 }, { "epoch": 1.25, "grad_norm": 6.539257698421576, "learning_rate": 3.283527615538695e-06, "loss": 0.5742, "step": 13971 }, { "epoch": 1.25, "grad_norm": 6.33487060869525, "learning_rate": 3.2828491622599836e-06, "loss": 0.5836, "step": 13972 }, { "epoch": 1.25, "grad_norm": 9.950658585131695, "learning_rate": 3.282170744823034e-06, "loss": 0.6077, "step": 13973 }, { "epoch": 1.25, "grad_norm": 8.115665151983794, "learning_rate": 3.281492363242008e-06, "loss": 0.5845, "step": 13974 }, { "epoch": 1.25, "grad_norm": 8.47193439688196, "learning_rate": 3.2808140175310667e-06, "loss": 0.6087, "step": 13975 }, { "epoch": 1.25, "grad_norm": 4.699346551043925, "learning_rate": 3.2801357077043682e-06, "loss": 0.5472, "step": 13976 }, { "epoch": 1.25, "grad_norm": 4.772496905651359, "learning_rate": 3.2794574337760687e-06, "loss": 0.5852, "step": 13977 }, { "epoch": 1.25, "grad_norm": 6.579831838089926, "learning_rate": 3.2787791957603276e-06, "loss": 0.653, "step": 13978 }, { "epoch": 1.25, "grad_norm": 6.406047262141717, "learning_rate": 3.2781009936713025e-06, "loss": 0.5939, "step": 13979 }, { "epoch": 1.25, "grad_norm": 7.740205873365283, "learning_rate": 3.2774228275231467e-06, "loss": 0.6005, "step": 13980 }, { "epoch": 1.25, "grad_norm": 8.050623770906041, "learning_rate": 3.276744697330017e-06, "loss": 0.5578, "step": 13981 }, { "epoch": 1.25, "grad_norm": 6.465004719658058, "learning_rate": 3.2760666031060696e-06, "loss": 0.6001, "step": 13982 }, { "epoch": 1.25, "grad_norm": 10.2661484780485, "learning_rate": 3.275388544865455e-06, "loss": 0.61, "step": 13983 }, { "epoch": 1.25, "grad_norm": 5.9270552099218685, "learning_rate": 3.2747105226223274e-06, "loss": 0.5817, "step": 13984 }, { "epoch": 1.25, "grad_norm": 8.289370457237832, "learning_rate": 3.274032536390841e-06, "loss": 0.6086, "step": 13985 }, { "epoch": 1.25, "grad_norm": 8.133253410738387, "learning_rate": 3.273354586185144e-06, "loss": 0.6088, "step": 13986 }, { "epoch": 1.25, "grad_norm": 4.254965304405587, "learning_rate": 3.272676672019389e-06, "loss": 0.6175, "step": 13987 }, { "epoch": 1.25, "grad_norm": 7.047914348978449, "learning_rate": 3.271998793907727e-06, "loss": 0.5885, "step": 13988 }, { "epoch": 1.25, "grad_norm": 7.129035482451844, "learning_rate": 3.271320951864304e-06, "loss": 0.6039, "step": 13989 }, { "epoch": 1.25, "grad_norm": 6.625092939230338, "learning_rate": 3.270643145903273e-06, "loss": 0.5668, "step": 13990 }, { "epoch": 1.25, "grad_norm": 6.4817455218576, "learning_rate": 3.2699653760387774e-06, "loss": 0.6306, "step": 13991 }, { "epoch": 1.25, "grad_norm": 6.865737669961547, "learning_rate": 3.269287642284966e-06, "loss": 0.589, "step": 13992 }, { "epoch": 1.25, "grad_norm": 6.700508681165714, "learning_rate": 3.268609944655985e-06, "loss": 0.6489, "step": 13993 }, { "epoch": 1.25, "grad_norm": 7.240287628185725, "learning_rate": 3.2679322831659786e-06, "loss": 0.6193, "step": 13994 }, { "epoch": 1.25, "grad_norm": 6.578733561261825, "learning_rate": 3.2672546578290943e-06, "loss": 0.5626, "step": 13995 }, { "epoch": 1.25, "grad_norm": 7.499349563288458, "learning_rate": 3.266577068659473e-06, "loss": 0.678, "step": 13996 }, { "epoch": 1.25, "grad_norm": 7.514611635801977, "learning_rate": 3.2658995156712605e-06, "loss": 0.6471, "step": 13997 }, { "epoch": 1.25, "grad_norm": 6.82055186940561, "learning_rate": 3.2652219988785973e-06, "loss": 0.524, "step": 13998 }, { "epoch": 1.25, "grad_norm": 6.4895029189305085, "learning_rate": 3.264544518295626e-06, "loss": 0.592, "step": 13999 }, { "epoch": 1.25, "grad_norm": 6.223296733041059, "learning_rate": 3.2638670739364865e-06, "loss": 0.6026, "step": 14000 }, { "epoch": 1.25, "grad_norm": 6.7762669518419925, "learning_rate": 3.2631896658153205e-06, "loss": 0.6264, "step": 14001 }, { "epoch": 1.25, "grad_norm": 8.187894812259207, "learning_rate": 3.262512293946267e-06, "loss": 0.6013, "step": 14002 }, { "epoch": 1.25, "grad_norm": 5.285959247229007, "learning_rate": 3.261834958343464e-06, "loss": 0.6019, "step": 14003 }, { "epoch": 1.25, "grad_norm": 6.763980672833974, "learning_rate": 3.2611576590210492e-06, "loss": 0.5354, "step": 14004 }, { "epoch": 1.25, "grad_norm": 4.717877662923218, "learning_rate": 3.2604803959931623e-06, "loss": 0.6162, "step": 14005 }, { "epoch": 1.25, "grad_norm": 7.452128334960911, "learning_rate": 3.2598031692739365e-06, "loss": 0.6122, "step": 14006 }, { "epoch": 1.25, "grad_norm": 7.141084431190575, "learning_rate": 3.2591259788775078e-06, "loss": 0.6436, "step": 14007 }, { "epoch": 1.25, "grad_norm": 6.85355605203259, "learning_rate": 3.2584488248180124e-06, "loss": 0.5693, "step": 14008 }, { "epoch": 1.25, "grad_norm": 6.437154818801018, "learning_rate": 3.2577717071095827e-06, "loss": 0.6302, "step": 14009 }, { "epoch": 1.25, "grad_norm": 10.103423573237823, "learning_rate": 3.2570946257663548e-06, "loss": 0.6152, "step": 14010 }, { "epoch": 1.25, "grad_norm": 6.494533312422914, "learning_rate": 3.2564175808024588e-06, "loss": 0.5978, "step": 14011 }, { "epoch": 1.25, "grad_norm": 7.397635371123472, "learning_rate": 3.255740572232027e-06, "loss": 0.5769, "step": 14012 }, { "epoch": 1.25, "grad_norm": 7.127067363830593, "learning_rate": 3.2550636000691925e-06, "loss": 0.5979, "step": 14013 }, { "epoch": 1.25, "grad_norm": 6.504548071172773, "learning_rate": 3.254386664328083e-06, "loss": 0.6572, "step": 14014 }, { "epoch": 1.25, "grad_norm": 7.3382298123127745, "learning_rate": 3.2537097650228284e-06, "loss": 0.6494, "step": 14015 }, { "epoch": 1.25, "grad_norm": 6.368460014365391, "learning_rate": 3.2530329021675594e-06, "loss": 0.6349, "step": 14016 }, { "epoch": 1.25, "grad_norm": 6.354730723266199, "learning_rate": 3.252356075776402e-06, "loss": 0.6334, "step": 14017 }, { "epoch": 1.25, "grad_norm": 7.6006898669149905, "learning_rate": 3.251679285863484e-06, "loss": 0.608, "step": 14018 }, { "epoch": 1.25, "grad_norm": 5.716010870220275, "learning_rate": 3.251002532442934e-06, "loss": 0.5879, "step": 14019 }, { "epoch": 1.25, "grad_norm": 6.191421136181028, "learning_rate": 3.2503258155288753e-06, "loss": 0.6326, "step": 14020 }, { "epoch": 1.25, "grad_norm": 5.664309025663215, "learning_rate": 3.249649135135432e-06, "loss": 0.6345, "step": 14021 }, { "epoch": 1.25, "grad_norm": 6.252364430084975, "learning_rate": 3.2489724912767307e-06, "loss": 0.653, "step": 14022 }, { "epoch": 1.25, "grad_norm": 7.952643112715204, "learning_rate": 3.2482958839668943e-06, "loss": 0.6471, "step": 14023 }, { "epoch": 1.25, "grad_norm": 6.352516092276112, "learning_rate": 3.247619313220044e-06, "loss": 0.6243, "step": 14024 }, { "epoch": 1.25, "grad_norm": 5.751697129902406, "learning_rate": 3.246942779050303e-06, "loss": 0.5707, "step": 14025 }, { "epoch": 1.25, "grad_norm": 6.154606974028246, "learning_rate": 3.2462662814717937e-06, "loss": 0.5536, "step": 14026 }, { "epoch": 1.25, "grad_norm": 5.775743085263744, "learning_rate": 3.2455898204986337e-06, "loss": 0.6454, "step": 14027 }, { "epoch": 1.25, "grad_norm": 7.162330855616766, "learning_rate": 3.2449133961449442e-06, "loss": 0.5301, "step": 14028 }, { "epoch": 1.25, "grad_norm": 6.88266655719037, "learning_rate": 3.2442370084248452e-06, "loss": 0.5358, "step": 14029 }, { "epoch": 1.25, "grad_norm": 5.922234468299968, "learning_rate": 3.2435606573524526e-06, "loss": 0.5711, "step": 14030 }, { "epoch": 1.25, "grad_norm": 5.370064059684683, "learning_rate": 3.242884342941884e-06, "loss": 0.6056, "step": 14031 }, { "epoch": 1.25, "grad_norm": 6.043947766989689, "learning_rate": 3.2422080652072584e-06, "loss": 0.579, "step": 14032 }, { "epoch": 1.25, "grad_norm": 5.530701200418585, "learning_rate": 3.2415318241626914e-06, "loss": 0.5681, "step": 14033 }, { "epoch": 1.25, "grad_norm": 6.977076706723662, "learning_rate": 3.240855619822294e-06, "loss": 0.618, "step": 14034 }, { "epoch": 1.25, "grad_norm": 4.911226015940273, "learning_rate": 3.2401794522001834e-06, "loss": 0.6203, "step": 14035 }, { "epoch": 1.25, "grad_norm": 7.429399018636236, "learning_rate": 3.2395033213104733e-06, "loss": 0.5781, "step": 14036 }, { "epoch": 1.25, "grad_norm": 5.227652894084293, "learning_rate": 3.2388272271672755e-06, "loss": 0.6354, "step": 14037 }, { "epoch": 1.25, "grad_norm": 9.794424396011177, "learning_rate": 3.2381511697847023e-06, "loss": 0.5291, "step": 14038 }, { "epoch": 1.25, "grad_norm": 6.137166664493985, "learning_rate": 3.2374751491768654e-06, "loss": 0.6176, "step": 14039 }, { "epoch": 1.25, "grad_norm": 5.872228377459595, "learning_rate": 3.2367991653578747e-06, "loss": 0.6223, "step": 14040 }, { "epoch": 1.25, "grad_norm": 6.327793655332198, "learning_rate": 3.2361232183418396e-06, "loss": 0.5805, "step": 14041 }, { "epoch": 1.25, "grad_norm": 5.057526886048322, "learning_rate": 3.2354473081428704e-06, "loss": 0.5553, "step": 14042 }, { "epoch": 1.25, "grad_norm": 6.877476898441335, "learning_rate": 3.2347714347750735e-06, "loss": 0.6284, "step": 14043 }, { "epoch": 1.25, "grad_norm": 8.098282895244523, "learning_rate": 3.234095598252558e-06, "loss": 0.6255, "step": 14044 }, { "epoch": 1.25, "grad_norm": 4.858865527794053, "learning_rate": 3.2334197985894293e-06, "loss": 0.612, "step": 14045 }, { "epoch": 1.25, "grad_norm": 6.838535982802868, "learning_rate": 3.232744035799793e-06, "loss": 0.6503, "step": 14046 }, { "epoch": 1.25, "grad_norm": 6.916111469378982, "learning_rate": 3.232068309897755e-06, "loss": 0.6635, "step": 14047 }, { "epoch": 1.25, "grad_norm": 5.086104763482101, "learning_rate": 3.2313926208974213e-06, "loss": 0.589, "step": 14048 }, { "epoch": 1.25, "grad_norm": 8.212442369289526, "learning_rate": 3.2307169688128924e-06, "loss": 0.5985, "step": 14049 }, { "epoch": 1.25, "grad_norm": 8.0933866702693, "learning_rate": 3.2300413536582708e-06, "loss": 0.6096, "step": 14050 }, { "epoch": 1.25, "grad_norm": 5.9955381500736715, "learning_rate": 3.2293657754476603e-06, "loss": 0.6113, "step": 14051 }, { "epoch": 1.25, "grad_norm": 6.912352665417203, "learning_rate": 3.228690234195162e-06, "loss": 0.6096, "step": 14052 }, { "epoch": 1.25, "grad_norm": 7.017728273888986, "learning_rate": 3.2280147299148754e-06, "loss": 0.6133, "step": 14053 }, { "epoch": 1.25, "grad_norm": 6.118768034781843, "learning_rate": 3.2273392626209005e-06, "loss": 0.639, "step": 14054 }, { "epoch": 1.25, "grad_norm": 6.064692534375806, "learning_rate": 3.2266638323273376e-06, "loss": 0.5613, "step": 14055 }, { "epoch": 1.25, "grad_norm": 6.524256294823813, "learning_rate": 3.2259884390482828e-06, "loss": 0.604, "step": 14056 }, { "epoch": 1.25, "grad_norm": 6.91168380553558, "learning_rate": 3.225313082797834e-06, "loss": 0.5876, "step": 14057 }, { "epoch": 1.25, "grad_norm": 5.871226559791408, "learning_rate": 3.224637763590089e-06, "loss": 0.5646, "step": 14058 }, { "epoch": 1.25, "grad_norm": 6.461172584248612, "learning_rate": 3.2239624814391422e-06, "loss": 0.6178, "step": 14059 }, { "epoch": 1.25, "grad_norm": 5.5957970224309435, "learning_rate": 3.223287236359088e-06, "loss": 0.597, "step": 14060 }, { "epoch": 1.25, "grad_norm": 7.5037045824599815, "learning_rate": 3.2226120283640246e-06, "loss": 0.625, "step": 14061 }, { "epoch": 1.25, "grad_norm": 5.770002719089063, "learning_rate": 3.2219368574680427e-06, "loss": 0.6022, "step": 14062 }, { "epoch": 1.25, "grad_norm": 7.105514596629109, "learning_rate": 3.221261723685233e-06, "loss": 0.5898, "step": 14063 }, { "epoch": 1.25, "grad_norm": 7.349512656303862, "learning_rate": 3.2205866270296894e-06, "loss": 0.5582, "step": 14064 }, { "epoch": 1.25, "grad_norm": 7.228682225740329, "learning_rate": 3.2199115675155047e-06, "loss": 0.6351, "step": 14065 }, { "epoch": 1.25, "grad_norm": 4.717447970681148, "learning_rate": 3.219236545156766e-06, "loss": 0.5666, "step": 14066 }, { "epoch": 1.25, "grad_norm": 7.34503655980662, "learning_rate": 3.2185615599675644e-06, "loss": 0.6204, "step": 14067 }, { "epoch": 1.26, "grad_norm": 6.375518686439419, "learning_rate": 3.217886611961991e-06, "loss": 0.6295, "step": 14068 }, { "epoch": 1.26, "grad_norm": 5.582980458269471, "learning_rate": 3.21721170115413e-06, "loss": 0.6181, "step": 14069 }, { "epoch": 1.26, "grad_norm": 6.4940951793199115, "learning_rate": 3.216536827558071e-06, "loss": 0.6215, "step": 14070 }, { "epoch": 1.26, "grad_norm": 6.352982394230798, "learning_rate": 3.2158619911879006e-06, "loss": 0.6249, "step": 14071 }, { "epoch": 1.26, "grad_norm": 6.557162244642264, "learning_rate": 3.2151871920577027e-06, "loss": 0.5511, "step": 14072 }, { "epoch": 1.26, "grad_norm": 5.585553960495882, "learning_rate": 3.214512430181564e-06, "loss": 0.6552, "step": 14073 }, { "epoch": 1.26, "grad_norm": 6.980666124117477, "learning_rate": 3.213837705573569e-06, "loss": 0.6051, "step": 14074 }, { "epoch": 1.26, "grad_norm": 6.88610994155848, "learning_rate": 3.213163018247799e-06, "loss": 0.6219, "step": 14075 }, { "epoch": 1.26, "grad_norm": 7.165266763434706, "learning_rate": 3.2124883682183383e-06, "loss": 0.5902, "step": 14076 }, { "epoch": 1.26, "grad_norm": 7.008425408333757, "learning_rate": 3.21181375549927e-06, "loss": 0.5713, "step": 14077 }, { "epoch": 1.26, "grad_norm": 5.65602781235058, "learning_rate": 3.2111391801046732e-06, "loss": 0.6194, "step": 14078 }, { "epoch": 1.26, "grad_norm": 6.516092309930541, "learning_rate": 3.2104646420486268e-06, "loss": 0.5737, "step": 14079 }, { "epoch": 1.26, "grad_norm": 6.398987715829651, "learning_rate": 3.209790141345213e-06, "loss": 0.6025, "step": 14080 }, { "epoch": 1.26, "grad_norm": 5.456942850662524, "learning_rate": 3.2091156780085087e-06, "loss": 0.5516, "step": 14081 }, { "epoch": 1.26, "grad_norm": 5.9105840715343785, "learning_rate": 3.2084412520525933e-06, "loss": 0.6267, "step": 14082 }, { "epoch": 1.26, "grad_norm": 4.777697733915076, "learning_rate": 3.2077668634915427e-06, "loss": 0.6089, "step": 14083 }, { "epoch": 1.26, "grad_norm": 6.3263791192739145, "learning_rate": 3.2070925123394346e-06, "loss": 0.5705, "step": 14084 }, { "epoch": 1.26, "grad_norm": 6.640599910622072, "learning_rate": 3.2064181986103428e-06, "loss": 0.6203, "step": 14085 }, { "epoch": 1.26, "grad_norm": 8.381911654619529, "learning_rate": 3.205743922318345e-06, "loss": 0.5969, "step": 14086 }, { "epoch": 1.26, "grad_norm": 9.23345393161523, "learning_rate": 3.2050696834775117e-06, "loss": 0.6113, "step": 14087 }, { "epoch": 1.26, "grad_norm": 5.470943311337223, "learning_rate": 3.2043954821019185e-06, "loss": 0.6052, "step": 14088 }, { "epoch": 1.26, "grad_norm": 8.531897006932326, "learning_rate": 3.203721318205638e-06, "loss": 0.5821, "step": 14089 }, { "epoch": 1.26, "grad_norm": 4.628243889667987, "learning_rate": 3.20304719180274e-06, "loss": 0.6404, "step": 14090 }, { "epoch": 1.26, "grad_norm": 7.020132308061576, "learning_rate": 3.202373102907299e-06, "loss": 0.5998, "step": 14091 }, { "epoch": 1.26, "grad_norm": 4.846589469385847, "learning_rate": 3.2016990515333807e-06, "loss": 0.5683, "step": 14092 }, { "epoch": 1.26, "grad_norm": 6.0800028546283515, "learning_rate": 3.201025037695056e-06, "loss": 0.6133, "step": 14093 }, { "epoch": 1.26, "grad_norm": 8.144582123391693, "learning_rate": 3.200351061406396e-06, "loss": 0.6223, "step": 14094 }, { "epoch": 1.26, "grad_norm": 6.990833408354744, "learning_rate": 3.199677122681465e-06, "loss": 0.5656, "step": 14095 }, { "epoch": 1.26, "grad_norm": 7.110028118299155, "learning_rate": 3.1990032215343316e-06, "loss": 0.6087, "step": 14096 }, { "epoch": 1.26, "grad_norm": 6.326134812119718, "learning_rate": 3.198329357979062e-06, "loss": 0.5586, "step": 14097 }, { "epoch": 1.26, "grad_norm": 8.913633186874835, "learning_rate": 3.197655532029721e-06, "loss": 0.6473, "step": 14098 }, { "epoch": 1.26, "grad_norm": 6.979730222666931, "learning_rate": 3.1969817437003743e-06, "loss": 0.6096, "step": 14099 }, { "epoch": 1.26, "grad_norm": 7.58602061451955, "learning_rate": 3.1963079930050846e-06, "loss": 0.6076, "step": 14100 }, { "epoch": 1.26, "grad_norm": 7.11562058986664, "learning_rate": 3.1956342799579154e-06, "loss": 0.606, "step": 14101 }, { "epoch": 1.26, "grad_norm": 5.6349703063701355, "learning_rate": 3.19496060457293e-06, "loss": 0.6299, "step": 14102 }, { "epoch": 1.26, "grad_norm": 6.244702639160508, "learning_rate": 3.194286966864188e-06, "loss": 0.6417, "step": 14103 }, { "epoch": 1.26, "grad_norm": 5.795554538535375, "learning_rate": 3.1936133668457516e-06, "loss": 0.5842, "step": 14104 }, { "epoch": 1.26, "grad_norm": 6.423737124255248, "learning_rate": 3.192939804531681e-06, "loss": 0.5723, "step": 14105 }, { "epoch": 1.26, "grad_norm": 9.131434711476878, "learning_rate": 3.192266279936035e-06, "loss": 0.5891, "step": 14106 }, { "epoch": 1.26, "grad_norm": 5.115364691362743, "learning_rate": 3.1915927930728695e-06, "loss": 0.5603, "step": 14107 }, { "epoch": 1.26, "grad_norm": 9.13187605486909, "learning_rate": 3.1909193439562445e-06, "loss": 0.6661, "step": 14108 }, { "epoch": 1.26, "grad_norm": 5.672533579132917, "learning_rate": 3.190245932600218e-06, "loss": 0.5161, "step": 14109 }, { "epoch": 1.26, "grad_norm": 6.588612999070721, "learning_rate": 3.189572559018843e-06, "loss": 0.5627, "step": 14110 }, { "epoch": 1.26, "grad_norm": 5.9783156312184165, "learning_rate": 3.1888992232261763e-06, "loss": 0.6071, "step": 14111 }, { "epoch": 1.26, "grad_norm": 7.139964631202621, "learning_rate": 3.1882259252362724e-06, "loss": 0.5318, "step": 14112 }, { "epoch": 1.26, "grad_norm": 8.222890383282024, "learning_rate": 3.187552665063184e-06, "loss": 0.5837, "step": 14113 }, { "epoch": 1.26, "grad_norm": 5.274210314824888, "learning_rate": 3.1868794427209636e-06, "loss": 0.5464, "step": 14114 }, { "epoch": 1.26, "grad_norm": 6.179620590108454, "learning_rate": 3.1862062582236664e-06, "loss": 0.5937, "step": 14115 }, { "epoch": 1.26, "grad_norm": 5.483164541210393, "learning_rate": 3.1855331115853398e-06, "loss": 0.5583, "step": 14116 }, { "epoch": 1.26, "grad_norm": 7.209258549408169, "learning_rate": 3.184860002820036e-06, "loss": 0.593, "step": 14117 }, { "epoch": 1.26, "grad_norm": 7.163140042605447, "learning_rate": 3.1841869319418063e-06, "loss": 0.5967, "step": 14118 }, { "epoch": 1.26, "grad_norm": 6.558458251520648, "learning_rate": 3.183513898964696e-06, "loss": 0.6175, "step": 14119 }, { "epoch": 1.26, "grad_norm": 6.121149138112046, "learning_rate": 3.1828409039027568e-06, "loss": 0.5603, "step": 14120 }, { "epoch": 1.26, "grad_norm": 7.213865146458191, "learning_rate": 3.1821679467700328e-06, "loss": 0.6213, "step": 14121 }, { "epoch": 1.26, "grad_norm": 7.0368464368196495, "learning_rate": 3.1814950275805727e-06, "loss": 0.6484, "step": 14122 }, { "epoch": 1.26, "grad_norm": 6.554890613382404, "learning_rate": 3.18082214634842e-06, "loss": 0.5154, "step": 14123 }, { "epoch": 1.26, "grad_norm": 6.962880022061275, "learning_rate": 3.180149303087622e-06, "loss": 0.5608, "step": 14124 }, { "epoch": 1.26, "grad_norm": 7.644969443239955, "learning_rate": 3.179476497812223e-06, "loss": 0.5356, "step": 14125 }, { "epoch": 1.26, "grad_norm": 7.095671495879797, "learning_rate": 3.178803730536263e-06, "loss": 0.5807, "step": 14126 }, { "epoch": 1.26, "grad_norm": 7.589783169221604, "learning_rate": 3.1781310012737875e-06, "loss": 0.6465, "step": 14127 }, { "epoch": 1.26, "grad_norm": 5.56471163591941, "learning_rate": 3.1774583100388384e-06, "loss": 0.5792, "step": 14128 }, { "epoch": 1.26, "grad_norm": 5.600816897701473, "learning_rate": 3.176785656845455e-06, "loss": 0.555, "step": 14129 }, { "epoch": 1.26, "grad_norm": 5.192400339800826, "learning_rate": 3.176113041707678e-06, "loss": 0.5905, "step": 14130 }, { "epoch": 1.26, "grad_norm": 6.198299329290172, "learning_rate": 3.175440464639548e-06, "loss": 0.6211, "step": 14131 }, { "epoch": 1.26, "grad_norm": 7.834340001191554, "learning_rate": 3.1747679256551017e-06, "loss": 0.6648, "step": 14132 }, { "epoch": 1.26, "grad_norm": 8.267287598992556, "learning_rate": 3.1740954247683776e-06, "loss": 0.6002, "step": 14133 }, { "epoch": 1.26, "grad_norm": 5.302201742955359, "learning_rate": 3.1734229619934152e-06, "loss": 0.5412, "step": 14134 }, { "epoch": 1.26, "grad_norm": 11.219836003502474, "learning_rate": 3.172750537344247e-06, "loss": 0.5841, "step": 14135 }, { "epoch": 1.26, "grad_norm": 4.8893354557429385, "learning_rate": 3.1720781508349088e-06, "loss": 0.5507, "step": 14136 }, { "epoch": 1.26, "grad_norm": 6.008734839533168, "learning_rate": 3.171405802479436e-06, "loss": 0.6141, "step": 14137 }, { "epoch": 1.26, "grad_norm": 6.562817912046149, "learning_rate": 3.170733492291864e-06, "loss": 0.6277, "step": 14138 }, { "epoch": 1.26, "grad_norm": 5.773892380061328, "learning_rate": 3.1700612202862225e-06, "loss": 0.6391, "step": 14139 }, { "epoch": 1.26, "grad_norm": 9.434446282513486, "learning_rate": 3.1693889864765464e-06, "loss": 0.5655, "step": 14140 }, { "epoch": 1.26, "grad_norm": 7.386640838658993, "learning_rate": 3.1687167908768667e-06, "loss": 0.5878, "step": 14141 }, { "epoch": 1.26, "grad_norm": 7.248533301097855, "learning_rate": 3.1680446335012127e-06, "loss": 0.5839, "step": 14142 }, { "epoch": 1.26, "grad_norm": 5.786753471239798, "learning_rate": 3.167372514363615e-06, "loss": 0.6191, "step": 14143 }, { "epoch": 1.26, "grad_norm": 7.007300929580455, "learning_rate": 3.166700433478104e-06, "loss": 0.659, "step": 14144 }, { "epoch": 1.26, "grad_norm": 6.875141880568826, "learning_rate": 3.166028390858705e-06, "loss": 0.5883, "step": 14145 }, { "epoch": 1.26, "grad_norm": 7.199897981137047, "learning_rate": 3.1653563865194474e-06, "loss": 0.6088, "step": 14146 }, { "epoch": 1.26, "grad_norm": 5.762887981197458, "learning_rate": 3.1646844204743585e-06, "loss": 0.5439, "step": 14147 }, { "epoch": 1.26, "grad_norm": 8.308055234903481, "learning_rate": 3.164012492737462e-06, "loss": 0.6333, "step": 14148 }, { "epoch": 1.26, "grad_norm": 8.567579373020022, "learning_rate": 3.163340603322786e-06, "loss": 0.591, "step": 14149 }, { "epoch": 1.26, "grad_norm": 4.454007350599428, "learning_rate": 3.1626687522443508e-06, "loss": 0.5587, "step": 14150 }, { "epoch": 1.26, "grad_norm": 7.24843520550573, "learning_rate": 3.161996939516182e-06, "loss": 0.6222, "step": 14151 }, { "epoch": 1.26, "grad_norm": 5.698131516576222, "learning_rate": 3.1613251651523014e-06, "loss": 0.5988, "step": 14152 }, { "epoch": 1.26, "grad_norm": 5.018300918304478, "learning_rate": 3.1606534291667317e-06, "loss": 0.6014, "step": 14153 }, { "epoch": 1.26, "grad_norm": 6.329832645434805, "learning_rate": 3.1599817315734944e-06, "loss": 0.624, "step": 14154 }, { "epoch": 1.26, "grad_norm": 5.470105756553675, "learning_rate": 3.1593100723866075e-06, "loss": 0.5744, "step": 14155 }, { "epoch": 1.26, "grad_norm": 10.475704580143065, "learning_rate": 3.158638451620092e-06, "loss": 0.6055, "step": 14156 }, { "epoch": 1.26, "grad_norm": 7.275631950307534, "learning_rate": 3.157966869287967e-06, "loss": 0.5892, "step": 14157 }, { "epoch": 1.26, "grad_norm": 8.976031971233075, "learning_rate": 3.157295325404249e-06, "loss": 0.6645, "step": 14158 }, { "epoch": 1.26, "grad_norm": 5.830912851027432, "learning_rate": 3.1566238199829556e-06, "loss": 0.5381, "step": 14159 }, { "epoch": 1.26, "grad_norm": 6.172642167010969, "learning_rate": 3.1559523530381044e-06, "loss": 0.6004, "step": 14160 }, { "epoch": 1.26, "grad_norm": 4.651443111597223, "learning_rate": 3.155280924583708e-06, "loss": 0.6057, "step": 14161 }, { "epoch": 1.26, "grad_norm": 6.809900001974165, "learning_rate": 3.1546095346337834e-06, "loss": 0.578, "step": 14162 }, { "epoch": 1.26, "grad_norm": 7.194748920194539, "learning_rate": 3.1539381832023436e-06, "loss": 0.5593, "step": 14163 }, { "epoch": 1.26, "grad_norm": 7.301999716506646, "learning_rate": 3.1532668703034015e-06, "loss": 0.575, "step": 14164 }, { "epoch": 1.26, "grad_norm": 11.422045344549227, "learning_rate": 3.152595595950968e-06, "loss": 0.6911, "step": 14165 }, { "epoch": 1.26, "grad_norm": 7.754563147395034, "learning_rate": 3.1519243601590555e-06, "loss": 0.5758, "step": 14166 }, { "epoch": 1.26, "grad_norm": 6.04495346712206, "learning_rate": 3.1512531629416765e-06, "loss": 0.6328, "step": 14167 }, { "epoch": 1.26, "grad_norm": 6.268071540827965, "learning_rate": 3.150582004312837e-06, "loss": 0.5732, "step": 14168 }, { "epoch": 1.26, "grad_norm": 6.248758159492901, "learning_rate": 3.149910884286549e-06, "loss": 0.5898, "step": 14169 }, { "epoch": 1.26, "grad_norm": 6.524830157928618, "learning_rate": 3.14923980287682e-06, "loss": 0.5744, "step": 14170 }, { "epoch": 1.26, "grad_norm": 6.833774137232275, "learning_rate": 3.148568760097656e-06, "loss": 0.6075, "step": 14171 }, { "epoch": 1.26, "grad_norm": 8.83540598615519, "learning_rate": 3.147897755963066e-06, "loss": 0.6947, "step": 14172 }, { "epoch": 1.26, "grad_norm": 7.412120982449695, "learning_rate": 3.147226790487053e-06, "loss": 0.6104, "step": 14173 }, { "epoch": 1.26, "grad_norm": 7.441377585468815, "learning_rate": 3.1465558636836235e-06, "loss": 0.5161, "step": 14174 }, { "epoch": 1.26, "grad_norm": 7.557013295369439, "learning_rate": 3.145884975566782e-06, "loss": 0.5673, "step": 14175 }, { "epoch": 1.26, "grad_norm": 7.5969036503025595, "learning_rate": 3.1452141261505302e-06, "loss": 0.5509, "step": 14176 }, { "epoch": 1.26, "grad_norm": 5.387687635941349, "learning_rate": 3.144543315448872e-06, "loss": 0.6066, "step": 14177 }, { "epoch": 1.26, "grad_norm": 5.89059027690157, "learning_rate": 3.1438725434758103e-06, "loss": 0.6495, "step": 14178 }, { "epoch": 1.26, "grad_norm": 5.205531193282285, "learning_rate": 3.1432018102453425e-06, "loss": 0.5976, "step": 14179 }, { "epoch": 1.26, "grad_norm": 6.788578657967702, "learning_rate": 3.142531115771472e-06, "loss": 0.6095, "step": 14180 }, { "epoch": 1.27, "grad_norm": 7.498455488771687, "learning_rate": 3.1418604600681955e-06, "loss": 0.6182, "step": 14181 }, { "epoch": 1.27, "grad_norm": 6.536943268243391, "learning_rate": 3.1411898431495134e-06, "loss": 0.5981, "step": 14182 }, { "epoch": 1.27, "grad_norm": 8.010657010345819, "learning_rate": 3.1405192650294214e-06, "loss": 0.6932, "step": 14183 }, { "epoch": 1.27, "grad_norm": 6.919520878895749, "learning_rate": 3.1398487257219177e-06, "loss": 0.6559, "step": 14184 }, { "epoch": 1.27, "grad_norm": 7.646922695154604, "learning_rate": 3.1391782252409996e-06, "loss": 0.5938, "step": 14185 }, { "epoch": 1.27, "grad_norm": 7.154626532136132, "learning_rate": 3.13850776360066e-06, "loss": 0.584, "step": 14186 }, { "epoch": 1.27, "grad_norm": 5.5671544344804405, "learning_rate": 3.1378373408148937e-06, "loss": 0.5897, "step": 14187 }, { "epoch": 1.27, "grad_norm": 4.990441106710454, "learning_rate": 3.137166956897696e-06, "loss": 0.5819, "step": 14188 }, { "epoch": 1.27, "grad_norm": 5.893635449788463, "learning_rate": 3.136496611863058e-06, "loss": 0.5886, "step": 14189 }, { "epoch": 1.27, "grad_norm": 7.164289525197329, "learning_rate": 3.1358263057249714e-06, "loss": 0.6184, "step": 14190 }, { "epoch": 1.27, "grad_norm": 6.50713273732411, "learning_rate": 3.13515603849743e-06, "loss": 0.6383, "step": 14191 }, { "epoch": 1.27, "grad_norm": 8.381388738024818, "learning_rate": 3.134485810194423e-06, "loss": 0.6764, "step": 14192 }, { "epoch": 1.27, "grad_norm": 6.638633468682515, "learning_rate": 3.1338156208299376e-06, "loss": 0.5935, "step": 14193 }, { "epoch": 1.27, "grad_norm": 6.3553750918849605, "learning_rate": 3.133145470417963e-06, "loss": 0.585, "step": 14194 }, { "epoch": 1.27, "grad_norm": 8.909058332302738, "learning_rate": 3.132475358972491e-06, "loss": 0.5939, "step": 14195 }, { "epoch": 1.27, "grad_norm": 7.556654218771383, "learning_rate": 3.131805286507504e-06, "loss": 0.5823, "step": 14196 }, { "epoch": 1.27, "grad_norm": 6.2262978451907705, "learning_rate": 3.131135253036991e-06, "loss": 0.58, "step": 14197 }, { "epoch": 1.27, "grad_norm": 5.768669259223704, "learning_rate": 3.1304652585749375e-06, "loss": 0.6495, "step": 14198 }, { "epoch": 1.27, "grad_norm": 5.277392208883144, "learning_rate": 3.1297953031353267e-06, "loss": 0.5741, "step": 14199 }, { "epoch": 1.27, "grad_norm": 7.503469595682173, "learning_rate": 3.1291253867321425e-06, "loss": 0.5864, "step": 14200 }, { "epoch": 1.27, "grad_norm": 6.745831337259602, "learning_rate": 3.1284555093793713e-06, "loss": 0.5299, "step": 14201 }, { "epoch": 1.27, "grad_norm": 6.666942037502495, "learning_rate": 3.1277856710909906e-06, "loss": 0.5353, "step": 14202 }, { "epoch": 1.27, "grad_norm": 5.194199978769812, "learning_rate": 3.1271158718809835e-06, "loss": 0.5569, "step": 14203 }, { "epoch": 1.27, "grad_norm": 7.4002925355548905, "learning_rate": 3.126446111763333e-06, "loss": 0.5895, "step": 14204 }, { "epoch": 1.27, "grad_norm": 6.669597814249999, "learning_rate": 3.1257763907520157e-06, "loss": 0.547, "step": 14205 }, { "epoch": 1.27, "grad_norm": 6.13875042033398, "learning_rate": 3.1251067088610134e-06, "loss": 0.5721, "step": 14206 }, { "epoch": 1.27, "grad_norm": 9.160312276270883, "learning_rate": 3.124437066104301e-06, "loss": 0.6371, "step": 14207 }, { "epoch": 1.27, "grad_norm": 5.8240299852373525, "learning_rate": 3.123767462495858e-06, "loss": 0.6185, "step": 14208 }, { "epoch": 1.27, "grad_norm": 5.191085451679857, "learning_rate": 3.1230978980496594e-06, "loss": 0.5798, "step": 14209 }, { "epoch": 1.27, "grad_norm": 8.247850563114849, "learning_rate": 3.1224283727796824e-06, "loss": 0.5724, "step": 14210 }, { "epoch": 1.27, "grad_norm": 5.046461606035232, "learning_rate": 3.1217588866999017e-06, "loss": 0.559, "step": 14211 }, { "epoch": 1.27, "grad_norm": 7.813814197497029, "learning_rate": 3.121089439824291e-06, "loss": 0.6003, "step": 14212 }, { "epoch": 1.27, "grad_norm": 4.984119235874705, "learning_rate": 3.120420032166822e-06, "loss": 0.6263, "step": 14213 }, { "epoch": 1.27, "grad_norm": 7.335981322248142, "learning_rate": 3.1197506637414702e-06, "loss": 0.5611, "step": 14214 }, { "epoch": 1.27, "grad_norm": 5.783742820369118, "learning_rate": 3.119081334562205e-06, "loss": 0.6224, "step": 14215 }, { "epoch": 1.27, "grad_norm": 7.1060029094150226, "learning_rate": 3.1184120446429977e-06, "loss": 0.5845, "step": 14216 }, { "epoch": 1.27, "grad_norm": 4.628802882489749, "learning_rate": 3.1177427939978187e-06, "loss": 0.5838, "step": 14217 }, { "epoch": 1.27, "grad_norm": 5.729326600447655, "learning_rate": 3.1170735826406367e-06, "loss": 0.649, "step": 14218 }, { "epoch": 1.27, "grad_norm": 6.239336196574895, "learning_rate": 3.1164044105854197e-06, "loss": 0.6201, "step": 14219 }, { "epoch": 1.27, "grad_norm": 6.102852234236515, "learning_rate": 3.115735277846137e-06, "loss": 0.5593, "step": 14220 }, { "epoch": 1.27, "grad_norm": 5.995396505325374, "learning_rate": 3.115066184436754e-06, "loss": 0.5682, "step": 14221 }, { "epoch": 1.27, "grad_norm": 7.079799004528221, "learning_rate": 3.1143971303712352e-06, "loss": 0.6187, "step": 14222 }, { "epoch": 1.27, "grad_norm": 6.320089070263943, "learning_rate": 3.113728115663547e-06, "loss": 0.6077, "step": 14223 }, { "epoch": 1.27, "grad_norm": 7.749677453447969, "learning_rate": 3.1130591403276543e-06, "loss": 0.6679, "step": 14224 }, { "epoch": 1.27, "grad_norm": 5.660356567377498, "learning_rate": 3.1123902043775184e-06, "loss": 0.6059, "step": 14225 }, { "epoch": 1.27, "grad_norm": 7.083363663416029, "learning_rate": 3.1117213078271036e-06, "loss": 0.5508, "step": 14226 }, { "epoch": 1.27, "grad_norm": 5.998723469062759, "learning_rate": 3.1110524506903715e-06, "loss": 0.5632, "step": 14227 }, { "epoch": 1.27, "grad_norm": 6.8944294365451695, "learning_rate": 3.1103836329812833e-06, "loss": 0.5906, "step": 14228 }, { "epoch": 1.27, "grad_norm": 6.856703683825819, "learning_rate": 3.1097148547137967e-06, "loss": 0.569, "step": 14229 }, { "epoch": 1.27, "grad_norm": 6.912603394898779, "learning_rate": 3.1090461159018753e-06, "loss": 0.5499, "step": 14230 }, { "epoch": 1.27, "grad_norm": 7.691173160590024, "learning_rate": 3.1083774165594737e-06, "loss": 0.6673, "step": 14231 }, { "epoch": 1.27, "grad_norm": 8.269041304521982, "learning_rate": 3.1077087567005504e-06, "loss": 0.659, "step": 14232 }, { "epoch": 1.27, "grad_norm": 6.8013448668031895, "learning_rate": 3.1070401363390647e-06, "loss": 0.643, "step": 14233 }, { "epoch": 1.27, "grad_norm": 6.419292694077384, "learning_rate": 3.106371555488969e-06, "loss": 0.6305, "step": 14234 }, { "epoch": 1.27, "grad_norm": 8.943833539430145, "learning_rate": 3.105703014164222e-06, "loss": 0.5998, "step": 14235 }, { "epoch": 1.27, "grad_norm": 5.903888829930047, "learning_rate": 3.1050345123787747e-06, "loss": 0.6119, "step": 14236 }, { "epoch": 1.27, "grad_norm": 7.074583214424002, "learning_rate": 3.104366050146582e-06, "loss": 0.5222, "step": 14237 }, { "epoch": 1.27, "grad_norm": 5.9277232880543655, "learning_rate": 3.1036976274815967e-06, "loss": 0.5942, "step": 14238 }, { "epoch": 1.27, "grad_norm": 7.554341856569497, "learning_rate": 3.1030292443977705e-06, "loss": 0.6437, "step": 14239 }, { "epoch": 1.27, "grad_norm": 7.168673845974766, "learning_rate": 3.102360900909055e-06, "loss": 0.6388, "step": 14240 }, { "epoch": 1.27, "grad_norm": 6.025031508591829, "learning_rate": 3.1016925970293997e-06, "loss": 0.6086, "step": 14241 }, { "epoch": 1.27, "grad_norm": 5.045135326347537, "learning_rate": 3.1010243327727535e-06, "loss": 0.5403, "step": 14242 }, { "epoch": 1.27, "grad_norm": 6.986581791264296, "learning_rate": 3.100356108153067e-06, "loss": 0.6597, "step": 14243 }, { "epoch": 1.27, "grad_norm": 8.891436296707225, "learning_rate": 3.0996879231842856e-06, "loss": 0.6157, "step": 14244 }, { "epoch": 1.27, "grad_norm": 6.832163532580829, "learning_rate": 3.0990197778803573e-06, "loss": 0.6091, "step": 14245 }, { "epoch": 1.27, "grad_norm": 6.583140208531847, "learning_rate": 3.0983516722552287e-06, "loss": 0.5786, "step": 14246 }, { "epoch": 1.27, "grad_norm": 6.792662164063253, "learning_rate": 3.0976836063228433e-06, "loss": 0.6062, "step": 14247 }, { "epoch": 1.27, "grad_norm": 6.790451857620161, "learning_rate": 3.097015580097148e-06, "loss": 0.6118, "step": 14248 }, { "epoch": 1.27, "grad_norm": 5.2213016928643565, "learning_rate": 3.0963475935920837e-06, "loss": 0.6149, "step": 14249 }, { "epoch": 1.27, "grad_norm": 6.234415628691221, "learning_rate": 3.0956796468215965e-06, "loss": 0.6413, "step": 14250 }, { "epoch": 1.27, "grad_norm": 6.813345218559186, "learning_rate": 3.095011739799625e-06, "loss": 0.5486, "step": 14251 }, { "epoch": 1.27, "grad_norm": 6.066415365896051, "learning_rate": 3.094343872540111e-06, "loss": 0.5765, "step": 14252 }, { "epoch": 1.27, "grad_norm": 7.434258299681204, "learning_rate": 3.093676045056996e-06, "loss": 0.5298, "step": 14253 }, { "epoch": 1.27, "grad_norm": 6.473804072843131, "learning_rate": 3.0930082573642183e-06, "loss": 0.6463, "step": 14254 }, { "epoch": 1.27, "grad_norm": 10.698363818516853, "learning_rate": 3.0923405094757165e-06, "loss": 0.5869, "step": 14255 }, { "epoch": 1.27, "grad_norm": 7.818556474619349, "learning_rate": 3.0916728014054305e-06, "loss": 0.6414, "step": 14256 }, { "epoch": 1.27, "grad_norm": 8.861939405602447, "learning_rate": 3.091005133167294e-06, "loss": 0.5301, "step": 14257 }, { "epoch": 1.27, "grad_norm": 7.156114565594416, "learning_rate": 3.090337504775246e-06, "loss": 0.6125, "step": 14258 }, { "epoch": 1.27, "grad_norm": 5.917903221441524, "learning_rate": 3.08966991624322e-06, "loss": 0.6235, "step": 14259 }, { "epoch": 1.27, "grad_norm": 5.1613935770679475, "learning_rate": 3.08900236758515e-06, "loss": 0.6246, "step": 14260 }, { "epoch": 1.27, "grad_norm": 5.644894811836973, "learning_rate": 3.088334858814972e-06, "loss": 0.5428, "step": 14261 }, { "epoch": 1.27, "grad_norm": 4.810045082312236, "learning_rate": 3.0876673899466164e-06, "loss": 0.5939, "step": 14262 }, { "epoch": 1.27, "grad_norm": 7.262029851398582, "learning_rate": 3.086999960994016e-06, "loss": 0.5773, "step": 14263 }, { "epoch": 1.27, "grad_norm": 10.195939801569406, "learning_rate": 3.0863325719711047e-06, "loss": 0.5769, "step": 14264 }, { "epoch": 1.27, "grad_norm": 8.168682597372745, "learning_rate": 3.085665222891808e-06, "loss": 0.5828, "step": 14265 }, { "epoch": 1.27, "grad_norm": 6.920976493917427, "learning_rate": 3.0849979137700568e-06, "loss": 0.5562, "step": 14266 }, { "epoch": 1.27, "grad_norm": 7.791298442647365, "learning_rate": 3.084330644619781e-06, "loss": 0.5954, "step": 14267 }, { "epoch": 1.27, "grad_norm": 6.9297839110308175, "learning_rate": 3.0836634154549077e-06, "loss": 0.5653, "step": 14268 }, { "epoch": 1.27, "grad_norm": 5.070154395491449, "learning_rate": 3.082996226289363e-06, "loss": 0.5905, "step": 14269 }, { "epoch": 1.27, "grad_norm": 7.144580455257197, "learning_rate": 3.082329077137075e-06, "loss": 0.5983, "step": 14270 }, { "epoch": 1.27, "grad_norm": 6.232464275014895, "learning_rate": 3.081661968011968e-06, "loss": 0.6038, "step": 14271 }, { "epoch": 1.27, "grad_norm": 8.293164832321724, "learning_rate": 3.080994898927966e-06, "loss": 0.6305, "step": 14272 }, { "epoch": 1.27, "grad_norm": 6.944609032632751, "learning_rate": 3.0803278698989926e-06, "loss": 0.5789, "step": 14273 }, { "epoch": 1.27, "grad_norm": 5.8831910617440295, "learning_rate": 3.0796608809389715e-06, "loss": 0.5642, "step": 14274 }, { "epoch": 1.27, "grad_norm": 6.7962676264187225, "learning_rate": 3.0789939320618233e-06, "loss": 0.5564, "step": 14275 }, { "epoch": 1.27, "grad_norm": 7.467791626918249, "learning_rate": 3.07832702328147e-06, "loss": 0.6439, "step": 14276 }, { "epoch": 1.27, "grad_norm": 6.6361439924557555, "learning_rate": 3.0776601546118325e-06, "loss": 0.5547, "step": 14277 }, { "epoch": 1.27, "grad_norm": 8.808965677239383, "learning_rate": 3.0769933260668284e-06, "loss": 0.6319, "step": 14278 }, { "epoch": 1.27, "grad_norm": 6.667533811502846, "learning_rate": 3.076326537660379e-06, "loss": 0.5855, "step": 14279 }, { "epoch": 1.27, "grad_norm": 8.085992125082479, "learning_rate": 3.075659789406399e-06, "loss": 0.5724, "step": 14280 }, { "epoch": 1.27, "grad_norm": 5.883805384217571, "learning_rate": 3.074993081318807e-06, "loss": 0.653, "step": 14281 }, { "epoch": 1.27, "grad_norm": 7.25627421531941, "learning_rate": 3.0743264134115186e-06, "loss": 0.6408, "step": 14282 }, { "epoch": 1.27, "grad_norm": 7.493316759165138, "learning_rate": 3.0736597856984486e-06, "loss": 0.6284, "step": 14283 }, { "epoch": 1.27, "grad_norm": 7.1024514090059485, "learning_rate": 3.072993198193513e-06, "loss": 0.5951, "step": 14284 }, { "epoch": 1.27, "grad_norm": 4.793914059615104, "learning_rate": 3.0723266509106232e-06, "loss": 0.6171, "step": 14285 }, { "epoch": 1.27, "grad_norm": 8.65980062502108, "learning_rate": 3.0716601438636934e-06, "loss": 0.6547, "step": 14286 }, { "epoch": 1.27, "grad_norm": 6.42235348248907, "learning_rate": 3.070993677066636e-06, "loss": 0.6163, "step": 14287 }, { "epoch": 1.27, "grad_norm": 6.228579719428768, "learning_rate": 3.07032725053336e-06, "loss": 0.593, "step": 14288 }, { "epoch": 1.27, "grad_norm": 6.870585553822756, "learning_rate": 3.069660864277777e-06, "loss": 0.5687, "step": 14289 }, { "epoch": 1.27, "grad_norm": 5.895604003551127, "learning_rate": 3.068994518313797e-06, "loss": 0.5403, "step": 14290 }, { "epoch": 1.27, "grad_norm": 5.819614149707555, "learning_rate": 3.068328212655326e-06, "loss": 0.6028, "step": 14291 }, { "epoch": 1.27, "grad_norm": 6.449255400515749, "learning_rate": 3.067661947316274e-06, "loss": 0.5628, "step": 14292 }, { "epoch": 1.28, "grad_norm": 5.8167478707992535, "learning_rate": 3.0669957223105485e-06, "loss": 0.576, "step": 14293 }, { "epoch": 1.28, "grad_norm": 6.6098646413118765, "learning_rate": 3.066329537652054e-06, "loss": 0.5584, "step": 14294 }, { "epoch": 1.28, "grad_norm": 6.235940136366945, "learning_rate": 3.065663393354694e-06, "loss": 0.6056, "step": 14295 }, { "epoch": 1.28, "grad_norm": 6.845955800152011, "learning_rate": 3.0649972894323744e-06, "loss": 0.5746, "step": 14296 }, { "epoch": 1.28, "grad_norm": 6.659744099532797, "learning_rate": 3.0643312258990007e-06, "loss": 0.5565, "step": 14297 }, { "epoch": 1.28, "grad_norm": 7.092797715473327, "learning_rate": 3.0636652027684717e-06, "loss": 0.5302, "step": 14298 }, { "epoch": 1.28, "grad_norm": 8.528806482700434, "learning_rate": 3.062999220054691e-06, "loss": 0.5916, "step": 14299 }, { "epoch": 1.28, "grad_norm": 7.402670631395741, "learning_rate": 3.0623332777715613e-06, "loss": 0.6222, "step": 14300 }, { "epoch": 1.28, "grad_norm": 8.583312177906585, "learning_rate": 3.0616673759329794e-06, "loss": 0.5671, "step": 14301 }, { "epoch": 1.28, "grad_norm": 8.46379892501733, "learning_rate": 3.061001514552847e-06, "loss": 0.6064, "step": 14302 }, { "epoch": 1.28, "grad_norm": 7.22413564101537, "learning_rate": 3.0603356936450625e-06, "loss": 0.6405, "step": 14303 }, { "epoch": 1.28, "grad_norm": 4.837745172169967, "learning_rate": 3.0596699132235207e-06, "loss": 0.5624, "step": 14304 }, { "epoch": 1.28, "grad_norm": 6.593361176182623, "learning_rate": 3.059004173302121e-06, "loss": 0.5621, "step": 14305 }, { "epoch": 1.28, "grad_norm": 8.614762976905338, "learning_rate": 3.058338473894759e-06, "loss": 0.6393, "step": 14306 }, { "epoch": 1.28, "grad_norm": 9.044602794266224, "learning_rate": 3.0576728150153306e-06, "loss": 0.6063, "step": 14307 }, { "epoch": 1.28, "grad_norm": 7.693069153526909, "learning_rate": 3.0570071966777267e-06, "loss": 0.5407, "step": 14308 }, { "epoch": 1.28, "grad_norm": 5.555061104533352, "learning_rate": 3.056341618895842e-06, "loss": 0.5213, "step": 14309 }, { "epoch": 1.28, "grad_norm": 6.851284808719114, "learning_rate": 3.0556760816835706e-06, "loss": 0.5975, "step": 14310 }, { "epoch": 1.28, "grad_norm": 6.468626144861592, "learning_rate": 3.055010585054802e-06, "loss": 0.5807, "step": 14311 }, { "epoch": 1.28, "grad_norm": 6.923367024490816, "learning_rate": 3.0543451290234283e-06, "loss": 0.5736, "step": 14312 }, { "epoch": 1.28, "grad_norm": 5.4097998292197715, "learning_rate": 3.0536797136033395e-06, "loss": 0.5828, "step": 14313 }, { "epoch": 1.28, "grad_norm": 6.370782621579315, "learning_rate": 3.053014338808424e-06, "loss": 0.6132, "step": 14314 }, { "epoch": 1.28, "grad_norm": 7.721757983250321, "learning_rate": 3.0523490046525693e-06, "loss": 0.6115, "step": 14315 }, { "epoch": 1.28, "grad_norm": 6.493781272618809, "learning_rate": 3.0516837111496663e-06, "loss": 0.5358, "step": 14316 }, { "epoch": 1.28, "grad_norm": 6.6402423824366945, "learning_rate": 3.051018458313597e-06, "loss": 0.5552, "step": 14317 }, { "epoch": 1.28, "grad_norm": 6.622464268004977, "learning_rate": 3.0503532461582495e-06, "loss": 0.61, "step": 14318 }, { "epoch": 1.28, "grad_norm": 6.984473716136694, "learning_rate": 3.0496880746975095e-06, "loss": 0.5739, "step": 14319 }, { "epoch": 1.28, "grad_norm": 6.175806184352043, "learning_rate": 3.049022943945259e-06, "loss": 0.6168, "step": 14320 }, { "epoch": 1.28, "grad_norm": 4.949973474645171, "learning_rate": 3.048357853915381e-06, "loss": 0.5835, "step": 14321 }, { "epoch": 1.28, "grad_norm": 8.757594011401247, "learning_rate": 3.0476928046217618e-06, "loss": 0.5906, "step": 14322 }, { "epoch": 1.28, "grad_norm": 5.4085677896361775, "learning_rate": 3.0470277960782784e-06, "loss": 0.5679, "step": 14323 }, { "epoch": 1.28, "grad_norm": 7.057962868567656, "learning_rate": 3.046362828298812e-06, "loss": 0.591, "step": 14324 }, { "epoch": 1.28, "grad_norm": 6.6696734487736995, "learning_rate": 3.045697901297243e-06, "loss": 0.5843, "step": 14325 }, { "epoch": 1.28, "grad_norm": 5.824674157993956, "learning_rate": 3.045033015087452e-06, "loss": 0.5389, "step": 14326 }, { "epoch": 1.28, "grad_norm": 7.348036588829832, "learning_rate": 3.044368169683314e-06, "loss": 0.6422, "step": 14327 }, { "epoch": 1.28, "grad_norm": 7.157059886998844, "learning_rate": 3.043703365098707e-06, "loss": 0.5722, "step": 14328 }, { "epoch": 1.28, "grad_norm": 5.617081775996938, "learning_rate": 3.0430386013475097e-06, "loss": 0.5986, "step": 14329 }, { "epoch": 1.28, "grad_norm": 5.727959659727873, "learning_rate": 3.042373878443595e-06, "loss": 0.5821, "step": 14330 }, { "epoch": 1.28, "grad_norm": 8.7947060066047, "learning_rate": 3.041709196400839e-06, "loss": 0.5981, "step": 14331 }, { "epoch": 1.28, "grad_norm": 5.9803431739196515, "learning_rate": 3.0410445552331155e-06, "loss": 0.6101, "step": 14332 }, { "epoch": 1.28, "grad_norm": 6.213762409379191, "learning_rate": 3.0403799549542958e-06, "loss": 0.5317, "step": 14333 }, { "epoch": 1.28, "grad_norm": 7.355672485492928, "learning_rate": 3.0397153955782532e-06, "loss": 0.5986, "step": 14334 }, { "epoch": 1.28, "grad_norm": 5.760193223718291, "learning_rate": 3.039050877118859e-06, "loss": 0.6066, "step": 14335 }, { "epoch": 1.28, "grad_norm": 8.789958809959167, "learning_rate": 3.038386399589985e-06, "loss": 0.6223, "step": 14336 }, { "epoch": 1.28, "grad_norm": 6.312145189369302, "learning_rate": 3.0377219630054967e-06, "loss": 0.5362, "step": 14337 }, { "epoch": 1.28, "grad_norm": 6.069933981319586, "learning_rate": 3.037057567379266e-06, "loss": 0.5434, "step": 14338 }, { "epoch": 1.28, "grad_norm": 8.017657864510372, "learning_rate": 3.0363932127251595e-06, "loss": 0.6481, "step": 14339 }, { "epoch": 1.28, "grad_norm": 8.912913069664853, "learning_rate": 3.035728899057044e-06, "loss": 0.6206, "step": 14340 }, { "epoch": 1.28, "grad_norm": 7.545767784314333, "learning_rate": 3.0350646263887875e-06, "loss": 0.6048, "step": 14341 }, { "epoch": 1.28, "grad_norm": 8.602418686082135, "learning_rate": 3.0344003947342527e-06, "loss": 0.5883, "step": 14342 }, { "epoch": 1.28, "grad_norm": 8.064167558461232, "learning_rate": 3.033736204107305e-06, "loss": 0.5877, "step": 14343 }, { "epoch": 1.28, "grad_norm": 6.009610810023753, "learning_rate": 3.0330720545218086e-06, "loss": 0.6243, "step": 14344 }, { "epoch": 1.28, "grad_norm": 5.8157846897559216, "learning_rate": 3.0324079459916243e-06, "loss": 0.5912, "step": 14345 }, { "epoch": 1.28, "grad_norm": 7.8942669618033, "learning_rate": 3.0317438785306164e-06, "loss": 0.5509, "step": 14346 }, { "epoch": 1.28, "grad_norm": 7.046989192574817, "learning_rate": 3.031079852152644e-06, "loss": 0.6456, "step": 14347 }, { "epoch": 1.28, "grad_norm": 5.794560843753916, "learning_rate": 3.030415866871568e-06, "loss": 0.5684, "step": 14348 }, { "epoch": 1.28, "grad_norm": 6.828012648488844, "learning_rate": 3.0297519227012472e-06, "loss": 0.5838, "step": 14349 }, { "epoch": 1.28, "grad_norm": 6.691471404379447, "learning_rate": 3.029088019655542e-06, "loss": 0.6336, "step": 14350 }, { "epoch": 1.28, "grad_norm": 6.036297068939976, "learning_rate": 3.0284241577483074e-06, "loss": 0.6367, "step": 14351 }, { "epoch": 1.28, "grad_norm": 4.8253795336043614, "learning_rate": 3.0277603369934004e-06, "loss": 0.5978, "step": 14352 }, { "epoch": 1.28, "grad_norm": 5.746917094150679, "learning_rate": 3.0270965574046762e-06, "loss": 0.6017, "step": 14353 }, { "epoch": 1.28, "grad_norm": 6.4607631820179, "learning_rate": 3.0264328189959924e-06, "loss": 0.6215, "step": 14354 }, { "epoch": 1.28, "grad_norm": 7.463362677406458, "learning_rate": 3.0257691217812003e-06, "loss": 0.6159, "step": 14355 }, { "epoch": 1.28, "grad_norm": 6.759879548734567, "learning_rate": 3.025105465774154e-06, "loss": 0.5901, "step": 14356 }, { "epoch": 1.28, "grad_norm": 6.697949753698949, "learning_rate": 3.024441850988708e-06, "loss": 0.6006, "step": 14357 }, { "epoch": 1.28, "grad_norm": 5.11725984203276, "learning_rate": 3.02377827743871e-06, "loss": 0.5641, "step": 14358 }, { "epoch": 1.28, "grad_norm": 7.640768898172056, "learning_rate": 3.0231147451380134e-06, "loss": 0.6524, "step": 14359 }, { "epoch": 1.28, "grad_norm": 5.269534441951477, "learning_rate": 3.022451254100468e-06, "loss": 0.5804, "step": 14360 }, { "epoch": 1.28, "grad_norm": 5.296494171047499, "learning_rate": 3.0217878043399205e-06, "loss": 0.6182, "step": 14361 }, { "epoch": 1.28, "grad_norm": 6.384066763854973, "learning_rate": 3.0211243958702207e-06, "loss": 0.5797, "step": 14362 }, { "epoch": 1.28, "grad_norm": 4.869319366820157, "learning_rate": 3.020461028705216e-06, "loss": 0.6399, "step": 14363 }, { "epoch": 1.28, "grad_norm": 6.9306178710307815, "learning_rate": 3.0197977028587523e-06, "loss": 0.5688, "step": 14364 }, { "epoch": 1.28, "grad_norm": 9.102073271754799, "learning_rate": 3.0191344183446756e-06, "loss": 0.6137, "step": 14365 }, { "epoch": 1.28, "grad_norm": 6.755903058489476, "learning_rate": 3.0184711751768284e-06, "loss": 0.6039, "step": 14366 }, { "epoch": 1.28, "grad_norm": 6.99306986163534, "learning_rate": 3.0178079733690575e-06, "loss": 0.5615, "step": 14367 }, { "epoch": 1.28, "grad_norm": 6.821766197420608, "learning_rate": 3.0171448129352025e-06, "loss": 0.5061, "step": 14368 }, { "epoch": 1.28, "grad_norm": 8.011335284526892, "learning_rate": 3.016481693889108e-06, "loss": 0.5738, "step": 14369 }, { "epoch": 1.28, "grad_norm": 8.581667925303389, "learning_rate": 3.0158186162446144e-06, "loss": 0.5803, "step": 14370 }, { "epoch": 1.28, "grad_norm": 6.804071797266645, "learning_rate": 3.015155580015562e-06, "loss": 0.6375, "step": 14371 }, { "epoch": 1.28, "grad_norm": 6.543247062622981, "learning_rate": 3.014492585215789e-06, "loss": 0.5714, "step": 14372 }, { "epoch": 1.28, "grad_norm": 7.497285761895214, "learning_rate": 3.0138296318591365e-06, "loss": 0.5792, "step": 14373 }, { "epoch": 1.28, "grad_norm": 9.791748847589739, "learning_rate": 3.0131667199594396e-06, "loss": 0.5473, "step": 14374 }, { "epoch": 1.28, "grad_norm": 7.962116724997619, "learning_rate": 3.012503849530537e-06, "loss": 0.6473, "step": 14375 }, { "epoch": 1.28, "grad_norm": 8.805485935512662, "learning_rate": 3.011841020586264e-06, "loss": 0.6466, "step": 14376 }, { "epoch": 1.28, "grad_norm": 7.141315008362686, "learning_rate": 3.0111782331404556e-06, "loss": 0.6109, "step": 14377 }, { "epoch": 1.28, "grad_norm": 7.385817080194886, "learning_rate": 3.010515487206945e-06, "loss": 0.5511, "step": 14378 }, { "epoch": 1.28, "grad_norm": 6.531865460087033, "learning_rate": 3.0098527827995705e-06, "loss": 0.5865, "step": 14379 }, { "epoch": 1.28, "grad_norm": 6.254039209004664, "learning_rate": 3.0091901199321582e-06, "loss": 0.5662, "step": 14380 }, { "epoch": 1.28, "grad_norm": 5.193749963469741, "learning_rate": 3.008527498618542e-06, "loss": 0.6018, "step": 14381 }, { "epoch": 1.28, "grad_norm": 5.649871901771433, "learning_rate": 3.007864918872553e-06, "loss": 0.5874, "step": 14382 }, { "epoch": 1.28, "grad_norm": 4.9688691911898735, "learning_rate": 3.007202380708022e-06, "loss": 0.5827, "step": 14383 }, { "epoch": 1.28, "grad_norm": 6.275159180190917, "learning_rate": 3.006539884138777e-06, "loss": 0.6815, "step": 14384 }, { "epoch": 1.28, "grad_norm": 6.673871408210668, "learning_rate": 3.0058774291786452e-06, "loss": 0.5902, "step": 14385 }, { "epoch": 1.28, "grad_norm": 7.117984593874003, "learning_rate": 3.005215015841456e-06, "loss": 0.5869, "step": 14386 }, { "epoch": 1.28, "grad_norm": 6.810975711845501, "learning_rate": 3.0045526441410345e-06, "loss": 0.6097, "step": 14387 }, { "epoch": 1.28, "grad_norm": 8.551732812221912, "learning_rate": 3.0038903140912064e-06, "loss": 0.577, "step": 14388 }, { "epoch": 1.28, "grad_norm": 6.77656003481528, "learning_rate": 3.003228025705797e-06, "loss": 0.5484, "step": 14389 }, { "epoch": 1.28, "grad_norm": 8.072311964229831, "learning_rate": 3.0025657789986294e-06, "loss": 0.6813, "step": 14390 }, { "epoch": 1.28, "grad_norm": 6.091237050436139, "learning_rate": 3.0019035739835265e-06, "loss": 0.5719, "step": 14391 }, { "epoch": 1.28, "grad_norm": 6.02138209346955, "learning_rate": 3.001241410674312e-06, "loss": 0.5339, "step": 14392 }, { "epoch": 1.28, "grad_norm": 7.2785118233909, "learning_rate": 3.000579289084805e-06, "loss": 0.6815, "step": 14393 }, { "epoch": 1.28, "grad_norm": 7.465857273112296, "learning_rate": 2.999917209228829e-06, "loss": 0.5916, "step": 14394 }, { "epoch": 1.28, "grad_norm": 6.394551575051437, "learning_rate": 2.9992551711201977e-06, "loss": 0.629, "step": 14395 }, { "epoch": 1.28, "grad_norm": 6.717746201154778, "learning_rate": 2.998593174772735e-06, "loss": 0.6452, "step": 14396 }, { "epoch": 1.28, "grad_norm": 9.20931204771821, "learning_rate": 2.9979312202002563e-06, "loss": 0.5965, "step": 14397 }, { "epoch": 1.28, "grad_norm": 7.171815444819069, "learning_rate": 2.9972693074165782e-06, "loss": 0.5878, "step": 14398 }, { "epoch": 1.28, "grad_norm": 6.322540400602786, "learning_rate": 2.9966074364355186e-06, "loss": 0.5789, "step": 14399 }, { "epoch": 1.28, "grad_norm": 4.750910988809198, "learning_rate": 2.9959456072708904e-06, "loss": 0.5846, "step": 14400 }, { "epoch": 1.28, "grad_norm": 7.07144267188725, "learning_rate": 2.9952838199365086e-06, "loss": 0.5455, "step": 14401 }, { "epoch": 1.28, "grad_norm": 7.288642631383981, "learning_rate": 2.9946220744461885e-06, "loss": 0.5809, "step": 14402 }, { "epoch": 1.28, "grad_norm": 7.778273456316964, "learning_rate": 2.9939603708137395e-06, "loss": 0.576, "step": 14403 }, { "epoch": 1.28, "grad_norm": 7.020343951142864, "learning_rate": 2.993298709052974e-06, "loss": 0.6374, "step": 14404 }, { "epoch": 1.29, "grad_norm": 6.601210127060374, "learning_rate": 2.992637089177705e-06, "loss": 0.6025, "step": 14405 }, { "epoch": 1.29, "grad_norm": 6.490660037863082, "learning_rate": 2.99197551120174e-06, "loss": 0.6195, "step": 14406 }, { "epoch": 1.29, "grad_norm": 4.544748043002633, "learning_rate": 2.991313975138888e-06, "loss": 0.5627, "step": 14407 }, { "epoch": 1.29, "grad_norm": 7.122196329547084, "learning_rate": 2.9906524810029603e-06, "loss": 0.5759, "step": 14408 }, { "epoch": 1.29, "grad_norm": 7.359433099485161, "learning_rate": 2.9899910288077605e-06, "loss": 0.6045, "step": 14409 }, { "epoch": 1.29, "grad_norm": 6.591937463117704, "learning_rate": 2.989329618567095e-06, "loss": 0.6199, "step": 14410 }, { "epoch": 1.29, "grad_norm": 5.352429490880989, "learning_rate": 2.9886682502947705e-06, "loss": 0.6135, "step": 14411 }, { "epoch": 1.29, "grad_norm": 6.050420548421991, "learning_rate": 2.9880069240045928e-06, "loss": 0.6756, "step": 14412 }, { "epoch": 1.29, "grad_norm": 6.719338475646623, "learning_rate": 2.9873456397103636e-06, "loss": 0.5225, "step": 14413 }, { "epoch": 1.29, "grad_norm": 6.267706208212715, "learning_rate": 2.986684397425886e-06, "loss": 0.6683, "step": 14414 }, { "epoch": 1.29, "grad_norm": 5.277878442931311, "learning_rate": 2.9860231971649635e-06, "loss": 0.5783, "step": 14415 }, { "epoch": 1.29, "grad_norm": 7.180729546338164, "learning_rate": 2.985362038941395e-06, "loss": 0.5667, "step": 14416 }, { "epoch": 1.29, "grad_norm": 8.175315697180206, "learning_rate": 2.9847009227689837e-06, "loss": 0.6434, "step": 14417 }, { "epoch": 1.29, "grad_norm": 5.241542736381951, "learning_rate": 2.984039848661526e-06, "loss": 0.6247, "step": 14418 }, { "epoch": 1.29, "grad_norm": 7.619062741973584, "learning_rate": 2.9833788166328216e-06, "loss": 0.6208, "step": 14419 }, { "epoch": 1.29, "grad_norm": 8.326711253432372, "learning_rate": 2.9827178266966684e-06, "loss": 0.6166, "step": 14420 }, { "epoch": 1.29, "grad_norm": 5.175539236247336, "learning_rate": 2.9820568788668623e-06, "loss": 0.5958, "step": 14421 }, { "epoch": 1.29, "grad_norm": 8.041452148426877, "learning_rate": 2.9813959731571996e-06, "loss": 0.632, "step": 14422 }, { "epoch": 1.29, "grad_norm": 5.8813924779902145, "learning_rate": 2.9807351095814774e-06, "loss": 0.5999, "step": 14423 }, { "epoch": 1.29, "grad_norm": 6.628485086893321, "learning_rate": 2.9800742881534857e-06, "loss": 0.6365, "step": 14424 }, { "epoch": 1.29, "grad_norm": 5.9116879558409465, "learning_rate": 2.97941350888702e-06, "loss": 0.5929, "step": 14425 }, { "epoch": 1.29, "grad_norm": 8.694945382717716, "learning_rate": 2.978752771795872e-06, "loss": 0.6167, "step": 14426 }, { "epoch": 1.29, "grad_norm": 5.5976980693651575, "learning_rate": 2.9780920768938345e-06, "loss": 0.61, "step": 14427 }, { "epoch": 1.29, "grad_norm": 5.075185954413045, "learning_rate": 2.9774314241946957e-06, "loss": 0.5728, "step": 14428 }, { "epoch": 1.29, "grad_norm": 6.049814540447643, "learning_rate": 2.9767708137122462e-06, "loss": 0.5773, "step": 14429 }, { "epoch": 1.29, "grad_norm": 6.247392618575722, "learning_rate": 2.976110245460277e-06, "loss": 0.5983, "step": 14430 }, { "epoch": 1.29, "grad_norm": 5.676823111955589, "learning_rate": 2.9754497194525726e-06, "loss": 0.6403, "step": 14431 }, { "epoch": 1.29, "grad_norm": 6.3580829363464435, "learning_rate": 2.9747892357029216e-06, "loss": 0.5533, "step": 14432 }, { "epoch": 1.29, "grad_norm": 7.563709019204993, "learning_rate": 2.974128794225112e-06, "loss": 0.6295, "step": 14433 }, { "epoch": 1.29, "grad_norm": 7.704241637271939, "learning_rate": 2.9734683950329252e-06, "loss": 0.5879, "step": 14434 }, { "epoch": 1.29, "grad_norm": 7.384434006269605, "learning_rate": 2.9728080381401483e-06, "loss": 0.5758, "step": 14435 }, { "epoch": 1.29, "grad_norm": 5.7199491651219825, "learning_rate": 2.9721477235605646e-06, "loss": 0.6355, "step": 14436 }, { "epoch": 1.29, "grad_norm": 7.177447766636385, "learning_rate": 2.971487451307957e-06, "loss": 0.6201, "step": 14437 }, { "epoch": 1.29, "grad_norm": 6.012642094416646, "learning_rate": 2.970827221396105e-06, "loss": 0.609, "step": 14438 }, { "epoch": 1.29, "grad_norm": 6.2710244661418315, "learning_rate": 2.970167033838791e-06, "loss": 0.602, "step": 14439 }, { "epoch": 1.29, "grad_norm": 5.066599519877523, "learning_rate": 2.9695068886497964e-06, "loss": 0.6141, "step": 14440 }, { "epoch": 1.29, "grad_norm": 7.390458290205408, "learning_rate": 2.968846785842897e-06, "loss": 0.6111, "step": 14441 }, { "epoch": 1.29, "grad_norm": 4.2182718630909495, "learning_rate": 2.968186725431873e-06, "loss": 0.6417, "step": 14442 }, { "epoch": 1.29, "grad_norm": 9.09551022615898, "learning_rate": 2.9675267074305023e-06, "loss": 0.6229, "step": 14443 }, { "epoch": 1.29, "grad_norm": 7.733975075562566, "learning_rate": 2.9668667318525603e-06, "loss": 0.6691, "step": 14444 }, { "epoch": 1.29, "grad_norm": 6.230984917763174, "learning_rate": 2.9662067987118227e-06, "loss": 0.6441, "step": 14445 }, { "epoch": 1.29, "grad_norm": 5.789848139733644, "learning_rate": 2.9655469080220645e-06, "loss": 0.5713, "step": 14446 }, { "epoch": 1.29, "grad_norm": 6.419212430122438, "learning_rate": 2.964887059797058e-06, "loss": 0.5805, "step": 14447 }, { "epoch": 1.29, "grad_norm": 7.4520182895240135, "learning_rate": 2.964227254050578e-06, "loss": 0.5798, "step": 14448 }, { "epoch": 1.29, "grad_norm": 5.226393183192043, "learning_rate": 2.963567490796397e-06, "loss": 0.6302, "step": 14449 }, { "epoch": 1.29, "grad_norm": 6.9274499968145795, "learning_rate": 2.9629077700482835e-06, "loss": 0.6177, "step": 14450 }, { "epoch": 1.29, "grad_norm": 5.949739847549607, "learning_rate": 2.962248091820009e-06, "loss": 0.5949, "step": 14451 }, { "epoch": 1.29, "grad_norm": 7.704920399908984, "learning_rate": 2.9615884561253454e-06, "loss": 0.6517, "step": 14452 }, { "epoch": 1.29, "grad_norm": 7.065451322091057, "learning_rate": 2.9609288629780573e-06, "loss": 0.5872, "step": 14453 }, { "epoch": 1.29, "grad_norm": 5.586936660918267, "learning_rate": 2.9602693123919136e-06, "loss": 0.5914, "step": 14454 }, { "epoch": 1.29, "grad_norm": 5.565660920165601, "learning_rate": 2.95960980438068e-06, "loss": 0.5607, "step": 14455 }, { "epoch": 1.29, "grad_norm": 7.263796195417792, "learning_rate": 2.9589503389581247e-06, "loss": 0.5683, "step": 14456 }, { "epoch": 1.29, "grad_norm": 8.023559193371083, "learning_rate": 2.9582909161380102e-06, "loss": 0.6174, "step": 14457 }, { "epoch": 1.29, "grad_norm": 5.550958159408224, "learning_rate": 2.9576315359341016e-06, "loss": 0.5343, "step": 14458 }, { "epoch": 1.29, "grad_norm": 5.433858968770386, "learning_rate": 2.956972198360163e-06, "loss": 0.5611, "step": 14459 }, { "epoch": 1.29, "grad_norm": 6.3446663607984055, "learning_rate": 2.9563129034299546e-06, "loss": 0.612, "step": 14460 }, { "epoch": 1.29, "grad_norm": 9.53026769134544, "learning_rate": 2.9556536511572386e-06, "loss": 0.5569, "step": 14461 }, { "epoch": 1.29, "grad_norm": 6.2412641910058415, "learning_rate": 2.9549944415557774e-06, "loss": 0.6269, "step": 14462 }, { "epoch": 1.29, "grad_norm": 7.587887939921509, "learning_rate": 2.9543352746393274e-06, "loss": 0.6304, "step": 14463 }, { "epoch": 1.29, "grad_norm": 5.974147737994717, "learning_rate": 2.9536761504216478e-06, "loss": 0.6062, "step": 14464 }, { "epoch": 1.29, "grad_norm": 5.169903989105843, "learning_rate": 2.953017068916499e-06, "loss": 0.6347, "step": 14465 }, { "epoch": 1.29, "grad_norm": 7.32194438683674, "learning_rate": 2.9523580301376364e-06, "loss": 0.5664, "step": 14466 }, { "epoch": 1.29, "grad_norm": 4.4698692992806945, "learning_rate": 2.9516990340988137e-06, "loss": 0.5712, "step": 14467 }, { "epoch": 1.29, "grad_norm": 7.264261181345793, "learning_rate": 2.9510400808137885e-06, "loss": 0.6136, "step": 14468 }, { "epoch": 1.29, "grad_norm": 5.1062475869052495, "learning_rate": 2.9503811702963157e-06, "loss": 0.6122, "step": 14469 }, { "epoch": 1.29, "grad_norm": 5.606051457279306, "learning_rate": 2.9497223025601464e-06, "loss": 0.6225, "step": 14470 }, { "epoch": 1.29, "grad_norm": 6.5665490648723, "learning_rate": 2.949063477619033e-06, "loss": 0.5786, "step": 14471 }, { "epoch": 1.29, "grad_norm": 6.947497018375507, "learning_rate": 2.9484046954867295e-06, "loss": 0.5627, "step": 14472 }, { "epoch": 1.29, "grad_norm": 7.01405433561849, "learning_rate": 2.9477459561769837e-06, "loss": 0.5653, "step": 14473 }, { "epoch": 1.29, "grad_norm": 5.857553890874094, "learning_rate": 2.947087259703548e-06, "loss": 0.6598, "step": 14474 }, { "epoch": 1.29, "grad_norm": 6.299606075557728, "learning_rate": 2.9464286060801696e-06, "loss": 0.6011, "step": 14475 }, { "epoch": 1.29, "grad_norm": 8.264069576661537, "learning_rate": 2.9457699953205964e-06, "loss": 0.5911, "step": 14476 }, { "epoch": 1.29, "grad_norm": 5.814710529979519, "learning_rate": 2.9451114274385763e-06, "loss": 0.5905, "step": 14477 }, { "epoch": 1.29, "grad_norm": 5.62531385999506, "learning_rate": 2.944452902447855e-06, "loss": 0.6191, "step": 14478 }, { "epoch": 1.29, "grad_norm": 6.283886613167699, "learning_rate": 2.943794420362177e-06, "loss": 0.5712, "step": 14479 }, { "epoch": 1.29, "grad_norm": 6.32928636396176, "learning_rate": 2.9431359811952904e-06, "loss": 0.6337, "step": 14480 }, { "epoch": 1.29, "grad_norm": 6.97385638156927, "learning_rate": 2.9424775849609332e-06, "loss": 0.6148, "step": 14481 }, { "epoch": 1.29, "grad_norm": 11.780910325628934, "learning_rate": 2.941819231672851e-06, "loss": 0.6006, "step": 14482 }, { "epoch": 1.29, "grad_norm": 5.213129265179419, "learning_rate": 2.9411609213447847e-06, "loss": 0.538, "step": 14483 }, { "epoch": 1.29, "grad_norm": 4.221343122944482, "learning_rate": 2.940502653990475e-06, "loss": 0.5998, "step": 14484 }, { "epoch": 1.29, "grad_norm": 6.7199667320361, "learning_rate": 2.9398444296236626e-06, "loss": 0.6114, "step": 14485 }, { "epoch": 1.29, "grad_norm": 7.904275901203021, "learning_rate": 2.939186248258086e-06, "loss": 0.6609, "step": 14486 }, { "epoch": 1.29, "grad_norm": 7.375450456953021, "learning_rate": 2.9385281099074837e-06, "loss": 0.6051, "step": 14487 }, { "epoch": 1.29, "grad_norm": 5.340211414965274, "learning_rate": 2.9378700145855926e-06, "loss": 0.5936, "step": 14488 }, { "epoch": 1.29, "grad_norm": 6.714977566095878, "learning_rate": 2.9372119623061486e-06, "loss": 0.6359, "step": 14489 }, { "epoch": 1.29, "grad_norm": 6.775478708262551, "learning_rate": 2.936553953082887e-06, "loss": 0.5088, "step": 14490 }, { "epoch": 1.29, "grad_norm": 5.63295122456318, "learning_rate": 2.935895986929544e-06, "loss": 0.6776, "step": 14491 }, { "epoch": 1.29, "grad_norm": 8.07595971174323, "learning_rate": 2.935238063859851e-06, "loss": 0.6131, "step": 14492 }, { "epoch": 1.29, "grad_norm": 5.497652195115357, "learning_rate": 2.934580183887542e-06, "loss": 0.5429, "step": 14493 }, { "epoch": 1.29, "grad_norm": 4.539707395102691, "learning_rate": 2.9339223470263485e-06, "loss": 0.6445, "step": 14494 }, { "epoch": 1.29, "grad_norm": 5.615063927375149, "learning_rate": 2.933264553290003e-06, "loss": 0.5858, "step": 14495 }, { "epoch": 1.29, "grad_norm": 5.889654117658814, "learning_rate": 2.9326068026922317e-06, "loss": 0.6119, "step": 14496 }, { "epoch": 1.29, "grad_norm": 6.666303950749915, "learning_rate": 2.9319490952467654e-06, "loss": 0.6204, "step": 14497 }, { "epoch": 1.29, "grad_norm": 7.092001272181314, "learning_rate": 2.9312914309673347e-06, "loss": 0.5945, "step": 14498 }, { "epoch": 1.29, "grad_norm": 4.906228194516457, "learning_rate": 2.9306338098676634e-06, "loss": 0.6053, "step": 14499 }, { "epoch": 1.29, "grad_norm": 7.1484423471018115, "learning_rate": 2.92997623196148e-06, "loss": 0.5658, "step": 14500 }, { "epoch": 1.29, "grad_norm": 6.765846149156163, "learning_rate": 2.92931869726251e-06, "loss": 0.5801, "step": 14501 }, { "epoch": 1.29, "grad_norm": 5.932822698194865, "learning_rate": 2.9286612057844764e-06, "loss": 0.5767, "step": 14502 }, { "epoch": 1.29, "grad_norm": 7.995533589782565, "learning_rate": 2.928003757541106e-06, "loss": 0.6221, "step": 14503 }, { "epoch": 1.29, "grad_norm": 5.33556852739582, "learning_rate": 2.9273463525461176e-06, "loss": 0.505, "step": 14504 }, { "epoch": 1.29, "grad_norm": 5.392449928716301, "learning_rate": 2.926688990813235e-06, "loss": 0.6256, "step": 14505 }, { "epoch": 1.29, "grad_norm": 5.172297128117079, "learning_rate": 2.926031672356181e-06, "loss": 0.5515, "step": 14506 }, { "epoch": 1.29, "grad_norm": 6.705695210112105, "learning_rate": 2.925374397188672e-06, "loss": 0.4884, "step": 14507 }, { "epoch": 1.29, "grad_norm": 7.672195736116548, "learning_rate": 2.9247171653244298e-06, "loss": 0.5507, "step": 14508 }, { "epoch": 1.29, "grad_norm": 5.906221206106583, "learning_rate": 2.9240599767771737e-06, "loss": 0.6661, "step": 14509 }, { "epoch": 1.29, "grad_norm": 7.351080215925008, "learning_rate": 2.9234028315606185e-06, "loss": 0.6204, "step": 14510 }, { "epoch": 1.29, "grad_norm": 5.717152182518857, "learning_rate": 2.9227457296884804e-06, "loss": 0.65, "step": 14511 }, { "epoch": 1.29, "grad_norm": 5.265891814117842, "learning_rate": 2.9220886711744757e-06, "loss": 0.5701, "step": 14512 }, { "epoch": 1.29, "grad_norm": 6.4579015549943355, "learning_rate": 2.921431656032321e-06, "loss": 0.5922, "step": 14513 }, { "epoch": 1.29, "grad_norm": 7.850922306700705, "learning_rate": 2.920774684275727e-06, "loss": 0.5385, "step": 14514 }, { "epoch": 1.29, "grad_norm": 5.861687917777732, "learning_rate": 2.920117755918409e-06, "loss": 0.6257, "step": 14515 }, { "epoch": 1.29, "grad_norm": 4.764866341894267, "learning_rate": 2.919460870974078e-06, "loss": 0.5766, "step": 14516 }, { "epoch": 1.3, "grad_norm": 4.905707825057792, "learning_rate": 2.9188040294564436e-06, "loss": 0.5755, "step": 14517 }, { "epoch": 1.3, "grad_norm": 6.171337916830878, "learning_rate": 2.9181472313792204e-06, "loss": 0.5726, "step": 14518 }, { "epoch": 1.3, "grad_norm": 6.833513915800901, "learning_rate": 2.9174904767561118e-06, "loss": 0.5723, "step": 14519 }, { "epoch": 1.3, "grad_norm": 5.5619989722368475, "learning_rate": 2.9168337656008294e-06, "loss": 0.5531, "step": 14520 }, { "epoch": 1.3, "grad_norm": 5.695213895699714, "learning_rate": 2.9161770979270796e-06, "loss": 0.6885, "step": 14521 }, { "epoch": 1.3, "grad_norm": 6.89302230585796, "learning_rate": 2.91552047374857e-06, "loss": 0.5308, "step": 14522 }, { "epoch": 1.3, "grad_norm": 5.161419963348462, "learning_rate": 2.9148638930790053e-06, "loss": 0.6121, "step": 14523 }, { "epoch": 1.3, "grad_norm": 6.163412961631086, "learning_rate": 2.9142073559320925e-06, "loss": 0.6622, "step": 14524 }, { "epoch": 1.3, "grad_norm": 5.349941000318637, "learning_rate": 2.9135508623215335e-06, "loss": 0.5622, "step": 14525 }, { "epoch": 1.3, "grad_norm": 7.152082360602169, "learning_rate": 2.912894412261029e-06, "loss": 0.6355, "step": 14526 }, { "epoch": 1.3, "grad_norm": 7.7727656001951795, "learning_rate": 2.912238005764283e-06, "loss": 0.5992, "step": 14527 }, { "epoch": 1.3, "grad_norm": 6.519862717145605, "learning_rate": 2.911581642844996e-06, "loss": 0.5763, "step": 14528 }, { "epoch": 1.3, "grad_norm": 6.689388518891596, "learning_rate": 2.9109253235168693e-06, "loss": 0.622, "step": 14529 }, { "epoch": 1.3, "grad_norm": 7.020884728616298, "learning_rate": 2.9102690477936012e-06, "loss": 0.5354, "step": 14530 }, { "epoch": 1.3, "grad_norm": 9.929154770888006, "learning_rate": 2.9096128156888924e-06, "loss": 0.6512, "step": 14531 }, { "epoch": 1.3, "grad_norm": 8.276423943087273, "learning_rate": 2.908956627216436e-06, "loss": 0.5882, "step": 14532 }, { "epoch": 1.3, "grad_norm": 5.772407992138237, "learning_rate": 2.9083004823899308e-06, "loss": 0.5735, "step": 14533 }, { "epoch": 1.3, "grad_norm": 5.914403228246642, "learning_rate": 2.9076443812230728e-06, "loss": 0.6578, "step": 14534 }, { "epoch": 1.3, "grad_norm": 7.449237370704516, "learning_rate": 2.906988323729555e-06, "loss": 0.6068, "step": 14535 }, { "epoch": 1.3, "grad_norm": 7.829577667435318, "learning_rate": 2.906332309923075e-06, "loss": 0.6586, "step": 14536 }, { "epoch": 1.3, "grad_norm": 6.910420622835594, "learning_rate": 2.905676339817321e-06, "loss": 0.6323, "step": 14537 }, { "epoch": 1.3, "grad_norm": 4.754830071241391, "learning_rate": 2.905020413425989e-06, "loss": 0.5454, "step": 14538 }, { "epoch": 1.3, "grad_norm": 7.02232816008928, "learning_rate": 2.9043645307627654e-06, "loss": 0.6239, "step": 14539 }, { "epoch": 1.3, "grad_norm": 5.92581087067422, "learning_rate": 2.9037086918413435e-06, "loss": 0.5927, "step": 14540 }, { "epoch": 1.3, "grad_norm": 6.854929770521914, "learning_rate": 2.903052896675412e-06, "loss": 0.6047, "step": 14541 }, { "epoch": 1.3, "grad_norm": 6.69186715056417, "learning_rate": 2.9023971452786582e-06, "loss": 0.5911, "step": 14542 }, { "epoch": 1.3, "grad_norm": 5.341556601943801, "learning_rate": 2.9017414376647713e-06, "loss": 0.5444, "step": 14543 }, { "epoch": 1.3, "grad_norm": 6.171946325891566, "learning_rate": 2.901085773847438e-06, "loss": 0.6274, "step": 14544 }, { "epoch": 1.3, "grad_norm": 7.62275719532823, "learning_rate": 2.9004301538403408e-06, "loss": 0.5848, "step": 14545 }, { "epoch": 1.3, "grad_norm": 6.030926754451648, "learning_rate": 2.899774577657166e-06, "loss": 0.6074, "step": 14546 }, { "epoch": 1.3, "grad_norm": 5.754153915251487, "learning_rate": 2.899119045311598e-06, "loss": 0.6083, "step": 14547 }, { "epoch": 1.3, "grad_norm": 6.372916982167611, "learning_rate": 2.8984635568173184e-06, "loss": 0.6248, "step": 14548 }, { "epoch": 1.3, "grad_norm": 6.442547645089385, "learning_rate": 2.8978081121880123e-06, "loss": 0.6166, "step": 14549 }, { "epoch": 1.3, "grad_norm": 9.39966921393144, "learning_rate": 2.8971527114373566e-06, "loss": 0.6154, "step": 14550 }, { "epoch": 1.3, "grad_norm": 5.000751638331322, "learning_rate": 2.8964973545790322e-06, "loss": 0.6111, "step": 14551 }, { "epoch": 1.3, "grad_norm": 6.33573654289518, "learning_rate": 2.8958420416267186e-06, "loss": 0.599, "step": 14552 }, { "epoch": 1.3, "grad_norm": 7.9136006998574215, "learning_rate": 2.8951867725940967e-06, "loss": 0.586, "step": 14553 }, { "epoch": 1.3, "grad_norm": 5.346644992356539, "learning_rate": 2.89453154749484e-06, "loss": 0.5904, "step": 14554 }, { "epoch": 1.3, "grad_norm": 5.296452160201325, "learning_rate": 2.8938763663426256e-06, "loss": 0.5603, "step": 14555 }, { "epoch": 1.3, "grad_norm": 6.565424717771355, "learning_rate": 2.8932212291511318e-06, "loss": 0.5479, "step": 14556 }, { "epoch": 1.3, "grad_norm": 4.108016342723745, "learning_rate": 2.8925661359340297e-06, "loss": 0.6041, "step": 14557 }, { "epoch": 1.3, "grad_norm": 6.546824388534938, "learning_rate": 2.891911086704994e-06, "loss": 0.5669, "step": 14558 }, { "epoch": 1.3, "grad_norm": 6.596318851477017, "learning_rate": 2.8912560814776973e-06, "loss": 0.7147, "step": 14559 }, { "epoch": 1.3, "grad_norm": 5.8108105698924, "learning_rate": 2.8906011202658122e-06, "loss": 0.6083, "step": 14560 }, { "epoch": 1.3, "grad_norm": 6.401324960002299, "learning_rate": 2.8899462030830093e-06, "loss": 0.6212, "step": 14561 }, { "epoch": 1.3, "grad_norm": 5.82481361165915, "learning_rate": 2.8892913299429605e-06, "loss": 0.5757, "step": 14562 }, { "epoch": 1.3, "grad_norm": 4.8821333942944385, "learning_rate": 2.88863650085933e-06, "loss": 0.5555, "step": 14563 }, { "epoch": 1.3, "grad_norm": 6.918394121809769, "learning_rate": 2.8879817158457895e-06, "loss": 0.6192, "step": 14564 }, { "epoch": 1.3, "grad_norm": 7.538116701325618, "learning_rate": 2.8873269749160052e-06, "loss": 0.5579, "step": 14565 }, { "epoch": 1.3, "grad_norm": 5.78462738697619, "learning_rate": 2.8866722780836443e-06, "loss": 0.6139, "step": 14566 }, { "epoch": 1.3, "grad_norm": 6.938717803554829, "learning_rate": 2.886017625362373e-06, "loss": 0.6685, "step": 14567 }, { "epoch": 1.3, "grad_norm": 5.665252907280941, "learning_rate": 2.885363016765852e-06, "loss": 0.5613, "step": 14568 }, { "epoch": 1.3, "grad_norm": 8.699119144330691, "learning_rate": 2.884708452307749e-06, "loss": 0.6269, "step": 14569 }, { "epoch": 1.3, "grad_norm": 7.355860222905223, "learning_rate": 2.884053932001723e-06, "loss": 0.6519, "step": 14570 }, { "epoch": 1.3, "grad_norm": 8.4993881285262, "learning_rate": 2.883399455861437e-06, "loss": 0.6093, "step": 14571 }, { "epoch": 1.3, "grad_norm": 7.921579768504905, "learning_rate": 2.882745023900552e-06, "loss": 0.5943, "step": 14572 }, { "epoch": 1.3, "grad_norm": 6.592143959612229, "learning_rate": 2.882090636132728e-06, "loss": 0.5384, "step": 14573 }, { "epoch": 1.3, "grad_norm": 6.0174634783975955, "learning_rate": 2.8814362925716244e-06, "loss": 0.5663, "step": 14574 }, { "epoch": 1.3, "grad_norm": 7.292763073411334, "learning_rate": 2.8807819932309e-06, "loss": 0.5984, "step": 14575 }, { "epoch": 1.3, "grad_norm": 5.408687781902184, "learning_rate": 2.8801277381242087e-06, "loss": 0.588, "step": 14576 }, { "epoch": 1.3, "grad_norm": 6.147232410919068, "learning_rate": 2.8794735272652085e-06, "loss": 0.5854, "step": 14577 }, { "epoch": 1.3, "grad_norm": 7.4422059108977985, "learning_rate": 2.8788193606675553e-06, "loss": 0.6145, "step": 14578 }, { "epoch": 1.3, "grad_norm": 7.7427389236091955, "learning_rate": 2.878165238344901e-06, "loss": 0.5833, "step": 14579 }, { "epoch": 1.3, "grad_norm": 5.519504109709133, "learning_rate": 2.877511160310902e-06, "loss": 0.5885, "step": 14580 }, { "epoch": 1.3, "grad_norm": 4.519805323100826, "learning_rate": 2.8768571265792105e-06, "loss": 0.574, "step": 14581 }, { "epoch": 1.3, "grad_norm": 4.1194463678806414, "learning_rate": 2.876203137163477e-06, "loss": 0.4983, "step": 14582 }, { "epoch": 1.3, "grad_norm": 5.975662751876128, "learning_rate": 2.8755491920773506e-06, "loss": 0.5853, "step": 14583 }, { "epoch": 1.3, "grad_norm": 6.286827143671914, "learning_rate": 2.874895291334482e-06, "loss": 0.6089, "step": 14584 }, { "epoch": 1.3, "grad_norm": 5.699779990514072, "learning_rate": 2.87424143494852e-06, "loss": 0.5936, "step": 14585 }, { "epoch": 1.3, "grad_norm": 5.903985672313625, "learning_rate": 2.8735876229331128e-06, "loss": 0.6102, "step": 14586 }, { "epoch": 1.3, "grad_norm": 7.317618274327215, "learning_rate": 2.8729338553019073e-06, "loss": 0.5817, "step": 14587 }, { "epoch": 1.3, "grad_norm": 4.962095243915575, "learning_rate": 2.872280132068551e-06, "loss": 0.554, "step": 14588 }, { "epoch": 1.3, "grad_norm": 5.427462810216966, "learning_rate": 2.8716264532466864e-06, "loss": 0.5741, "step": 14589 }, { "epoch": 1.3, "grad_norm": 6.349386448844346, "learning_rate": 2.8709728188499573e-06, "loss": 0.5996, "step": 14590 }, { "epoch": 1.3, "grad_norm": 8.006896347491942, "learning_rate": 2.870319228892009e-06, "loss": 0.5742, "step": 14591 }, { "epoch": 1.3, "grad_norm": 6.802868827062682, "learning_rate": 2.869665683386482e-06, "loss": 0.5912, "step": 14592 }, { "epoch": 1.3, "grad_norm": 6.258613401389508, "learning_rate": 2.8690121823470196e-06, "loss": 0.5745, "step": 14593 }, { "epoch": 1.3, "grad_norm": 5.714941825342107, "learning_rate": 2.8683587257872625e-06, "loss": 0.6498, "step": 14594 }, { "epoch": 1.3, "grad_norm": 5.828825279431822, "learning_rate": 2.8677053137208464e-06, "loss": 0.6005, "step": 14595 }, { "epoch": 1.3, "grad_norm": 8.641867266009958, "learning_rate": 2.867051946161415e-06, "loss": 0.621, "step": 14596 }, { "epoch": 1.3, "grad_norm": 4.876371214965587, "learning_rate": 2.8663986231226004e-06, "loss": 0.5645, "step": 14597 }, { "epoch": 1.3, "grad_norm": 7.480565581803046, "learning_rate": 2.8657453446180428e-06, "loss": 0.5694, "step": 14598 }, { "epoch": 1.3, "grad_norm": 6.515827463282158, "learning_rate": 2.8650921106613767e-06, "loss": 0.5569, "step": 14599 }, { "epoch": 1.3, "grad_norm": 6.024646566978927, "learning_rate": 2.864438921266238e-06, "loss": 0.6116, "step": 14600 }, { "epoch": 1.3, "grad_norm": 6.728279032478568, "learning_rate": 2.863785776446261e-06, "loss": 0.544, "step": 14601 }, { "epoch": 1.3, "grad_norm": 10.263198572648042, "learning_rate": 2.863132676215076e-06, "loss": 0.6159, "step": 14602 }, { "epoch": 1.3, "grad_norm": 6.271357950055915, "learning_rate": 2.862479620586317e-06, "loss": 0.5733, "step": 14603 }, { "epoch": 1.3, "grad_norm": 7.2246920739570895, "learning_rate": 2.861826609573615e-06, "loss": 0.572, "step": 14604 }, { "epoch": 1.3, "grad_norm": 8.624085305367228, "learning_rate": 2.8611736431906002e-06, "loss": 0.5622, "step": 14605 }, { "epoch": 1.3, "grad_norm": 6.160839399764682, "learning_rate": 2.860520721450901e-06, "loss": 0.5935, "step": 14606 }, { "epoch": 1.3, "grad_norm": 6.257261900799848, "learning_rate": 2.8598678443681486e-06, "loss": 0.5451, "step": 14607 }, { "epoch": 1.3, "grad_norm": 10.222950006718785, "learning_rate": 2.859215011955966e-06, "loss": 0.6286, "step": 14608 }, { "epoch": 1.3, "grad_norm": 6.455460762380976, "learning_rate": 2.858562224227982e-06, "loss": 0.5946, "step": 14609 }, { "epoch": 1.3, "grad_norm": 8.974315092214637, "learning_rate": 2.8579094811978235e-06, "loss": 0.5896, "step": 14610 }, { "epoch": 1.3, "grad_norm": 7.277357614601458, "learning_rate": 2.857256782879112e-06, "loss": 0.5722, "step": 14611 }, { "epoch": 1.3, "grad_norm": 6.145542808188252, "learning_rate": 2.856604129285473e-06, "loss": 0.5576, "step": 14612 }, { "epoch": 1.3, "grad_norm": 7.616496264160208, "learning_rate": 2.8559515204305275e-06, "loss": 0.6199, "step": 14613 }, { "epoch": 1.3, "grad_norm": 6.22966779126883, "learning_rate": 2.855298956327902e-06, "loss": 0.6522, "step": 14614 }, { "epoch": 1.3, "grad_norm": 6.509542506308877, "learning_rate": 2.8546464369912116e-06, "loss": 0.5721, "step": 14615 }, { "epoch": 1.3, "grad_norm": 4.8755631682132226, "learning_rate": 2.8539939624340786e-06, "loss": 0.5684, "step": 14616 }, { "epoch": 1.3, "grad_norm": 6.746198994894159, "learning_rate": 2.8533415326701214e-06, "loss": 0.5874, "step": 14617 }, { "epoch": 1.3, "grad_norm": 7.672804759488389, "learning_rate": 2.8526891477129594e-06, "loss": 0.6105, "step": 14618 }, { "epoch": 1.3, "grad_norm": 7.047693129054449, "learning_rate": 2.8520368075762083e-06, "loss": 0.5997, "step": 14619 }, { "epoch": 1.3, "grad_norm": 6.3873878912263775, "learning_rate": 2.8513845122734867e-06, "loss": 0.5867, "step": 14620 }, { "epoch": 1.3, "grad_norm": 6.323409139562968, "learning_rate": 2.8507322618184063e-06, "loss": 0.5675, "step": 14621 }, { "epoch": 1.3, "grad_norm": 7.041912851952509, "learning_rate": 2.8500800562245833e-06, "loss": 0.5886, "step": 14622 }, { "epoch": 1.3, "grad_norm": 5.924370562154884, "learning_rate": 2.849427895505631e-06, "loss": 0.5911, "step": 14623 }, { "epoch": 1.3, "grad_norm": 7.974260267775128, "learning_rate": 2.8487757796751616e-06, "loss": 0.6882, "step": 14624 }, { "epoch": 1.3, "grad_norm": 6.991992146352666, "learning_rate": 2.848123708746788e-06, "loss": 0.5749, "step": 14625 }, { "epoch": 1.3, "grad_norm": 5.376901198105059, "learning_rate": 2.847471682734118e-06, "loss": 0.624, "step": 14626 }, { "epoch": 1.3, "grad_norm": 8.388046037533638, "learning_rate": 2.8468197016507646e-06, "loss": 0.6, "step": 14627 }, { "epoch": 1.3, "grad_norm": 6.281893888198189, "learning_rate": 2.8461677655103325e-06, "loss": 0.555, "step": 14628 }, { "epoch": 1.31, "grad_norm": 6.49058759571483, "learning_rate": 2.845515874326432e-06, "loss": 0.578, "step": 14629 }, { "epoch": 1.31, "grad_norm": 8.24310675651983, "learning_rate": 2.8448640281126694e-06, "loss": 0.5426, "step": 14630 }, { "epoch": 1.31, "grad_norm": 7.125756863629804, "learning_rate": 2.84421222688265e-06, "loss": 0.5541, "step": 14631 }, { "epoch": 1.31, "grad_norm": 8.50503056284289, "learning_rate": 2.843560470649981e-06, "loss": 0.5256, "step": 14632 }, { "epoch": 1.31, "grad_norm": 6.411022549641236, "learning_rate": 2.8429087594282633e-06, "loss": 0.6339, "step": 14633 }, { "epoch": 1.31, "grad_norm": 6.810738433771384, "learning_rate": 2.842257093231101e-06, "loss": 0.5884, "step": 14634 }, { "epoch": 1.31, "grad_norm": 7.254298457105643, "learning_rate": 2.8416054720720965e-06, "loss": 0.5447, "step": 14635 }, { "epoch": 1.31, "grad_norm": 6.555677488923051, "learning_rate": 2.8409538959648507e-06, "loss": 0.5321, "step": 14636 }, { "epoch": 1.31, "grad_norm": 7.926985056074106, "learning_rate": 2.840302364922964e-06, "loss": 0.5895, "step": 14637 }, { "epoch": 1.31, "grad_norm": 6.8515248514586515, "learning_rate": 2.839650878960039e-06, "loss": 0.6222, "step": 14638 }, { "epoch": 1.31, "grad_norm": 6.1382569063424945, "learning_rate": 2.83899943808967e-06, "loss": 0.4932, "step": 14639 }, { "epoch": 1.31, "grad_norm": 7.213444639645714, "learning_rate": 2.8383480423254533e-06, "loss": 0.5691, "step": 14640 }, { "epoch": 1.31, "grad_norm": 7.516735519860875, "learning_rate": 2.8376966916809873e-06, "loss": 0.5912, "step": 14641 }, { "epoch": 1.31, "grad_norm": 7.22457883484646, "learning_rate": 2.837045386169867e-06, "loss": 0.5543, "step": 14642 }, { "epoch": 1.31, "grad_norm": 6.5706122546570365, "learning_rate": 2.8363941258056882e-06, "loss": 0.5895, "step": 14643 }, { "epoch": 1.31, "grad_norm": 7.1809636592361015, "learning_rate": 2.8357429106020433e-06, "loss": 0.5902, "step": 14644 }, { "epoch": 1.31, "grad_norm": 7.259697021088739, "learning_rate": 2.8350917405725276e-06, "loss": 0.5852, "step": 14645 }, { "epoch": 1.31, "grad_norm": 8.26865289136976, "learning_rate": 2.834440615730729e-06, "loss": 0.6448, "step": 14646 }, { "epoch": 1.31, "grad_norm": 5.3767112702828745, "learning_rate": 2.83378953609024e-06, "loss": 0.645, "step": 14647 }, { "epoch": 1.31, "grad_norm": 5.3077570308320094, "learning_rate": 2.8331385016646505e-06, "loss": 0.6084, "step": 14648 }, { "epoch": 1.31, "grad_norm": 4.611053219388775, "learning_rate": 2.8324875124675486e-06, "loss": 0.5629, "step": 14649 }, { "epoch": 1.31, "grad_norm": 8.945938310722376, "learning_rate": 2.831836568512525e-06, "loss": 0.6268, "step": 14650 }, { "epoch": 1.31, "grad_norm": 4.922159822298804, "learning_rate": 2.8311856698131653e-06, "loss": 0.5808, "step": 14651 }, { "epoch": 1.31, "grad_norm": 5.406849992512114, "learning_rate": 2.8305348163830536e-06, "loss": 0.5604, "step": 14652 }, { "epoch": 1.31, "grad_norm": 7.883642567149509, "learning_rate": 2.829884008235778e-06, "loss": 0.5781, "step": 14653 }, { "epoch": 1.31, "grad_norm": 6.76383644991604, "learning_rate": 2.8292332453849203e-06, "loss": 0.6056, "step": 14654 }, { "epoch": 1.31, "grad_norm": 7.997609083422815, "learning_rate": 2.828582527844064e-06, "loss": 0.6069, "step": 14655 }, { "epoch": 1.31, "grad_norm": 6.443818463123299, "learning_rate": 2.827931855626792e-06, "loss": 0.6437, "step": 14656 }, { "epoch": 1.31, "grad_norm": 5.325701412945984, "learning_rate": 2.8272812287466855e-06, "loss": 0.635, "step": 14657 }, { "epoch": 1.31, "grad_norm": 5.166774160814114, "learning_rate": 2.8266306472173275e-06, "loss": 0.626, "step": 14658 }, { "epoch": 1.31, "grad_norm": 6.898914164405058, "learning_rate": 2.8259801110522938e-06, "loss": 0.6125, "step": 14659 }, { "epoch": 1.31, "grad_norm": 6.054574362914878, "learning_rate": 2.825329620265164e-06, "loss": 0.6048, "step": 14660 }, { "epoch": 1.31, "grad_norm": 4.784374453714737, "learning_rate": 2.8246791748695156e-06, "loss": 0.587, "step": 14661 }, { "epoch": 1.31, "grad_norm": 7.829627942492753, "learning_rate": 2.8240287748789254e-06, "loss": 0.6135, "step": 14662 }, { "epoch": 1.31, "grad_norm": 8.107675920992314, "learning_rate": 2.82337842030697e-06, "loss": 0.6037, "step": 14663 }, { "epoch": 1.31, "grad_norm": 7.045663102114819, "learning_rate": 2.8227281111672243e-06, "loss": 0.5633, "step": 14664 }, { "epoch": 1.31, "grad_norm": 5.5845017407080935, "learning_rate": 2.82207784747326e-06, "loss": 0.5555, "step": 14665 }, { "epoch": 1.31, "grad_norm": 5.884136104029214, "learning_rate": 2.8214276292386506e-06, "loss": 0.557, "step": 14666 }, { "epoch": 1.31, "grad_norm": 8.174991713891545, "learning_rate": 2.8207774564769686e-06, "loss": 0.617, "step": 14667 }, { "epoch": 1.31, "grad_norm": 7.863439447404439, "learning_rate": 2.8201273292017872e-06, "loss": 0.5968, "step": 14668 }, { "epoch": 1.31, "grad_norm": 7.7306874801848195, "learning_rate": 2.8194772474266713e-06, "loss": 0.5876, "step": 14669 }, { "epoch": 1.31, "grad_norm": 5.143109173098023, "learning_rate": 2.8188272111651927e-06, "loss": 0.6464, "step": 14670 }, { "epoch": 1.31, "grad_norm": 7.919870746999614, "learning_rate": 2.8181772204309216e-06, "loss": 0.6045, "step": 14671 }, { "epoch": 1.31, "grad_norm": 5.929705207313971, "learning_rate": 2.8175272752374204e-06, "loss": 0.65, "step": 14672 }, { "epoch": 1.31, "grad_norm": 6.140757151274942, "learning_rate": 2.816877375598258e-06, "loss": 0.5786, "step": 14673 }, { "epoch": 1.31, "grad_norm": 6.791864241591136, "learning_rate": 2.816227521527e-06, "loss": 0.6113, "step": 14674 }, { "epoch": 1.31, "grad_norm": 8.355617659014257, "learning_rate": 2.81557771303721e-06, "loss": 0.5406, "step": 14675 }, { "epoch": 1.31, "grad_norm": 6.917026591837284, "learning_rate": 2.814927950142451e-06, "loss": 0.632, "step": 14676 }, { "epoch": 1.31, "grad_norm": 8.673949072292034, "learning_rate": 2.8142782328562878e-06, "loss": 0.5551, "step": 14677 }, { "epoch": 1.31, "grad_norm": 6.843218696148027, "learning_rate": 2.813628561192279e-06, "loss": 0.582, "step": 14678 }, { "epoch": 1.31, "grad_norm": 6.608672206312245, "learning_rate": 2.8129789351639847e-06, "loss": 0.5622, "step": 14679 }, { "epoch": 1.31, "grad_norm": 7.147133932887482, "learning_rate": 2.8123293547849655e-06, "loss": 0.625, "step": 14680 }, { "epoch": 1.31, "grad_norm": 6.0141314189998125, "learning_rate": 2.8116798200687805e-06, "loss": 0.5535, "step": 14681 }, { "epoch": 1.31, "grad_norm": 7.738965870934562, "learning_rate": 2.8110303310289887e-06, "loss": 0.6859, "step": 14682 }, { "epoch": 1.31, "grad_norm": 6.010945492812928, "learning_rate": 2.8103808876791434e-06, "loss": 0.6316, "step": 14683 }, { "epoch": 1.31, "grad_norm": 6.9653910516612685, "learning_rate": 2.809731490032804e-06, "loss": 0.6269, "step": 14684 }, { "epoch": 1.31, "grad_norm": 5.2763726930059605, "learning_rate": 2.8090821381035207e-06, "loss": 0.5564, "step": 14685 }, { "epoch": 1.31, "grad_norm": 7.798525645534601, "learning_rate": 2.808432831904849e-06, "loss": 0.5869, "step": 14686 }, { "epoch": 1.31, "grad_norm": 6.066442208784562, "learning_rate": 2.8077835714503444e-06, "loss": 0.5589, "step": 14687 }, { "epoch": 1.31, "grad_norm": 8.605745984702406, "learning_rate": 2.807134356753555e-06, "loss": 0.6489, "step": 14688 }, { "epoch": 1.31, "grad_norm": 5.976100456950208, "learning_rate": 2.8064851878280347e-06, "loss": 0.5416, "step": 14689 }, { "epoch": 1.31, "grad_norm": 6.1588069408382, "learning_rate": 2.805836064687334e-06, "loss": 0.5891, "step": 14690 }, { "epoch": 1.31, "grad_norm": 4.546737460867647, "learning_rate": 2.805186987344999e-06, "loss": 0.5644, "step": 14691 }, { "epoch": 1.31, "grad_norm": 6.855988800974197, "learning_rate": 2.804537955814578e-06, "loss": 0.5649, "step": 14692 }, { "epoch": 1.31, "grad_norm": 7.707335917945903, "learning_rate": 2.8038889701096196e-06, "loss": 0.5817, "step": 14693 }, { "epoch": 1.31, "grad_norm": 6.82908368757806, "learning_rate": 2.80324003024367e-06, "loss": 0.629, "step": 14694 }, { "epoch": 1.31, "grad_norm": 6.814775642346032, "learning_rate": 2.8025911362302753e-06, "loss": 0.5701, "step": 14695 }, { "epoch": 1.31, "grad_norm": 8.25854304466767, "learning_rate": 2.8019422880829765e-06, "loss": 0.6092, "step": 14696 }, { "epoch": 1.31, "grad_norm": 8.051217189193633, "learning_rate": 2.801293485815321e-06, "loss": 0.602, "step": 14697 }, { "epoch": 1.31, "grad_norm": 8.323697431113397, "learning_rate": 2.8006447294408466e-06, "loss": 0.5928, "step": 14698 }, { "epoch": 1.31, "grad_norm": 6.203633784829223, "learning_rate": 2.7999960189730977e-06, "loss": 0.6218, "step": 14699 }, { "epoch": 1.31, "grad_norm": 5.724469400235122, "learning_rate": 2.7993473544256134e-06, "loss": 0.5369, "step": 14700 }, { "epoch": 1.31, "grad_norm": 5.834829329359145, "learning_rate": 2.7986987358119338e-06, "loss": 0.614, "step": 14701 }, { "epoch": 1.31, "grad_norm": 8.236452130208386, "learning_rate": 2.7980501631455974e-06, "loss": 0.5982, "step": 14702 }, { "epoch": 1.31, "grad_norm": 8.184207686513725, "learning_rate": 2.7974016364401434e-06, "loss": 0.5663, "step": 14703 }, { "epoch": 1.31, "grad_norm": 6.944426248083473, "learning_rate": 2.7967531557091056e-06, "loss": 0.5291, "step": 14704 }, { "epoch": 1.31, "grad_norm": 6.49335656970426, "learning_rate": 2.79610472096602e-06, "loss": 0.6356, "step": 14705 }, { "epoch": 1.31, "grad_norm": 6.611212994060446, "learning_rate": 2.795456332224421e-06, "loss": 0.5754, "step": 14706 }, { "epoch": 1.31, "grad_norm": 5.815370796531603, "learning_rate": 2.794807989497844e-06, "loss": 0.628, "step": 14707 }, { "epoch": 1.31, "grad_norm": 8.546173578266586, "learning_rate": 2.7941596927998225e-06, "loss": 0.6107, "step": 14708 }, { "epoch": 1.31, "grad_norm": 7.562720550244697, "learning_rate": 2.7935114421438857e-06, "loss": 0.5623, "step": 14709 }, { "epoch": 1.31, "grad_norm": 7.035297560727162, "learning_rate": 2.792863237543565e-06, "loss": 0.5928, "step": 14710 }, { "epoch": 1.31, "grad_norm": 5.110485605084381, "learning_rate": 2.7922150790123924e-06, "loss": 0.5738, "step": 14711 }, { "epoch": 1.31, "grad_norm": 12.16711773786022, "learning_rate": 2.7915669665638926e-06, "loss": 0.6094, "step": 14712 }, { "epoch": 1.31, "grad_norm": 6.995457314588372, "learning_rate": 2.7909189002115967e-06, "loss": 0.6154, "step": 14713 }, { "epoch": 1.31, "grad_norm": 7.474035074897762, "learning_rate": 2.790270879969031e-06, "loss": 0.5786, "step": 14714 }, { "epoch": 1.31, "grad_norm": 7.436356114953808, "learning_rate": 2.789622905849723e-06, "loss": 0.5802, "step": 14715 }, { "epoch": 1.31, "grad_norm": 6.746459084331684, "learning_rate": 2.788974977867195e-06, "loss": 0.5965, "step": 14716 }, { "epoch": 1.31, "grad_norm": 6.2934500579697685, "learning_rate": 2.788327096034972e-06, "loss": 0.6338, "step": 14717 }, { "epoch": 1.31, "grad_norm": 8.886502632857635, "learning_rate": 2.7876792603665765e-06, "loss": 0.6319, "step": 14718 }, { "epoch": 1.31, "grad_norm": 5.670049907811608, "learning_rate": 2.787031470875532e-06, "loss": 0.6219, "step": 14719 }, { "epoch": 1.31, "grad_norm": 5.525783044032521, "learning_rate": 2.786383727575359e-06, "loss": 0.6078, "step": 14720 }, { "epoch": 1.31, "grad_norm": 6.604540455430241, "learning_rate": 2.785736030479581e-06, "loss": 0.5451, "step": 14721 }, { "epoch": 1.31, "grad_norm": 4.982999071691602, "learning_rate": 2.7850883796017116e-06, "loss": 0.5607, "step": 14722 }, { "epoch": 1.31, "grad_norm": 5.303709232434837, "learning_rate": 2.7844407749552713e-06, "loss": 0.6218, "step": 14723 }, { "epoch": 1.31, "grad_norm": 7.273834913219414, "learning_rate": 2.7837932165537786e-06, "loss": 0.5528, "step": 14724 }, { "epoch": 1.31, "grad_norm": 6.976318897762929, "learning_rate": 2.7831457044107477e-06, "loss": 0.585, "step": 14725 }, { "epoch": 1.31, "grad_norm": 6.513638998201363, "learning_rate": 2.782498238539699e-06, "loss": 0.5947, "step": 14726 }, { "epoch": 1.31, "grad_norm": 5.8018197907209546, "learning_rate": 2.7818508189541404e-06, "loss": 0.5404, "step": 14727 }, { "epoch": 1.31, "grad_norm": 8.656951695415131, "learning_rate": 2.7812034456675906e-06, "loss": 0.5563, "step": 14728 }, { "epoch": 1.31, "grad_norm": 6.998817415757714, "learning_rate": 2.780556118693558e-06, "loss": 0.6725, "step": 14729 }, { "epoch": 1.31, "grad_norm": 5.092548108990833, "learning_rate": 2.779908838045556e-06, "loss": 0.658, "step": 14730 }, { "epoch": 1.31, "grad_norm": 6.369574802493496, "learning_rate": 2.7792616037370944e-06, "loss": 0.6019, "step": 14731 }, { "epoch": 1.31, "grad_norm": 6.759855208646145, "learning_rate": 2.778614415781684e-06, "loss": 0.6037, "step": 14732 }, { "epoch": 1.31, "grad_norm": 5.601370856623269, "learning_rate": 2.7779672741928326e-06, "loss": 0.6087, "step": 14733 }, { "epoch": 1.31, "grad_norm": 8.396933851959563, "learning_rate": 2.7773201789840505e-06, "loss": 0.5961, "step": 14734 }, { "epoch": 1.31, "grad_norm": 5.8989775760133325, "learning_rate": 2.7766731301688397e-06, "loss": 0.6035, "step": 14735 }, { "epoch": 1.31, "grad_norm": 4.795462326738627, "learning_rate": 2.7760261277607086e-06, "loss": 0.5768, "step": 14736 }, { "epoch": 1.31, "grad_norm": 6.276172619241128, "learning_rate": 2.7753791717731616e-06, "loss": 0.5711, "step": 14737 }, { "epoch": 1.31, "grad_norm": 6.444923962889785, "learning_rate": 2.774732262219703e-06, "loss": 0.6165, "step": 14738 }, { "epoch": 1.31, "grad_norm": 6.022579838976158, "learning_rate": 2.7740853991138344e-06, "loss": 0.5893, "step": 14739 }, { "epoch": 1.31, "grad_norm": 6.529840957324658, "learning_rate": 2.7734385824690606e-06, "loss": 0.5735, "step": 14740 }, { "epoch": 1.32, "grad_norm": 6.602731305997631, "learning_rate": 2.7727918122988805e-06, "loss": 0.6073, "step": 14741 }, { "epoch": 1.32, "grad_norm": 5.428052904001444, "learning_rate": 2.7721450886167922e-06, "loss": 0.6118, "step": 14742 }, { "epoch": 1.32, "grad_norm": 6.561973206802653, "learning_rate": 2.771498411436295e-06, "loss": 0.6516, "step": 14743 }, { "epoch": 1.32, "grad_norm": 5.499416496957474, "learning_rate": 2.7708517807708894e-06, "loss": 0.5607, "step": 14744 }, { "epoch": 1.32, "grad_norm": 5.5241073251317605, "learning_rate": 2.77020519663407e-06, "loss": 0.5933, "step": 14745 }, { "epoch": 1.32, "grad_norm": 6.646712953813759, "learning_rate": 2.769558659039335e-06, "loss": 0.5981, "step": 14746 }, { "epoch": 1.32, "grad_norm": 7.394998880480625, "learning_rate": 2.768912168000181e-06, "loss": 0.6073, "step": 14747 }, { "epoch": 1.32, "grad_norm": 8.48662922392802, "learning_rate": 2.7682657235300958e-06, "loss": 0.5632, "step": 14748 }, { "epoch": 1.32, "grad_norm": 5.272496587316058, "learning_rate": 2.767619325642577e-06, "loss": 0.6223, "step": 14749 }, { "epoch": 1.32, "grad_norm": 7.751315594580467, "learning_rate": 2.766972974351116e-06, "loss": 0.6282, "step": 14750 }, { "epoch": 1.32, "grad_norm": 6.362730041266211, "learning_rate": 2.7663266696692036e-06, "loss": 0.6068, "step": 14751 }, { "epoch": 1.32, "grad_norm": 5.699258261492003, "learning_rate": 2.7656804116103307e-06, "loss": 0.5838, "step": 14752 }, { "epoch": 1.32, "grad_norm": 8.667695521708877, "learning_rate": 2.765034200187988e-06, "loss": 0.6331, "step": 14753 }, { "epoch": 1.32, "grad_norm": 6.625405394545013, "learning_rate": 2.7643880354156616e-06, "loss": 0.5741, "step": 14754 }, { "epoch": 1.32, "grad_norm": 6.298152923203906, "learning_rate": 2.7637419173068375e-06, "loss": 0.5615, "step": 14755 }, { "epoch": 1.32, "grad_norm": 8.296218566368875, "learning_rate": 2.7630958458750036e-06, "loss": 0.5876, "step": 14756 }, { "epoch": 1.32, "grad_norm": 5.6146593257599955, "learning_rate": 2.7624498211336446e-06, "loss": 0.5932, "step": 14757 }, { "epoch": 1.32, "grad_norm": 6.577184504335389, "learning_rate": 2.761803843096246e-06, "loss": 0.5995, "step": 14758 }, { "epoch": 1.32, "grad_norm": 7.243610449161306, "learning_rate": 2.76115791177629e-06, "loss": 0.6269, "step": 14759 }, { "epoch": 1.32, "grad_norm": 7.871397616866562, "learning_rate": 2.7605120271872622e-06, "loss": 0.6025, "step": 14760 }, { "epoch": 1.32, "grad_norm": 7.849892342980423, "learning_rate": 2.7598661893426393e-06, "loss": 0.6122, "step": 14761 }, { "epoch": 1.32, "grad_norm": 6.038411366504692, "learning_rate": 2.7592203982559033e-06, "loss": 0.6008, "step": 14762 }, { "epoch": 1.32, "grad_norm": 5.145141728314818, "learning_rate": 2.7585746539405352e-06, "loss": 0.6266, "step": 14763 }, { "epoch": 1.32, "grad_norm": 5.871851163533148, "learning_rate": 2.757928956410012e-06, "loss": 0.6055, "step": 14764 }, { "epoch": 1.32, "grad_norm": 5.137863036054402, "learning_rate": 2.757283305677812e-06, "loss": 0.5787, "step": 14765 }, { "epoch": 1.32, "grad_norm": 8.239728993783089, "learning_rate": 2.756637701757413e-06, "loss": 0.6431, "step": 14766 }, { "epoch": 1.32, "grad_norm": 9.774382453594818, "learning_rate": 2.7559921446622877e-06, "loss": 0.553, "step": 14767 }, { "epoch": 1.32, "grad_norm": 5.182062357767533, "learning_rate": 2.7553466344059122e-06, "loss": 0.6345, "step": 14768 }, { "epoch": 1.32, "grad_norm": 7.15307730314865, "learning_rate": 2.754701171001761e-06, "loss": 0.5607, "step": 14769 }, { "epoch": 1.32, "grad_norm": 5.798204522230267, "learning_rate": 2.7540557544633052e-06, "loss": 0.5655, "step": 14770 }, { "epoch": 1.32, "grad_norm": 7.286090903635536, "learning_rate": 2.753410384804016e-06, "loss": 0.5442, "step": 14771 }, { "epoch": 1.32, "grad_norm": 4.604793727675368, "learning_rate": 2.7527650620373654e-06, "loss": 0.6301, "step": 14772 }, { "epoch": 1.32, "grad_norm": 5.5414526826250015, "learning_rate": 2.7521197861768243e-06, "loss": 0.5535, "step": 14773 }, { "epoch": 1.32, "grad_norm": 7.554883922511573, "learning_rate": 2.7514745572358586e-06, "loss": 0.5477, "step": 14774 }, { "epoch": 1.32, "grad_norm": 6.22578239787286, "learning_rate": 2.750829375227937e-06, "loss": 0.594, "step": 14775 }, { "epoch": 1.32, "grad_norm": 6.351891913371103, "learning_rate": 2.750184240166527e-06, "loss": 0.6109, "step": 14776 }, { "epoch": 1.32, "grad_norm": 7.16354643340043, "learning_rate": 2.7495391520650935e-06, "loss": 0.6846, "step": 14777 }, { "epoch": 1.32, "grad_norm": 7.073654236985589, "learning_rate": 2.7488941109371014e-06, "loss": 0.653, "step": 14778 }, { "epoch": 1.32, "grad_norm": 7.007139337164889, "learning_rate": 2.7482491167960175e-06, "loss": 0.6077, "step": 14779 }, { "epoch": 1.32, "grad_norm": 4.93099566361456, "learning_rate": 2.7476041696553e-06, "loss": 0.6047, "step": 14780 }, { "epoch": 1.32, "grad_norm": 6.595393859394191, "learning_rate": 2.7469592695284127e-06, "loss": 0.5795, "step": 14781 }, { "epoch": 1.32, "grad_norm": 7.742193299293692, "learning_rate": 2.7463144164288164e-06, "loss": 0.5943, "step": 14782 }, { "epoch": 1.32, "grad_norm": 6.175508599838531, "learning_rate": 2.7456696103699733e-06, "loss": 0.6143, "step": 14783 }, { "epoch": 1.32, "grad_norm": 5.498281517334388, "learning_rate": 2.7450248513653376e-06, "loss": 0.5935, "step": 14784 }, { "epoch": 1.32, "grad_norm": 5.958046630140257, "learning_rate": 2.744380139428371e-06, "loss": 0.6097, "step": 14785 }, { "epoch": 1.32, "grad_norm": 8.082719549028747, "learning_rate": 2.743735474572531e-06, "loss": 0.5822, "step": 14786 }, { "epoch": 1.32, "grad_norm": 6.5395063146141315, "learning_rate": 2.743090856811269e-06, "loss": 0.5928, "step": 14787 }, { "epoch": 1.32, "grad_norm": 5.874196927153603, "learning_rate": 2.742446286158044e-06, "loss": 0.6611, "step": 14788 }, { "epoch": 1.32, "grad_norm": 6.377205580359138, "learning_rate": 2.7418017626263076e-06, "loss": 0.5349, "step": 14789 }, { "epoch": 1.32, "grad_norm": 5.99683657774061, "learning_rate": 2.7411572862295146e-06, "loss": 0.5622, "step": 14790 }, { "epoch": 1.32, "grad_norm": 5.229712372929733, "learning_rate": 2.7405128569811186e-06, "loss": 0.6094, "step": 14791 }, { "epoch": 1.32, "grad_norm": 5.484233175308531, "learning_rate": 2.739868474894566e-06, "loss": 0.5874, "step": 14792 }, { "epoch": 1.32, "grad_norm": 8.328353405207565, "learning_rate": 2.73922413998331e-06, "loss": 0.603, "step": 14793 }, { "epoch": 1.32, "grad_norm": 9.414343958017348, "learning_rate": 2.7385798522607986e-06, "loss": 0.6205, "step": 14794 }, { "epoch": 1.32, "grad_norm": 6.470764842769586, "learning_rate": 2.737935611740481e-06, "loss": 0.5432, "step": 14795 }, { "epoch": 1.32, "grad_norm": 6.441225151392155, "learning_rate": 2.7372914184358027e-06, "loss": 0.6129, "step": 14796 }, { "epoch": 1.32, "grad_norm": 6.245398172182102, "learning_rate": 2.736647272360213e-06, "loss": 0.6201, "step": 14797 }, { "epoch": 1.32, "grad_norm": 4.807978534495059, "learning_rate": 2.7360031735271557e-06, "loss": 0.5572, "step": 14798 }, { "epoch": 1.32, "grad_norm": 6.097436859760323, "learning_rate": 2.735359121950072e-06, "loss": 0.6186, "step": 14799 }, { "epoch": 1.32, "grad_norm": 7.180228672757757, "learning_rate": 2.7347151176424062e-06, "loss": 0.5514, "step": 14800 }, { "epoch": 1.32, "grad_norm": 6.339108443108028, "learning_rate": 2.734071160617602e-06, "loss": 0.5566, "step": 14801 }, { "epoch": 1.32, "grad_norm": 7.404977027942776, "learning_rate": 2.7334272508891003e-06, "loss": 0.6, "step": 14802 }, { "epoch": 1.32, "grad_norm": 6.597780414270351, "learning_rate": 2.7327833884703402e-06, "loss": 0.5934, "step": 14803 }, { "epoch": 1.32, "grad_norm": 7.213774227256598, "learning_rate": 2.7321395733747646e-06, "loss": 0.5736, "step": 14804 }, { "epoch": 1.32, "grad_norm": 6.440584455531, "learning_rate": 2.7314958056158064e-06, "loss": 0.6044, "step": 14805 }, { "epoch": 1.32, "grad_norm": 6.018778669473016, "learning_rate": 2.7308520852069053e-06, "loss": 0.5847, "step": 14806 }, { "epoch": 1.32, "grad_norm": 4.3466335436718495, "learning_rate": 2.730208412161498e-06, "loss": 0.561, "step": 14807 }, { "epoch": 1.32, "grad_norm": 7.304543026458657, "learning_rate": 2.7295647864930185e-06, "loss": 0.5473, "step": 14808 }, { "epoch": 1.32, "grad_norm": 5.086225833690758, "learning_rate": 2.728921208214903e-06, "loss": 0.578, "step": 14809 }, { "epoch": 1.32, "grad_norm": 5.479213717710926, "learning_rate": 2.728277677340585e-06, "loss": 0.6647, "step": 14810 }, { "epoch": 1.32, "grad_norm": 6.328063822968609, "learning_rate": 2.7276341938834937e-06, "loss": 0.6192, "step": 14811 }, { "epoch": 1.32, "grad_norm": 6.668889812279947, "learning_rate": 2.726990757857064e-06, "loss": 0.5999, "step": 14812 }, { "epoch": 1.32, "grad_norm": 6.7724247836840785, "learning_rate": 2.7263473692747223e-06, "loss": 0.5419, "step": 14813 }, { "epoch": 1.32, "grad_norm": 4.734111494523718, "learning_rate": 2.7257040281499005e-06, "loss": 0.5801, "step": 14814 }, { "epoch": 1.32, "grad_norm": 7.286505761028956, "learning_rate": 2.7250607344960268e-06, "loss": 0.6432, "step": 14815 }, { "epoch": 1.32, "grad_norm": 6.3693239938932305, "learning_rate": 2.7244174883265274e-06, "loss": 0.6629, "step": 14816 }, { "epoch": 1.32, "grad_norm": 6.967319821354633, "learning_rate": 2.7237742896548313e-06, "loss": 0.5392, "step": 14817 }, { "epoch": 1.32, "grad_norm": 6.733791659103921, "learning_rate": 2.723131138494361e-06, "loss": 0.6194, "step": 14818 }, { "epoch": 1.32, "grad_norm": 6.78626179849597, "learning_rate": 2.7224880348585414e-06, "loss": 0.5897, "step": 14819 }, { "epoch": 1.32, "grad_norm": 6.539612011862975, "learning_rate": 2.721844978760797e-06, "loss": 0.5807, "step": 14820 }, { "epoch": 1.32, "grad_norm": 5.263541383388645, "learning_rate": 2.721201970214549e-06, "loss": 0.5478, "step": 14821 }, { "epoch": 1.32, "grad_norm": 7.389528512808675, "learning_rate": 2.720559009233219e-06, "loss": 0.5592, "step": 14822 }, { "epoch": 1.32, "grad_norm": 6.614801336390614, "learning_rate": 2.719916095830231e-06, "loss": 0.5918, "step": 14823 }, { "epoch": 1.32, "grad_norm": 4.759551114349417, "learning_rate": 2.719273230018998e-06, "loss": 0.5447, "step": 14824 }, { "epoch": 1.32, "grad_norm": 7.245692482858888, "learning_rate": 2.7186304118129424e-06, "loss": 0.6108, "step": 14825 }, { "epoch": 1.32, "grad_norm": 8.345345972003651, "learning_rate": 2.7179876412254823e-06, "loss": 0.5697, "step": 14826 }, { "epoch": 1.32, "grad_norm": 8.119038162198306, "learning_rate": 2.7173449182700306e-06, "loss": 0.5401, "step": 14827 }, { "epoch": 1.32, "grad_norm": 5.727097025577731, "learning_rate": 2.7167022429600056e-06, "loss": 0.5886, "step": 14828 }, { "epoch": 1.32, "grad_norm": 7.959095661410646, "learning_rate": 2.7160596153088204e-06, "loss": 0.6122, "step": 14829 }, { "epoch": 1.32, "grad_norm": 8.177043982027541, "learning_rate": 2.7154170353298913e-06, "loss": 0.5792, "step": 14830 }, { "epoch": 1.32, "grad_norm": 8.845368185880517, "learning_rate": 2.714774503036626e-06, "loss": 0.5566, "step": 14831 }, { "epoch": 1.32, "grad_norm": 9.435598406498471, "learning_rate": 2.7141320184424386e-06, "loss": 0.5624, "step": 14832 }, { "epoch": 1.32, "grad_norm": 6.0524040972869235, "learning_rate": 2.7134895815607394e-06, "loss": 0.5856, "step": 14833 }, { "epoch": 1.32, "grad_norm": 5.295827333264731, "learning_rate": 2.712847192404938e-06, "loss": 0.5829, "step": 14834 }, { "epoch": 1.32, "grad_norm": 7.548080884019631, "learning_rate": 2.7122048509884426e-06, "loss": 0.6418, "step": 14835 }, { "epoch": 1.32, "grad_norm": 4.504170876438622, "learning_rate": 2.711562557324662e-06, "loss": 0.6224, "step": 14836 }, { "epoch": 1.32, "grad_norm": 7.824815677358688, "learning_rate": 2.710920311427e-06, "loss": 0.5907, "step": 14837 }, { "epoch": 1.32, "grad_norm": 5.074124970998369, "learning_rate": 2.7102781133088636e-06, "loss": 0.6111, "step": 14838 }, { "epoch": 1.32, "grad_norm": 5.385269707240346, "learning_rate": 2.7096359629836565e-06, "loss": 0.571, "step": 14839 }, { "epoch": 1.32, "grad_norm": 5.907987756311079, "learning_rate": 2.7089938604647835e-06, "loss": 0.5637, "step": 14840 }, { "epoch": 1.32, "grad_norm": 8.917235500073248, "learning_rate": 2.708351805765649e-06, "loss": 0.6052, "step": 14841 }, { "epoch": 1.32, "grad_norm": 5.5405607959482595, "learning_rate": 2.707709798899649e-06, "loss": 0.6531, "step": 14842 }, { "epoch": 1.32, "grad_norm": 4.561913950110396, "learning_rate": 2.70706783988019e-06, "loss": 0.5374, "step": 14843 }, { "epoch": 1.32, "grad_norm": 6.036629029132004, "learning_rate": 2.706425928720665e-06, "loss": 0.5725, "step": 14844 }, { "epoch": 1.32, "grad_norm": 6.5148579941965075, "learning_rate": 2.7057840654344763e-06, "loss": 0.5953, "step": 14845 }, { "epoch": 1.32, "grad_norm": 6.592301552226379, "learning_rate": 2.705142250035021e-06, "loss": 0.5996, "step": 14846 }, { "epoch": 1.32, "grad_norm": 8.875543086061304, "learning_rate": 2.704500482535696e-06, "loss": 0.6649, "step": 14847 }, { "epoch": 1.32, "grad_norm": 6.428378873240603, "learning_rate": 2.7038587629498964e-06, "loss": 0.5955, "step": 14848 }, { "epoch": 1.32, "grad_norm": 7.871809016662366, "learning_rate": 2.703217091291018e-06, "loss": 0.5819, "step": 14849 }, { "epoch": 1.32, "grad_norm": 6.993799559247861, "learning_rate": 2.7025754675724514e-06, "loss": 0.6228, "step": 14850 }, { "epoch": 1.32, "grad_norm": 6.201038021835727, "learning_rate": 2.701933891807591e-06, "loss": 0.6393, "step": 14851 }, { "epoch": 1.32, "grad_norm": 8.472365600709844, "learning_rate": 2.701292364009828e-06, "loss": 0.6347, "step": 14852 }, { "epoch": 1.33, "grad_norm": 5.113169775173034, "learning_rate": 2.7006508841925517e-06, "loss": 0.5422, "step": 14853 }, { "epoch": 1.33, "grad_norm": 6.788040029677572, "learning_rate": 2.7000094523691534e-06, "loss": 0.5632, "step": 14854 }, { "epoch": 1.33, "grad_norm": 5.339962484642328, "learning_rate": 2.6993680685530233e-06, "loss": 0.5927, "step": 14855 }, { "epoch": 1.33, "grad_norm": 6.663984521898269, "learning_rate": 2.698726732757546e-06, "loss": 0.6136, "step": 14856 }, { "epoch": 1.33, "grad_norm": 7.285786812647119, "learning_rate": 2.6980854449961063e-06, "loss": 0.5935, "step": 14857 }, { "epoch": 1.33, "grad_norm": 6.0219423725873344, "learning_rate": 2.697444205282092e-06, "loss": 0.6625, "step": 14858 }, { "epoch": 1.33, "grad_norm": 4.982314647730492, "learning_rate": 2.696803013628887e-06, "loss": 0.551, "step": 14859 }, { "epoch": 1.33, "grad_norm": 8.198696032457127, "learning_rate": 2.696161870049876e-06, "loss": 0.6012, "step": 14860 }, { "epoch": 1.33, "grad_norm": 6.458813993794885, "learning_rate": 2.69552077455844e-06, "loss": 0.5727, "step": 14861 }, { "epoch": 1.33, "grad_norm": 5.0955312843561495, "learning_rate": 2.6948797271679637e-06, "loss": 0.5698, "step": 14862 }, { "epoch": 1.33, "grad_norm": 5.906845967034571, "learning_rate": 2.6942387278918223e-06, "loss": 0.6243, "step": 14863 }, { "epoch": 1.33, "grad_norm": 7.262808666489778, "learning_rate": 2.6935977767433984e-06, "loss": 0.5862, "step": 14864 }, { "epoch": 1.33, "grad_norm": 8.425785697187173, "learning_rate": 2.69295687373607e-06, "loss": 0.644, "step": 14865 }, { "epoch": 1.33, "grad_norm": 8.417202152937195, "learning_rate": 2.6923160188832143e-06, "loss": 0.618, "step": 14866 }, { "epoch": 1.33, "grad_norm": 6.30468578417814, "learning_rate": 2.69167521219821e-06, "loss": 0.5685, "step": 14867 }, { "epoch": 1.33, "grad_norm": 5.836773671429905, "learning_rate": 2.691034453694428e-06, "loss": 0.5823, "step": 14868 }, { "epoch": 1.33, "grad_norm": 5.85690374115841, "learning_rate": 2.690393743385246e-06, "loss": 0.5764, "step": 14869 }, { "epoch": 1.33, "grad_norm": 8.326214894425881, "learning_rate": 2.6897530812840385e-06, "loss": 0.6259, "step": 14870 }, { "epoch": 1.33, "grad_norm": 6.179927007251622, "learning_rate": 2.6891124674041734e-06, "loss": 0.6404, "step": 14871 }, { "epoch": 1.33, "grad_norm": 6.997689948718386, "learning_rate": 2.6884719017590256e-06, "loss": 0.5844, "step": 14872 }, { "epoch": 1.33, "grad_norm": 6.67820598109221, "learning_rate": 2.6878313843619645e-06, "loss": 0.5324, "step": 14873 }, { "epoch": 1.33, "grad_norm": 5.87489964894729, "learning_rate": 2.687190915226362e-06, "loss": 0.5969, "step": 14874 }, { "epoch": 1.33, "grad_norm": 8.635011775030492, "learning_rate": 2.686550494365582e-06, "loss": 0.5798, "step": 14875 }, { "epoch": 1.33, "grad_norm": 5.699149192701553, "learning_rate": 2.6859101217929933e-06, "loss": 0.5864, "step": 14876 }, { "epoch": 1.33, "grad_norm": 6.9868578235616425, "learning_rate": 2.6852697975219633e-06, "loss": 0.5709, "step": 14877 }, { "epoch": 1.33, "grad_norm": 7.85873524701914, "learning_rate": 2.684629521565858e-06, "loss": 0.6283, "step": 14878 }, { "epoch": 1.33, "grad_norm": 6.518228196288253, "learning_rate": 2.6839892939380395e-06, "loss": 0.6298, "step": 14879 }, { "epoch": 1.33, "grad_norm": 7.154881710436993, "learning_rate": 2.683349114651876e-06, "loss": 0.532, "step": 14880 }, { "epoch": 1.33, "grad_norm": 6.154491964828763, "learning_rate": 2.6827089837207232e-06, "loss": 0.5733, "step": 14881 }, { "epoch": 1.33, "grad_norm": 8.032123197322356, "learning_rate": 2.682068901157946e-06, "loss": 0.578, "step": 14882 }, { "epoch": 1.33, "grad_norm": 7.471968901214238, "learning_rate": 2.6814288669769044e-06, "loss": 0.65, "step": 14883 }, { "epoch": 1.33, "grad_norm": 5.659905221004216, "learning_rate": 2.68078888119096e-06, "loss": 0.6641, "step": 14884 }, { "epoch": 1.33, "grad_norm": 4.737069024089865, "learning_rate": 2.680148943813466e-06, "loss": 0.6009, "step": 14885 }, { "epoch": 1.33, "grad_norm": 7.464504386149451, "learning_rate": 2.679509054857783e-06, "loss": 0.6118, "step": 14886 }, { "epoch": 1.33, "grad_norm": 7.903214291785372, "learning_rate": 2.678869214337268e-06, "loss": 0.6874, "step": 14887 }, { "epoch": 1.33, "grad_norm": 7.5478334159138765, "learning_rate": 2.678229422265273e-06, "loss": 0.5732, "step": 14888 }, { "epoch": 1.33, "grad_norm": 5.22720678580852, "learning_rate": 2.6775896786551546e-06, "loss": 0.615, "step": 14889 }, { "epoch": 1.33, "grad_norm": 6.182824572484987, "learning_rate": 2.676949983520266e-06, "loss": 0.5197, "step": 14890 }, { "epoch": 1.33, "grad_norm": 5.83247557256651, "learning_rate": 2.6763103368739583e-06, "loss": 0.5857, "step": 14891 }, { "epoch": 1.33, "grad_norm": 7.135792417422544, "learning_rate": 2.6756707387295842e-06, "loss": 0.5782, "step": 14892 }, { "epoch": 1.33, "grad_norm": 5.384657584487287, "learning_rate": 2.6750311891004947e-06, "loss": 0.5711, "step": 14893 }, { "epoch": 1.33, "grad_norm": 4.601097217274062, "learning_rate": 2.674391688000036e-06, "loss": 0.6077, "step": 14894 }, { "epoch": 1.33, "grad_norm": 6.752391221667987, "learning_rate": 2.673752235441558e-06, "loss": 0.5985, "step": 14895 }, { "epoch": 1.33, "grad_norm": 6.206963686023177, "learning_rate": 2.673112831438407e-06, "loss": 0.5618, "step": 14896 }, { "epoch": 1.33, "grad_norm": 7.066421829978814, "learning_rate": 2.67247347600393e-06, "loss": 0.5947, "step": 14897 }, { "epoch": 1.33, "grad_norm": 6.392067817202814, "learning_rate": 2.671834169151472e-06, "loss": 0.6044, "step": 14898 }, { "epoch": 1.33, "grad_norm": 5.872820903587027, "learning_rate": 2.671194910894379e-06, "loss": 0.5685, "step": 14899 }, { "epoch": 1.33, "grad_norm": 7.024416601915947, "learning_rate": 2.670555701245993e-06, "loss": 0.5723, "step": 14900 }, { "epoch": 1.33, "grad_norm": 6.319269840485124, "learning_rate": 2.6699165402196525e-06, "loss": 0.5984, "step": 14901 }, { "epoch": 1.33, "grad_norm": 6.078437629483275, "learning_rate": 2.669277427828702e-06, "loss": 0.6219, "step": 14902 }, { "epoch": 1.33, "grad_norm": 5.49654835823428, "learning_rate": 2.668638364086481e-06, "loss": 0.5445, "step": 14903 }, { "epoch": 1.33, "grad_norm": 5.21907059086118, "learning_rate": 2.6679993490063284e-06, "loss": 0.5771, "step": 14904 }, { "epoch": 1.33, "grad_norm": 7.5128751133030125, "learning_rate": 2.667360382601582e-06, "loss": 0.5775, "step": 14905 }, { "epoch": 1.33, "grad_norm": 5.782019505766232, "learning_rate": 2.6667214648855816e-06, "loss": 0.5566, "step": 14906 }, { "epoch": 1.33, "grad_norm": 5.599662685924294, "learning_rate": 2.6660825958716586e-06, "loss": 0.607, "step": 14907 }, { "epoch": 1.33, "grad_norm": 6.616690368006348, "learning_rate": 2.6654437755731504e-06, "loss": 0.5393, "step": 14908 }, { "epoch": 1.33, "grad_norm": 9.021852621819734, "learning_rate": 2.6648050040033913e-06, "loss": 0.5972, "step": 14909 }, { "epoch": 1.33, "grad_norm": 6.327744582156377, "learning_rate": 2.664166281175713e-06, "loss": 0.6173, "step": 14910 }, { "epoch": 1.33, "grad_norm": 6.250683455723718, "learning_rate": 2.6635276071034484e-06, "loss": 0.5832, "step": 14911 }, { "epoch": 1.33, "grad_norm": 7.502477834425655, "learning_rate": 2.6628889817999304e-06, "loss": 0.5535, "step": 14912 }, { "epoch": 1.33, "grad_norm": 9.989704569156656, "learning_rate": 2.6622504052784857e-06, "loss": 0.6, "step": 14913 }, { "epoch": 1.33, "grad_norm": 5.819990273621143, "learning_rate": 2.6616118775524436e-06, "loss": 0.5888, "step": 14914 }, { "epoch": 1.33, "grad_norm": 7.5425294998105405, "learning_rate": 2.6609733986351314e-06, "loss": 0.7046, "step": 14915 }, { "epoch": 1.33, "grad_norm": 6.195607551872185, "learning_rate": 2.6603349685398784e-06, "loss": 0.6657, "step": 14916 }, { "epoch": 1.33, "grad_norm": 7.358906523565907, "learning_rate": 2.6596965872800085e-06, "loss": 0.573, "step": 14917 }, { "epoch": 1.33, "grad_norm": 6.379465725423374, "learning_rate": 2.659058254868847e-06, "loss": 0.5539, "step": 14918 }, { "epoch": 1.33, "grad_norm": 6.663860723902731, "learning_rate": 2.65841997131972e-06, "loss": 0.5899, "step": 14919 }, { "epoch": 1.33, "grad_norm": 6.743890561375794, "learning_rate": 2.6577817366459465e-06, "loss": 0.5694, "step": 14920 }, { "epoch": 1.33, "grad_norm": 6.870811013082939, "learning_rate": 2.657143550860849e-06, "loss": 0.6272, "step": 14921 }, { "epoch": 1.33, "grad_norm": 6.210366156964024, "learning_rate": 2.6565054139777504e-06, "loss": 0.5241, "step": 14922 }, { "epoch": 1.33, "grad_norm": 5.701529483097314, "learning_rate": 2.6558673260099676e-06, "loss": 0.5233, "step": 14923 }, { "epoch": 1.33, "grad_norm": 6.161788112765237, "learning_rate": 2.655229286970822e-06, "loss": 0.5792, "step": 14924 }, { "epoch": 1.33, "grad_norm": 5.408423940751369, "learning_rate": 2.6545912968736316e-06, "loss": 0.5879, "step": 14925 }, { "epoch": 1.33, "grad_norm": 6.200491138869476, "learning_rate": 2.65395335573171e-06, "loss": 0.5851, "step": 14926 }, { "epoch": 1.33, "grad_norm": 4.657347574117157, "learning_rate": 2.653315463558375e-06, "loss": 0.6177, "step": 14927 }, { "epoch": 1.33, "grad_norm": 7.708975294143208, "learning_rate": 2.6526776203669403e-06, "loss": 0.6223, "step": 14928 }, { "epoch": 1.33, "grad_norm": 6.588889212974012, "learning_rate": 2.6520398261707182e-06, "loss": 0.6044, "step": 14929 }, { "epoch": 1.33, "grad_norm": 6.259485635524976, "learning_rate": 2.6514020809830237e-06, "loss": 0.5516, "step": 14930 }, { "epoch": 1.33, "grad_norm": 6.461406552469308, "learning_rate": 2.650764384817167e-06, "loss": 0.6083, "step": 14931 }, { "epoch": 1.33, "grad_norm": 6.786909588439307, "learning_rate": 2.6501267376864615e-06, "loss": 0.6741, "step": 14932 }, { "epoch": 1.33, "grad_norm": 7.906024919806796, "learning_rate": 2.6494891396042118e-06, "loss": 0.6542, "step": 14933 }, { "epoch": 1.33, "grad_norm": 7.694823758770364, "learning_rate": 2.6488515905837286e-06, "loss": 0.61, "step": 14934 }, { "epoch": 1.33, "grad_norm": 7.302534323676685, "learning_rate": 2.64821409063832e-06, "loss": 0.6435, "step": 14935 }, { "epoch": 1.33, "grad_norm": 7.240375789465482, "learning_rate": 2.6475766397812917e-06, "loss": 0.5932, "step": 14936 }, { "epoch": 1.33, "grad_norm": 5.171303075468167, "learning_rate": 2.6469392380259495e-06, "loss": 0.5341, "step": 14937 }, { "epoch": 1.33, "grad_norm": 9.519609021624706, "learning_rate": 2.6463018853855994e-06, "loss": 0.5879, "step": 14938 }, { "epoch": 1.33, "grad_norm": 4.678550872665379, "learning_rate": 2.645664581873541e-06, "loss": 0.5613, "step": 14939 }, { "epoch": 1.33, "grad_norm": 5.543382001499024, "learning_rate": 2.6450273275030785e-06, "loss": 0.5942, "step": 14940 }, { "epoch": 1.33, "grad_norm": 5.947793753052208, "learning_rate": 2.6443901222875135e-06, "loss": 0.5502, "step": 14941 }, { "epoch": 1.33, "grad_norm": 7.033229048818087, "learning_rate": 2.6437529662401472e-06, "loss": 0.6015, "step": 14942 }, { "epoch": 1.33, "grad_norm": 8.660840386490642, "learning_rate": 2.6431158593742765e-06, "loss": 0.6714, "step": 14943 }, { "epoch": 1.33, "grad_norm": 5.268532053010118, "learning_rate": 2.642478801703201e-06, "loss": 0.5486, "step": 14944 }, { "epoch": 1.33, "grad_norm": 7.443469512268695, "learning_rate": 2.6418417932402187e-06, "loss": 0.6488, "step": 14945 }, { "epoch": 1.33, "grad_norm": 7.021423210088533, "learning_rate": 2.641204833998623e-06, "loss": 0.5372, "step": 14946 }, { "epoch": 1.33, "grad_norm": 4.894680177919018, "learning_rate": 2.640567923991711e-06, "loss": 0.5992, "step": 14947 }, { "epoch": 1.33, "grad_norm": 5.9262423377777615, "learning_rate": 2.639931063232776e-06, "loss": 0.621, "step": 14948 }, { "epoch": 1.33, "grad_norm": 5.795859631924286, "learning_rate": 2.6392942517351122e-06, "loss": 0.561, "step": 14949 }, { "epoch": 1.33, "grad_norm": 7.030887026689776, "learning_rate": 2.6386574895120125e-06, "loss": 0.629, "step": 14950 }, { "epoch": 1.33, "grad_norm": 6.631985703200043, "learning_rate": 2.638020776576765e-06, "loss": 0.6301, "step": 14951 }, { "epoch": 1.33, "grad_norm": 5.084298465346028, "learning_rate": 2.6373841129426608e-06, "loss": 0.5258, "step": 14952 }, { "epoch": 1.33, "grad_norm": 5.811468073730915, "learning_rate": 2.636747498622989e-06, "loss": 0.5507, "step": 14953 }, { "epoch": 1.33, "grad_norm": 6.738487668097852, "learning_rate": 2.636110933631038e-06, "loss": 0.5966, "step": 14954 }, { "epoch": 1.33, "grad_norm": 5.453849509251942, "learning_rate": 2.6354744179800945e-06, "loss": 0.5577, "step": 14955 }, { "epoch": 1.33, "grad_norm": 8.652524213228832, "learning_rate": 2.6348379516834466e-06, "loss": 0.6559, "step": 14956 }, { "epoch": 1.33, "grad_norm": 7.5460351671449235, "learning_rate": 2.6342015347543743e-06, "loss": 0.6332, "step": 14957 }, { "epoch": 1.33, "grad_norm": 6.22873556405967, "learning_rate": 2.6335651672061657e-06, "loss": 0.596, "step": 14958 }, { "epoch": 1.33, "grad_norm": 6.527449077272407, "learning_rate": 2.6329288490521003e-06, "loss": 0.5649, "step": 14959 }, { "epoch": 1.33, "grad_norm": 8.487027856636644, "learning_rate": 2.6322925803054616e-06, "loss": 0.6292, "step": 14960 }, { "epoch": 1.33, "grad_norm": 5.953362339302169, "learning_rate": 2.63165636097953e-06, "loss": 0.629, "step": 14961 }, { "epoch": 1.33, "grad_norm": 6.163992345226609, "learning_rate": 2.631020191087585e-06, "loss": 0.5731, "step": 14962 }, { "epoch": 1.33, "grad_norm": 5.313934697373516, "learning_rate": 2.630384070642907e-06, "loss": 0.5947, "step": 14963 }, { "epoch": 1.33, "grad_norm": 6.638426384540426, "learning_rate": 2.6297479996587716e-06, "loss": 0.5695, "step": 14964 }, { "epoch": 1.34, "grad_norm": 6.8576512355372525, "learning_rate": 2.629111978148455e-06, "loss": 0.5567, "step": 14965 }, { "epoch": 1.34, "grad_norm": 7.475858904515392, "learning_rate": 2.6284760061252333e-06, "loss": 0.5664, "step": 14966 }, { "epoch": 1.34, "grad_norm": 6.293796164823158, "learning_rate": 2.627840083602382e-06, "loss": 0.6067, "step": 14967 }, { "epoch": 1.34, "grad_norm": 5.9333877721911135, "learning_rate": 2.627204210593174e-06, "loss": 0.6416, "step": 14968 }, { "epoch": 1.34, "grad_norm": 6.565001332473048, "learning_rate": 2.626568387110884e-06, "loss": 0.6011, "step": 14969 }, { "epoch": 1.34, "grad_norm": 5.2289431214038915, "learning_rate": 2.625932613168779e-06, "loss": 0.564, "step": 14970 }, { "epoch": 1.34, "grad_norm": 6.187534975946211, "learning_rate": 2.6252968887801332e-06, "loss": 0.6817, "step": 14971 }, { "epoch": 1.34, "grad_norm": 4.688525302259582, "learning_rate": 2.6246612139582126e-06, "loss": 0.587, "step": 14972 }, { "epoch": 1.34, "grad_norm": 7.303293878161056, "learning_rate": 2.6240255887162873e-06, "loss": 0.6176, "step": 14973 }, { "epoch": 1.34, "grad_norm": 6.122272070299025, "learning_rate": 2.623390013067625e-06, "loss": 0.5874, "step": 14974 }, { "epoch": 1.34, "grad_norm": 8.118440325037517, "learning_rate": 2.6227544870254905e-06, "loss": 0.617, "step": 14975 }, { "epoch": 1.34, "grad_norm": 6.71014516516398, "learning_rate": 2.6221190106031522e-06, "loss": 0.6148, "step": 14976 }, { "epoch": 1.34, "grad_norm": 8.773967283903481, "learning_rate": 2.621483583813871e-06, "loss": 0.5929, "step": 14977 }, { "epoch": 1.34, "grad_norm": 8.589442386950346, "learning_rate": 2.6208482066709108e-06, "loss": 0.5782, "step": 14978 }, { "epoch": 1.34, "grad_norm": 6.926821170266975, "learning_rate": 2.6202128791875337e-06, "loss": 0.6018, "step": 14979 }, { "epoch": 1.34, "grad_norm": 6.577581290948911, "learning_rate": 2.6195776013770015e-06, "loss": 0.5294, "step": 14980 }, { "epoch": 1.34, "grad_norm": 6.066314371728381, "learning_rate": 2.6189423732525733e-06, "loss": 0.5748, "step": 14981 }, { "epoch": 1.34, "grad_norm": 6.115904680420456, "learning_rate": 2.6183071948275106e-06, "loss": 0.5945, "step": 14982 }, { "epoch": 1.34, "grad_norm": 6.453892143219966, "learning_rate": 2.6176720661150678e-06, "loss": 0.5546, "step": 14983 }, { "epoch": 1.34, "grad_norm": 7.010810145640086, "learning_rate": 2.6170369871285026e-06, "loss": 0.5244, "step": 14984 }, { "epoch": 1.34, "grad_norm": 5.6020803644538315, "learning_rate": 2.616401957881075e-06, "loss": 0.5507, "step": 14985 }, { "epoch": 1.34, "grad_norm": 32.35315049942263, "learning_rate": 2.615766978386034e-06, "loss": 0.5614, "step": 14986 }, { "epoch": 1.34, "grad_norm": 5.637888267645923, "learning_rate": 2.6151320486566356e-06, "loss": 0.5861, "step": 14987 }, { "epoch": 1.34, "grad_norm": 6.846547198703827, "learning_rate": 2.614497168706133e-06, "loss": 0.5963, "step": 14988 }, { "epoch": 1.34, "grad_norm": 7.5464066484805805, "learning_rate": 2.61386233854778e-06, "loss": 0.6291, "step": 14989 }, { "epoch": 1.34, "grad_norm": 5.851151587534548, "learning_rate": 2.613227558194823e-06, "loss": 0.6256, "step": 14990 }, { "epoch": 1.34, "grad_norm": 9.090168307076514, "learning_rate": 2.612592827660514e-06, "loss": 0.5906, "step": 14991 }, { "epoch": 1.34, "grad_norm": 6.86564363067613, "learning_rate": 2.611958146958101e-06, "loss": 0.5852, "step": 14992 }, { "epoch": 1.34, "grad_norm": 7.584974338718077, "learning_rate": 2.611323516100832e-06, "loss": 0.5846, "step": 14993 }, { "epoch": 1.34, "grad_norm": 5.3510483812407506, "learning_rate": 2.6106889351019536e-06, "loss": 0.5755, "step": 14994 }, { "epoch": 1.34, "grad_norm": 5.880070226661775, "learning_rate": 2.610054403974714e-06, "loss": 0.5985, "step": 14995 }, { "epoch": 1.34, "grad_norm": 6.58917499912922, "learning_rate": 2.609419922732352e-06, "loss": 0.6258, "step": 14996 }, { "epoch": 1.34, "grad_norm": 7.733671538307503, "learning_rate": 2.6087854913881138e-06, "loss": 0.6137, "step": 14997 }, { "epoch": 1.34, "grad_norm": 6.503375419638901, "learning_rate": 2.608151109955242e-06, "loss": 0.5951, "step": 14998 }, { "epoch": 1.34, "grad_norm": 6.43617296770431, "learning_rate": 2.607516778446978e-06, "loss": 0.5956, "step": 14999 }, { "epoch": 1.34, "grad_norm": 10.933280407214431, "learning_rate": 2.606882496876563e-06, "loss": 0.6426, "step": 15000 }, { "epoch": 1.34, "grad_norm": 5.678015805410801, "learning_rate": 2.606248265257234e-06, "loss": 0.5677, "step": 15001 }, { "epoch": 1.34, "grad_norm": 5.8201231030846685, "learning_rate": 2.6056140836022316e-06, "loss": 0.6013, "step": 15002 }, { "epoch": 1.34, "grad_norm": 6.199073837076986, "learning_rate": 2.60497995192479e-06, "loss": 0.5819, "step": 15003 }, { "epoch": 1.34, "grad_norm": 8.599937861921019, "learning_rate": 2.6043458702381462e-06, "loss": 0.5929, "step": 15004 }, { "epoch": 1.34, "grad_norm": 8.510718689661477, "learning_rate": 2.6037118385555364e-06, "loss": 0.6163, "step": 15005 }, { "epoch": 1.34, "grad_norm": 6.135706669147801, "learning_rate": 2.603077856890194e-06, "loss": 0.5388, "step": 15006 }, { "epoch": 1.34, "grad_norm": 6.626182083814477, "learning_rate": 2.6024439252553523e-06, "loss": 0.5041, "step": 15007 }, { "epoch": 1.34, "grad_norm": 6.522127726227082, "learning_rate": 2.6018100436642457e-06, "loss": 0.6189, "step": 15008 }, { "epoch": 1.34, "grad_norm": 7.822150041899604, "learning_rate": 2.6011762121301e-06, "loss": 0.5773, "step": 15009 }, { "epoch": 1.34, "grad_norm": 5.791055078446454, "learning_rate": 2.6005424306661477e-06, "loss": 0.6113, "step": 15010 }, { "epoch": 1.34, "grad_norm": 7.969259672791818, "learning_rate": 2.599908699285617e-06, "loss": 0.6555, "step": 15011 }, { "epoch": 1.34, "grad_norm": 6.471539674647872, "learning_rate": 2.599275018001736e-06, "loss": 0.5977, "step": 15012 }, { "epoch": 1.34, "grad_norm": 6.271652143690832, "learning_rate": 2.5986413868277316e-06, "loss": 0.5622, "step": 15013 }, { "epoch": 1.34, "grad_norm": 4.630729607107719, "learning_rate": 2.598007805776832e-06, "loss": 0.5439, "step": 15014 }, { "epoch": 1.34, "grad_norm": 7.911740575164361, "learning_rate": 2.5973742748622588e-06, "loss": 0.5849, "step": 15015 }, { "epoch": 1.34, "grad_norm": 7.213653158262906, "learning_rate": 2.5967407940972335e-06, "loss": 0.6521, "step": 15016 }, { "epoch": 1.34, "grad_norm": 9.219568881890458, "learning_rate": 2.5961073634949816e-06, "loss": 0.6145, "step": 15017 }, { "epoch": 1.34, "grad_norm": 6.647279436926166, "learning_rate": 2.5954739830687237e-06, "loss": 0.5669, "step": 15018 }, { "epoch": 1.34, "grad_norm": 5.276974969354231, "learning_rate": 2.594840652831681e-06, "loss": 0.5998, "step": 15019 }, { "epoch": 1.34, "grad_norm": 6.28815274887912, "learning_rate": 2.5942073727970725e-06, "loss": 0.5527, "step": 15020 }, { "epoch": 1.34, "grad_norm": 6.973062460926717, "learning_rate": 2.593574142978118e-06, "loss": 0.5813, "step": 15021 }, { "epoch": 1.34, "grad_norm": 6.01861791763774, "learning_rate": 2.592940963388032e-06, "loss": 0.5817, "step": 15022 }, { "epoch": 1.34, "grad_norm": 7.6031039078446865, "learning_rate": 2.5923078340400316e-06, "loss": 0.623, "step": 15023 }, { "epoch": 1.34, "grad_norm": 7.862935133592644, "learning_rate": 2.591674754947332e-06, "loss": 0.5438, "step": 15024 }, { "epoch": 1.34, "grad_norm": 5.470287336260344, "learning_rate": 2.591041726123148e-06, "loss": 0.5638, "step": 15025 }, { "epoch": 1.34, "grad_norm": 6.136090904155844, "learning_rate": 2.5904087475806947e-06, "loss": 0.5561, "step": 15026 }, { "epoch": 1.34, "grad_norm": 6.712659601776069, "learning_rate": 2.5897758193331802e-06, "loss": 0.5315, "step": 15027 }, { "epoch": 1.34, "grad_norm": 7.470438683292529, "learning_rate": 2.5891429413938184e-06, "loss": 0.5974, "step": 15028 }, { "epoch": 1.34, "grad_norm": 5.411983130879211, "learning_rate": 2.588510113775816e-06, "loss": 0.6085, "step": 15029 }, { "epoch": 1.34, "grad_norm": 5.471130299673768, "learning_rate": 2.5878773364923847e-06, "loss": 0.5814, "step": 15030 }, { "epoch": 1.34, "grad_norm": 7.759775125645258, "learning_rate": 2.587244609556731e-06, "loss": 0.5847, "step": 15031 }, { "epoch": 1.34, "grad_norm": 5.08368902753578, "learning_rate": 2.5866119329820627e-06, "loss": 0.5834, "step": 15032 }, { "epoch": 1.34, "grad_norm": 6.707579446550206, "learning_rate": 2.585979306781584e-06, "loss": 0.5824, "step": 15033 }, { "epoch": 1.34, "grad_norm": 5.434191994820986, "learning_rate": 2.5853467309685038e-06, "loss": 0.5691, "step": 15034 }, { "epoch": 1.34, "grad_norm": 7.445112291513857, "learning_rate": 2.5847142055560206e-06, "loss": 0.6737, "step": 15035 }, { "epoch": 1.34, "grad_norm": 6.7538354251162, "learning_rate": 2.584081730557338e-06, "loss": 0.6161, "step": 15036 }, { "epoch": 1.34, "grad_norm": 6.7107724638505, "learning_rate": 2.583449305985659e-06, "loss": 0.6046, "step": 15037 }, { "epoch": 1.34, "grad_norm": 6.011769211858943, "learning_rate": 2.582816931854184e-06, "loss": 0.637, "step": 15038 }, { "epoch": 1.34, "grad_norm": 5.7605259103256765, "learning_rate": 2.582184608176113e-06, "loss": 0.5333, "step": 15039 }, { "epoch": 1.34, "grad_norm": 5.645792384446202, "learning_rate": 2.5815523349646423e-06, "loss": 0.6127, "step": 15040 }, { "epoch": 1.34, "grad_norm": 5.623099241794625, "learning_rate": 2.5809201122329695e-06, "loss": 0.6204, "step": 15041 }, { "epoch": 1.34, "grad_norm": 5.6186152664203375, "learning_rate": 2.5802879399942927e-06, "loss": 0.6105, "step": 15042 }, { "epoch": 1.34, "grad_norm": 7.271568505140595, "learning_rate": 2.579655818261807e-06, "loss": 0.5445, "step": 15043 }, { "epoch": 1.34, "grad_norm": 6.394076599609442, "learning_rate": 2.5790237470487045e-06, "loss": 0.5355, "step": 15044 }, { "epoch": 1.34, "grad_norm": 5.882987094403889, "learning_rate": 2.578391726368179e-06, "loss": 0.5768, "step": 15045 }, { "epoch": 1.34, "grad_norm": 6.551187680016304, "learning_rate": 2.577759756233425e-06, "loss": 0.5514, "step": 15046 }, { "epoch": 1.34, "grad_norm": 6.39151686690782, "learning_rate": 2.5771278366576296e-06, "loss": 0.5616, "step": 15047 }, { "epoch": 1.34, "grad_norm": 6.519002045097481, "learning_rate": 2.5764959676539845e-06, "loss": 0.5801, "step": 15048 }, { "epoch": 1.34, "grad_norm": 8.648177831576234, "learning_rate": 2.5758641492356785e-06, "loss": 0.652, "step": 15049 }, { "epoch": 1.34, "grad_norm": 8.580263468383123, "learning_rate": 2.5752323814159003e-06, "loss": 0.6266, "step": 15050 }, { "epoch": 1.34, "grad_norm": 5.2734986679626905, "learning_rate": 2.5746006642078354e-06, "loss": 0.5817, "step": 15051 }, { "epoch": 1.34, "grad_norm": 6.37196163340586, "learning_rate": 2.573968997624672e-06, "loss": 0.5801, "step": 15052 }, { "epoch": 1.34, "grad_norm": 9.473849009977808, "learning_rate": 2.5733373816795914e-06, "loss": 0.6621, "step": 15053 }, { "epoch": 1.34, "grad_norm": 7.221721391068999, "learning_rate": 2.572705816385778e-06, "loss": 0.6416, "step": 15054 }, { "epoch": 1.34, "grad_norm": 7.991568160015231, "learning_rate": 2.5720743017564154e-06, "loss": 0.6245, "step": 15055 }, { "epoch": 1.34, "grad_norm": 8.121957352219544, "learning_rate": 2.571442837804685e-06, "loss": 0.5569, "step": 15056 }, { "epoch": 1.34, "grad_norm": 5.876673064140517, "learning_rate": 2.570811424543769e-06, "loss": 0.5641, "step": 15057 }, { "epoch": 1.34, "grad_norm": 6.143619272000288, "learning_rate": 2.5701800619868416e-06, "loss": 0.6047, "step": 15058 }, { "epoch": 1.34, "grad_norm": 6.466920829384702, "learning_rate": 2.5695487501470866e-06, "loss": 0.5494, "step": 15059 }, { "epoch": 1.34, "grad_norm": 10.73799244132306, "learning_rate": 2.568917489037677e-06, "loss": 0.6117, "step": 15060 }, { "epoch": 1.34, "grad_norm": 5.209925804057708, "learning_rate": 2.568286278671791e-06, "loss": 0.5323, "step": 15061 }, { "epoch": 1.34, "grad_norm": 7.318872821892992, "learning_rate": 2.5676551190626037e-06, "loss": 0.5978, "step": 15062 }, { "epoch": 1.34, "grad_norm": 7.421086855477212, "learning_rate": 2.567024010223289e-06, "loss": 0.5872, "step": 15063 }, { "epoch": 1.34, "grad_norm": 5.5260175789479575, "learning_rate": 2.5663929521670196e-06, "loss": 0.5416, "step": 15064 }, { "epoch": 1.34, "grad_norm": 6.534256245182314, "learning_rate": 2.56576194490697e-06, "loss": 0.5644, "step": 15065 }, { "epoch": 1.34, "grad_norm": 5.950004489488865, "learning_rate": 2.565130988456307e-06, "loss": 0.6144, "step": 15066 }, { "epoch": 1.34, "grad_norm": 6.323918232200568, "learning_rate": 2.564500082828202e-06, "loss": 0.6149, "step": 15067 }, { "epoch": 1.34, "grad_norm": 12.454712290364682, "learning_rate": 2.5638692280358247e-06, "loss": 0.6069, "step": 15068 }, { "epoch": 1.34, "grad_norm": 4.8890141665384625, "learning_rate": 2.5632384240923413e-06, "loss": 0.6155, "step": 15069 }, { "epoch": 1.34, "grad_norm": 6.423825434499414, "learning_rate": 2.5626076710109206e-06, "loss": 0.5684, "step": 15070 }, { "epoch": 1.34, "grad_norm": 6.555141525436559, "learning_rate": 2.561976968804728e-06, "loss": 0.5258, "step": 15071 }, { "epoch": 1.34, "grad_norm": 6.557472569521265, "learning_rate": 2.5613463174869268e-06, "loss": 0.572, "step": 15072 }, { "epoch": 1.34, "grad_norm": 6.942653198855258, "learning_rate": 2.56071571707068e-06, "loss": 0.5883, "step": 15073 }, { "epoch": 1.34, "grad_norm": 6.066228497680753, "learning_rate": 2.5600851675691497e-06, "loss": 0.5295, "step": 15074 }, { "epoch": 1.34, "grad_norm": 7.257482539952881, "learning_rate": 2.559454668995498e-06, "loss": 0.6305, "step": 15075 }, { "epoch": 1.34, "grad_norm": 6.963618885494495, "learning_rate": 2.558824221362886e-06, "loss": 0.5803, "step": 15076 }, { "epoch": 1.35, "grad_norm": 6.867750515126046, "learning_rate": 2.5581938246844716e-06, "loss": 0.6215, "step": 15077 }, { "epoch": 1.35, "grad_norm": 7.707095374082219, "learning_rate": 2.557563478973416e-06, "loss": 0.6318, "step": 15078 }, { "epoch": 1.35, "grad_norm": 6.258612707104679, "learning_rate": 2.5569331842428724e-06, "loss": 0.51, "step": 15079 }, { "epoch": 1.35, "grad_norm": 6.975260764979795, "learning_rate": 2.556302940505998e-06, "loss": 0.6015, "step": 15080 }, { "epoch": 1.35, "grad_norm": 4.602050949471063, "learning_rate": 2.555672747775948e-06, "loss": 0.6496, "step": 15081 }, { "epoch": 1.35, "grad_norm": 5.201855613614339, "learning_rate": 2.5550426060658766e-06, "loss": 0.6006, "step": 15082 }, { "epoch": 1.35, "grad_norm": 6.03237458022645, "learning_rate": 2.5544125153889366e-06, "loss": 0.5478, "step": 15083 }, { "epoch": 1.35, "grad_norm": 7.4307416463136, "learning_rate": 2.553782475758281e-06, "loss": 0.5596, "step": 15084 }, { "epoch": 1.35, "grad_norm": 6.045971506143133, "learning_rate": 2.553152487187057e-06, "loss": 0.5829, "step": 15085 }, { "epoch": 1.35, "grad_norm": 5.3383291558589905, "learning_rate": 2.5525225496884193e-06, "loss": 0.5977, "step": 15086 }, { "epoch": 1.35, "grad_norm": 5.667618549117838, "learning_rate": 2.551892663275511e-06, "loss": 0.5491, "step": 15087 }, { "epoch": 1.35, "grad_norm": 7.890037413334313, "learning_rate": 2.5512628279614815e-06, "loss": 0.5819, "step": 15088 }, { "epoch": 1.35, "grad_norm": 5.972036499354017, "learning_rate": 2.5506330437594784e-06, "loss": 0.6112, "step": 15089 }, { "epoch": 1.35, "grad_norm": 6.3076585249205905, "learning_rate": 2.5500033106826472e-06, "loss": 0.5939, "step": 15090 }, { "epoch": 1.35, "grad_norm": 7.362123936571728, "learning_rate": 2.549373628744132e-06, "loss": 0.6547, "step": 15091 }, { "epoch": 1.35, "grad_norm": 6.1706013853349635, "learning_rate": 2.5487439979570745e-06, "loss": 0.6082, "step": 15092 }, { "epoch": 1.35, "grad_norm": 7.792239582298513, "learning_rate": 2.548114418334617e-06, "loss": 0.5981, "step": 15093 }, { "epoch": 1.35, "grad_norm": 6.433942714046304, "learning_rate": 2.547484889889903e-06, "loss": 0.5994, "step": 15094 }, { "epoch": 1.35, "grad_norm": 5.355135321617326, "learning_rate": 2.5468554126360702e-06, "loss": 0.5672, "step": 15095 }, { "epoch": 1.35, "grad_norm": 8.181964188121881, "learning_rate": 2.546225986586258e-06, "loss": 0.6128, "step": 15096 }, { "epoch": 1.35, "grad_norm": 5.855861004094792, "learning_rate": 2.545596611753607e-06, "loss": 0.5797, "step": 15097 }, { "epoch": 1.35, "grad_norm": 8.008918690247997, "learning_rate": 2.5449672881512493e-06, "loss": 0.544, "step": 15098 }, { "epoch": 1.35, "grad_norm": 8.053494888471418, "learning_rate": 2.544338015792324e-06, "loss": 0.6366, "step": 15099 }, { "epoch": 1.35, "grad_norm": 4.346565553391315, "learning_rate": 2.5437087946899653e-06, "loss": 0.5466, "step": 15100 }, { "epoch": 1.35, "grad_norm": 6.399941209298324, "learning_rate": 2.5430796248573054e-06, "loss": 0.5994, "step": 15101 }, { "epoch": 1.35, "grad_norm": 9.62306302035983, "learning_rate": 2.5424505063074777e-06, "loss": 0.6684, "step": 15102 }, { "epoch": 1.35, "grad_norm": 6.536070846245072, "learning_rate": 2.5418214390536133e-06, "loss": 0.591, "step": 15103 }, { "epoch": 1.35, "grad_norm": 6.217676320582467, "learning_rate": 2.541192423108846e-06, "loss": 0.5675, "step": 15104 }, { "epoch": 1.35, "grad_norm": 5.6656228281171215, "learning_rate": 2.540563458486299e-06, "loss": 0.5889, "step": 15105 }, { "epoch": 1.35, "grad_norm": 8.456834594170966, "learning_rate": 2.5399345451991043e-06, "loss": 0.6139, "step": 15106 }, { "epoch": 1.35, "grad_norm": 5.745081997571854, "learning_rate": 2.5393056832603885e-06, "loss": 0.5957, "step": 15107 }, { "epoch": 1.35, "grad_norm": 6.803786018850342, "learning_rate": 2.5386768726832777e-06, "loss": 0.5781, "step": 15108 }, { "epoch": 1.35, "grad_norm": 5.579312862233151, "learning_rate": 2.5380481134808966e-06, "loss": 0.6159, "step": 15109 }, { "epoch": 1.35, "grad_norm": 5.373786471134496, "learning_rate": 2.537419405666372e-06, "loss": 0.528, "step": 15110 }, { "epoch": 1.35, "grad_norm": 5.71752242110261, "learning_rate": 2.536790749252822e-06, "loss": 0.5588, "step": 15111 }, { "epoch": 1.35, "grad_norm": 5.807602277259198, "learning_rate": 2.536162144253371e-06, "loss": 0.5882, "step": 15112 }, { "epoch": 1.35, "grad_norm": 6.380798698086225, "learning_rate": 2.5355335906811396e-06, "loss": 0.5457, "step": 15113 }, { "epoch": 1.35, "grad_norm": 7.117094659304448, "learning_rate": 2.534905088549248e-06, "loss": 0.5977, "step": 15114 }, { "epoch": 1.35, "grad_norm": 6.029045901726208, "learning_rate": 2.534276637870815e-06, "loss": 0.6189, "step": 15115 }, { "epoch": 1.35, "grad_norm": 8.688055846022806, "learning_rate": 2.533648238658956e-06, "loss": 0.581, "step": 15116 }, { "epoch": 1.35, "grad_norm": 6.285153308987967, "learning_rate": 2.533019890926791e-06, "loss": 0.5975, "step": 15117 }, { "epoch": 1.35, "grad_norm": 6.60133590891748, "learning_rate": 2.532391594687431e-06, "loss": 0.5346, "step": 15118 }, { "epoch": 1.35, "grad_norm": 6.091296015613695, "learning_rate": 2.5317633499539916e-06, "loss": 0.5729, "step": 15119 }, { "epoch": 1.35, "grad_norm": 6.542983164513425, "learning_rate": 2.531135156739588e-06, "loss": 0.6016, "step": 15120 }, { "epoch": 1.35, "grad_norm": 6.439800004104856, "learning_rate": 2.5305070150573313e-06, "loss": 0.566, "step": 15121 }, { "epoch": 1.35, "grad_norm": 5.4001016891442255, "learning_rate": 2.529878924920334e-06, "loss": 0.6408, "step": 15122 }, { "epoch": 1.35, "grad_norm": 6.771024439109968, "learning_rate": 2.5292508863417033e-06, "loss": 0.6063, "step": 15123 }, { "epoch": 1.35, "grad_norm": 6.76660501500935, "learning_rate": 2.5286228993345483e-06, "loss": 0.6127, "step": 15124 }, { "epoch": 1.35, "grad_norm": 7.495259273041217, "learning_rate": 2.5279949639119794e-06, "loss": 0.7046, "step": 15125 }, { "epoch": 1.35, "grad_norm": 6.182695056895717, "learning_rate": 2.527367080087102e-06, "loss": 0.5675, "step": 15126 }, { "epoch": 1.35, "grad_norm": 5.5407137440528045, "learning_rate": 2.5267392478730212e-06, "loss": 0.573, "step": 15127 }, { "epoch": 1.35, "grad_norm": 7.7989864279859145, "learning_rate": 2.5261114672828442e-06, "loss": 0.5471, "step": 15128 }, { "epoch": 1.35, "grad_norm": 5.82333478376396, "learning_rate": 2.5254837383296727e-06, "loss": 0.5472, "step": 15129 }, { "epoch": 1.35, "grad_norm": 5.206859513353213, "learning_rate": 2.524856061026607e-06, "loss": 0.5635, "step": 15130 }, { "epoch": 1.35, "grad_norm": 9.13842755804041, "learning_rate": 2.524228435386751e-06, "loss": 0.5927, "step": 15131 }, { "epoch": 1.35, "grad_norm": 5.841479525263, "learning_rate": 2.5236008614232043e-06, "loss": 0.5755, "step": 15132 }, { "epoch": 1.35, "grad_norm": 5.268236230277615, "learning_rate": 2.522973339149066e-06, "loss": 0.5755, "step": 15133 }, { "epoch": 1.35, "grad_norm": 7.838313837026516, "learning_rate": 2.522345868577435e-06, "loss": 0.5983, "step": 15134 }, { "epoch": 1.35, "grad_norm": 4.890199752954614, "learning_rate": 2.5217184497214097e-06, "loss": 0.5774, "step": 15135 }, { "epoch": 1.35, "grad_norm": 5.416211462041913, "learning_rate": 2.521091082594083e-06, "loss": 0.566, "step": 15136 }, { "epoch": 1.35, "grad_norm": 8.711531816201989, "learning_rate": 2.5204637672085507e-06, "loss": 0.5838, "step": 15137 }, { "epoch": 1.35, "grad_norm": 6.036535642758754, "learning_rate": 2.519836503577908e-06, "loss": 0.5484, "step": 15138 }, { "epoch": 1.35, "grad_norm": 5.529757021022951, "learning_rate": 2.519209291715246e-06, "loss": 0.5635, "step": 15139 }, { "epoch": 1.35, "grad_norm": 7.963986755646742, "learning_rate": 2.5185821316336577e-06, "loss": 0.5796, "step": 15140 }, { "epoch": 1.35, "grad_norm": 5.942239394637693, "learning_rate": 2.5179550233462348e-06, "loss": 0.5846, "step": 15141 }, { "epoch": 1.35, "grad_norm": 6.598240777475695, "learning_rate": 2.5173279668660633e-06, "loss": 0.5711, "step": 15142 }, { "epoch": 1.35, "grad_norm": 5.876855292699237, "learning_rate": 2.5167009622062333e-06, "loss": 0.5743, "step": 15143 }, { "epoch": 1.35, "grad_norm": 8.185952094628254, "learning_rate": 2.5160740093798354e-06, "loss": 0.5828, "step": 15144 }, { "epoch": 1.35, "grad_norm": 7.790928437022327, "learning_rate": 2.5154471083999498e-06, "loss": 0.5672, "step": 15145 }, { "epoch": 1.35, "grad_norm": 6.924347638542674, "learning_rate": 2.514820259279665e-06, "loss": 0.6623, "step": 15146 }, { "epoch": 1.35, "grad_norm": 6.609111447317407, "learning_rate": 2.514193462032065e-06, "loss": 0.6353, "step": 15147 }, { "epoch": 1.35, "grad_norm": 6.353823645698439, "learning_rate": 2.5135667166702342e-06, "loss": 0.6047, "step": 15148 }, { "epoch": 1.35, "grad_norm": 6.6942909410068365, "learning_rate": 2.5129400232072513e-06, "loss": 0.6168, "step": 15149 }, { "epoch": 1.35, "grad_norm": 6.6659350574055125, "learning_rate": 2.512313381656199e-06, "loss": 0.584, "step": 15150 }, { "epoch": 1.35, "grad_norm": 5.815683885935327, "learning_rate": 2.511686792030157e-06, "loss": 0.6168, "step": 15151 }, { "epoch": 1.35, "grad_norm": 5.494416073640225, "learning_rate": 2.5110602543422034e-06, "loss": 0.6166, "step": 15152 }, { "epoch": 1.35, "grad_norm": 7.181288755098564, "learning_rate": 2.510433768605417e-06, "loss": 0.5479, "step": 15153 }, { "epoch": 1.35, "grad_norm": 7.427296642082458, "learning_rate": 2.509807334832875e-06, "loss": 0.6153, "step": 15154 }, { "epoch": 1.35, "grad_norm": 11.037165942144263, "learning_rate": 2.5091809530376497e-06, "loss": 0.6499, "step": 15155 }, { "epoch": 1.35, "grad_norm": 5.4383086707381585, "learning_rate": 2.5085546232328174e-06, "loss": 0.5703, "step": 15156 }, { "epoch": 1.35, "grad_norm": 6.984884620471906, "learning_rate": 2.5079283454314506e-06, "loss": 0.6478, "step": 15157 }, { "epoch": 1.35, "grad_norm": 10.699711657513838, "learning_rate": 2.507302119646625e-06, "loss": 0.5949, "step": 15158 }, { "epoch": 1.35, "grad_norm": 7.059173498608641, "learning_rate": 2.506675945891406e-06, "loss": 0.5159, "step": 15159 }, { "epoch": 1.35, "grad_norm": 4.887986805152354, "learning_rate": 2.506049824178866e-06, "loss": 0.5972, "step": 15160 }, { "epoch": 1.35, "grad_norm": 7.5221516682917695, "learning_rate": 2.5054237545220774e-06, "loss": 0.5813, "step": 15161 }, { "epoch": 1.35, "grad_norm": 6.8126096708860935, "learning_rate": 2.5047977369341016e-06, "loss": 0.603, "step": 15162 }, { "epoch": 1.35, "grad_norm": 5.639600276297569, "learning_rate": 2.5041717714280094e-06, "loss": 0.6313, "step": 15163 }, { "epoch": 1.35, "grad_norm": 7.528552371191764, "learning_rate": 2.503545858016866e-06, "loss": 0.5725, "step": 15164 }, { "epoch": 1.35, "grad_norm": 6.503253588545533, "learning_rate": 2.5029199967137353e-06, "loss": 0.5446, "step": 15165 }, { "epoch": 1.35, "grad_norm": 7.683887283700775, "learning_rate": 2.502294187531681e-06, "loss": 0.5603, "step": 15166 }, { "epoch": 1.35, "grad_norm": 8.70973911465323, "learning_rate": 2.5016684304837674e-06, "loss": 0.6029, "step": 15167 }, { "epoch": 1.35, "grad_norm": 6.695495681459116, "learning_rate": 2.5010427255830527e-06, "loss": 0.557, "step": 15168 }, { "epoch": 1.35, "grad_norm": 6.874592663506318, "learning_rate": 2.5004170728425982e-06, "loss": 0.5972, "step": 15169 }, { "epoch": 1.35, "grad_norm": 6.083628078625756, "learning_rate": 2.4997914722754633e-06, "loss": 0.5943, "step": 15170 }, { "epoch": 1.35, "grad_norm": 5.920930501418725, "learning_rate": 2.499165923894706e-06, "loss": 0.5646, "step": 15171 }, { "epoch": 1.35, "grad_norm": 8.716693435086077, "learning_rate": 2.4985404277133835e-06, "loss": 0.6141, "step": 15172 }, { "epoch": 1.35, "grad_norm": 6.129511780228218, "learning_rate": 2.497914983744553e-06, "loss": 0.5668, "step": 15173 }, { "epoch": 1.35, "grad_norm": 7.041043788159, "learning_rate": 2.497289592001268e-06, "loss": 0.6588, "step": 15174 }, { "epoch": 1.35, "grad_norm": 8.621278151386406, "learning_rate": 2.49666425249658e-06, "loss": 0.6242, "step": 15175 }, { "epoch": 1.35, "grad_norm": 8.452270963741837, "learning_rate": 2.496038965243544e-06, "loss": 0.6267, "step": 15176 }, { "epoch": 1.35, "grad_norm": 5.896095680912172, "learning_rate": 2.4954137302552105e-06, "loss": 0.5302, "step": 15177 }, { "epoch": 1.35, "grad_norm": 6.102096453575739, "learning_rate": 2.49478854754463e-06, "loss": 0.5762, "step": 15178 }, { "epoch": 1.35, "grad_norm": 4.7743832690306975, "learning_rate": 2.494163417124853e-06, "loss": 0.5853, "step": 15179 }, { "epoch": 1.35, "grad_norm": 7.804274324619159, "learning_rate": 2.4935383390089283e-06, "loss": 0.5973, "step": 15180 }, { "epoch": 1.35, "grad_norm": 4.9791009615456545, "learning_rate": 2.4929133132099005e-06, "loss": 0.5577, "step": 15181 }, { "epoch": 1.35, "grad_norm": 7.18995043662529, "learning_rate": 2.4922883397408167e-06, "loss": 0.6005, "step": 15182 }, { "epoch": 1.35, "grad_norm": 7.960896437134166, "learning_rate": 2.4916634186147213e-06, "loss": 0.54, "step": 15183 }, { "epoch": 1.35, "grad_norm": 6.203238766005372, "learning_rate": 2.49103854984466e-06, "loss": 0.5711, "step": 15184 }, { "epoch": 1.35, "grad_norm": 6.089274484808255, "learning_rate": 2.4904137334436734e-06, "loss": 0.5719, "step": 15185 }, { "epoch": 1.35, "grad_norm": 6.966501377946458, "learning_rate": 2.489788969424807e-06, "loss": 0.5899, "step": 15186 }, { "epoch": 1.35, "grad_norm": 5.06810843167128, "learning_rate": 2.4891642578010976e-06, "loss": 0.5741, "step": 15187 }, { "epoch": 1.35, "grad_norm": 6.014840194527695, "learning_rate": 2.4885395985855846e-06, "loss": 0.6053, "step": 15188 }, { "epoch": 1.36, "grad_norm": 9.091533799435869, "learning_rate": 2.487914991791307e-06, "loss": 0.6676, "step": 15189 }, { "epoch": 1.36, "grad_norm": 5.480514013306307, "learning_rate": 2.4872904374313026e-06, "loss": 0.5958, "step": 15190 }, { "epoch": 1.36, "grad_norm": 7.832313768401261, "learning_rate": 2.4866659355186087e-06, "loss": 0.5966, "step": 15191 }, { "epoch": 1.36, "grad_norm": 8.43902600327396, "learning_rate": 2.486041486066258e-06, "loss": 0.5907, "step": 15192 }, { "epoch": 1.36, "grad_norm": 6.218295270294223, "learning_rate": 2.485417089087288e-06, "loss": 0.6536, "step": 15193 }, { "epoch": 1.36, "grad_norm": 5.725686499224136, "learning_rate": 2.4847927445947275e-06, "loss": 0.5745, "step": 15194 }, { "epoch": 1.36, "grad_norm": 7.53792531384599, "learning_rate": 2.48416845260161e-06, "loss": 0.6466, "step": 15195 }, { "epoch": 1.36, "grad_norm": 12.290536011196703, "learning_rate": 2.483544213120967e-06, "loss": 0.649, "step": 15196 }, { "epoch": 1.36, "grad_norm": 5.471442704289469, "learning_rate": 2.4829200261658274e-06, "loss": 0.5693, "step": 15197 }, { "epoch": 1.36, "grad_norm": 7.619410033674413, "learning_rate": 2.4822958917492218e-06, "loss": 0.6406, "step": 15198 }, { "epoch": 1.36, "grad_norm": 5.472663103434341, "learning_rate": 2.481671809884174e-06, "loss": 0.5819, "step": 15199 }, { "epoch": 1.36, "grad_norm": 6.675441312725132, "learning_rate": 2.4810477805837117e-06, "loss": 0.5836, "step": 15200 }, { "epoch": 1.36, "grad_norm": 8.63605281070082, "learning_rate": 2.480423803860863e-06, "loss": 0.6051, "step": 15201 }, { "epoch": 1.36, "grad_norm": 6.8649793669594485, "learning_rate": 2.479799879728648e-06, "loss": 0.6207, "step": 15202 }, { "epoch": 1.36, "grad_norm": 5.779396014500936, "learning_rate": 2.4791760082000905e-06, "loss": 0.4816, "step": 15203 }, { "epoch": 1.36, "grad_norm": 7.560770714312319, "learning_rate": 2.4785521892882135e-06, "loss": 0.5826, "step": 15204 }, { "epoch": 1.36, "grad_norm": 5.328733051245722, "learning_rate": 2.47792842300604e-06, "loss": 0.5984, "step": 15205 }, { "epoch": 1.36, "grad_norm": 9.010489201927035, "learning_rate": 2.4773047093665853e-06, "loss": 0.6032, "step": 15206 }, { "epoch": 1.36, "grad_norm": 9.295950973413936, "learning_rate": 2.47668104838287e-06, "loss": 0.6276, "step": 15207 }, { "epoch": 1.36, "grad_norm": 7.715660040938863, "learning_rate": 2.4760574400679122e-06, "loss": 0.5292, "step": 15208 }, { "epoch": 1.36, "grad_norm": 5.787075531924868, "learning_rate": 2.475433884434728e-06, "loss": 0.5754, "step": 15209 }, { "epoch": 1.36, "grad_norm": 6.64869189676575, "learning_rate": 2.4748103814963324e-06, "loss": 0.5973, "step": 15210 }, { "epoch": 1.36, "grad_norm": 8.073917222566672, "learning_rate": 2.4741869312657423e-06, "loss": 0.6225, "step": 15211 }, { "epoch": 1.36, "grad_norm": 6.275015107285138, "learning_rate": 2.4735635337559664e-06, "loss": 0.5623, "step": 15212 }, { "epoch": 1.36, "grad_norm": 6.930327227803697, "learning_rate": 2.4729401889800194e-06, "loss": 0.5907, "step": 15213 }, { "epoch": 1.36, "grad_norm": 8.343836862037112, "learning_rate": 2.472316896950911e-06, "loss": 0.5944, "step": 15214 }, { "epoch": 1.36, "grad_norm": 7.738602962459229, "learning_rate": 2.4716936576816525e-06, "loss": 0.5953, "step": 15215 }, { "epoch": 1.36, "grad_norm": 7.423956381703024, "learning_rate": 2.4710704711852534e-06, "loss": 0.6121, "step": 15216 }, { "epoch": 1.36, "grad_norm": 7.133202722020561, "learning_rate": 2.4704473374747185e-06, "loss": 0.6042, "step": 15217 }, { "epoch": 1.36, "grad_norm": 6.67852738993027, "learning_rate": 2.4698242565630575e-06, "loss": 0.6193, "step": 15218 }, { "epoch": 1.36, "grad_norm": 6.114323389538373, "learning_rate": 2.469201228463272e-06, "loss": 0.5675, "step": 15219 }, { "epoch": 1.36, "grad_norm": 7.842902571651335, "learning_rate": 2.468578253188369e-06, "loss": 0.6039, "step": 15220 }, { "epoch": 1.36, "grad_norm": 8.52332422652029, "learning_rate": 2.467955330751351e-06, "loss": 0.6525, "step": 15221 }, { "epoch": 1.36, "grad_norm": 8.195641671901301, "learning_rate": 2.4673324611652205e-06, "loss": 0.6076, "step": 15222 }, { "epoch": 1.36, "grad_norm": 6.114136118415405, "learning_rate": 2.466709644442978e-06, "loss": 0.603, "step": 15223 }, { "epoch": 1.36, "grad_norm": 6.153566871628214, "learning_rate": 2.466086880597626e-06, "loss": 0.5995, "step": 15224 }, { "epoch": 1.36, "grad_norm": 10.027772010815477, "learning_rate": 2.4654641696421593e-06, "loss": 0.6109, "step": 15225 }, { "epoch": 1.36, "grad_norm": 6.55950447701592, "learning_rate": 2.464841511589578e-06, "loss": 0.5821, "step": 15226 }, { "epoch": 1.36, "grad_norm": 7.220449669566721, "learning_rate": 2.464218906452878e-06, "loss": 0.5819, "step": 15227 }, { "epoch": 1.36, "grad_norm": 7.6840067506996945, "learning_rate": 2.463596354245055e-06, "loss": 0.5775, "step": 15228 }, { "epoch": 1.36, "grad_norm": 7.3292263928144825, "learning_rate": 2.4629738549791025e-06, "loss": 0.5985, "step": 15229 }, { "epoch": 1.36, "grad_norm": 6.7968545978432795, "learning_rate": 2.462351408668018e-06, "loss": 0.5179, "step": 15230 }, { "epoch": 1.36, "grad_norm": 5.7149575788978435, "learning_rate": 2.46172901532479e-06, "loss": 0.5668, "step": 15231 }, { "epoch": 1.36, "grad_norm": 7.463578986003413, "learning_rate": 2.4611066749624085e-06, "loss": 0.5288, "step": 15232 }, { "epoch": 1.36, "grad_norm": 5.660676913955222, "learning_rate": 2.4604843875938643e-06, "loss": 0.6047, "step": 15233 }, { "epoch": 1.36, "grad_norm": 6.971889363626915, "learning_rate": 2.4598621532321478e-06, "loss": 0.5953, "step": 15234 }, { "epoch": 1.36, "grad_norm": 8.996489692496208, "learning_rate": 2.459239971890246e-06, "loss": 0.6341, "step": 15235 }, { "epoch": 1.36, "grad_norm": 7.391706010322378, "learning_rate": 2.458617843581145e-06, "loss": 0.5676, "step": 15236 }, { "epoch": 1.36, "grad_norm": 6.64904920549135, "learning_rate": 2.4579957683178334e-06, "loss": 0.6645, "step": 15237 }, { "epoch": 1.36, "grad_norm": 6.101607846011128, "learning_rate": 2.4573737461132913e-06, "loss": 0.6068, "step": 15238 }, { "epoch": 1.36, "grad_norm": 8.472974958439975, "learning_rate": 2.4567517769805045e-06, "loss": 0.5837, "step": 15239 }, { "epoch": 1.36, "grad_norm": 6.002727672750484, "learning_rate": 2.4561298609324537e-06, "loss": 0.5965, "step": 15240 }, { "epoch": 1.36, "grad_norm": 6.238351714939946, "learning_rate": 2.4555079979821217e-06, "loss": 0.5909, "step": 15241 }, { "epoch": 1.36, "grad_norm": 5.573192174320548, "learning_rate": 2.4548861881424877e-06, "loss": 0.5696, "step": 15242 }, { "epoch": 1.36, "grad_norm": 5.974907494694809, "learning_rate": 2.454264431426533e-06, "loss": 0.5528, "step": 15243 }, { "epoch": 1.36, "grad_norm": 8.920151130774679, "learning_rate": 2.453642727847231e-06, "loss": 0.6124, "step": 15244 }, { "epoch": 1.36, "grad_norm": 8.282453134534977, "learning_rate": 2.453021077417563e-06, "loss": 0.6464, "step": 15245 }, { "epoch": 1.36, "grad_norm": 5.386621769911644, "learning_rate": 2.4523994801505e-06, "loss": 0.5899, "step": 15246 }, { "epoch": 1.36, "grad_norm": 8.739283991891114, "learning_rate": 2.4517779360590183e-06, "loss": 0.5903, "step": 15247 }, { "epoch": 1.36, "grad_norm": 6.7529296515303425, "learning_rate": 2.451156445156092e-06, "loss": 0.5722, "step": 15248 }, { "epoch": 1.36, "grad_norm": 5.828249325743333, "learning_rate": 2.450535007454693e-06, "loss": 0.5434, "step": 15249 }, { "epoch": 1.36, "grad_norm": 5.566982739083851, "learning_rate": 2.4499136229677945e-06, "loss": 0.5937, "step": 15250 }, { "epoch": 1.36, "grad_norm": 5.191293094276491, "learning_rate": 2.4492922917083623e-06, "loss": 0.5916, "step": 15251 }, { "epoch": 1.36, "grad_norm": 6.803469660215911, "learning_rate": 2.448671013689368e-06, "loss": 0.5584, "step": 15252 }, { "epoch": 1.36, "grad_norm": 6.567467562685244, "learning_rate": 2.4480497889237787e-06, "loss": 0.6092, "step": 15253 }, { "epoch": 1.36, "grad_norm": 6.497264268951367, "learning_rate": 2.4474286174245613e-06, "loss": 0.5474, "step": 15254 }, { "epoch": 1.36, "grad_norm": 6.621883618952956, "learning_rate": 2.446807499204682e-06, "loss": 0.6175, "step": 15255 }, { "epoch": 1.36, "grad_norm": 5.799288479778182, "learning_rate": 2.4461864342771065e-06, "loss": 0.5911, "step": 15256 }, { "epoch": 1.36, "grad_norm": 6.7732591686074, "learning_rate": 2.4455654226547942e-06, "loss": 0.5354, "step": 15257 }, { "epoch": 1.36, "grad_norm": 5.923831892757163, "learning_rate": 2.4449444643507098e-06, "loss": 0.5753, "step": 15258 }, { "epoch": 1.36, "grad_norm": 7.004480048193925, "learning_rate": 2.4443235593778157e-06, "loss": 0.5734, "step": 15259 }, { "epoch": 1.36, "grad_norm": 6.222556416330085, "learning_rate": 2.4437027077490695e-06, "loss": 0.5286, "step": 15260 }, { "epoch": 1.36, "grad_norm": 8.804315118268526, "learning_rate": 2.44308190947743e-06, "loss": 0.6087, "step": 15261 }, { "epoch": 1.36, "grad_norm": 7.468055085722266, "learning_rate": 2.4424611645758567e-06, "loss": 0.6433, "step": 15262 }, { "epoch": 1.36, "grad_norm": 8.145688183376379, "learning_rate": 2.4418404730573075e-06, "loss": 0.6547, "step": 15263 }, { "epoch": 1.36, "grad_norm": 6.728588872775224, "learning_rate": 2.4412198349347345e-06, "loss": 0.6374, "step": 15264 }, { "epoch": 1.36, "grad_norm": 7.085884271225814, "learning_rate": 2.4405992502210934e-06, "loss": 0.6224, "step": 15265 }, { "epoch": 1.36, "grad_norm": 5.957724731769276, "learning_rate": 2.439978718929338e-06, "loss": 0.5621, "step": 15266 }, { "epoch": 1.36, "grad_norm": 5.290612868809058, "learning_rate": 2.4393582410724204e-06, "loss": 0.5823, "step": 15267 }, { "epoch": 1.36, "grad_norm": 6.914417888654169, "learning_rate": 2.4387378166632924e-06, "loss": 0.6193, "step": 15268 }, { "epoch": 1.36, "grad_norm": 8.451548941398585, "learning_rate": 2.4381174457149048e-06, "loss": 0.5952, "step": 15269 }, { "epoch": 1.36, "grad_norm": 8.775926480569979, "learning_rate": 2.4374971282402034e-06, "loss": 0.569, "step": 15270 }, { "epoch": 1.36, "grad_norm": 7.277017450134407, "learning_rate": 2.4368768642521374e-06, "loss": 0.6224, "step": 15271 }, { "epoch": 1.36, "grad_norm": 4.72541649284393, "learning_rate": 2.436256653763654e-06, "loss": 0.6092, "step": 15272 }, { "epoch": 1.36, "grad_norm": 7.848091676641214, "learning_rate": 2.435636496787698e-06, "loss": 0.6359, "step": 15273 }, { "epoch": 1.36, "grad_norm": 7.216685639002915, "learning_rate": 2.4350163933372166e-06, "loss": 0.5863, "step": 15274 }, { "epoch": 1.36, "grad_norm": 7.142185281277388, "learning_rate": 2.434396343425149e-06, "loss": 0.5911, "step": 15275 }, { "epoch": 1.36, "grad_norm": 7.268741500318552, "learning_rate": 2.4337763470644403e-06, "loss": 0.6007, "step": 15276 }, { "epoch": 1.36, "grad_norm": 8.054974349139382, "learning_rate": 2.4331564042680293e-06, "loss": 0.5895, "step": 15277 }, { "epoch": 1.36, "grad_norm": 7.995676114512794, "learning_rate": 2.432536515048857e-06, "loss": 0.5892, "step": 15278 }, { "epoch": 1.36, "grad_norm": 7.597081701361043, "learning_rate": 2.431916679419862e-06, "loss": 0.5568, "step": 15279 }, { "epoch": 1.36, "grad_norm": 7.871830852221906, "learning_rate": 2.431296897393982e-06, "loss": 0.6071, "step": 15280 }, { "epoch": 1.36, "grad_norm": 6.080159345129762, "learning_rate": 2.430677168984157e-06, "loss": 0.5899, "step": 15281 }, { "epoch": 1.36, "grad_norm": 6.662499679559327, "learning_rate": 2.4300574942033163e-06, "loss": 0.5914, "step": 15282 }, { "epoch": 1.36, "grad_norm": 5.760886752179844, "learning_rate": 2.4294378730643974e-06, "loss": 0.5604, "step": 15283 }, { "epoch": 1.36, "grad_norm": 6.806902976894375, "learning_rate": 2.428818305580334e-06, "loss": 0.5353, "step": 15284 }, { "epoch": 1.36, "grad_norm": 8.68591667083678, "learning_rate": 2.4281987917640574e-06, "loss": 0.5891, "step": 15285 }, { "epoch": 1.36, "grad_norm": 6.510382642913864, "learning_rate": 2.427579331628499e-06, "loss": 0.6546, "step": 15286 }, { "epoch": 1.36, "grad_norm": 7.063981989818763, "learning_rate": 2.4269599251865904e-06, "loss": 0.5908, "step": 15287 }, { "epoch": 1.36, "grad_norm": 5.810431593444579, "learning_rate": 2.4263405724512566e-06, "loss": 0.6079, "step": 15288 }, { "epoch": 1.36, "grad_norm": 6.955311218971023, "learning_rate": 2.425721273435429e-06, "loss": 0.6037, "step": 15289 }, { "epoch": 1.36, "grad_norm": 5.917594407147409, "learning_rate": 2.4251020281520306e-06, "loss": 0.6191, "step": 15290 }, { "epoch": 1.36, "grad_norm": 5.875170304448491, "learning_rate": 2.424482836613988e-06, "loss": 0.5912, "step": 15291 }, { "epoch": 1.36, "grad_norm": 6.763043923981057, "learning_rate": 2.4238636988342263e-06, "loss": 0.5806, "step": 15292 }, { "epoch": 1.36, "grad_norm": 6.640736941970092, "learning_rate": 2.4232446148256676e-06, "loss": 0.5799, "step": 15293 }, { "epoch": 1.36, "grad_norm": 6.705269967101258, "learning_rate": 2.422625584601237e-06, "loss": 0.5952, "step": 15294 }, { "epoch": 1.36, "grad_norm": 5.366691802345707, "learning_rate": 2.4220066081738515e-06, "loss": 0.5836, "step": 15295 }, { "epoch": 1.36, "grad_norm": 7.723394442898744, "learning_rate": 2.4213876855564315e-06, "loss": 0.5633, "step": 15296 }, { "epoch": 1.36, "grad_norm": 5.898577385562662, "learning_rate": 2.420768816761897e-06, "loss": 0.5816, "step": 15297 }, { "epoch": 1.36, "grad_norm": 6.7546627884783215, "learning_rate": 2.420150001803165e-06, "loss": 0.5665, "step": 15298 }, { "epoch": 1.36, "grad_norm": 7.468498094482104, "learning_rate": 2.4195312406931513e-06, "loss": 0.5902, "step": 15299 }, { "epoch": 1.36, "grad_norm": 5.135838946098599, "learning_rate": 2.4189125334447743e-06, "loss": 0.5823, "step": 15300 }, { "epoch": 1.37, "grad_norm": 6.935080294162843, "learning_rate": 2.4182938800709437e-06, "loss": 0.5759, "step": 15301 }, { "epoch": 1.37, "grad_norm": 6.922522100626762, "learning_rate": 2.4176752805845767e-06, "loss": 0.6347, "step": 15302 }, { "epoch": 1.37, "grad_norm": 8.98889242134265, "learning_rate": 2.4170567349985808e-06, "loss": 0.6486, "step": 15303 }, { "epoch": 1.37, "grad_norm": 6.18932132608457, "learning_rate": 2.4164382433258686e-06, "loss": 0.5088, "step": 15304 }, { "epoch": 1.37, "grad_norm": 6.593226351502218, "learning_rate": 2.4158198055793506e-06, "loss": 0.5722, "step": 15305 }, { "epoch": 1.37, "grad_norm": 6.739175493310133, "learning_rate": 2.415201421771935e-06, "loss": 0.5824, "step": 15306 }, { "epoch": 1.37, "grad_norm": 4.939610527057863, "learning_rate": 2.414583091916531e-06, "loss": 0.5902, "step": 15307 }, { "epoch": 1.37, "grad_norm": 9.752602447881896, "learning_rate": 2.413964816026041e-06, "loss": 0.5649, "step": 15308 }, { "epoch": 1.37, "grad_norm": 4.605156245825359, "learning_rate": 2.413346594113372e-06, "loss": 0.6028, "step": 15309 }, { "epoch": 1.37, "grad_norm": 5.045975814198473, "learning_rate": 2.4127284261914284e-06, "loss": 0.5892, "step": 15310 }, { "epoch": 1.37, "grad_norm": 6.4949813152881175, "learning_rate": 2.4121103122731125e-06, "loss": 0.533, "step": 15311 }, { "epoch": 1.37, "grad_norm": 6.2894929665333805, "learning_rate": 2.411492252371327e-06, "loss": 0.5828, "step": 15312 }, { "epoch": 1.37, "grad_norm": 6.857416106503852, "learning_rate": 2.4108742464989736e-06, "loss": 0.605, "step": 15313 }, { "epoch": 1.37, "grad_norm": 6.777432805594984, "learning_rate": 2.4102562946689486e-06, "loss": 0.6133, "step": 15314 }, { "epoch": 1.37, "grad_norm": 8.357018710650772, "learning_rate": 2.409638396894152e-06, "loss": 0.5693, "step": 15315 }, { "epoch": 1.37, "grad_norm": 6.6441708361764675, "learning_rate": 2.4090205531874806e-06, "loss": 0.5922, "step": 15316 }, { "epoch": 1.37, "grad_norm": 6.914952731825939, "learning_rate": 2.4084027635618328e-06, "loss": 0.5463, "step": 15317 }, { "epoch": 1.37, "grad_norm": 6.209730033277364, "learning_rate": 2.4077850280301e-06, "loss": 0.596, "step": 15318 }, { "epoch": 1.37, "grad_norm": 7.168982101730439, "learning_rate": 2.4071673466051787e-06, "loss": 0.623, "step": 15319 }, { "epoch": 1.37, "grad_norm": 5.661290221556705, "learning_rate": 2.406549719299962e-06, "loss": 0.4822, "step": 15320 }, { "epoch": 1.37, "grad_norm": 6.1348165361027265, "learning_rate": 2.4059321461273388e-06, "loss": 0.665, "step": 15321 }, { "epoch": 1.37, "grad_norm": 5.694043489547387, "learning_rate": 2.405314627100201e-06, "loss": 0.5743, "step": 15322 }, { "epoch": 1.37, "grad_norm": 6.693953094275328, "learning_rate": 2.404697162231438e-06, "loss": 0.5383, "step": 15323 }, { "epoch": 1.37, "grad_norm": 8.232656787234943, "learning_rate": 2.404079751533938e-06, "loss": 0.5917, "step": 15324 }, { "epoch": 1.37, "grad_norm": 6.64696223722908, "learning_rate": 2.403462395020588e-06, "loss": 0.5658, "step": 15325 }, { "epoch": 1.37, "grad_norm": 5.734303504960375, "learning_rate": 2.4028450927042768e-06, "loss": 0.5848, "step": 15326 }, { "epoch": 1.37, "grad_norm": 4.853871438434748, "learning_rate": 2.402227844597884e-06, "loss": 0.5918, "step": 15327 }, { "epoch": 1.37, "grad_norm": 5.897760847704565, "learning_rate": 2.401610650714297e-06, "loss": 0.554, "step": 15328 }, { "epoch": 1.37, "grad_norm": 5.717017386011283, "learning_rate": 2.400993511066396e-06, "loss": 0.5972, "step": 15329 }, { "epoch": 1.37, "grad_norm": 8.57546605546565, "learning_rate": 2.4003764256670646e-06, "loss": 0.5501, "step": 15330 }, { "epoch": 1.37, "grad_norm": 7.181540412844928, "learning_rate": 2.399759394529184e-06, "loss": 0.6282, "step": 15331 }, { "epoch": 1.37, "grad_norm": 5.682201990519442, "learning_rate": 2.39914241766563e-06, "loss": 0.6225, "step": 15332 }, { "epoch": 1.37, "grad_norm": 6.648979245810481, "learning_rate": 2.3985254950892835e-06, "loss": 0.5732, "step": 15333 }, { "epoch": 1.37, "grad_norm": 7.092905332202179, "learning_rate": 2.397908626813018e-06, "loss": 0.5655, "step": 15334 }, { "epoch": 1.37, "grad_norm": 5.763964220157918, "learning_rate": 2.397291812849712e-06, "loss": 0.5635, "step": 15335 }, { "epoch": 1.37, "grad_norm": 7.2968269273116135, "learning_rate": 2.3966750532122397e-06, "loss": 0.5558, "step": 15336 }, { "epoch": 1.37, "grad_norm": 6.088252382147744, "learning_rate": 2.3960583479134742e-06, "loss": 0.6008, "step": 15337 }, { "epoch": 1.37, "grad_norm": 4.704613330760909, "learning_rate": 2.395441696966288e-06, "loss": 0.6051, "step": 15338 }, { "epoch": 1.37, "grad_norm": 5.650957710992998, "learning_rate": 2.394825100383555e-06, "loss": 0.5274, "step": 15339 }, { "epoch": 1.37, "grad_norm": 5.3329988663875945, "learning_rate": 2.3942085581781406e-06, "loss": 0.5693, "step": 15340 }, { "epoch": 1.37, "grad_norm": 5.635462567603978, "learning_rate": 2.3935920703629156e-06, "loss": 0.6006, "step": 15341 }, { "epoch": 1.37, "grad_norm": 7.061540863697198, "learning_rate": 2.3929756369507482e-06, "loss": 0.5899, "step": 15342 }, { "epoch": 1.37, "grad_norm": 10.372057109582956, "learning_rate": 2.392359257954506e-06, "loss": 0.5984, "step": 15343 }, { "epoch": 1.37, "grad_norm": 7.347546018514494, "learning_rate": 2.3917429333870522e-06, "loss": 0.597, "step": 15344 }, { "epoch": 1.37, "grad_norm": 5.412587892037886, "learning_rate": 2.3911266632612556e-06, "loss": 0.6606, "step": 15345 }, { "epoch": 1.37, "grad_norm": 6.699438119522221, "learning_rate": 2.3905104475899764e-06, "loss": 0.609, "step": 15346 }, { "epoch": 1.37, "grad_norm": 7.554015537193053, "learning_rate": 2.389894286386075e-06, "loss": 0.6177, "step": 15347 }, { "epoch": 1.37, "grad_norm": 6.998396420404499, "learning_rate": 2.3892781796624144e-06, "loss": 0.6221, "step": 15348 }, { "epoch": 1.37, "grad_norm": 5.414518164591421, "learning_rate": 2.3886621274318543e-06, "loss": 0.6129, "step": 15349 }, { "epoch": 1.37, "grad_norm": 9.034411970747223, "learning_rate": 2.3880461297072537e-06, "loss": 0.5759, "step": 15350 }, { "epoch": 1.37, "grad_norm": 8.011339051190925, "learning_rate": 2.38743018650147e-06, "loss": 0.5414, "step": 15351 }, { "epoch": 1.37, "grad_norm": 7.298176755807752, "learning_rate": 2.3868142978273613e-06, "loss": 0.5665, "step": 15352 }, { "epoch": 1.37, "grad_norm": 5.95029335361457, "learning_rate": 2.3861984636977796e-06, "loss": 0.5129, "step": 15353 }, { "epoch": 1.37, "grad_norm": 5.139278003435558, "learning_rate": 2.3855826841255815e-06, "loss": 0.5801, "step": 15354 }, { "epoch": 1.37, "grad_norm": 5.481914587899809, "learning_rate": 2.3849669591236187e-06, "loss": 0.5433, "step": 15355 }, { "epoch": 1.37, "grad_norm": 7.003329937576162, "learning_rate": 2.3843512887047436e-06, "loss": 0.6016, "step": 15356 }, { "epoch": 1.37, "grad_norm": 6.278789392069724, "learning_rate": 2.3837356728818095e-06, "loss": 0.5791, "step": 15357 }, { "epoch": 1.37, "grad_norm": 8.305890622215259, "learning_rate": 2.3831201116676618e-06, "loss": 0.5725, "step": 15358 }, { "epoch": 1.37, "grad_norm": 6.71723928232758, "learning_rate": 2.382504605075151e-06, "loss": 0.5518, "step": 15359 }, { "epoch": 1.37, "grad_norm": 6.620527180203994, "learning_rate": 2.3818891531171264e-06, "loss": 0.6403, "step": 15360 }, { "epoch": 1.37, "grad_norm": 6.694182238476861, "learning_rate": 2.3812737558064297e-06, "loss": 0.6112, "step": 15361 }, { "epoch": 1.37, "grad_norm": 5.610122408878612, "learning_rate": 2.3806584131559084e-06, "loss": 0.586, "step": 15362 }, { "epoch": 1.37, "grad_norm": 7.97366642863119, "learning_rate": 2.380043125178407e-06, "loss": 0.5209, "step": 15363 }, { "epoch": 1.37, "grad_norm": 7.098692881674012, "learning_rate": 2.3794278918867674e-06, "loss": 0.5646, "step": 15364 }, { "epoch": 1.37, "grad_norm": 6.119840763162265, "learning_rate": 2.378812713293834e-06, "loss": 0.592, "step": 15365 }, { "epoch": 1.37, "grad_norm": 6.043464338340651, "learning_rate": 2.378197589412442e-06, "loss": 0.6183, "step": 15366 }, { "epoch": 1.37, "grad_norm": 6.357130276274201, "learning_rate": 2.3775825202554347e-06, "loss": 0.5587, "step": 15367 }, { "epoch": 1.37, "grad_norm": 5.898285931740217, "learning_rate": 2.376967505835649e-06, "loss": 0.613, "step": 15368 }, { "epoch": 1.37, "grad_norm": 5.717366665939535, "learning_rate": 2.376352546165922e-06, "loss": 0.5705, "step": 15369 }, { "epoch": 1.37, "grad_norm": 7.633657966154586, "learning_rate": 2.375737641259092e-06, "loss": 0.5725, "step": 15370 }, { "epoch": 1.37, "grad_norm": 6.482106801020406, "learning_rate": 2.3751227911279893e-06, "loss": 0.5965, "step": 15371 }, { "epoch": 1.37, "grad_norm": 6.030511184776018, "learning_rate": 2.3745079957854505e-06, "loss": 0.6123, "step": 15372 }, { "epoch": 1.37, "grad_norm": 9.12259580983422, "learning_rate": 2.3738932552443076e-06, "loss": 0.6157, "step": 15373 }, { "epoch": 1.37, "grad_norm": 6.7840970701545364, "learning_rate": 2.373278569517394e-06, "loss": 0.5876, "step": 15374 }, { "epoch": 1.37, "grad_norm": 6.540579859244908, "learning_rate": 2.3726639386175353e-06, "loss": 0.5639, "step": 15375 }, { "epoch": 1.37, "grad_norm": 7.690702428764871, "learning_rate": 2.3720493625575637e-06, "loss": 0.6178, "step": 15376 }, { "epoch": 1.37, "grad_norm": 6.474259315580586, "learning_rate": 2.371434841350308e-06, "loss": 0.6163, "step": 15377 }, { "epoch": 1.37, "grad_norm": 4.473906071088057, "learning_rate": 2.3708203750085924e-06, "loss": 0.5333, "step": 15378 }, { "epoch": 1.37, "grad_norm": 8.765730001708302, "learning_rate": 2.370205963545243e-06, "loss": 0.5838, "step": 15379 }, { "epoch": 1.37, "grad_norm": 7.688165296720368, "learning_rate": 2.3695916069730846e-06, "loss": 0.6125, "step": 15380 }, { "epoch": 1.37, "grad_norm": 5.757312549881616, "learning_rate": 2.3689773053049414e-06, "loss": 0.5546, "step": 15381 }, { "epoch": 1.37, "grad_norm": 6.426371603490911, "learning_rate": 2.3683630585536357e-06, "loss": 0.5691, "step": 15382 }, { "epoch": 1.37, "grad_norm": 5.866529135455081, "learning_rate": 2.3677488667319887e-06, "loss": 0.6132, "step": 15383 }, { "epoch": 1.37, "grad_norm": 7.060161617542974, "learning_rate": 2.367134729852818e-06, "loss": 0.5181, "step": 15384 }, { "epoch": 1.37, "grad_norm": 5.846192040728541, "learning_rate": 2.366520647928944e-06, "loss": 0.5334, "step": 15385 }, { "epoch": 1.37, "grad_norm": 5.288635724959641, "learning_rate": 2.3659066209731845e-06, "loss": 0.5525, "step": 15386 }, { "epoch": 1.37, "grad_norm": 5.111857908871401, "learning_rate": 2.365292648998356e-06, "loss": 0.5982, "step": 15387 }, { "epoch": 1.37, "grad_norm": 6.917817172827464, "learning_rate": 2.364678732017273e-06, "loss": 0.5808, "step": 15388 }, { "epoch": 1.37, "grad_norm": 4.954370851938825, "learning_rate": 2.3640648700427525e-06, "loss": 0.5286, "step": 15389 }, { "epoch": 1.37, "grad_norm": 7.20788418123953, "learning_rate": 2.363451063087605e-06, "loss": 0.5972, "step": 15390 }, { "epoch": 1.37, "grad_norm": 6.34621200864893, "learning_rate": 2.3628373111646407e-06, "loss": 0.5744, "step": 15391 }, { "epoch": 1.37, "grad_norm": 7.00789610826379, "learning_rate": 2.3622236142866723e-06, "loss": 0.6713, "step": 15392 }, { "epoch": 1.37, "grad_norm": 8.431630774032294, "learning_rate": 2.361609972466509e-06, "loss": 0.552, "step": 15393 }, { "epoch": 1.37, "grad_norm": 7.244941611732809, "learning_rate": 2.36099638571696e-06, "loss": 0.6355, "step": 15394 }, { "epoch": 1.37, "grad_norm": 5.634512206414605, "learning_rate": 2.3603828540508323e-06, "loss": 0.6332, "step": 15395 }, { "epoch": 1.37, "grad_norm": 7.568264080847426, "learning_rate": 2.359769377480933e-06, "loss": 0.558, "step": 15396 }, { "epoch": 1.37, "grad_norm": 5.487139159934689, "learning_rate": 2.3591559560200646e-06, "loss": 0.5964, "step": 15397 }, { "epoch": 1.37, "grad_norm": 5.413770980047048, "learning_rate": 2.358542589681032e-06, "loss": 0.5145, "step": 15398 }, { "epoch": 1.37, "grad_norm": 7.5327009441905615, "learning_rate": 2.357929278476638e-06, "loss": 0.5774, "step": 15399 }, { "epoch": 1.37, "grad_norm": 6.167687249599947, "learning_rate": 2.357316022419685e-06, "loss": 0.6247, "step": 15400 }, { "epoch": 1.37, "grad_norm": 4.6291125412159095, "learning_rate": 2.356702821522972e-06, "loss": 0.5862, "step": 15401 }, { "epoch": 1.37, "grad_norm": 6.660640464945697, "learning_rate": 2.356089675799301e-06, "loss": 0.5985, "step": 15402 }, { "epoch": 1.37, "grad_norm": 5.559025505666036, "learning_rate": 2.355476585261468e-06, "loss": 0.5415, "step": 15403 }, { "epoch": 1.37, "grad_norm": 6.18613491776588, "learning_rate": 2.3548635499222684e-06, "loss": 0.5704, "step": 15404 }, { "epoch": 1.37, "grad_norm": 6.3575616301316975, "learning_rate": 2.354250569794499e-06, "loss": 0.5824, "step": 15405 }, { "epoch": 1.37, "grad_norm": 5.911251681858195, "learning_rate": 2.353637644890954e-06, "loss": 0.5046, "step": 15406 }, { "epoch": 1.37, "grad_norm": 7.586111893535961, "learning_rate": 2.353024775224429e-06, "loss": 0.5441, "step": 15407 }, { "epoch": 1.37, "grad_norm": 8.360442644779864, "learning_rate": 2.3524119608077144e-06, "loss": 0.5954, "step": 15408 }, { "epoch": 1.37, "grad_norm": 8.984293197908606, "learning_rate": 2.3517992016536045e-06, "loss": 0.623, "step": 15409 }, { "epoch": 1.37, "grad_norm": 6.834046757998613, "learning_rate": 2.3511864977748845e-06, "loss": 0.5828, "step": 15410 }, { "epoch": 1.37, "grad_norm": 6.579401963396583, "learning_rate": 2.3505738491843456e-06, "loss": 0.576, "step": 15411 }, { "epoch": 1.37, "grad_norm": 8.324619459382383, "learning_rate": 2.3499612558947752e-06, "loss": 0.6408, "step": 15412 }, { "epoch": 1.37, "grad_norm": 6.456028171105316, "learning_rate": 2.3493487179189606e-06, "loss": 0.5855, "step": 15413 }, { "epoch": 1.38, "grad_norm": 7.705556437707586, "learning_rate": 2.3487362352696863e-06, "loss": 0.5492, "step": 15414 }, { "epoch": 1.38, "grad_norm": 5.052153392578449, "learning_rate": 2.3481238079597392e-06, "loss": 0.6022, "step": 15415 }, { "epoch": 1.38, "grad_norm": 6.370720128488505, "learning_rate": 2.3475114360018973e-06, "loss": 0.5709, "step": 15416 }, { "epoch": 1.38, "grad_norm": 4.288700833671227, "learning_rate": 2.3468991194089465e-06, "loss": 0.593, "step": 15417 }, { "epoch": 1.38, "grad_norm": 5.6337256027231675, "learning_rate": 2.346286858193667e-06, "loss": 0.6327, "step": 15418 }, { "epoch": 1.38, "grad_norm": 6.443995094169895, "learning_rate": 2.345674652368837e-06, "loss": 0.5472, "step": 15419 }, { "epoch": 1.38, "grad_norm": 9.276278782886475, "learning_rate": 2.3450625019472357e-06, "loss": 0.6142, "step": 15420 }, { "epoch": 1.38, "grad_norm": 7.243813205968432, "learning_rate": 2.34445040694164e-06, "loss": 0.5773, "step": 15421 }, { "epoch": 1.38, "grad_norm": 6.626470699259227, "learning_rate": 2.3438383673648284e-06, "loss": 0.5973, "step": 15422 }, { "epoch": 1.38, "grad_norm": 7.73756272025452, "learning_rate": 2.343226383229572e-06, "loss": 0.5508, "step": 15423 }, { "epoch": 1.38, "grad_norm": 6.210751207171804, "learning_rate": 2.342614454548647e-06, "loss": 0.6161, "step": 15424 }, { "epoch": 1.38, "grad_norm": 6.258004050686239, "learning_rate": 2.342002581334826e-06, "loss": 0.6745, "step": 15425 }, { "epoch": 1.38, "grad_norm": 7.1210533913697684, "learning_rate": 2.3413907636008794e-06, "loss": 0.6031, "step": 15426 }, { "epoch": 1.38, "grad_norm": 5.771086723638093, "learning_rate": 2.3407790013595787e-06, "loss": 0.5893, "step": 15427 }, { "epoch": 1.38, "grad_norm": 5.556515788048686, "learning_rate": 2.3401672946236947e-06, "loss": 0.5073, "step": 15428 }, { "epoch": 1.38, "grad_norm": 8.801979019466977, "learning_rate": 2.339555643405991e-06, "loss": 0.5676, "step": 15429 }, { "epoch": 1.38, "grad_norm": 5.194484814837417, "learning_rate": 2.3389440477192377e-06, "loss": 0.6192, "step": 15430 }, { "epoch": 1.38, "grad_norm": 6.9410878290323215, "learning_rate": 2.3383325075761997e-06, "loss": 0.5944, "step": 15431 }, { "epoch": 1.38, "grad_norm": 5.858146362543744, "learning_rate": 2.3377210229896435e-06, "loss": 0.6843, "step": 15432 }, { "epoch": 1.38, "grad_norm": 6.2521525322407445, "learning_rate": 2.3371095939723286e-06, "loss": 0.5454, "step": 15433 }, { "epoch": 1.38, "grad_norm": 5.791849977493365, "learning_rate": 2.3364982205370196e-06, "loss": 0.4897, "step": 15434 }, { "epoch": 1.38, "grad_norm": 7.483243298924606, "learning_rate": 2.335886902696479e-06, "loss": 0.5098, "step": 15435 }, { "epoch": 1.38, "grad_norm": 6.978916666531775, "learning_rate": 2.335275640463463e-06, "loss": 0.5629, "step": 15436 }, { "epoch": 1.38, "grad_norm": 4.806107106827214, "learning_rate": 2.3346644338507323e-06, "loss": 0.6272, "step": 15437 }, { "epoch": 1.38, "grad_norm": 5.955109389342841, "learning_rate": 2.334053282871045e-06, "loss": 0.5806, "step": 15438 }, { "epoch": 1.38, "grad_norm": 7.455806445737048, "learning_rate": 2.3334421875371568e-06, "loss": 0.5499, "step": 15439 }, { "epoch": 1.38, "grad_norm": 9.427897433079394, "learning_rate": 2.3328311478618234e-06, "loss": 0.5387, "step": 15440 }, { "epoch": 1.38, "grad_norm": 7.241899467477099, "learning_rate": 2.3322201638578003e-06, "loss": 0.6177, "step": 15441 }, { "epoch": 1.38, "grad_norm": 6.8667195142477775, "learning_rate": 2.331609235537837e-06, "loss": 0.556, "step": 15442 }, { "epoch": 1.38, "grad_norm": 9.004489979490959, "learning_rate": 2.3309983629146876e-06, "loss": 0.6683, "step": 15443 }, { "epoch": 1.38, "grad_norm": 5.856774800132853, "learning_rate": 2.330387546001102e-06, "loss": 0.5441, "step": 15444 }, { "epoch": 1.38, "grad_norm": 6.064553393196938, "learning_rate": 2.3297767848098306e-06, "loss": 0.5901, "step": 15445 }, { "epoch": 1.38, "grad_norm": 7.18250206131216, "learning_rate": 2.3291660793536227e-06, "loss": 0.6375, "step": 15446 }, { "epoch": 1.38, "grad_norm": 7.40808340597432, "learning_rate": 2.3285554296452223e-06, "loss": 0.5688, "step": 15447 }, { "epoch": 1.38, "grad_norm": 5.8829404380098165, "learning_rate": 2.327944835697378e-06, "loss": 0.6282, "step": 15448 }, { "epoch": 1.38, "grad_norm": 6.556913145435115, "learning_rate": 2.3273342975228323e-06, "loss": 0.5684, "step": 15449 }, { "epoch": 1.38, "grad_norm": 5.335244578939409, "learning_rate": 2.3267238151343303e-06, "loss": 0.6024, "step": 15450 }, { "epoch": 1.38, "grad_norm": 5.256785032683414, "learning_rate": 2.326113388544614e-06, "loss": 0.5477, "step": 15451 }, { "epoch": 1.38, "grad_norm": 7.605041216451519, "learning_rate": 2.325503017766425e-06, "loss": 0.596, "step": 15452 }, { "epoch": 1.38, "grad_norm": 6.674511088449798, "learning_rate": 2.3248927028125053e-06, "loss": 0.5341, "step": 15453 }, { "epoch": 1.38, "grad_norm": 7.8282309775047345, "learning_rate": 2.32428244369559e-06, "loss": 0.6159, "step": 15454 }, { "epoch": 1.38, "grad_norm": 7.423667341313421, "learning_rate": 2.323672240428419e-06, "loss": 0.5927, "step": 15455 }, { "epoch": 1.38, "grad_norm": 8.088741147254344, "learning_rate": 2.323062093023729e-06, "loss": 0.5774, "step": 15456 }, { "epoch": 1.38, "grad_norm": 5.33109055356537, "learning_rate": 2.322452001494256e-06, "loss": 0.5137, "step": 15457 }, { "epoch": 1.38, "grad_norm": 5.705276471148506, "learning_rate": 2.321841965852733e-06, "loss": 0.5588, "step": 15458 }, { "epoch": 1.38, "grad_norm": 5.523881187960127, "learning_rate": 2.321231986111896e-06, "loss": 0.5561, "step": 15459 }, { "epoch": 1.38, "grad_norm": 6.243026912272245, "learning_rate": 2.3206220622844727e-06, "loss": 0.5677, "step": 15460 }, { "epoch": 1.38, "grad_norm": 8.906139166198802, "learning_rate": 2.3200121943831988e-06, "loss": 0.5872, "step": 15461 }, { "epoch": 1.38, "grad_norm": 7.086733877569432, "learning_rate": 2.3194023824207985e-06, "loss": 0.6274, "step": 15462 }, { "epoch": 1.38, "grad_norm": 7.180909379395099, "learning_rate": 2.3187926264100036e-06, "loss": 0.5607, "step": 15463 }, { "epoch": 1.38, "grad_norm": 6.082115094380091, "learning_rate": 2.318182926363541e-06, "loss": 0.5505, "step": 15464 }, { "epoch": 1.38, "grad_norm": 9.929785148549657, "learning_rate": 2.317573282294136e-06, "loss": 0.5879, "step": 15465 }, { "epoch": 1.38, "grad_norm": 8.342091304168124, "learning_rate": 2.316963694214517e-06, "loss": 0.6361, "step": 15466 }, { "epoch": 1.38, "grad_norm": 7.565773379201985, "learning_rate": 2.3163541621374026e-06, "loss": 0.6039, "step": 15467 }, { "epoch": 1.38, "grad_norm": 5.9420630646200205, "learning_rate": 2.3157446860755183e-06, "loss": 0.5898, "step": 15468 }, { "epoch": 1.38, "grad_norm": 5.131885212553709, "learning_rate": 2.3151352660415854e-06, "loss": 0.555, "step": 15469 }, { "epoch": 1.38, "grad_norm": 5.544152499224465, "learning_rate": 2.3145259020483243e-06, "loss": 0.5753, "step": 15470 }, { "epoch": 1.38, "grad_norm": 7.296129048031609, "learning_rate": 2.313916594108453e-06, "loss": 0.5749, "step": 15471 }, { "epoch": 1.38, "grad_norm": 7.133887827002375, "learning_rate": 2.313307342234693e-06, "loss": 0.5811, "step": 15472 }, { "epoch": 1.38, "grad_norm": 6.972358817196699, "learning_rate": 2.3126981464397564e-06, "loss": 0.5918, "step": 15473 }, { "epoch": 1.38, "grad_norm": 5.093790736937827, "learning_rate": 2.312089006736361e-06, "loss": 0.6051, "step": 15474 }, { "epoch": 1.38, "grad_norm": 7.166572416557928, "learning_rate": 2.3114799231372227e-06, "loss": 0.5943, "step": 15475 }, { "epoch": 1.38, "grad_norm": 7.36832515016384, "learning_rate": 2.310870895655052e-06, "loss": 0.5975, "step": 15476 }, { "epoch": 1.38, "grad_norm": 7.3408516501876395, "learning_rate": 2.3102619243025615e-06, "loss": 0.5397, "step": 15477 }, { "epoch": 1.38, "grad_norm": 7.587408409996009, "learning_rate": 2.3096530090924636e-06, "loss": 0.5767, "step": 15478 }, { "epoch": 1.38, "grad_norm": 8.602740051885991, "learning_rate": 2.309044150037468e-06, "loss": 0.5706, "step": 15479 }, { "epoch": 1.38, "grad_norm": 7.9944107133441635, "learning_rate": 2.3084353471502825e-06, "loss": 0.5715, "step": 15480 }, { "epoch": 1.38, "grad_norm": 5.78972806386249, "learning_rate": 2.3078266004436133e-06, "loss": 0.6091, "step": 15481 }, { "epoch": 1.38, "grad_norm": 7.1773874081713185, "learning_rate": 2.307217909930169e-06, "loss": 0.5428, "step": 15482 }, { "epoch": 1.38, "grad_norm": 6.1573500306151745, "learning_rate": 2.3066092756226534e-06, "loss": 0.537, "step": 15483 }, { "epoch": 1.38, "grad_norm": 8.219978077509818, "learning_rate": 2.306000697533771e-06, "loss": 0.5551, "step": 15484 }, { "epoch": 1.38, "grad_norm": 5.537824774983657, "learning_rate": 2.3053921756762254e-06, "loss": 0.5328, "step": 15485 }, { "epoch": 1.38, "grad_norm": 7.878449655424195, "learning_rate": 2.3047837100627155e-06, "loss": 0.5875, "step": 15486 }, { "epoch": 1.38, "grad_norm": 5.614249164376737, "learning_rate": 2.3041753007059436e-06, "loss": 0.5809, "step": 15487 }, { "epoch": 1.38, "grad_norm": 7.143201321680394, "learning_rate": 2.3035669476186084e-06, "loss": 0.5902, "step": 15488 }, { "epoch": 1.38, "grad_norm": 7.728390276737316, "learning_rate": 2.3029586508134082e-06, "loss": 0.5822, "step": 15489 }, { "epoch": 1.38, "grad_norm": 6.7365649591582635, "learning_rate": 2.3023504103030414e-06, "loss": 0.5933, "step": 15490 }, { "epoch": 1.38, "grad_norm": 5.3811986695413365, "learning_rate": 2.3017422261002e-06, "loss": 0.6053, "step": 15491 }, { "epoch": 1.38, "grad_norm": 5.339735703851821, "learning_rate": 2.301134098217583e-06, "loss": 0.6474, "step": 15492 }, { "epoch": 1.38, "grad_norm": 5.560044057227534, "learning_rate": 2.3005260266678787e-06, "loss": 0.5446, "step": 15493 }, { "epoch": 1.38, "grad_norm": 5.738776697734147, "learning_rate": 2.2999180114637827e-06, "loss": 0.488, "step": 15494 }, { "epoch": 1.38, "grad_norm": 6.092076979202961, "learning_rate": 2.299310052617984e-06, "loss": 0.633, "step": 15495 }, { "epoch": 1.38, "grad_norm": 6.517811745656809, "learning_rate": 2.2987021501431745e-06, "loss": 0.5775, "step": 15496 }, { "epoch": 1.38, "grad_norm": 7.287424039074518, "learning_rate": 2.2980943040520414e-06, "loss": 0.5544, "step": 15497 }, { "epoch": 1.38, "grad_norm": 7.3444802632842405, "learning_rate": 2.2974865143572743e-06, "loss": 0.5928, "step": 15498 }, { "epoch": 1.38, "grad_norm": 6.617330863732741, "learning_rate": 2.2968787810715565e-06, "loss": 0.6096, "step": 15499 }, { "epoch": 1.38, "grad_norm": 7.64000918107434, "learning_rate": 2.296271104207574e-06, "loss": 0.5604, "step": 15500 }, { "epoch": 1.38, "grad_norm": 6.88836362968792, "learning_rate": 2.2956634837780117e-06, "loss": 0.5182, "step": 15501 }, { "epoch": 1.38, "grad_norm": 5.972708404050875, "learning_rate": 2.2950559197955513e-06, "loss": 0.6206, "step": 15502 }, { "epoch": 1.38, "grad_norm": 7.396613974311872, "learning_rate": 2.294448412272875e-06, "loss": 0.5324, "step": 15503 }, { "epoch": 1.38, "grad_norm": 5.990108974993849, "learning_rate": 2.293840961222665e-06, "loss": 0.6175, "step": 15504 }, { "epoch": 1.38, "grad_norm": 5.562270066050555, "learning_rate": 2.293233566657598e-06, "loss": 0.6519, "step": 15505 }, { "epoch": 1.38, "grad_norm": 5.478272529548018, "learning_rate": 2.292626228590351e-06, "loss": 0.5372, "step": 15506 }, { "epoch": 1.38, "grad_norm": 5.691622446186954, "learning_rate": 2.292018947033603e-06, "loss": 0.5814, "step": 15507 }, { "epoch": 1.38, "grad_norm": 5.126294082687872, "learning_rate": 2.291411722000028e-06, "loss": 0.5747, "step": 15508 }, { "epoch": 1.38, "grad_norm": 6.242530931900307, "learning_rate": 2.290804553502303e-06, "loss": 0.5796, "step": 15509 }, { "epoch": 1.38, "grad_norm": 5.431727217148969, "learning_rate": 2.2901974415530986e-06, "loss": 0.5723, "step": 15510 }, { "epoch": 1.38, "grad_norm": 6.129591634919501, "learning_rate": 2.289590386165091e-06, "loss": 0.6224, "step": 15511 }, { "epoch": 1.38, "grad_norm": 9.256229210162052, "learning_rate": 2.288983387350946e-06, "loss": 0.5536, "step": 15512 }, { "epoch": 1.38, "grad_norm": 9.766328352293497, "learning_rate": 2.288376445123336e-06, "loss": 0.5686, "step": 15513 }, { "epoch": 1.38, "grad_norm": 7.274481291683681, "learning_rate": 2.287769559494929e-06, "loss": 0.6169, "step": 15514 }, { "epoch": 1.38, "grad_norm": 7.756244146264532, "learning_rate": 2.287162730478393e-06, "loss": 0.6844, "step": 15515 }, { "epoch": 1.38, "grad_norm": 8.662147766753808, "learning_rate": 2.2865559580863944e-06, "loss": 0.5539, "step": 15516 }, { "epoch": 1.38, "grad_norm": 7.354784450713886, "learning_rate": 2.285949242331599e-06, "loss": 0.6123, "step": 15517 }, { "epoch": 1.38, "grad_norm": 6.821915308067003, "learning_rate": 2.2853425832266685e-06, "loss": 0.5976, "step": 15518 }, { "epoch": 1.38, "grad_norm": 9.159308143302507, "learning_rate": 2.284735980784268e-06, "loss": 0.6295, "step": 15519 }, { "epoch": 1.38, "grad_norm": 8.019737960911044, "learning_rate": 2.284129435017055e-06, "loss": 0.6273, "step": 15520 }, { "epoch": 1.38, "grad_norm": 6.802354130475889, "learning_rate": 2.2835229459376934e-06, "loss": 0.5827, "step": 15521 }, { "epoch": 1.38, "grad_norm": 6.5077442296588455, "learning_rate": 2.282916513558841e-06, "loss": 0.6386, "step": 15522 }, { "epoch": 1.38, "grad_norm": 6.8833312564281295, "learning_rate": 2.2823101378931556e-06, "loss": 0.5878, "step": 15523 }, { "epoch": 1.38, "grad_norm": 6.692726915114494, "learning_rate": 2.281703818953297e-06, "loss": 0.589, "step": 15524 }, { "epoch": 1.38, "grad_norm": 8.3904139688218, "learning_rate": 2.2810975567519163e-06, "loss": 0.6323, "step": 15525 }, { "epoch": 1.39, "grad_norm": 5.0787891151632065, "learning_rate": 2.280491351301669e-06, "loss": 0.5446, "step": 15526 }, { "epoch": 1.39, "grad_norm": 5.1552074738629985, "learning_rate": 2.2798852026152096e-06, "loss": 0.6238, "step": 15527 }, { "epoch": 1.39, "grad_norm": 6.930723641617397, "learning_rate": 2.27927911070519e-06, "loss": 0.5932, "step": 15528 }, { "epoch": 1.39, "grad_norm": 6.635212212948513, "learning_rate": 2.2786730755842623e-06, "loss": 0.5469, "step": 15529 }, { "epoch": 1.39, "grad_norm": 7.179465081279692, "learning_rate": 2.2780670972650724e-06, "loss": 0.5811, "step": 15530 }, { "epoch": 1.39, "grad_norm": 6.240229625981796, "learning_rate": 2.2774611757602715e-06, "loss": 0.5524, "step": 15531 }, { "epoch": 1.39, "grad_norm": 7.692640059855339, "learning_rate": 2.276855311082506e-06, "loss": 0.5279, "step": 15532 }, { "epoch": 1.39, "grad_norm": 8.260172160997833, "learning_rate": 2.2762495032444237e-06, "loss": 0.6005, "step": 15533 }, { "epoch": 1.39, "grad_norm": 7.905338256063175, "learning_rate": 2.2756437522586667e-06, "loss": 0.5705, "step": 15534 }, { "epoch": 1.39, "grad_norm": 7.325098392747677, "learning_rate": 2.27503805813788e-06, "loss": 0.6001, "step": 15535 }, { "epoch": 1.39, "grad_norm": 6.61194935433905, "learning_rate": 2.2744324208947083e-06, "loss": 0.6119, "step": 15536 }, { "epoch": 1.39, "grad_norm": 7.051318931948064, "learning_rate": 2.273826840541789e-06, "loss": 0.6012, "step": 15537 }, { "epoch": 1.39, "grad_norm": 8.494731887442004, "learning_rate": 2.2732213170917644e-06, "loss": 0.5907, "step": 15538 }, { "epoch": 1.39, "grad_norm": 6.381620715517347, "learning_rate": 2.2726158505572733e-06, "loss": 0.5676, "step": 15539 }, { "epoch": 1.39, "grad_norm": 6.044214625921926, "learning_rate": 2.272010440950953e-06, "loss": 0.6268, "step": 15540 }, { "epoch": 1.39, "grad_norm": 5.217967044999442, "learning_rate": 2.2714050882854406e-06, "loss": 0.6475, "step": 15541 }, { "epoch": 1.39, "grad_norm": 7.495022078253916, "learning_rate": 2.270799792573373e-06, "loss": 0.5363, "step": 15542 }, { "epoch": 1.39, "grad_norm": 6.691000764878836, "learning_rate": 2.2701945538273817e-06, "loss": 0.5599, "step": 15543 }, { "epoch": 1.39, "grad_norm": 4.639502100495015, "learning_rate": 2.269589372060101e-06, "loss": 0.595, "step": 15544 }, { "epoch": 1.39, "grad_norm": 5.432846793432708, "learning_rate": 2.268984247284162e-06, "loss": 0.5957, "step": 15545 }, { "epoch": 1.39, "grad_norm": 8.150157268118116, "learning_rate": 2.2683791795121967e-06, "loss": 0.6025, "step": 15546 }, { "epoch": 1.39, "grad_norm": 5.609159670886531, "learning_rate": 2.2677741687568354e-06, "loss": 0.5458, "step": 15547 }, { "epoch": 1.39, "grad_norm": 5.939084940215274, "learning_rate": 2.2671692150307027e-06, "loss": 0.6128, "step": 15548 }, { "epoch": 1.39, "grad_norm": 5.60007127048123, "learning_rate": 2.26656431834643e-06, "loss": 0.5781, "step": 15549 }, { "epoch": 1.39, "grad_norm": 5.533569136144716, "learning_rate": 2.2659594787166396e-06, "loss": 0.5655, "step": 15550 }, { "epoch": 1.39, "grad_norm": 7.062001160902101, "learning_rate": 2.2653546961539575e-06, "loss": 0.6179, "step": 15551 }, { "epoch": 1.39, "grad_norm": 5.584899312538942, "learning_rate": 2.2647499706710067e-06, "loss": 0.5854, "step": 15552 }, { "epoch": 1.39, "grad_norm": 6.6641901824816365, "learning_rate": 2.2641453022804105e-06, "loss": 0.5886, "step": 15553 }, { "epoch": 1.39, "grad_norm": 7.580473794486593, "learning_rate": 2.26354069099479e-06, "loss": 0.5737, "step": 15554 }, { "epoch": 1.39, "grad_norm": 7.523667506723249, "learning_rate": 2.262936136826766e-06, "loss": 0.5482, "step": 15555 }, { "epoch": 1.39, "grad_norm": 6.1032255706817224, "learning_rate": 2.262331639788955e-06, "loss": 0.6308, "step": 15556 }, { "epoch": 1.39, "grad_norm": 13.606743325320394, "learning_rate": 2.261727199893975e-06, "loss": 0.5148, "step": 15557 }, { "epoch": 1.39, "grad_norm": 5.163359274854474, "learning_rate": 2.2611228171544437e-06, "loss": 0.5952, "step": 15558 }, { "epoch": 1.39, "grad_norm": 7.480968484136979, "learning_rate": 2.2605184915829747e-06, "loss": 0.5989, "step": 15559 }, { "epoch": 1.39, "grad_norm": 6.140179800106157, "learning_rate": 2.2599142231921835e-06, "loss": 0.5913, "step": 15560 }, { "epoch": 1.39, "grad_norm": 9.196090875471244, "learning_rate": 2.2593100119946836e-06, "loss": 0.6099, "step": 15561 }, { "epoch": 1.39, "grad_norm": 8.128399659222282, "learning_rate": 2.2587058580030853e-06, "loss": 0.6052, "step": 15562 }, { "epoch": 1.39, "grad_norm": 8.205802365533584, "learning_rate": 2.258101761229997e-06, "loss": 0.613, "step": 15563 }, { "epoch": 1.39, "grad_norm": 6.0746849393358575, "learning_rate": 2.25749772168803e-06, "loss": 0.5996, "step": 15564 }, { "epoch": 1.39, "grad_norm": 7.444651498365336, "learning_rate": 2.256893739389792e-06, "loss": 0.6233, "step": 15565 }, { "epoch": 1.39, "grad_norm": 12.49912525461129, "learning_rate": 2.25628981434789e-06, "loss": 0.5772, "step": 15566 }, { "epoch": 1.39, "grad_norm": 7.062196474824254, "learning_rate": 2.255685946574929e-06, "loss": 0.5803, "step": 15567 }, { "epoch": 1.39, "grad_norm": 5.004051699993318, "learning_rate": 2.2550821360835164e-06, "loss": 0.5405, "step": 15568 }, { "epoch": 1.39, "grad_norm": 7.034073059435542, "learning_rate": 2.254478382886251e-06, "loss": 0.5926, "step": 15569 }, { "epoch": 1.39, "grad_norm": 7.657798285503411, "learning_rate": 2.2538746869957364e-06, "loss": 0.5834, "step": 15570 }, { "epoch": 1.39, "grad_norm": 6.394421058167707, "learning_rate": 2.253271048424574e-06, "loss": 0.634, "step": 15571 }, { "epoch": 1.39, "grad_norm": 7.401578132079515, "learning_rate": 2.2526674671853633e-06, "loss": 0.5897, "step": 15572 }, { "epoch": 1.39, "grad_norm": 6.8648145691949685, "learning_rate": 2.2520639432907027e-06, "loss": 0.6078, "step": 15573 }, { "epoch": 1.39, "grad_norm": 7.762580664121214, "learning_rate": 2.251460476753191e-06, "loss": 0.6206, "step": 15574 }, { "epoch": 1.39, "grad_norm": 5.203870606746879, "learning_rate": 2.2508570675854206e-06, "loss": 0.5376, "step": 15575 }, { "epoch": 1.39, "grad_norm": 7.5414261675266, "learning_rate": 2.250253715799991e-06, "loss": 0.5998, "step": 15576 }, { "epoch": 1.39, "grad_norm": 5.344924594060138, "learning_rate": 2.2496504214094903e-06, "loss": 0.5372, "step": 15577 }, { "epoch": 1.39, "grad_norm": 5.3800127822715496, "learning_rate": 2.249047184426515e-06, "loss": 0.6079, "step": 15578 }, { "epoch": 1.39, "grad_norm": 5.565643012796541, "learning_rate": 2.2484440048636546e-06, "loss": 0.6011, "step": 15579 }, { "epoch": 1.39, "grad_norm": 6.65188988426823, "learning_rate": 2.2478408827335e-06, "loss": 0.5752, "step": 15580 }, { "epoch": 1.39, "grad_norm": 7.512822461868368, "learning_rate": 2.247237818048642e-06, "loss": 0.5969, "step": 15581 }, { "epoch": 1.39, "grad_norm": 6.401452121864045, "learning_rate": 2.246634810821664e-06, "loss": 0.6224, "step": 15582 }, { "epoch": 1.39, "grad_norm": 6.966459529198738, "learning_rate": 2.2460318610651542e-06, "loss": 0.5779, "step": 15583 }, { "epoch": 1.39, "grad_norm": 10.063822823480027, "learning_rate": 2.2454289687916986e-06, "loss": 0.6116, "step": 15584 }, { "epoch": 1.39, "grad_norm": 4.7861253753645, "learning_rate": 2.244826134013881e-06, "loss": 0.5043, "step": 15585 }, { "epoch": 1.39, "grad_norm": 10.234652875259922, "learning_rate": 2.244223356744284e-06, "loss": 0.5442, "step": 15586 }, { "epoch": 1.39, "grad_norm": 7.7388166435294385, "learning_rate": 2.243620636995491e-06, "loss": 0.602, "step": 15587 }, { "epoch": 1.39, "grad_norm": 5.817899256573841, "learning_rate": 2.243017974780079e-06, "loss": 0.5816, "step": 15588 }, { "epoch": 1.39, "grad_norm": 7.411744056849063, "learning_rate": 2.2424153701106293e-06, "loss": 0.607, "step": 15589 }, { "epoch": 1.39, "grad_norm": 6.071273668068562, "learning_rate": 2.24181282299972e-06, "loss": 0.5579, "step": 15590 }, { "epoch": 1.39, "grad_norm": 7.318971582486893, "learning_rate": 2.241210333459929e-06, "loss": 0.5208, "step": 15591 }, { "epoch": 1.39, "grad_norm": 6.503929681814502, "learning_rate": 2.240607901503829e-06, "loss": 0.6085, "step": 15592 }, { "epoch": 1.39, "grad_norm": 6.257879264064039, "learning_rate": 2.2400055271439956e-06, "loss": 0.5918, "step": 15593 }, { "epoch": 1.39, "grad_norm": 8.02938744965514, "learning_rate": 2.239403210393005e-06, "loss": 0.5917, "step": 15594 }, { "epoch": 1.39, "grad_norm": 4.677149893825906, "learning_rate": 2.238800951263425e-06, "loss": 0.6166, "step": 15595 }, { "epoch": 1.39, "grad_norm": 6.945114378936415, "learning_rate": 2.238198749767828e-06, "loss": 0.5913, "step": 15596 }, { "epoch": 1.39, "grad_norm": 6.40008784439519, "learning_rate": 2.2375966059187837e-06, "loss": 0.5265, "step": 15597 }, { "epoch": 1.39, "grad_norm": 7.207602497082454, "learning_rate": 2.2369945197288613e-06, "loss": 0.5914, "step": 15598 }, { "epoch": 1.39, "grad_norm": 6.536810123387598, "learning_rate": 2.2363924912106265e-06, "loss": 0.5993, "step": 15599 }, { "epoch": 1.39, "grad_norm": 8.317339480881117, "learning_rate": 2.235790520376649e-06, "loss": 0.5609, "step": 15600 }, { "epoch": 1.39, "grad_norm": 6.059107588038489, "learning_rate": 2.2351886072394885e-06, "loss": 0.6361, "step": 15601 }, { "epoch": 1.39, "grad_norm": 7.545937163873598, "learning_rate": 2.234586751811711e-06, "loss": 0.628, "step": 15602 }, { "epoch": 1.39, "grad_norm": 7.096821089593124, "learning_rate": 2.233984954105879e-06, "loss": 0.6058, "step": 15603 }, { "epoch": 1.39, "grad_norm": 7.062418190807811, "learning_rate": 2.2333832141345537e-06, "loss": 0.5527, "step": 15604 }, { "epoch": 1.39, "grad_norm": 6.795201408707934, "learning_rate": 2.232781531910297e-06, "loss": 0.523, "step": 15605 }, { "epoch": 1.39, "grad_norm": 7.602840990275533, "learning_rate": 2.2321799074456635e-06, "loss": 0.6211, "step": 15606 }, { "epoch": 1.39, "grad_norm": 6.00375411876351, "learning_rate": 2.231578340753215e-06, "loss": 0.5745, "step": 15607 }, { "epoch": 1.39, "grad_norm": 7.669499703899708, "learning_rate": 2.230976831845504e-06, "loss": 0.547, "step": 15608 }, { "epoch": 1.39, "grad_norm": 5.951029857483052, "learning_rate": 2.2303753807350877e-06, "loss": 0.5479, "step": 15609 }, { "epoch": 1.39, "grad_norm": 5.845023928682716, "learning_rate": 2.22977398743452e-06, "loss": 0.5294, "step": 15610 }, { "epoch": 1.39, "grad_norm": 4.723177730187189, "learning_rate": 2.2291726519563533e-06, "loss": 0.5794, "step": 15611 }, { "epoch": 1.39, "grad_norm": 4.656420293056503, "learning_rate": 2.2285713743131416e-06, "loss": 0.5806, "step": 15612 }, { "epoch": 1.39, "grad_norm": 6.4384890700339135, "learning_rate": 2.227970154517431e-06, "loss": 0.6101, "step": 15613 }, { "epoch": 1.39, "grad_norm": 5.299444198511948, "learning_rate": 2.2273689925817734e-06, "loss": 0.5397, "step": 15614 }, { "epoch": 1.39, "grad_norm": 8.226051667954323, "learning_rate": 2.2267678885187153e-06, "loss": 0.5945, "step": 15615 }, { "epoch": 1.39, "grad_norm": 7.186395273686995, "learning_rate": 2.226166842340805e-06, "loss": 0.5707, "step": 15616 }, { "epoch": 1.39, "grad_norm": 7.513929340521346, "learning_rate": 2.225565854060587e-06, "loss": 0.5704, "step": 15617 }, { "epoch": 1.39, "grad_norm": 5.155406688131777, "learning_rate": 2.2249649236906074e-06, "loss": 0.5975, "step": 15618 }, { "epoch": 1.39, "grad_norm": 5.939788142019107, "learning_rate": 2.2243640512434063e-06, "loss": 0.55, "step": 15619 }, { "epoch": 1.39, "grad_norm": 9.952942731327514, "learning_rate": 2.223763236731529e-06, "loss": 0.553, "step": 15620 }, { "epoch": 1.39, "grad_norm": 6.791200638794214, "learning_rate": 2.223162480167512e-06, "loss": 0.6532, "step": 15621 }, { "epoch": 1.39, "grad_norm": 7.674017611742752, "learning_rate": 2.2225617815638976e-06, "loss": 0.6046, "step": 15622 }, { "epoch": 1.39, "grad_norm": 6.003003869390359, "learning_rate": 2.221961140933223e-06, "loss": 0.5808, "step": 15623 }, { "epoch": 1.39, "grad_norm": 5.577460646500969, "learning_rate": 2.2213605582880267e-06, "loss": 0.5306, "step": 15624 }, { "epoch": 1.39, "grad_norm": 5.9164954037989705, "learning_rate": 2.2207600336408445e-06, "loss": 0.5701, "step": 15625 }, { "epoch": 1.39, "grad_norm": 8.175039937353574, "learning_rate": 2.220159567004209e-06, "loss": 0.598, "step": 15626 }, { "epoch": 1.39, "grad_norm": 8.40589863637329, "learning_rate": 2.219559158390654e-06, "loss": 0.5665, "step": 15627 }, { "epoch": 1.39, "grad_norm": 8.289927870697271, "learning_rate": 2.2189588078127133e-06, "loss": 0.6047, "step": 15628 }, { "epoch": 1.39, "grad_norm": 5.843331258542825, "learning_rate": 2.218358515282917e-06, "loss": 0.6215, "step": 15629 }, { "epoch": 1.39, "grad_norm": 6.230380913435165, "learning_rate": 2.217758280813794e-06, "loss": 0.6042, "step": 15630 }, { "epoch": 1.39, "grad_norm": 5.978618259296307, "learning_rate": 2.217158104417877e-06, "loss": 0.5401, "step": 15631 }, { "epoch": 1.39, "grad_norm": 4.91382112469569, "learning_rate": 2.216557986107688e-06, "loss": 0.5482, "step": 15632 }, { "epoch": 1.39, "grad_norm": 7.967820062097949, "learning_rate": 2.2159579258957553e-06, "loss": 0.6014, "step": 15633 }, { "epoch": 1.39, "grad_norm": 5.812314975275016, "learning_rate": 2.2153579237946054e-06, "loss": 0.5818, "step": 15634 }, { "epoch": 1.39, "grad_norm": 7.286475745042007, "learning_rate": 2.2147579798167597e-06, "loss": 0.6168, "step": 15635 }, { "epoch": 1.39, "grad_norm": 8.513088922788546, "learning_rate": 2.2141580939747408e-06, "loss": 0.5796, "step": 15636 }, { "epoch": 1.39, "grad_norm": 7.942028879466706, "learning_rate": 2.2135582662810712e-06, "loss": 0.5637, "step": 15637 }, { "epoch": 1.4, "grad_norm": 6.812554770176907, "learning_rate": 2.212958496748272e-06, "loss": 0.5527, "step": 15638 }, { "epoch": 1.4, "grad_norm": 6.440702737846951, "learning_rate": 2.212358785388859e-06, "loss": 0.5712, "step": 15639 }, { "epoch": 1.4, "grad_norm": 5.194360386412732, "learning_rate": 2.211759132215352e-06, "loss": 0.5831, "step": 15640 }, { "epoch": 1.4, "grad_norm": 7.852375292816661, "learning_rate": 2.211159537240266e-06, "loss": 0.5321, "step": 15641 }, { "epoch": 1.4, "grad_norm": 6.552701099561052, "learning_rate": 2.210560000476118e-06, "loss": 0.5771, "step": 15642 }, { "epoch": 1.4, "grad_norm": 6.615719633723244, "learning_rate": 2.2099605219354207e-06, "loss": 0.559, "step": 15643 }, { "epoch": 1.4, "grad_norm": 5.379730959696422, "learning_rate": 2.20936110163069e-06, "loss": 0.5472, "step": 15644 }, { "epoch": 1.4, "grad_norm": 6.750688988713783, "learning_rate": 2.2087617395744325e-06, "loss": 0.574, "step": 15645 }, { "epoch": 1.4, "grad_norm": 6.485735847937765, "learning_rate": 2.2081624357791616e-06, "loss": 0.5905, "step": 15646 }, { "epoch": 1.4, "grad_norm": 6.02474309702848, "learning_rate": 2.2075631902573853e-06, "loss": 0.6219, "step": 15647 }, { "epoch": 1.4, "grad_norm": 6.003567980708661, "learning_rate": 2.206964003021614e-06, "loss": 0.5279, "step": 15648 }, { "epoch": 1.4, "grad_norm": 6.570689065339225, "learning_rate": 2.20636487408435e-06, "loss": 0.6038, "step": 15649 }, { "epoch": 1.4, "grad_norm": 5.8324203635522, "learning_rate": 2.205765803458102e-06, "loss": 0.6162, "step": 15650 }, { "epoch": 1.4, "grad_norm": 8.251205354420003, "learning_rate": 2.2051667911553755e-06, "loss": 0.5866, "step": 15651 }, { "epoch": 1.4, "grad_norm": 6.708706482612838, "learning_rate": 2.2045678371886695e-06, "loss": 0.5754, "step": 15652 }, { "epoch": 1.4, "grad_norm": 7.044540190275258, "learning_rate": 2.2039689415704884e-06, "loss": 0.5853, "step": 15653 }, { "epoch": 1.4, "grad_norm": 5.513298737234314, "learning_rate": 2.2033701043133322e-06, "loss": 0.6157, "step": 15654 }, { "epoch": 1.4, "grad_norm": 6.251246736442857, "learning_rate": 2.2027713254297006e-06, "loss": 0.5777, "step": 15655 }, { "epoch": 1.4, "grad_norm": 8.60103473466808, "learning_rate": 2.202172604932092e-06, "loss": 0.5561, "step": 15656 }, { "epoch": 1.4, "grad_norm": 6.23671225315828, "learning_rate": 2.2015739428330048e-06, "loss": 0.5985, "step": 15657 }, { "epoch": 1.4, "grad_norm": 6.50414140293372, "learning_rate": 2.2009753391449305e-06, "loss": 0.6258, "step": 15658 }, { "epoch": 1.4, "grad_norm": 8.583250011230986, "learning_rate": 2.2003767938803672e-06, "loss": 0.519, "step": 15659 }, { "epoch": 1.4, "grad_norm": 7.941374611238873, "learning_rate": 2.199778307051807e-06, "loss": 0.5896, "step": 15660 }, { "epoch": 1.4, "grad_norm": 6.337216847467996, "learning_rate": 2.199179878671742e-06, "loss": 0.5852, "step": 15661 }, { "epoch": 1.4, "grad_norm": 7.574492363303739, "learning_rate": 2.1985815087526636e-06, "loss": 0.5817, "step": 15662 }, { "epoch": 1.4, "grad_norm": 5.641587735515801, "learning_rate": 2.1979831973070633e-06, "loss": 0.5903, "step": 15663 }, { "epoch": 1.4, "grad_norm": 8.154703977153485, "learning_rate": 2.1973849443474266e-06, "loss": 0.5262, "step": 15664 }, { "epoch": 1.4, "grad_norm": 7.243722191711865, "learning_rate": 2.1967867498862404e-06, "loss": 0.6406, "step": 15665 }, { "epoch": 1.4, "grad_norm": 6.451266639995774, "learning_rate": 2.1961886139359917e-06, "loss": 0.6095, "step": 15666 }, { "epoch": 1.4, "grad_norm": 7.715373580611428, "learning_rate": 2.1955905365091657e-06, "loss": 0.571, "step": 15667 }, { "epoch": 1.4, "grad_norm": 6.306867963714762, "learning_rate": 2.194992517618245e-06, "loss": 0.5747, "step": 15668 }, { "epoch": 1.4, "grad_norm": 5.643358437955661, "learning_rate": 2.194394557275713e-06, "loss": 0.5979, "step": 15669 }, { "epoch": 1.4, "grad_norm": 6.790702019157372, "learning_rate": 2.1937966554940526e-06, "loss": 0.6099, "step": 15670 }, { "epoch": 1.4, "grad_norm": 6.363658459039709, "learning_rate": 2.1931988122857393e-06, "loss": 0.5417, "step": 15671 }, { "epoch": 1.4, "grad_norm": 6.033870747203033, "learning_rate": 2.1926010276632543e-06, "loss": 0.5721, "step": 15672 }, { "epoch": 1.4, "grad_norm": 7.7262675474074936, "learning_rate": 2.1920033016390744e-06, "loss": 0.5745, "step": 15673 }, { "epoch": 1.4, "grad_norm": 5.842109755115991, "learning_rate": 2.191405634225677e-06, "loss": 0.5761, "step": 15674 }, { "epoch": 1.4, "grad_norm": 5.4228387236894, "learning_rate": 2.190808025435535e-06, "loss": 0.5568, "step": 15675 }, { "epoch": 1.4, "grad_norm": 8.043262038896328, "learning_rate": 2.1902104752811266e-06, "loss": 0.558, "step": 15676 }, { "epoch": 1.4, "grad_norm": 5.907952635859913, "learning_rate": 2.1896129837749203e-06, "loss": 0.5754, "step": 15677 }, { "epoch": 1.4, "grad_norm": 6.720722234861124, "learning_rate": 2.1890155509293866e-06, "loss": 0.5545, "step": 15678 }, { "epoch": 1.4, "grad_norm": 6.4826126466908285, "learning_rate": 2.188418176756997e-06, "loss": 0.5783, "step": 15679 }, { "epoch": 1.4, "grad_norm": 6.306921953174648, "learning_rate": 2.187820861270222e-06, "loss": 0.6009, "step": 15680 }, { "epoch": 1.4, "grad_norm": 6.9531255252659125, "learning_rate": 2.1872236044815264e-06, "loss": 0.5809, "step": 15681 }, { "epoch": 1.4, "grad_norm": 6.397331859769235, "learning_rate": 2.1866264064033793e-06, "loss": 0.691, "step": 15682 }, { "epoch": 1.4, "grad_norm": 6.666996358758681, "learning_rate": 2.1860292670482464e-06, "loss": 0.5479, "step": 15683 }, { "epoch": 1.4, "grad_norm": 7.005446876921415, "learning_rate": 2.1854321864285876e-06, "loss": 0.59, "step": 15684 }, { "epoch": 1.4, "grad_norm": 6.199544261099525, "learning_rate": 2.184835164556869e-06, "loss": 0.6147, "step": 15685 }, { "epoch": 1.4, "grad_norm": 5.510860699884743, "learning_rate": 2.1842382014455505e-06, "loss": 0.5674, "step": 15686 }, { "epoch": 1.4, "grad_norm": 6.624098520666755, "learning_rate": 2.1836412971070937e-06, "loss": 0.5796, "step": 15687 }, { "epoch": 1.4, "grad_norm": 7.091728329368284, "learning_rate": 2.1830444515539584e-06, "loss": 0.5826, "step": 15688 }, { "epoch": 1.4, "grad_norm": 6.829096403026924, "learning_rate": 2.1824476647985993e-06, "loss": 0.5337, "step": 15689 }, { "epoch": 1.4, "grad_norm": 6.1442032222669, "learning_rate": 2.181850936853475e-06, "loss": 0.5498, "step": 15690 }, { "epoch": 1.4, "grad_norm": 6.170694821264278, "learning_rate": 2.181254267731041e-06, "loss": 0.598, "step": 15691 }, { "epoch": 1.4, "grad_norm": 6.5386660279352045, "learning_rate": 2.180657657443753e-06, "loss": 0.5603, "step": 15692 }, { "epoch": 1.4, "grad_norm": 5.726610193244645, "learning_rate": 2.18006110600406e-06, "loss": 0.6021, "step": 15693 }, { "epoch": 1.4, "grad_norm": 4.8886733240856195, "learning_rate": 2.1794646134244164e-06, "loss": 0.5615, "step": 15694 }, { "epoch": 1.4, "grad_norm": 5.96813292034928, "learning_rate": 2.1788681797172733e-06, "loss": 0.5935, "step": 15695 }, { "epoch": 1.4, "grad_norm": 5.291938071584457, "learning_rate": 2.1782718048950773e-06, "loss": 0.5906, "step": 15696 }, { "epoch": 1.4, "grad_norm": 5.690068570967789, "learning_rate": 2.177675488970278e-06, "loss": 0.5583, "step": 15697 }, { "epoch": 1.4, "grad_norm": 5.239412876760367, "learning_rate": 2.177079231955322e-06, "loss": 0.5767, "step": 15698 }, { "epoch": 1.4, "grad_norm": 8.434531571779699, "learning_rate": 2.1764830338626548e-06, "loss": 0.6725, "step": 15699 }, { "epoch": 1.4, "grad_norm": 4.701132057919579, "learning_rate": 2.175886894704721e-06, "loss": 0.5344, "step": 15700 }, { "epoch": 1.4, "grad_norm": 5.1935513105814, "learning_rate": 2.1752908144939656e-06, "loss": 0.6033, "step": 15701 }, { "epoch": 1.4, "grad_norm": 4.9551594759325, "learning_rate": 2.1746947932428263e-06, "loss": 0.5203, "step": 15702 }, { "epoch": 1.4, "grad_norm": 5.614104059658775, "learning_rate": 2.1740988309637455e-06, "loss": 0.6517, "step": 15703 }, { "epoch": 1.4, "grad_norm": 7.683478447472192, "learning_rate": 2.173502927669164e-06, "loss": 0.5855, "step": 15704 }, { "epoch": 1.4, "grad_norm": 6.663120613195911, "learning_rate": 2.1729070833715183e-06, "loss": 0.5707, "step": 15705 }, { "epoch": 1.4, "grad_norm": 7.2468490273477295, "learning_rate": 2.1723112980832477e-06, "loss": 0.6345, "step": 15706 }, { "epoch": 1.4, "grad_norm": 6.652671145829867, "learning_rate": 2.1717155718167847e-06, "loss": 0.5894, "step": 15707 }, { "epoch": 1.4, "grad_norm": 6.017025107059707, "learning_rate": 2.171119904584567e-06, "loss": 0.5864, "step": 15708 }, { "epoch": 1.4, "grad_norm": 5.822947915971672, "learning_rate": 2.1705242963990248e-06, "loss": 0.6026, "step": 15709 }, { "epoch": 1.4, "grad_norm": 5.04697909237207, "learning_rate": 2.1699287472725904e-06, "loss": 0.5976, "step": 15710 }, { "epoch": 1.4, "grad_norm": 8.428641140303187, "learning_rate": 2.1693332572176968e-06, "loss": 0.6328, "step": 15711 }, { "epoch": 1.4, "grad_norm": 7.002559924725337, "learning_rate": 2.168737826246772e-06, "loss": 0.5618, "step": 15712 }, { "epoch": 1.4, "grad_norm": 7.354016262926374, "learning_rate": 2.1681424543722444e-06, "loss": 0.601, "step": 15713 }, { "epoch": 1.4, "grad_norm": 5.777716702022312, "learning_rate": 2.1675471416065436e-06, "loss": 0.5572, "step": 15714 }, { "epoch": 1.4, "grad_norm": 6.4652321295823345, "learning_rate": 2.166951887962091e-06, "loss": 0.5866, "step": 15715 }, { "epoch": 1.4, "grad_norm": 6.269019590306064, "learning_rate": 2.1663566934513136e-06, "loss": 0.5804, "step": 15716 }, { "epoch": 1.4, "grad_norm": 8.501924083466992, "learning_rate": 2.1657615580866347e-06, "loss": 0.5824, "step": 15717 }, { "epoch": 1.4, "grad_norm": 6.169769342951194, "learning_rate": 2.1651664818804767e-06, "loss": 0.6052, "step": 15718 }, { "epoch": 1.4, "grad_norm": 6.734643315845236, "learning_rate": 2.16457146484526e-06, "loss": 0.5928, "step": 15719 }, { "epoch": 1.4, "grad_norm": 6.456288897012107, "learning_rate": 2.1639765069934067e-06, "loss": 0.6104, "step": 15720 }, { "epoch": 1.4, "grad_norm": 7.537893145432655, "learning_rate": 2.1633816083373322e-06, "loss": 0.5696, "step": 15721 }, { "epoch": 1.4, "grad_norm": 8.406328837021045, "learning_rate": 2.1627867688894533e-06, "loss": 0.5779, "step": 15722 }, { "epoch": 1.4, "grad_norm": 7.074076771825386, "learning_rate": 2.1621919886621875e-06, "loss": 0.5929, "step": 15723 }, { "epoch": 1.4, "grad_norm": 5.2061612098053995, "learning_rate": 2.1615972676679483e-06, "loss": 0.5827, "step": 15724 }, { "epoch": 1.4, "grad_norm": 7.0053521727125085, "learning_rate": 2.1610026059191508e-06, "loss": 0.6337, "step": 15725 }, { "epoch": 1.4, "grad_norm": 9.460106959150751, "learning_rate": 2.1604080034282065e-06, "loss": 0.5734, "step": 15726 }, { "epoch": 1.4, "grad_norm": 6.717029827975725, "learning_rate": 2.1598134602075283e-06, "loss": 0.6244, "step": 15727 }, { "epoch": 1.4, "grad_norm": 6.917060068517886, "learning_rate": 2.159218976269522e-06, "loss": 0.5334, "step": 15728 }, { "epoch": 1.4, "grad_norm": 7.242488479037595, "learning_rate": 2.158624551626599e-06, "loss": 0.581, "step": 15729 }, { "epoch": 1.4, "grad_norm": 6.915452496399931, "learning_rate": 2.158030186291166e-06, "loss": 0.5827, "step": 15730 }, { "epoch": 1.4, "grad_norm": 9.728676399644009, "learning_rate": 2.1574358802756283e-06, "loss": 0.5773, "step": 15731 }, { "epoch": 1.4, "grad_norm": 5.909038911851871, "learning_rate": 2.1568416335923924e-06, "loss": 0.6236, "step": 15732 }, { "epoch": 1.4, "grad_norm": 5.800502901551602, "learning_rate": 2.1562474462538623e-06, "loss": 0.5354, "step": 15733 }, { "epoch": 1.4, "grad_norm": 6.583221313467861, "learning_rate": 2.155653318272437e-06, "loss": 0.5954, "step": 15734 }, { "epoch": 1.4, "grad_norm": 6.36200548500084, "learning_rate": 2.155059249660522e-06, "loss": 0.5587, "step": 15735 }, { "epoch": 1.4, "grad_norm": 6.4362806567823085, "learning_rate": 2.154465240430513e-06, "loss": 0.6326, "step": 15736 }, { "epoch": 1.4, "grad_norm": 7.716879021726436, "learning_rate": 2.15387129059481e-06, "loss": 0.5881, "step": 15737 }, { "epoch": 1.4, "grad_norm": 6.177335491118514, "learning_rate": 2.1532774001658113e-06, "loss": 0.5905, "step": 15738 }, { "epoch": 1.4, "grad_norm": 6.420446460076768, "learning_rate": 2.1526835691559127e-06, "loss": 0.6147, "step": 15739 }, { "epoch": 1.4, "grad_norm": 8.857395157657413, "learning_rate": 2.1520897975775107e-06, "loss": 0.5849, "step": 15740 }, { "epoch": 1.4, "grad_norm": 7.173872445865954, "learning_rate": 2.1514960854429954e-06, "loss": 0.5526, "step": 15741 }, { "epoch": 1.4, "grad_norm": 5.246084242185098, "learning_rate": 2.1509024327647614e-06, "loss": 0.5936, "step": 15742 }, { "epoch": 1.4, "grad_norm": 5.401917464956223, "learning_rate": 2.1503088395552004e-06, "loss": 0.5736, "step": 15743 }, { "epoch": 1.4, "grad_norm": 7.169704479552531, "learning_rate": 2.1497153058267006e-06, "loss": 0.5781, "step": 15744 }, { "epoch": 1.4, "grad_norm": 6.215289216012471, "learning_rate": 2.1491218315916525e-06, "loss": 0.6347, "step": 15745 }, { "epoch": 1.4, "grad_norm": 5.709593121073899, "learning_rate": 2.148528416862444e-06, "loss": 0.4937, "step": 15746 }, { "epoch": 1.4, "grad_norm": 5.212548973915593, "learning_rate": 2.147935061651459e-06, "loss": 0.6048, "step": 15747 }, { "epoch": 1.4, "grad_norm": 7.358257068542974, "learning_rate": 2.1473417659710834e-06, "loss": 0.5595, "step": 15748 }, { "epoch": 1.4, "grad_norm": 8.237022647264794, "learning_rate": 2.1467485298337026e-06, "loss": 0.5312, "step": 15749 }, { "epoch": 1.41, "grad_norm": 4.61067774633747, "learning_rate": 2.1461553532516967e-06, "loss": 0.536, "step": 15750 }, { "epoch": 1.41, "grad_norm": 6.477099037610732, "learning_rate": 2.145562236237447e-06, "loss": 0.5704, "step": 15751 }, { "epoch": 1.41, "grad_norm": 6.753480261026587, "learning_rate": 2.144969178803335e-06, "loss": 0.5764, "step": 15752 }, { "epoch": 1.41, "grad_norm": 6.711612695378747, "learning_rate": 2.1443761809617413e-06, "loss": 0.5793, "step": 15753 }, { "epoch": 1.41, "grad_norm": 5.9213645101858035, "learning_rate": 2.1437832427250387e-06, "loss": 0.5378, "step": 15754 }, { "epoch": 1.41, "grad_norm": 7.250016055935236, "learning_rate": 2.1431903641056062e-06, "loss": 0.6086, "step": 15755 }, { "epoch": 1.41, "grad_norm": 7.343369522076429, "learning_rate": 2.142597545115818e-06, "loss": 0.6113, "step": 15756 }, { "epoch": 1.41, "grad_norm": 7.240743741962293, "learning_rate": 2.142004785768049e-06, "loss": 0.574, "step": 15757 }, { "epoch": 1.41, "grad_norm": 7.6337058723541125, "learning_rate": 2.1414120860746714e-06, "loss": 0.6237, "step": 15758 }, { "epoch": 1.41, "grad_norm": 6.455075468246401, "learning_rate": 2.1408194460480576e-06, "loss": 0.5599, "step": 15759 }, { "epoch": 1.41, "grad_norm": 5.215469976761498, "learning_rate": 2.140226865700575e-06, "loss": 0.6488, "step": 15760 }, { "epoch": 1.41, "grad_norm": 8.861684532726306, "learning_rate": 2.1396343450445935e-06, "loss": 0.6567, "step": 15761 }, { "epoch": 1.41, "grad_norm": 6.056689436230939, "learning_rate": 2.139041884092481e-06, "loss": 0.5968, "step": 15762 }, { "epoch": 1.41, "grad_norm": 7.552096246201852, "learning_rate": 2.1384494828566034e-06, "loss": 0.5721, "step": 15763 }, { "epoch": 1.41, "grad_norm": 4.482743736304597, "learning_rate": 2.137857141349328e-06, "loss": 0.5939, "step": 15764 }, { "epoch": 1.41, "grad_norm": 8.20623344835906, "learning_rate": 2.1372648595830157e-06, "loss": 0.6445, "step": 15765 }, { "epoch": 1.41, "grad_norm": 8.14451085174631, "learning_rate": 2.1366726375700315e-06, "loss": 0.5775, "step": 15766 }, { "epoch": 1.41, "grad_norm": 9.369208756980273, "learning_rate": 2.1360804753227337e-06, "loss": 0.6083, "step": 15767 }, { "epoch": 1.41, "grad_norm": 8.722570906548366, "learning_rate": 2.1354883728534843e-06, "loss": 0.5685, "step": 15768 }, { "epoch": 1.41, "grad_norm": 6.435184692637387, "learning_rate": 2.134896330174642e-06, "loss": 0.599, "step": 15769 }, { "epoch": 1.41, "grad_norm": 5.777383324033865, "learning_rate": 2.1343043472985637e-06, "loss": 0.6173, "step": 15770 }, { "epoch": 1.41, "grad_norm": 6.7529198598204045, "learning_rate": 2.1337124242376086e-06, "loss": 0.5856, "step": 15771 }, { "epoch": 1.41, "grad_norm": 8.696326008727045, "learning_rate": 2.133120561004127e-06, "loss": 0.642, "step": 15772 }, { "epoch": 1.41, "grad_norm": 5.904343140770151, "learning_rate": 2.1325287576104765e-06, "loss": 0.5695, "step": 15773 }, { "epoch": 1.41, "grad_norm": 7.802145694173473, "learning_rate": 2.1319370140690083e-06, "loss": 0.5748, "step": 15774 }, { "epoch": 1.41, "grad_norm": 6.543510335563226, "learning_rate": 2.1313453303920735e-06, "loss": 0.5624, "step": 15775 }, { "epoch": 1.41, "grad_norm": 5.874950462129129, "learning_rate": 2.130753706592023e-06, "loss": 0.5592, "step": 15776 }, { "epoch": 1.41, "grad_norm": 5.928576561718683, "learning_rate": 2.130162142681207e-06, "loss": 0.5953, "step": 15777 }, { "epoch": 1.41, "grad_norm": 6.462084499062974, "learning_rate": 2.12957063867197e-06, "loss": 0.6209, "step": 15778 }, { "epoch": 1.41, "grad_norm": 7.031200910093054, "learning_rate": 2.1289791945766613e-06, "loss": 0.5937, "step": 15779 }, { "epoch": 1.41, "grad_norm": 6.686387350906905, "learning_rate": 2.128387810407622e-06, "loss": 0.5429, "step": 15780 }, { "epoch": 1.41, "grad_norm": 7.1027745264469955, "learning_rate": 2.1277964861771992e-06, "loss": 0.5793, "step": 15781 }, { "epoch": 1.41, "grad_norm": 5.949842846990863, "learning_rate": 2.127205221897735e-06, "loss": 0.5458, "step": 15782 }, { "epoch": 1.41, "grad_norm": 6.525160864325934, "learning_rate": 2.1266140175815703e-06, "loss": 0.6098, "step": 15783 }, { "epoch": 1.41, "grad_norm": 7.5970218306002595, "learning_rate": 2.126022873241047e-06, "loss": 0.5805, "step": 15784 }, { "epoch": 1.41, "grad_norm": 8.450251062276902, "learning_rate": 2.1254317888885e-06, "loss": 0.6077, "step": 15785 }, { "epoch": 1.41, "grad_norm": 8.133292069756514, "learning_rate": 2.12484076453627e-06, "loss": 0.6403, "step": 15786 }, { "epoch": 1.41, "grad_norm": 6.153124663258722, "learning_rate": 2.1242498001966922e-06, "loss": 0.576, "step": 15787 }, { "epoch": 1.41, "grad_norm": 7.546884909319679, "learning_rate": 2.1236588958821024e-06, "loss": 0.5899, "step": 15788 }, { "epoch": 1.41, "grad_norm": 8.78339075164476, "learning_rate": 2.1230680516048335e-06, "loss": 0.6482, "step": 15789 }, { "epoch": 1.41, "grad_norm": 10.126058513530888, "learning_rate": 2.122477267377221e-06, "loss": 0.5765, "step": 15790 }, { "epoch": 1.41, "grad_norm": 4.503840680562158, "learning_rate": 2.1218865432115913e-06, "loss": 0.5681, "step": 15791 }, { "epoch": 1.41, "grad_norm": 5.878986778858247, "learning_rate": 2.121295879120278e-06, "loss": 0.5194, "step": 15792 }, { "epoch": 1.41, "grad_norm": 7.538482664872857, "learning_rate": 2.1207052751156103e-06, "loss": 0.5882, "step": 15793 }, { "epoch": 1.41, "grad_norm": 6.290667997470873, "learning_rate": 2.1201147312099126e-06, "loss": 0.5591, "step": 15794 }, { "epoch": 1.41, "grad_norm": 6.50402363550051, "learning_rate": 2.119524247415513e-06, "loss": 0.5148, "step": 15795 }, { "epoch": 1.41, "grad_norm": 8.515651450989647, "learning_rate": 2.1189338237447365e-06, "loss": 0.5599, "step": 15796 }, { "epoch": 1.41, "grad_norm": 11.101668510674969, "learning_rate": 2.1183434602099094e-06, "loss": 0.5111, "step": 15797 }, { "epoch": 1.41, "grad_norm": 6.718096241499279, "learning_rate": 2.1177531568233497e-06, "loss": 0.6009, "step": 15798 }, { "epoch": 1.41, "grad_norm": 5.612136264877197, "learning_rate": 2.117162913597381e-06, "loss": 0.5588, "step": 15799 }, { "epoch": 1.41, "grad_norm": 7.631296961227163, "learning_rate": 2.116572730544323e-06, "loss": 0.6159, "step": 15800 }, { "epoch": 1.41, "grad_norm": 7.143538361540989, "learning_rate": 2.1159826076764946e-06, "loss": 0.6211, "step": 15801 }, { "epoch": 1.41, "grad_norm": 7.1813333258444425, "learning_rate": 2.115392545006213e-06, "loss": 0.5916, "step": 15802 }, { "epoch": 1.41, "grad_norm": 6.846940547652701, "learning_rate": 2.114802542545797e-06, "loss": 0.561, "step": 15803 }, { "epoch": 1.41, "grad_norm": 4.803596312450302, "learning_rate": 2.1142126003075576e-06, "loss": 0.5494, "step": 15804 }, { "epoch": 1.41, "grad_norm": 6.344427521234288, "learning_rate": 2.1136227183038104e-06, "loss": 0.5925, "step": 15805 }, { "epoch": 1.41, "grad_norm": 5.98306049768175, "learning_rate": 2.1130328965468684e-06, "loss": 0.5766, "step": 15806 }, { "epoch": 1.41, "grad_norm": 6.358778063776489, "learning_rate": 2.112443135049044e-06, "loss": 0.5855, "step": 15807 }, { "epoch": 1.41, "grad_norm": 6.112862458615103, "learning_rate": 2.111853433822643e-06, "loss": 0.5819, "step": 15808 }, { "epoch": 1.41, "grad_norm": 7.0109680034700705, "learning_rate": 2.1112637928799766e-06, "loss": 0.5587, "step": 15809 }, { "epoch": 1.41, "grad_norm": 7.251478111528706, "learning_rate": 2.110674212233355e-06, "loss": 0.6339, "step": 15810 }, { "epoch": 1.41, "grad_norm": 5.451209272611212, "learning_rate": 2.1100846918950796e-06, "loss": 0.5748, "step": 15811 }, { "epoch": 1.41, "grad_norm": 4.989117766534107, "learning_rate": 2.109495231877457e-06, "loss": 0.6017, "step": 15812 }, { "epoch": 1.41, "grad_norm": 6.54673761074574, "learning_rate": 2.1089058321927917e-06, "loss": 0.6382, "step": 15813 }, { "epoch": 1.41, "grad_norm": 5.499827680918088, "learning_rate": 2.1083164928533856e-06, "loss": 0.5724, "step": 15814 }, { "epoch": 1.41, "grad_norm": 5.628861307391039, "learning_rate": 2.1077272138715402e-06, "loss": 0.5601, "step": 15815 }, { "epoch": 1.41, "grad_norm": 7.55191049189998, "learning_rate": 2.107137995259557e-06, "loss": 0.6074, "step": 15816 }, { "epoch": 1.41, "grad_norm": 9.54037532679063, "learning_rate": 2.106548837029731e-06, "loss": 0.6261, "step": 15817 }, { "epoch": 1.41, "grad_norm": 6.1551083746490605, "learning_rate": 2.105959739194362e-06, "loss": 0.6214, "step": 15818 }, { "epoch": 1.41, "grad_norm": 6.991594501125826, "learning_rate": 2.105370701765745e-06, "loss": 0.5824, "step": 15819 }, { "epoch": 1.41, "grad_norm": 7.119155241654609, "learning_rate": 2.104781724756176e-06, "loss": 0.6237, "step": 15820 }, { "epoch": 1.41, "grad_norm": 7.70986421733844, "learning_rate": 2.1041928081779488e-06, "loss": 0.5637, "step": 15821 }, { "epoch": 1.41, "grad_norm": 4.527250557326591, "learning_rate": 2.103603952043354e-06, "loss": 0.5685, "step": 15822 }, { "epoch": 1.41, "grad_norm": 8.863754844414323, "learning_rate": 2.1030151563646857e-06, "loss": 0.5964, "step": 15823 }, { "epoch": 1.41, "grad_norm": 4.810719152340684, "learning_rate": 2.1024264211542295e-06, "loss": 0.5651, "step": 15824 }, { "epoch": 1.41, "grad_norm": 7.191539446976396, "learning_rate": 2.101837746424276e-06, "loss": 0.6497, "step": 15825 }, { "epoch": 1.41, "grad_norm": 6.574389521844845, "learning_rate": 2.1012491321871136e-06, "loss": 0.5399, "step": 15826 }, { "epoch": 1.41, "grad_norm": 7.4411305070226215, "learning_rate": 2.1006605784550265e-06, "loss": 0.5617, "step": 15827 }, { "epoch": 1.41, "grad_norm": 7.69950060157889, "learning_rate": 2.1000720852403006e-06, "loss": 0.6183, "step": 15828 }, { "epoch": 1.41, "grad_norm": 7.233064871417518, "learning_rate": 2.0994836525552216e-06, "loss": 0.5475, "step": 15829 }, { "epoch": 1.41, "grad_norm": 7.178073218576332, "learning_rate": 2.0988952804120665e-06, "loss": 0.6401, "step": 15830 }, { "epoch": 1.41, "grad_norm": 11.48091220370895, "learning_rate": 2.09830696882312e-06, "loss": 0.5924, "step": 15831 }, { "epoch": 1.41, "grad_norm": 7.726544970210392, "learning_rate": 2.0977187178006603e-06, "loss": 0.6108, "step": 15832 }, { "epoch": 1.41, "grad_norm": 7.493024074615426, "learning_rate": 2.097130527356967e-06, "loss": 0.6887, "step": 15833 }, { "epoch": 1.41, "grad_norm": 5.77517073545966, "learning_rate": 2.0965423975043163e-06, "loss": 0.5874, "step": 15834 }, { "epoch": 1.41, "grad_norm": 9.376542625033617, "learning_rate": 2.0959543282549864e-06, "loss": 0.6239, "step": 15835 }, { "epoch": 1.41, "grad_norm": 6.084768083218725, "learning_rate": 2.09536631962125e-06, "loss": 0.6112, "step": 15836 }, { "epoch": 1.41, "grad_norm": 6.663233268856237, "learning_rate": 2.09477837161538e-06, "loss": 0.542, "step": 15837 }, { "epoch": 1.41, "grad_norm": 6.462507541565511, "learning_rate": 2.094190484249648e-06, "loss": 0.4877, "step": 15838 }, { "epoch": 1.41, "grad_norm": 5.4968173694307145, "learning_rate": 2.093602657536327e-06, "loss": 0.5857, "step": 15839 }, { "epoch": 1.41, "grad_norm": 5.5866405970213, "learning_rate": 2.0930148914876848e-06, "loss": 0.6428, "step": 15840 }, { "epoch": 1.41, "grad_norm": 5.578651769847526, "learning_rate": 2.092427186115991e-06, "loss": 0.5947, "step": 15841 }, { "epoch": 1.41, "grad_norm": 6.345894580000176, "learning_rate": 2.091839541433513e-06, "loss": 0.5434, "step": 15842 }, { "epoch": 1.41, "grad_norm": 6.009414613587293, "learning_rate": 2.091251957452515e-06, "loss": 0.6036, "step": 15843 }, { "epoch": 1.41, "grad_norm": 6.089714893731891, "learning_rate": 2.0906644341852623e-06, "loss": 0.5901, "step": 15844 }, { "epoch": 1.41, "grad_norm": 7.7328367371378866, "learning_rate": 2.090076971644018e-06, "loss": 0.586, "step": 15845 }, { "epoch": 1.41, "grad_norm": 7.306922764053833, "learning_rate": 2.089489569841044e-06, "loss": 0.6063, "step": 15846 }, { "epoch": 1.41, "grad_norm": 5.967776470254952, "learning_rate": 2.0889022287886034e-06, "loss": 0.5729, "step": 15847 }, { "epoch": 1.41, "grad_norm": 8.349808263351427, "learning_rate": 2.088314948498952e-06, "loss": 0.5579, "step": 15848 }, { "epoch": 1.41, "grad_norm": 6.530259061203962, "learning_rate": 2.087727728984349e-06, "loss": 0.6075, "step": 15849 }, { "epoch": 1.41, "grad_norm": 7.466231216346293, "learning_rate": 2.087140570257054e-06, "loss": 0.6223, "step": 15850 }, { "epoch": 1.41, "grad_norm": 8.006262937775748, "learning_rate": 2.086553472329319e-06, "loss": 0.5263, "step": 15851 }, { "epoch": 1.41, "grad_norm": 6.406007823940519, "learning_rate": 2.0859664352133997e-06, "loss": 0.5913, "step": 15852 }, { "epoch": 1.41, "grad_norm": 6.6262602915417, "learning_rate": 2.085379458921549e-06, "loss": 0.5992, "step": 15853 }, { "epoch": 1.41, "grad_norm": 6.97501010009488, "learning_rate": 2.08479254346602e-06, "loss": 0.6272, "step": 15854 }, { "epoch": 1.41, "grad_norm": 7.007084030741908, "learning_rate": 2.084205688859064e-06, "loss": 0.632, "step": 15855 }, { "epoch": 1.41, "grad_norm": 7.804467956836428, "learning_rate": 2.0836188951129267e-06, "loss": 0.6072, "step": 15856 }, { "epoch": 1.41, "grad_norm": 7.946640947023971, "learning_rate": 2.0830321622398588e-06, "loss": 0.6186, "step": 15857 }, { "epoch": 1.41, "grad_norm": 4.944131918069959, "learning_rate": 2.0824454902521056e-06, "loss": 0.5271, "step": 15858 }, { "epoch": 1.41, "grad_norm": 7.445833340203318, "learning_rate": 2.0818588791619137e-06, "loss": 0.5788, "step": 15859 }, { "epoch": 1.41, "grad_norm": 7.832684914665286, "learning_rate": 2.0812723289815294e-06, "loss": 0.5802, "step": 15860 }, { "epoch": 1.41, "grad_norm": 7.605933292814659, "learning_rate": 2.080685839723191e-06, "loss": 0.5335, "step": 15861 }, { "epoch": 1.42, "grad_norm": 7.127773116154133, "learning_rate": 2.080099411399143e-06, "loss": 0.5977, "step": 15862 }, { "epoch": 1.42, "grad_norm": 7.568861836182093, "learning_rate": 2.079513044021625e-06, "loss": 0.5692, "step": 15863 }, { "epoch": 1.42, "grad_norm": 5.477152709754503, "learning_rate": 2.0789267376028766e-06, "loss": 0.619, "step": 15864 }, { "epoch": 1.42, "grad_norm": 6.287732142187681, "learning_rate": 2.078340492155137e-06, "loss": 0.6034, "step": 15865 }, { "epoch": 1.42, "grad_norm": 5.1855103442060155, "learning_rate": 2.07775430769064e-06, "loss": 0.6194, "step": 15866 }, { "epoch": 1.42, "grad_norm": 7.487852517070933, "learning_rate": 2.0771681842216236e-06, "loss": 0.6228, "step": 15867 }, { "epoch": 1.42, "grad_norm": 8.697018057539774, "learning_rate": 2.0765821217603184e-06, "loss": 0.5573, "step": 15868 }, { "epoch": 1.42, "grad_norm": 6.717883430251962, "learning_rate": 2.075996120318959e-06, "loss": 0.5498, "step": 15869 }, { "epoch": 1.42, "grad_norm": 6.17180001263375, "learning_rate": 2.075410179909778e-06, "loss": 0.5621, "step": 15870 }, { "epoch": 1.42, "grad_norm": 8.368616124983477, "learning_rate": 2.074824300545005e-06, "loss": 0.5997, "step": 15871 }, { "epoch": 1.42, "grad_norm": 7.6142468108445325, "learning_rate": 2.0742384822368677e-06, "loss": 0.6142, "step": 15872 }, { "epoch": 1.42, "grad_norm": 5.969723460376286, "learning_rate": 2.0736527249975975e-06, "loss": 0.6245, "step": 15873 }, { "epoch": 1.42, "grad_norm": 9.426794375040764, "learning_rate": 2.0730670288394155e-06, "loss": 0.5236, "step": 15874 }, { "epoch": 1.42, "grad_norm": 4.641289138735613, "learning_rate": 2.0724813937745496e-06, "loss": 0.5406, "step": 15875 }, { "epoch": 1.42, "grad_norm": 6.830927903281725, "learning_rate": 2.071895819815224e-06, "loss": 0.5971, "step": 15876 }, { "epoch": 1.42, "grad_norm": 8.147245355764186, "learning_rate": 2.07131030697366e-06, "loss": 0.6186, "step": 15877 }, { "epoch": 1.42, "grad_norm": 5.860390016140075, "learning_rate": 2.07072485526208e-06, "loss": 0.5645, "step": 15878 }, { "epoch": 1.42, "grad_norm": 7.524945605625171, "learning_rate": 2.070139464692705e-06, "loss": 0.5161, "step": 15879 }, { "epoch": 1.42, "grad_norm": 5.482882574671355, "learning_rate": 2.0695541352777533e-06, "loss": 0.6248, "step": 15880 }, { "epoch": 1.42, "grad_norm": 9.498791917131, "learning_rate": 2.068968867029439e-06, "loss": 0.5968, "step": 15881 }, { "epoch": 1.42, "grad_norm": 6.3990774763065605, "learning_rate": 2.0683836599599806e-06, "loss": 0.6186, "step": 15882 }, { "epoch": 1.42, "grad_norm": 5.734927371576587, "learning_rate": 2.0677985140815932e-06, "loss": 0.593, "step": 15883 }, { "epoch": 1.42, "grad_norm": 7.91964649850289, "learning_rate": 2.067213429406491e-06, "loss": 0.6225, "step": 15884 }, { "epoch": 1.42, "grad_norm": 4.999450455976784, "learning_rate": 2.066628405946885e-06, "loss": 0.5479, "step": 15885 }, { "epoch": 1.42, "grad_norm": 7.340778814117683, "learning_rate": 2.0660434437149898e-06, "loss": 0.6317, "step": 15886 }, { "epoch": 1.42, "grad_norm": 7.495256066293773, "learning_rate": 2.0654585427230097e-06, "loss": 0.5821, "step": 15887 }, { "epoch": 1.42, "grad_norm": 8.328512374538857, "learning_rate": 2.0648737029831567e-06, "loss": 0.5731, "step": 15888 }, { "epoch": 1.42, "grad_norm": 6.829210866076961, "learning_rate": 2.064288924507637e-06, "loss": 0.5736, "step": 15889 }, { "epoch": 1.42, "grad_norm": 5.824611374698113, "learning_rate": 2.0637042073086566e-06, "loss": 0.558, "step": 15890 }, { "epoch": 1.42, "grad_norm": 8.109226521166494, "learning_rate": 2.063119551398421e-06, "loss": 0.5705, "step": 15891 }, { "epoch": 1.42, "grad_norm": 9.519451920797252, "learning_rate": 2.0625349567891343e-06, "loss": 0.5557, "step": 15892 }, { "epoch": 1.42, "grad_norm": 9.871335058618977, "learning_rate": 2.0619504234929966e-06, "loss": 0.6642, "step": 15893 }, { "epoch": 1.42, "grad_norm": 8.956002981923469, "learning_rate": 2.0613659515222108e-06, "loss": 0.6153, "step": 15894 }, { "epoch": 1.42, "grad_norm": 6.289504458340173, "learning_rate": 2.060781540888973e-06, "loss": 0.5858, "step": 15895 }, { "epoch": 1.42, "grad_norm": 6.265328717777523, "learning_rate": 2.060197191605485e-06, "loss": 0.5732, "step": 15896 }, { "epoch": 1.42, "grad_norm": 12.009683445188044, "learning_rate": 2.0596129036839415e-06, "loss": 0.6305, "step": 15897 }, { "epoch": 1.42, "grad_norm": 6.377841796975436, "learning_rate": 2.05902867713654e-06, "loss": 0.5482, "step": 15898 }, { "epoch": 1.42, "grad_norm": 4.925834181287161, "learning_rate": 2.058444511975475e-06, "loss": 0.5595, "step": 15899 }, { "epoch": 1.42, "grad_norm": 5.140999447968709, "learning_rate": 2.0578604082129383e-06, "loss": 0.616, "step": 15900 }, { "epoch": 1.42, "grad_norm": 7.26401068514973, "learning_rate": 2.0572763658611215e-06, "loss": 0.5854, "step": 15901 }, { "epoch": 1.42, "grad_norm": 10.416149798527723, "learning_rate": 2.056692384932217e-06, "loss": 0.6126, "step": 15902 }, { "epoch": 1.42, "grad_norm": 7.350137118790361, "learning_rate": 2.056108465438412e-06, "loss": 0.6033, "step": 15903 }, { "epoch": 1.42, "grad_norm": 5.260592519656983, "learning_rate": 2.055524607391897e-06, "loss": 0.5771, "step": 15904 }, { "epoch": 1.42, "grad_norm": 6.112199928615497, "learning_rate": 2.0549408108048584e-06, "loss": 0.5537, "step": 15905 }, { "epoch": 1.42, "grad_norm": 6.122640962536826, "learning_rate": 2.0543570756894797e-06, "loss": 0.56, "step": 15906 }, { "epoch": 1.42, "grad_norm": 5.075638964881355, "learning_rate": 2.053773402057946e-06, "loss": 0.5917, "step": 15907 }, { "epoch": 1.42, "grad_norm": 6.97282009949862, "learning_rate": 2.0531897899224424e-06, "loss": 0.6115, "step": 15908 }, { "epoch": 1.42, "grad_norm": 5.334086882654917, "learning_rate": 2.0526062392951464e-06, "loss": 0.5881, "step": 15909 }, { "epoch": 1.42, "grad_norm": 6.290725868814526, "learning_rate": 2.0520227501882405e-06, "loss": 0.5839, "step": 15910 }, { "epoch": 1.42, "grad_norm": 6.131939786175758, "learning_rate": 2.0514393226139033e-06, "loss": 0.6402, "step": 15911 }, { "epoch": 1.42, "grad_norm": 8.939074274030288, "learning_rate": 2.0508559565843155e-06, "loss": 0.5939, "step": 15912 }, { "epoch": 1.42, "grad_norm": 8.724167599367004, "learning_rate": 2.050272652111649e-06, "loss": 0.5389, "step": 15913 }, { "epoch": 1.42, "grad_norm": 7.938776331629604, "learning_rate": 2.0496894092080817e-06, "loss": 0.5883, "step": 15914 }, { "epoch": 1.42, "grad_norm": 8.662818609701498, "learning_rate": 2.049106227885786e-06, "loss": 0.5688, "step": 15915 }, { "epoch": 1.42, "grad_norm": 7.705934058168152, "learning_rate": 2.0485231081569356e-06, "loss": 0.5698, "step": 15916 }, { "epoch": 1.42, "grad_norm": 8.7342496087874, "learning_rate": 2.0479400500337023e-06, "loss": 0.5903, "step": 15917 }, { "epoch": 1.42, "grad_norm": 6.207447837349036, "learning_rate": 2.047357053528257e-06, "loss": 0.6287, "step": 15918 }, { "epoch": 1.42, "grad_norm": 7.153135841280834, "learning_rate": 2.046774118652765e-06, "loss": 0.6111, "step": 15919 }, { "epoch": 1.42, "grad_norm": 5.909478709812742, "learning_rate": 2.046191245419396e-06, "loss": 0.5348, "step": 15920 }, { "epoch": 1.42, "grad_norm": 9.073960735227457, "learning_rate": 2.045608433840316e-06, "loss": 0.611, "step": 15921 }, { "epoch": 1.42, "grad_norm": 8.344778417138222, "learning_rate": 2.0450256839276917e-06, "loss": 0.636, "step": 15922 }, { "epoch": 1.42, "grad_norm": 6.8580318172095724, "learning_rate": 2.0444429956936836e-06, "loss": 0.5987, "step": 15923 }, { "epoch": 1.42, "grad_norm": 6.281737178797946, "learning_rate": 2.043860369150455e-06, "loss": 0.543, "step": 15924 }, { "epoch": 1.42, "grad_norm": 6.164453545096212, "learning_rate": 2.043277804310169e-06, "loss": 0.5762, "step": 15925 }, { "epoch": 1.42, "grad_norm": 7.487062003135259, "learning_rate": 2.042695301184982e-06, "loss": 0.6334, "step": 15926 }, { "epoch": 1.42, "grad_norm": 7.499123742038753, "learning_rate": 2.042112859787054e-06, "loss": 0.5892, "step": 15927 }, { "epoch": 1.42, "grad_norm": 5.770798266330652, "learning_rate": 2.0415304801285435e-06, "loss": 0.6055, "step": 15928 }, { "epoch": 1.42, "grad_norm": 5.15318418995858, "learning_rate": 2.0409481622216044e-06, "loss": 0.6245, "step": 15929 }, { "epoch": 1.42, "grad_norm": 7.013526019282697, "learning_rate": 2.040365906078392e-06, "loss": 0.6079, "step": 15930 }, { "epoch": 1.42, "grad_norm": 8.26226499231029, "learning_rate": 2.039783711711062e-06, "loss": 0.5882, "step": 15931 }, { "epoch": 1.42, "grad_norm": 8.320676343796011, "learning_rate": 2.0392015791317625e-06, "loss": 0.6587, "step": 15932 }, { "epoch": 1.42, "grad_norm": 7.276092686077966, "learning_rate": 2.038619508352646e-06, "loss": 0.5758, "step": 15933 }, { "epoch": 1.42, "grad_norm": 6.22071042921743, "learning_rate": 2.0380374993858616e-06, "loss": 0.6238, "step": 15934 }, { "epoch": 1.42, "grad_norm": 8.794324270514013, "learning_rate": 2.037455552243558e-06, "loss": 0.5867, "step": 15935 }, { "epoch": 1.42, "grad_norm": 8.366755859389485, "learning_rate": 2.0368736669378843e-06, "loss": 0.6461, "step": 15936 }, { "epoch": 1.42, "grad_norm": 6.981158329554912, "learning_rate": 2.036291843480981e-06, "loss": 0.5854, "step": 15937 }, { "epoch": 1.42, "grad_norm": 6.574797131935246, "learning_rate": 2.035710081884997e-06, "loss": 0.5977, "step": 15938 }, { "epoch": 1.42, "grad_norm": 6.170404655659058, "learning_rate": 2.0351283821620713e-06, "loss": 0.5625, "step": 15939 }, { "epoch": 1.42, "grad_norm": 8.191045342917489, "learning_rate": 2.0345467443243483e-06, "loss": 0.621, "step": 15940 }, { "epoch": 1.42, "grad_norm": 6.255107766918525, "learning_rate": 2.033965168383968e-06, "loss": 0.6033, "step": 15941 }, { "epoch": 1.42, "grad_norm": 7.967790026151575, "learning_rate": 2.033383654353069e-06, "loss": 0.5858, "step": 15942 }, { "epoch": 1.42, "grad_norm": 7.412993899795627, "learning_rate": 2.0328022022437905e-06, "loss": 0.5763, "step": 15943 }, { "epoch": 1.42, "grad_norm": 5.285440517820598, "learning_rate": 2.032220812068267e-06, "loss": 0.5607, "step": 15944 }, { "epoch": 1.42, "grad_norm": 7.531616418562098, "learning_rate": 2.0316394838386345e-06, "loss": 0.647, "step": 15945 }, { "epoch": 1.42, "grad_norm": 5.443811596534067, "learning_rate": 2.0310582175670274e-06, "loss": 0.5986, "step": 15946 }, { "epoch": 1.42, "grad_norm": 5.464872370033625, "learning_rate": 2.0304770132655777e-06, "loss": 0.5271, "step": 15947 }, { "epoch": 1.42, "grad_norm": 6.5396999885186595, "learning_rate": 2.0298958709464173e-06, "loss": 0.564, "step": 15948 }, { "epoch": 1.42, "grad_norm": 7.301182256868482, "learning_rate": 2.029314790621678e-06, "loss": 0.6193, "step": 15949 }, { "epoch": 1.42, "grad_norm": 8.062049731388118, "learning_rate": 2.028733772303485e-06, "loss": 0.597, "step": 15950 }, { "epoch": 1.42, "grad_norm": 8.862842157304733, "learning_rate": 2.02815281600397e-06, "loss": 0.5723, "step": 15951 }, { "epoch": 1.42, "grad_norm": 5.4835716760137565, "learning_rate": 2.0275719217352542e-06, "loss": 0.5954, "step": 15952 }, { "epoch": 1.42, "grad_norm": 5.535705313679449, "learning_rate": 2.0269910895094645e-06, "loss": 0.5529, "step": 15953 }, { "epoch": 1.42, "grad_norm": 4.670363611489401, "learning_rate": 2.026410319338726e-06, "loss": 0.6731, "step": 15954 }, { "epoch": 1.42, "grad_norm": 9.311778748298462, "learning_rate": 2.02582961123516e-06, "loss": 0.6234, "step": 15955 }, { "epoch": 1.42, "grad_norm": 6.7934270309874885, "learning_rate": 2.0252489652108885e-06, "loss": 0.5647, "step": 15956 }, { "epoch": 1.42, "grad_norm": 7.5466940418928194, "learning_rate": 2.0246683812780295e-06, "loss": 0.544, "step": 15957 }, { "epoch": 1.42, "grad_norm": 5.498004374543092, "learning_rate": 2.0240878594487012e-06, "loss": 0.4943, "step": 15958 }, { "epoch": 1.42, "grad_norm": 6.362091421214674, "learning_rate": 2.0235073997350226e-06, "loss": 0.5554, "step": 15959 }, { "epoch": 1.42, "grad_norm": 6.362735782348668, "learning_rate": 2.0229270021491083e-06, "loss": 0.6098, "step": 15960 }, { "epoch": 1.42, "grad_norm": 7.797016668315229, "learning_rate": 2.0223466667030727e-06, "loss": 0.561, "step": 15961 }, { "epoch": 1.42, "grad_norm": 8.22249383342666, "learning_rate": 2.021766393409032e-06, "loss": 0.5865, "step": 15962 }, { "epoch": 1.42, "grad_norm": 6.5792888205845275, "learning_rate": 2.0211861822790934e-06, "loss": 0.5735, "step": 15963 }, { "epoch": 1.42, "grad_norm": 5.908960075784189, "learning_rate": 2.0206060333253694e-06, "loss": 0.5397, "step": 15964 }, { "epoch": 1.42, "grad_norm": 5.5144034065023435, "learning_rate": 2.02002594655997e-06, "loss": 0.5681, "step": 15965 }, { "epoch": 1.42, "grad_norm": 6.329578421810544, "learning_rate": 2.019445921995005e-06, "loss": 0.5775, "step": 15966 }, { "epoch": 1.42, "grad_norm": 7.554309050930799, "learning_rate": 2.0188659596425775e-06, "loss": 0.5543, "step": 15967 }, { "epoch": 1.42, "grad_norm": 6.163947760793578, "learning_rate": 2.018286059514794e-06, "loss": 0.614, "step": 15968 }, { "epoch": 1.42, "grad_norm": 4.5336397464142575, "learning_rate": 2.0177062216237608e-06, "loss": 0.623, "step": 15969 }, { "epoch": 1.42, "grad_norm": 5.829518955528446, "learning_rate": 2.0171264459815776e-06, "loss": 0.5652, "step": 15970 }, { "epoch": 1.42, "grad_norm": 5.63283050343638, "learning_rate": 2.0165467326003475e-06, "loss": 0.5572, "step": 15971 }, { "epoch": 1.42, "grad_norm": 6.35501040940004, "learning_rate": 2.0159670814921704e-06, "loss": 0.5861, "step": 15972 }, { "epoch": 1.42, "grad_norm": 5.389063714099731, "learning_rate": 2.0153874926691465e-06, "loss": 0.56, "step": 15973 }, { "epoch": 1.43, "grad_norm": 8.011939506165056, "learning_rate": 2.014807966143372e-06, "loss": 0.5884, "step": 15974 }, { "epoch": 1.43, "grad_norm": 7.411127141636999, "learning_rate": 2.0142285019269446e-06, "loss": 0.5778, "step": 15975 }, { "epoch": 1.43, "grad_norm": 7.454164134292326, "learning_rate": 2.013649100031958e-06, "loss": 0.5369, "step": 15976 }, { "epoch": 1.43, "grad_norm": 7.77670483070315, "learning_rate": 2.013069760470506e-06, "loss": 0.5849, "step": 15977 }, { "epoch": 1.43, "grad_norm": 7.444345996877541, "learning_rate": 2.012490483254681e-06, "loss": 0.6043, "step": 15978 }, { "epoch": 1.43, "grad_norm": 7.955383615343555, "learning_rate": 2.0119112683965753e-06, "loss": 0.6303, "step": 15979 }, { "epoch": 1.43, "grad_norm": 9.93953554011719, "learning_rate": 2.011332115908279e-06, "loss": 0.6254, "step": 15980 }, { "epoch": 1.43, "grad_norm": 5.715957344939878, "learning_rate": 2.0107530258018785e-06, "loss": 0.5797, "step": 15981 }, { "epoch": 1.43, "grad_norm": 7.906450882151817, "learning_rate": 2.0101739980894646e-06, "loss": 0.5731, "step": 15982 }, { "epoch": 1.43, "grad_norm": 7.761568198542431, "learning_rate": 2.0095950327831184e-06, "loss": 0.609, "step": 15983 }, { "epoch": 1.43, "grad_norm": 7.139236744289577, "learning_rate": 2.0090161298949273e-06, "loss": 0.6449, "step": 15984 }, { "epoch": 1.43, "grad_norm": 7.734790040946267, "learning_rate": 2.0084372894369743e-06, "loss": 0.6186, "step": 15985 }, { "epoch": 1.43, "grad_norm": 5.060260444738768, "learning_rate": 2.007858511421341e-06, "loss": 0.5252, "step": 15986 }, { "epoch": 1.43, "grad_norm": 5.471837914167092, "learning_rate": 2.0072797958601093e-06, "loss": 0.5645, "step": 15987 }, { "epoch": 1.43, "grad_norm": 9.889079945000988, "learning_rate": 2.0067011427653594e-06, "loss": 0.5702, "step": 15988 }, { "epoch": 1.43, "grad_norm": 5.580752441840207, "learning_rate": 2.0061225521491657e-06, "loss": 0.6035, "step": 15989 }, { "epoch": 1.43, "grad_norm": 6.852854827390154, "learning_rate": 2.005544024023608e-06, "loss": 0.6082, "step": 15990 }, { "epoch": 1.43, "grad_norm": 8.090219504366223, "learning_rate": 2.0049655584007606e-06, "loss": 0.6227, "step": 15991 }, { "epoch": 1.43, "grad_norm": 10.490579929952798, "learning_rate": 2.004387155292698e-06, "loss": 0.5784, "step": 15992 }, { "epoch": 1.43, "grad_norm": 5.943457950422283, "learning_rate": 2.003808814711494e-06, "loss": 0.5362, "step": 15993 }, { "epoch": 1.43, "grad_norm": 5.591457854030872, "learning_rate": 2.0032305366692208e-06, "loss": 0.6673, "step": 15994 }, { "epoch": 1.43, "grad_norm": 4.47664965051732, "learning_rate": 2.002652321177947e-06, "loss": 0.6692, "step": 15995 }, { "epoch": 1.43, "grad_norm": 6.05004000002149, "learning_rate": 2.0020741682497402e-06, "loss": 0.5771, "step": 15996 }, { "epoch": 1.43, "grad_norm": 7.969202451221911, "learning_rate": 2.0014960778966702e-06, "loss": 0.5766, "step": 15997 }, { "epoch": 1.43, "grad_norm": 7.694087904360678, "learning_rate": 2.000918050130803e-06, "loss": 0.6168, "step": 15998 }, { "epoch": 1.43, "grad_norm": 7.181265260013639, "learning_rate": 2.000340084964203e-06, "loss": 0.6091, "step": 15999 }, { "epoch": 1.43, "grad_norm": 8.354382593445214, "learning_rate": 1.9997621824089354e-06, "loss": 0.6587, "step": 16000 }, { "epoch": 1.43, "grad_norm": 6.814919070473513, "learning_rate": 1.999184342477063e-06, "loss": 0.6367, "step": 16001 }, { "epoch": 1.43, "grad_norm": 8.178997957531067, "learning_rate": 1.998606565180644e-06, "loss": 0.609, "step": 16002 }, { "epoch": 1.43, "grad_norm": 6.1142017619619855, "learning_rate": 1.99802885053174e-06, "loss": 0.5667, "step": 16003 }, { "epoch": 1.43, "grad_norm": 5.1510973925132255, "learning_rate": 1.99745119854241e-06, "loss": 0.5763, "step": 16004 }, { "epoch": 1.43, "grad_norm": 6.415211382322082, "learning_rate": 1.9968736092247104e-06, "loss": 0.6202, "step": 16005 }, { "epoch": 1.43, "grad_norm": 5.617842435271332, "learning_rate": 1.9962960825906973e-06, "loss": 0.5722, "step": 16006 }, { "epoch": 1.43, "grad_norm": 6.425534766451619, "learning_rate": 1.995718618652428e-06, "loss": 0.5958, "step": 16007 }, { "epoch": 1.43, "grad_norm": 7.422413930611282, "learning_rate": 1.995141217421951e-06, "loss": 0.544, "step": 16008 }, { "epoch": 1.43, "grad_norm": 6.908846958222902, "learning_rate": 1.994563878911322e-06, "loss": 0.5651, "step": 16009 }, { "epoch": 1.43, "grad_norm": 6.526125178781459, "learning_rate": 1.9939866031325887e-06, "loss": 0.5526, "step": 16010 }, { "epoch": 1.43, "grad_norm": 5.688337989263283, "learning_rate": 1.9934093900978025e-06, "loss": 0.5786, "step": 16011 }, { "epoch": 1.43, "grad_norm": 5.8149991922480675, "learning_rate": 1.9928322398190105e-06, "loss": 0.5981, "step": 16012 }, { "epoch": 1.43, "grad_norm": 6.979665016270829, "learning_rate": 1.99225515230826e-06, "loss": 0.634, "step": 16013 }, { "epoch": 1.43, "grad_norm": 7.7753638466426835, "learning_rate": 1.991678127577598e-06, "loss": 0.5941, "step": 16014 }, { "epoch": 1.43, "grad_norm": 7.927203650455647, "learning_rate": 1.9911011656390654e-06, "loss": 0.5612, "step": 16015 }, { "epoch": 1.43, "grad_norm": 6.201180940526621, "learning_rate": 1.9905242665047063e-06, "loss": 0.5753, "step": 16016 }, { "epoch": 1.43, "grad_norm": 5.340636232779513, "learning_rate": 1.989947430186563e-06, "loss": 0.5866, "step": 16017 }, { "epoch": 1.43, "grad_norm": 8.068618936842679, "learning_rate": 1.9893706566966747e-06, "loss": 0.6116, "step": 16018 }, { "epoch": 1.43, "grad_norm": 7.001688415482067, "learning_rate": 1.988793946047083e-06, "loss": 0.5993, "step": 16019 }, { "epoch": 1.43, "grad_norm": 7.427248547755618, "learning_rate": 1.988217298249821e-06, "loss": 0.5758, "step": 16020 }, { "epoch": 1.43, "grad_norm": 7.004449054998432, "learning_rate": 1.987640713316927e-06, "loss": 0.5712, "step": 16021 }, { "epoch": 1.43, "grad_norm": 6.648007732663391, "learning_rate": 1.987064191260437e-06, "loss": 0.5927, "step": 16022 }, { "epoch": 1.43, "grad_norm": 6.476487092090985, "learning_rate": 1.9864877320923854e-06, "loss": 0.5639, "step": 16023 }, { "epoch": 1.43, "grad_norm": 11.165538250763875, "learning_rate": 1.9859113358248005e-06, "loss": 0.6691, "step": 16024 }, { "epoch": 1.43, "grad_norm": 6.070824792300607, "learning_rate": 1.985335002469716e-06, "loss": 0.5576, "step": 16025 }, { "epoch": 1.43, "grad_norm": 6.758979443702664, "learning_rate": 1.9847587320391633e-06, "loss": 0.5754, "step": 16026 }, { "epoch": 1.43, "grad_norm": 9.136558313719993, "learning_rate": 1.9841825245451666e-06, "loss": 0.6132, "step": 16027 }, { "epoch": 1.43, "grad_norm": 6.8633714638441825, "learning_rate": 1.983606379999756e-06, "loss": 0.5656, "step": 16028 }, { "epoch": 1.43, "grad_norm": 5.469485251449335, "learning_rate": 1.983030298414956e-06, "loss": 0.611, "step": 16029 }, { "epoch": 1.43, "grad_norm": 6.018749936711844, "learning_rate": 1.982454279802791e-06, "loss": 0.6068, "step": 16030 }, { "epoch": 1.43, "grad_norm": 6.730677162802577, "learning_rate": 1.981878324175285e-06, "loss": 0.5593, "step": 16031 }, { "epoch": 1.43, "grad_norm": 8.506726032742797, "learning_rate": 1.9813024315444614e-06, "loss": 0.6043, "step": 16032 }, { "epoch": 1.43, "grad_norm": 6.172257752684266, "learning_rate": 1.9807266019223365e-06, "loss": 0.6218, "step": 16033 }, { "epoch": 1.43, "grad_norm": 7.587829837895418, "learning_rate": 1.980150835320932e-06, "loss": 0.5282, "step": 16034 }, { "epoch": 1.43, "grad_norm": 5.811627431119983, "learning_rate": 1.9795751317522654e-06, "loss": 0.606, "step": 16035 }, { "epoch": 1.43, "grad_norm": 5.646944549902293, "learning_rate": 1.9789994912283533e-06, "loss": 0.5311, "step": 16036 }, { "epoch": 1.43, "grad_norm": 9.077002930116182, "learning_rate": 1.978423913761211e-06, "loss": 0.5695, "step": 16037 }, { "epoch": 1.43, "grad_norm": 6.081621325272147, "learning_rate": 1.977848399362854e-06, "loss": 0.499, "step": 16038 }, { "epoch": 1.43, "grad_norm": 5.995064415461952, "learning_rate": 1.9772729480452928e-06, "loss": 0.5853, "step": 16039 }, { "epoch": 1.43, "grad_norm": 6.704191357641765, "learning_rate": 1.9766975598205384e-06, "loss": 0.631, "step": 16040 }, { "epoch": 1.43, "grad_norm": 9.318486079708535, "learning_rate": 1.976122234700601e-06, "loss": 0.577, "step": 16041 }, { "epoch": 1.43, "grad_norm": 8.745813370487864, "learning_rate": 1.97554697269749e-06, "loss": 0.6303, "step": 16042 }, { "epoch": 1.43, "grad_norm": 6.259947703411182, "learning_rate": 1.9749717738232116e-06, "loss": 0.62, "step": 16043 }, { "epoch": 1.43, "grad_norm": 8.078871578087316, "learning_rate": 1.974396638089773e-06, "loss": 0.6131, "step": 16044 }, { "epoch": 1.43, "grad_norm": 6.877564616694175, "learning_rate": 1.9738215655091814e-06, "loss": 0.5971, "step": 16045 }, { "epoch": 1.43, "grad_norm": 7.292474836258553, "learning_rate": 1.973246556093435e-06, "loss": 0.564, "step": 16046 }, { "epoch": 1.43, "grad_norm": 6.587989434649652, "learning_rate": 1.9726716098545385e-06, "loss": 0.5672, "step": 16047 }, { "epoch": 1.43, "grad_norm": 6.952588146031541, "learning_rate": 1.9720967268044923e-06, "loss": 0.5326, "step": 16048 }, { "epoch": 1.43, "grad_norm": 5.476657519544199, "learning_rate": 1.9715219069552953e-06, "loss": 0.5513, "step": 16049 }, { "epoch": 1.43, "grad_norm": 7.156090464323989, "learning_rate": 1.970947150318947e-06, "loss": 0.5781, "step": 16050 }, { "epoch": 1.43, "grad_norm": 4.071208452803583, "learning_rate": 1.970372456907445e-06, "loss": 0.5469, "step": 16051 }, { "epoch": 1.43, "grad_norm": 5.610561592928258, "learning_rate": 1.969797826732783e-06, "loss": 0.5579, "step": 16052 }, { "epoch": 1.43, "grad_norm": 5.420699724964787, "learning_rate": 1.969223259806954e-06, "loss": 0.5577, "step": 16053 }, { "epoch": 1.43, "grad_norm": 6.130735590464181, "learning_rate": 1.9686487561419516e-06, "loss": 0.5012, "step": 16054 }, { "epoch": 1.43, "grad_norm": 6.8722203621543, "learning_rate": 1.9680743157497685e-06, "loss": 0.5495, "step": 16055 }, { "epoch": 1.43, "grad_norm": 6.5312657384502435, "learning_rate": 1.967499938642394e-06, "loss": 0.5716, "step": 16056 }, { "epoch": 1.43, "grad_norm": 7.854095566334479, "learning_rate": 1.966925624831817e-06, "loss": 0.6487, "step": 16057 }, { "epoch": 1.43, "grad_norm": 5.962514852774252, "learning_rate": 1.966351374330027e-06, "loss": 0.5631, "step": 16058 }, { "epoch": 1.43, "grad_norm": 4.527788897008529, "learning_rate": 1.9657771871490065e-06, "loss": 0.5794, "step": 16059 }, { "epoch": 1.43, "grad_norm": 7.336361422058109, "learning_rate": 1.965203063300743e-06, "loss": 0.5561, "step": 16060 }, { "epoch": 1.43, "grad_norm": 8.128854984108214, "learning_rate": 1.964629002797219e-06, "loss": 0.5839, "step": 16061 }, { "epoch": 1.43, "grad_norm": 9.191564117125191, "learning_rate": 1.9640550056504177e-06, "loss": 0.5628, "step": 16062 }, { "epoch": 1.43, "grad_norm": 5.902842210308377, "learning_rate": 1.963481071872319e-06, "loss": 0.6322, "step": 16063 }, { "epoch": 1.43, "grad_norm": 6.138003758710454, "learning_rate": 1.962907201474905e-06, "loss": 0.5696, "step": 16064 }, { "epoch": 1.43, "grad_norm": 6.755455774113239, "learning_rate": 1.9623333944701508e-06, "loss": 0.5611, "step": 16065 }, { "epoch": 1.43, "grad_norm": 4.922964259380893, "learning_rate": 1.961759650870034e-06, "loss": 0.6112, "step": 16066 }, { "epoch": 1.43, "grad_norm": 8.33289626854815, "learning_rate": 1.9611859706865326e-06, "loss": 0.5797, "step": 16067 }, { "epoch": 1.43, "grad_norm": 6.652115177844114, "learning_rate": 1.960612353931618e-06, "loss": 0.5427, "step": 16068 }, { "epoch": 1.43, "grad_norm": 8.119048144619292, "learning_rate": 1.960038800617264e-06, "loss": 0.616, "step": 16069 }, { "epoch": 1.43, "grad_norm": 5.7644584434076735, "learning_rate": 1.959465310755442e-06, "loss": 0.6118, "step": 16070 }, { "epoch": 1.43, "grad_norm": 5.912114724017251, "learning_rate": 1.958891884358126e-06, "loss": 0.5262, "step": 16071 }, { "epoch": 1.43, "grad_norm": 7.337982170333464, "learning_rate": 1.9583185214372796e-06, "loss": 0.6201, "step": 16072 }, { "epoch": 1.43, "grad_norm": 6.73587871124126, "learning_rate": 1.9577452220048724e-06, "loss": 0.5678, "step": 16073 }, { "epoch": 1.43, "grad_norm": 5.201379808943579, "learning_rate": 1.9571719860728723e-06, "loss": 0.549, "step": 16074 }, { "epoch": 1.43, "grad_norm": 5.520800806080823, "learning_rate": 1.9565988136532427e-06, "loss": 0.5347, "step": 16075 }, { "epoch": 1.43, "grad_norm": 9.368952313020728, "learning_rate": 1.956025704757948e-06, "loss": 0.6021, "step": 16076 }, { "epoch": 1.43, "grad_norm": 7.558342988822617, "learning_rate": 1.955452659398952e-06, "loss": 0.5796, "step": 16077 }, { "epoch": 1.43, "grad_norm": 7.804483840174626, "learning_rate": 1.9548796775882127e-06, "loss": 0.6016, "step": 16078 }, { "epoch": 1.43, "grad_norm": 5.552701660224498, "learning_rate": 1.954306759337692e-06, "loss": 0.5762, "step": 16079 }, { "epoch": 1.43, "grad_norm": 7.648580824996596, "learning_rate": 1.953733904659347e-06, "loss": 0.5666, "step": 16080 }, { "epoch": 1.43, "grad_norm": 8.245924133529442, "learning_rate": 1.953161113565138e-06, "loss": 0.5816, "step": 16081 }, { "epoch": 1.43, "grad_norm": 7.3165315247797, "learning_rate": 1.9525883860670157e-06, "loss": 0.6286, "step": 16082 }, { "epoch": 1.43, "grad_norm": 4.753637184215156, "learning_rate": 1.9520157221769377e-06, "loss": 0.5689, "step": 16083 }, { "epoch": 1.43, "grad_norm": 7.154671314036131, "learning_rate": 1.951443121906858e-06, "loss": 0.5419, "step": 16084 }, { "epoch": 1.43, "grad_norm": 7.6268725941740785, "learning_rate": 1.9508705852687256e-06, "loss": 0.5936, "step": 16085 }, { "epoch": 1.44, "grad_norm": 7.8525686908671055, "learning_rate": 1.9502981122744917e-06, "loss": 0.6145, "step": 16086 }, { "epoch": 1.44, "grad_norm": 7.118698582413041, "learning_rate": 1.9497257029361065e-06, "loss": 0.5497, "step": 16087 }, { "epoch": 1.44, "grad_norm": 6.50782890728477, "learning_rate": 1.949153357265517e-06, "loss": 0.5827, "step": 16088 }, { "epoch": 1.44, "grad_norm": 7.952054012458648, "learning_rate": 1.94858107527467e-06, "loss": 0.5881, "step": 16089 }, { "epoch": 1.44, "grad_norm": 6.457346033026266, "learning_rate": 1.948008856975512e-06, "loss": 0.5563, "step": 16090 }, { "epoch": 1.44, "grad_norm": 8.159302629396956, "learning_rate": 1.9474367023799838e-06, "loss": 0.5735, "step": 16091 }, { "epoch": 1.44, "grad_norm": 9.278627118449968, "learning_rate": 1.9468646115000294e-06, "loss": 0.6478, "step": 16092 }, { "epoch": 1.44, "grad_norm": 5.722274537841317, "learning_rate": 1.94629258434759e-06, "loss": 0.6644, "step": 16093 }, { "epoch": 1.44, "grad_norm": 4.409989286112338, "learning_rate": 1.9457206209346057e-06, "loss": 0.6096, "step": 16094 }, { "epoch": 1.44, "grad_norm": 6.507476980964579, "learning_rate": 1.945148721273016e-06, "loss": 0.6325, "step": 16095 }, { "epoch": 1.44, "grad_norm": 6.093631343276634, "learning_rate": 1.944576885374755e-06, "loss": 0.5825, "step": 16096 }, { "epoch": 1.44, "grad_norm": 8.418405980313318, "learning_rate": 1.9440051132517625e-06, "loss": 0.6497, "step": 16097 }, { "epoch": 1.44, "grad_norm": 8.087790991613335, "learning_rate": 1.9434334049159687e-06, "loss": 0.5786, "step": 16098 }, { "epoch": 1.44, "grad_norm": 8.029228342610708, "learning_rate": 1.942861760379309e-06, "loss": 0.6018, "step": 16099 }, { "epoch": 1.44, "grad_norm": 6.354159848802613, "learning_rate": 1.9422901796537143e-06, "loss": 0.5942, "step": 16100 }, { "epoch": 1.44, "grad_norm": 6.143911781557417, "learning_rate": 1.9417186627511163e-06, "loss": 0.6158, "step": 16101 }, { "epoch": 1.44, "grad_norm": 5.5863437366695905, "learning_rate": 1.9411472096834454e-06, "loss": 0.6127, "step": 16102 }, { "epoch": 1.44, "grad_norm": 6.57437514664594, "learning_rate": 1.940575820462626e-06, "loss": 0.6055, "step": 16103 }, { "epoch": 1.44, "grad_norm": 7.381167007821189, "learning_rate": 1.9400044951005864e-06, "loss": 0.5259, "step": 16104 }, { "epoch": 1.44, "grad_norm": 4.646824438647992, "learning_rate": 1.939433233609252e-06, "loss": 0.541, "step": 16105 }, { "epoch": 1.44, "grad_norm": 5.310518859822356, "learning_rate": 1.9388620360005456e-06, "loss": 0.6307, "step": 16106 }, { "epoch": 1.44, "grad_norm": 9.302631898596294, "learning_rate": 1.938290902286391e-06, "loss": 0.5204, "step": 16107 }, { "epoch": 1.44, "grad_norm": 7.349506267685692, "learning_rate": 1.9377198324787105e-06, "loss": 0.5459, "step": 16108 }, { "epoch": 1.44, "grad_norm": 7.589995819995555, "learning_rate": 1.93714882658942e-06, "loss": 0.5735, "step": 16109 }, { "epoch": 1.44, "grad_norm": 4.729408798623771, "learning_rate": 1.9365778846304424e-06, "loss": 0.5196, "step": 16110 }, { "epoch": 1.44, "grad_norm": 6.483295237758713, "learning_rate": 1.936007006613691e-06, "loss": 0.6272, "step": 16111 }, { "epoch": 1.44, "grad_norm": 7.027916435351266, "learning_rate": 1.9354361925510827e-06, "loss": 0.5847, "step": 16112 }, { "epoch": 1.44, "grad_norm": 5.829169306150323, "learning_rate": 1.9348654424545332e-06, "loss": 0.5829, "step": 16113 }, { "epoch": 1.44, "grad_norm": 5.095464800969377, "learning_rate": 1.9342947563359547e-06, "loss": 0.5868, "step": 16114 }, { "epoch": 1.44, "grad_norm": 6.01007475858308, "learning_rate": 1.9337241342072617e-06, "loss": 0.6197, "step": 16115 }, { "epoch": 1.44, "grad_norm": 6.314778740530741, "learning_rate": 1.93315357608036e-06, "loss": 0.6107, "step": 16116 }, { "epoch": 1.44, "grad_norm": 7.9648242602804435, "learning_rate": 1.9325830819671616e-06, "loss": 0.5541, "step": 16117 }, { "epoch": 1.44, "grad_norm": 5.83805105346306, "learning_rate": 1.9320126518795736e-06, "loss": 0.5969, "step": 16118 }, { "epoch": 1.44, "grad_norm": 5.433223203516213, "learning_rate": 1.931442285829502e-06, "loss": 0.5595, "step": 16119 }, { "epoch": 1.44, "grad_norm": 6.128100605828463, "learning_rate": 1.9308719838288536e-06, "loss": 0.5469, "step": 16120 }, { "epoch": 1.44, "grad_norm": 6.351078538534099, "learning_rate": 1.930301745889533e-06, "loss": 0.5726, "step": 16121 }, { "epoch": 1.44, "grad_norm": 5.71133699185647, "learning_rate": 1.9297315720234388e-06, "loss": 0.5841, "step": 16122 }, { "epoch": 1.44, "grad_norm": 5.9756983007548135, "learning_rate": 1.929161462242474e-06, "loss": 0.5981, "step": 16123 }, { "epoch": 1.44, "grad_norm": 6.055352926546926, "learning_rate": 1.9285914165585414e-06, "loss": 0.5546, "step": 16124 }, { "epoch": 1.44, "grad_norm": 6.299120709407728, "learning_rate": 1.928021434983534e-06, "loss": 0.5849, "step": 16125 }, { "epoch": 1.44, "grad_norm": 7.6291138094335365, "learning_rate": 1.9274515175293524e-06, "loss": 0.5898, "step": 16126 }, { "epoch": 1.44, "grad_norm": 4.665711381671933, "learning_rate": 1.926881664207891e-06, "loss": 0.5218, "step": 16127 }, { "epoch": 1.44, "grad_norm": 6.582968666889471, "learning_rate": 1.926311875031047e-06, "loss": 0.5204, "step": 16128 }, { "epoch": 1.44, "grad_norm": 5.752641129260513, "learning_rate": 1.9257421500107093e-06, "loss": 0.5791, "step": 16129 }, { "epoch": 1.44, "grad_norm": 7.5674538284703905, "learning_rate": 1.9251724891587715e-06, "loss": 0.6003, "step": 16130 }, { "epoch": 1.44, "grad_norm": 6.5272792370344455, "learning_rate": 1.9246028924871245e-06, "loss": 0.5628, "step": 16131 }, { "epoch": 1.44, "grad_norm": 8.821078697654732, "learning_rate": 1.924033360007657e-06, "loss": 0.618, "step": 16132 }, { "epoch": 1.44, "grad_norm": 9.583921055265614, "learning_rate": 1.9234638917322573e-06, "loss": 0.5472, "step": 16133 }, { "epoch": 1.44, "grad_norm": 6.3151988710740765, "learning_rate": 1.922894487672813e-06, "loss": 0.581, "step": 16134 }, { "epoch": 1.44, "grad_norm": 5.9087635294888985, "learning_rate": 1.9223251478412054e-06, "loss": 0.5424, "step": 16135 }, { "epoch": 1.44, "grad_norm": 7.957929665797019, "learning_rate": 1.9217558722493203e-06, "loss": 0.5851, "step": 16136 }, { "epoch": 1.44, "grad_norm": 6.4928496719962805, "learning_rate": 1.9211866609090402e-06, "loss": 0.518, "step": 16137 }, { "epoch": 1.44, "grad_norm": 5.627151272642593, "learning_rate": 1.9206175138322465e-06, "loss": 0.5874, "step": 16138 }, { "epoch": 1.44, "grad_norm": 5.976620516620107, "learning_rate": 1.92004843103082e-06, "loss": 0.5755, "step": 16139 }, { "epoch": 1.44, "grad_norm": 7.399909894858208, "learning_rate": 1.9194794125166353e-06, "loss": 0.5857, "step": 16140 }, { "epoch": 1.44, "grad_norm": 5.631469273126346, "learning_rate": 1.9189104583015738e-06, "loss": 0.642, "step": 16141 }, { "epoch": 1.44, "grad_norm": 6.349292417263737, "learning_rate": 1.918341568397508e-06, "loss": 0.6745, "step": 16142 }, { "epoch": 1.44, "grad_norm": 9.287650691685396, "learning_rate": 1.9177727428163116e-06, "loss": 0.5674, "step": 16143 }, { "epoch": 1.44, "grad_norm": 5.535127069190486, "learning_rate": 1.9172039815698605e-06, "loss": 0.5596, "step": 16144 }, { "epoch": 1.44, "grad_norm": 7.3608598400116385, "learning_rate": 1.9166352846700247e-06, "loss": 0.594, "step": 16145 }, { "epoch": 1.44, "grad_norm": 7.2952687776653224, "learning_rate": 1.916066652128675e-06, "loss": 0.614, "step": 16146 }, { "epoch": 1.44, "grad_norm": 7.030531196666821, "learning_rate": 1.915498083957682e-06, "loss": 0.5804, "step": 16147 }, { "epoch": 1.44, "grad_norm": 8.831853092974606, "learning_rate": 1.914929580168909e-06, "loss": 0.5958, "step": 16148 }, { "epoch": 1.44, "grad_norm": 9.132433755438306, "learning_rate": 1.9143611407742257e-06, "loss": 0.6192, "step": 16149 }, { "epoch": 1.44, "grad_norm": 6.943563687327087, "learning_rate": 1.913792765785496e-06, "loss": 0.5069, "step": 16150 }, { "epoch": 1.44, "grad_norm": 6.304090945343582, "learning_rate": 1.913224455214584e-06, "loss": 0.5834, "step": 16151 }, { "epoch": 1.44, "grad_norm": 5.586419279145824, "learning_rate": 1.9126562090733507e-06, "loss": 0.5971, "step": 16152 }, { "epoch": 1.44, "grad_norm": 5.050162035846773, "learning_rate": 1.9120880273736603e-06, "loss": 0.5938, "step": 16153 }, { "epoch": 1.44, "grad_norm": 6.563650414849799, "learning_rate": 1.9115199101273695e-06, "loss": 0.5616, "step": 16154 }, { "epoch": 1.44, "grad_norm": 7.636284291413535, "learning_rate": 1.9109518573463354e-06, "loss": 0.6362, "step": 16155 }, { "epoch": 1.44, "grad_norm": 7.198221775188881, "learning_rate": 1.9103838690424166e-06, "loss": 0.5977, "step": 16156 }, { "epoch": 1.44, "grad_norm": 6.273368016075817, "learning_rate": 1.9098159452274678e-06, "loss": 0.5343, "step": 16157 }, { "epoch": 1.44, "grad_norm": 5.93322889929172, "learning_rate": 1.9092480859133443e-06, "loss": 0.5941, "step": 16158 }, { "epoch": 1.44, "grad_norm": 8.30716907413396, "learning_rate": 1.908680291111898e-06, "loss": 0.6152, "step": 16159 }, { "epoch": 1.44, "grad_norm": 6.549750602962268, "learning_rate": 1.9081125608349824e-06, "loss": 0.6336, "step": 16160 }, { "epoch": 1.44, "grad_norm": 6.8441774489818235, "learning_rate": 1.907544895094445e-06, "loss": 0.4872, "step": 16161 }, { "epoch": 1.44, "grad_norm": 7.109103954558313, "learning_rate": 1.9069772939021348e-06, "loss": 0.6046, "step": 16162 }, { "epoch": 1.44, "grad_norm": 8.430142114166337, "learning_rate": 1.9064097572699003e-06, "loss": 0.6049, "step": 16163 }, { "epoch": 1.44, "grad_norm": 6.107053845148975, "learning_rate": 1.905842285209587e-06, "loss": 0.5462, "step": 16164 }, { "epoch": 1.44, "grad_norm": 7.043834041107527, "learning_rate": 1.9052748777330403e-06, "loss": 0.612, "step": 16165 }, { "epoch": 1.44, "grad_norm": 7.526871826125952, "learning_rate": 1.904707534852105e-06, "loss": 0.596, "step": 16166 }, { "epoch": 1.44, "grad_norm": 5.513545024009426, "learning_rate": 1.9041402565786192e-06, "loss": 0.6133, "step": 16167 }, { "epoch": 1.44, "grad_norm": 5.470141525193471, "learning_rate": 1.9035730429244275e-06, "loss": 0.5266, "step": 16168 }, { "epoch": 1.44, "grad_norm": 5.8113304184394, "learning_rate": 1.9030058939013658e-06, "loss": 0.6146, "step": 16169 }, { "epoch": 1.44, "grad_norm": 5.401025910459513, "learning_rate": 1.9024388095212737e-06, "loss": 0.6084, "step": 16170 }, { "epoch": 1.44, "grad_norm": 5.920439319037354, "learning_rate": 1.9018717897959882e-06, "loss": 0.5585, "step": 16171 }, { "epoch": 1.44, "grad_norm": 7.000302358078976, "learning_rate": 1.9013048347373442e-06, "loss": 0.6036, "step": 16172 }, { "epoch": 1.44, "grad_norm": 4.811952167226684, "learning_rate": 1.9007379443571771e-06, "loss": 0.5648, "step": 16173 }, { "epoch": 1.44, "grad_norm": 9.13308017724825, "learning_rate": 1.9001711186673166e-06, "loss": 0.6274, "step": 16174 }, { "epoch": 1.44, "grad_norm": 6.302559392857365, "learning_rate": 1.8996043576795957e-06, "loss": 0.5865, "step": 16175 }, { "epoch": 1.44, "grad_norm": 5.208870529901284, "learning_rate": 1.8990376614058443e-06, "loss": 0.5988, "step": 16176 }, { "epoch": 1.44, "grad_norm": 9.45466974802684, "learning_rate": 1.8984710298578902e-06, "loss": 0.596, "step": 16177 }, { "epoch": 1.44, "grad_norm": 6.253790904557484, "learning_rate": 1.8979044630475629e-06, "loss": 0.6321, "step": 16178 }, { "epoch": 1.44, "grad_norm": 6.139125840940434, "learning_rate": 1.8973379609866848e-06, "loss": 0.5439, "step": 16179 }, { "epoch": 1.44, "grad_norm": 6.641224591318603, "learning_rate": 1.8967715236870826e-06, "loss": 0.5296, "step": 16180 }, { "epoch": 1.44, "grad_norm": 7.346929218555322, "learning_rate": 1.8962051511605783e-06, "loss": 0.5714, "step": 16181 }, { "epoch": 1.44, "grad_norm": 9.506959720583549, "learning_rate": 1.895638843418996e-06, "loss": 0.5658, "step": 16182 }, { "epoch": 1.44, "grad_norm": 7.648380937874914, "learning_rate": 1.8950726004741527e-06, "loss": 0.6261, "step": 16183 }, { "epoch": 1.44, "grad_norm": 6.639583586032304, "learning_rate": 1.8945064223378696e-06, "loss": 0.5467, "step": 16184 }, { "epoch": 1.44, "grad_norm": 5.9475973502144015, "learning_rate": 1.8939403090219633e-06, "loss": 0.5319, "step": 16185 }, { "epoch": 1.44, "grad_norm": 6.505884030895042, "learning_rate": 1.8933742605382532e-06, "loss": 0.6138, "step": 16186 }, { "epoch": 1.44, "grad_norm": 5.6293405090921, "learning_rate": 1.89280827689855e-06, "loss": 0.5402, "step": 16187 }, { "epoch": 1.44, "grad_norm": 6.980753885725871, "learning_rate": 1.8922423581146698e-06, "loss": 0.5944, "step": 16188 }, { "epoch": 1.44, "grad_norm": 4.881411848390724, "learning_rate": 1.8916765041984242e-06, "loss": 0.605, "step": 16189 }, { "epoch": 1.44, "grad_norm": 8.620240360614416, "learning_rate": 1.8911107151616243e-06, "loss": 0.5987, "step": 16190 }, { "epoch": 1.44, "grad_norm": 8.128255522567805, "learning_rate": 1.8905449910160818e-06, "loss": 0.6345, "step": 16191 }, { "epoch": 1.44, "grad_norm": 6.153289749998234, "learning_rate": 1.8899793317736015e-06, "loss": 0.6097, "step": 16192 }, { "epoch": 1.44, "grad_norm": 6.548421330610204, "learning_rate": 1.8894137374459915e-06, "loss": 0.5874, "step": 16193 }, { "epoch": 1.44, "grad_norm": 5.369668652509313, "learning_rate": 1.8888482080450577e-06, "loss": 0.5899, "step": 16194 }, { "epoch": 1.44, "grad_norm": 6.09086283052267, "learning_rate": 1.888282743582604e-06, "loss": 0.6241, "step": 16195 }, { "epoch": 1.44, "grad_norm": 7.302410372012042, "learning_rate": 1.8877173440704354e-06, "loss": 0.5566, "step": 16196 }, { "epoch": 1.44, "grad_norm": 8.219202120431495, "learning_rate": 1.88715200952035e-06, "loss": 0.5274, "step": 16197 }, { "epoch": 1.45, "grad_norm": 6.013474409535862, "learning_rate": 1.8865867399441511e-06, "loss": 0.5986, "step": 16198 }, { "epoch": 1.45, "grad_norm": 7.1722464301011035, "learning_rate": 1.8860215353536338e-06, "loss": 0.5787, "step": 16199 }, { "epoch": 1.45, "grad_norm": 7.844395309159375, "learning_rate": 1.8854563957605976e-06, "loss": 0.62, "step": 16200 }, { "epoch": 1.45, "grad_norm": 5.554476638939635, "learning_rate": 1.8848913211768384e-06, "loss": 0.5497, "step": 16201 }, { "epoch": 1.45, "grad_norm": 5.459544156790797, "learning_rate": 1.8843263116141507e-06, "loss": 0.6117, "step": 16202 }, { "epoch": 1.45, "grad_norm": 4.808625329366765, "learning_rate": 1.883761367084328e-06, "loss": 0.6122, "step": 16203 }, { "epoch": 1.45, "grad_norm": 6.14999665907887, "learning_rate": 1.8831964875991643e-06, "loss": 0.5561, "step": 16204 }, { "epoch": 1.45, "grad_norm": 6.174330758748959, "learning_rate": 1.8826316731704463e-06, "loss": 0.5617, "step": 16205 }, { "epoch": 1.45, "grad_norm": 6.439614200723388, "learning_rate": 1.8820669238099654e-06, "loss": 0.538, "step": 16206 }, { "epoch": 1.45, "grad_norm": 9.89087284063784, "learning_rate": 1.881502239529509e-06, "loss": 0.5783, "step": 16207 }, { "epoch": 1.45, "grad_norm": 7.668635109264112, "learning_rate": 1.8809376203408642e-06, "loss": 0.5666, "step": 16208 }, { "epoch": 1.45, "grad_norm": 5.365650680015462, "learning_rate": 1.8803730662558162e-06, "loss": 0.5833, "step": 16209 }, { "epoch": 1.45, "grad_norm": 8.224612509540579, "learning_rate": 1.8798085772861496e-06, "loss": 0.5793, "step": 16210 }, { "epoch": 1.45, "grad_norm": 6.539691044032317, "learning_rate": 1.8792441534436468e-06, "loss": 0.5974, "step": 16211 }, { "epoch": 1.45, "grad_norm": 6.66181352234482, "learning_rate": 1.8786797947400854e-06, "loss": 0.5775, "step": 16212 }, { "epoch": 1.45, "grad_norm": 7.767900977976966, "learning_rate": 1.8781155011872481e-06, "loss": 0.6676, "step": 16213 }, { "epoch": 1.45, "grad_norm": 8.10846891093178, "learning_rate": 1.8775512727969126e-06, "loss": 0.53, "step": 16214 }, { "epoch": 1.45, "grad_norm": 6.843303839791726, "learning_rate": 1.8769871095808568e-06, "loss": 0.6477, "step": 16215 }, { "epoch": 1.45, "grad_norm": 11.116242735815137, "learning_rate": 1.8764230115508553e-06, "loss": 0.5565, "step": 16216 }, { "epoch": 1.45, "grad_norm": 6.984750152159757, "learning_rate": 1.8758589787186848e-06, "loss": 0.5834, "step": 16217 }, { "epoch": 1.45, "grad_norm": 6.798392813309136, "learning_rate": 1.8752950110961144e-06, "loss": 0.6079, "step": 16218 }, { "epoch": 1.45, "grad_norm": 5.974095943196885, "learning_rate": 1.8747311086949183e-06, "loss": 0.5875, "step": 16219 }, { "epoch": 1.45, "grad_norm": 8.064280177558413, "learning_rate": 1.8741672715268655e-06, "loss": 0.5885, "step": 16220 }, { "epoch": 1.45, "grad_norm": 6.408675590704521, "learning_rate": 1.8736034996037255e-06, "loss": 0.5702, "step": 16221 }, { "epoch": 1.45, "grad_norm": 6.620684192192458, "learning_rate": 1.8730397929372661e-06, "loss": 0.545, "step": 16222 }, { "epoch": 1.45, "grad_norm": 5.715497175690145, "learning_rate": 1.8724761515392543e-06, "loss": 0.5321, "step": 16223 }, { "epoch": 1.45, "grad_norm": 6.924352900239795, "learning_rate": 1.8719125754214523e-06, "loss": 0.5875, "step": 16224 }, { "epoch": 1.45, "grad_norm": 7.657967848338932, "learning_rate": 1.8713490645956267e-06, "loss": 0.5774, "step": 16225 }, { "epoch": 1.45, "grad_norm": 6.6716698882609675, "learning_rate": 1.8707856190735358e-06, "loss": 0.6347, "step": 16226 }, { "epoch": 1.45, "grad_norm": 6.650040207102774, "learning_rate": 1.8702222388669421e-06, "loss": 0.5769, "step": 16227 }, { "epoch": 1.45, "grad_norm": 9.792710190568242, "learning_rate": 1.8696589239876057e-06, "loss": 0.5057, "step": 16228 }, { "epoch": 1.45, "grad_norm": 6.762259665247584, "learning_rate": 1.8690956744472832e-06, "loss": 0.6208, "step": 16229 }, { "epoch": 1.45, "grad_norm": 7.278840283391858, "learning_rate": 1.868532490257734e-06, "loss": 0.5691, "step": 16230 }, { "epoch": 1.45, "grad_norm": 6.285164777848194, "learning_rate": 1.8679693714307096e-06, "loss": 0.5073, "step": 16231 }, { "epoch": 1.45, "grad_norm": 6.828805017336723, "learning_rate": 1.8674063179779657e-06, "loss": 0.5729, "step": 16232 }, { "epoch": 1.45, "grad_norm": 6.066172970964223, "learning_rate": 1.8668433299112544e-06, "loss": 0.5507, "step": 16233 }, { "epoch": 1.45, "grad_norm": 4.665400072061841, "learning_rate": 1.8662804072423268e-06, "loss": 0.5363, "step": 16234 }, { "epoch": 1.45, "grad_norm": 6.870575368774762, "learning_rate": 1.8657175499829333e-06, "loss": 0.6173, "step": 16235 }, { "epoch": 1.45, "grad_norm": 7.252490698573941, "learning_rate": 1.8651547581448237e-06, "loss": 0.6264, "step": 16236 }, { "epoch": 1.45, "grad_norm": 7.638946225178121, "learning_rate": 1.864592031739742e-06, "loss": 0.5423, "step": 16237 }, { "epoch": 1.45, "grad_norm": 6.88989171890227, "learning_rate": 1.864029370779435e-06, "loss": 0.6124, "step": 16238 }, { "epoch": 1.45, "grad_norm": 5.868171815225098, "learning_rate": 1.8634667752756469e-06, "loss": 0.6207, "step": 16239 }, { "epoch": 1.45, "grad_norm": 7.3799433398891185, "learning_rate": 1.8629042452401224e-06, "loss": 0.6282, "step": 16240 }, { "epoch": 1.45, "grad_norm": 9.238081442560404, "learning_rate": 1.8623417806846005e-06, "loss": 0.5808, "step": 16241 }, { "epoch": 1.45, "grad_norm": 5.832227534513327, "learning_rate": 1.8617793816208224e-06, "loss": 0.5961, "step": 16242 }, { "epoch": 1.45, "grad_norm": 5.6071395814177025, "learning_rate": 1.861217048060529e-06, "loss": 0.598, "step": 16243 }, { "epoch": 1.45, "grad_norm": 7.3718730982186855, "learning_rate": 1.860654780015454e-06, "loss": 0.5493, "step": 16244 }, { "epoch": 1.45, "grad_norm": 6.914270501679444, "learning_rate": 1.8600925774973355e-06, "loss": 0.5434, "step": 16245 }, { "epoch": 1.45, "grad_norm": 5.226563034693283, "learning_rate": 1.8595304405179082e-06, "loss": 0.65, "step": 16246 }, { "epoch": 1.45, "grad_norm": 6.953254218395929, "learning_rate": 1.8589683690889055e-06, "loss": 0.5843, "step": 16247 }, { "epoch": 1.45, "grad_norm": 6.9379835556223455, "learning_rate": 1.858406363222059e-06, "loss": 0.5348, "step": 16248 }, { "epoch": 1.45, "grad_norm": 7.907646086263747, "learning_rate": 1.857844422929102e-06, "loss": 0.5619, "step": 16249 }, { "epoch": 1.45, "grad_norm": 7.955682883921274, "learning_rate": 1.8572825482217594e-06, "loss": 0.6393, "step": 16250 }, { "epoch": 1.45, "grad_norm": 8.841507622686544, "learning_rate": 1.8567207391117608e-06, "loss": 0.5502, "step": 16251 }, { "epoch": 1.45, "grad_norm": 7.241821554692742, "learning_rate": 1.856158995610834e-06, "loss": 0.6621, "step": 16252 }, { "epoch": 1.45, "grad_norm": 9.312578413024093, "learning_rate": 1.8555973177307025e-06, "loss": 0.5866, "step": 16253 }, { "epoch": 1.45, "grad_norm": 5.535019360277999, "learning_rate": 1.8550357054830926e-06, "loss": 0.5086, "step": 16254 }, { "epoch": 1.45, "grad_norm": 7.117852987541589, "learning_rate": 1.8544741588797238e-06, "loss": 0.593, "step": 16255 }, { "epoch": 1.45, "grad_norm": 5.748711752900766, "learning_rate": 1.8539126779323197e-06, "loss": 0.5803, "step": 16256 }, { "epoch": 1.45, "grad_norm": 7.967447400939709, "learning_rate": 1.8533512626525969e-06, "loss": 0.5447, "step": 16257 }, { "epoch": 1.45, "grad_norm": 7.98092216328443, "learning_rate": 1.852789913052276e-06, "loss": 0.5763, "step": 16258 }, { "epoch": 1.45, "grad_norm": 6.5204135232556935, "learning_rate": 1.8522286291430723e-06, "loss": 0.5361, "step": 16259 }, { "epoch": 1.45, "grad_norm": 4.9325934787780685, "learning_rate": 1.8516674109367034e-06, "loss": 0.6009, "step": 16260 }, { "epoch": 1.45, "grad_norm": 6.794695591938233, "learning_rate": 1.8511062584448824e-06, "loss": 0.5354, "step": 16261 }, { "epoch": 1.45, "grad_norm": 5.935311666390683, "learning_rate": 1.8505451716793238e-06, "loss": 0.584, "step": 16262 }, { "epoch": 1.45, "grad_norm": 7.791446156505, "learning_rate": 1.8499841506517357e-06, "loss": 0.57, "step": 16263 }, { "epoch": 1.45, "grad_norm": 6.752048910148552, "learning_rate": 1.8494231953738306e-06, "loss": 0.5412, "step": 16264 }, { "epoch": 1.45, "grad_norm": 6.25661586950429, "learning_rate": 1.8488623058573163e-06, "loss": 0.6287, "step": 16265 }, { "epoch": 1.45, "grad_norm": 7.008435066728198, "learning_rate": 1.8483014821139005e-06, "loss": 0.5814, "step": 16266 }, { "epoch": 1.45, "grad_norm": 6.561768660655434, "learning_rate": 1.8477407241552908e-06, "loss": 0.5612, "step": 16267 }, { "epoch": 1.45, "grad_norm": 5.767940836106502, "learning_rate": 1.8471800319931886e-06, "loss": 0.6398, "step": 16268 }, { "epoch": 1.45, "grad_norm": 5.3772597863636085, "learning_rate": 1.8466194056393e-06, "loss": 0.601, "step": 16269 }, { "epoch": 1.45, "grad_norm": 4.307017242632708, "learning_rate": 1.8460588451053246e-06, "loss": 0.6452, "step": 16270 }, { "epoch": 1.45, "grad_norm": 6.140670176685203, "learning_rate": 1.8454983504029633e-06, "loss": 0.559, "step": 16271 }, { "epoch": 1.45, "grad_norm": 5.501170088494278, "learning_rate": 1.8449379215439156e-06, "loss": 0.5778, "step": 16272 }, { "epoch": 1.45, "grad_norm": 5.7414366170538775, "learning_rate": 1.8443775585398793e-06, "loss": 0.5814, "step": 16273 }, { "epoch": 1.45, "grad_norm": 5.480585899924761, "learning_rate": 1.8438172614025523e-06, "loss": 0.5814, "step": 16274 }, { "epoch": 1.45, "grad_norm": 7.171936874183452, "learning_rate": 1.843257030143627e-06, "loss": 0.5545, "step": 16275 }, { "epoch": 1.45, "grad_norm": 9.313044327922912, "learning_rate": 1.8426968647747978e-06, "loss": 0.6089, "step": 16276 }, { "epoch": 1.45, "grad_norm": 6.9524844467010105, "learning_rate": 1.8421367653077571e-06, "loss": 0.6083, "step": 16277 }, { "epoch": 1.45, "grad_norm": 5.388680558907984, "learning_rate": 1.8415767317541954e-06, "loss": 0.4958, "step": 16278 }, { "epoch": 1.45, "grad_norm": 9.612614162241591, "learning_rate": 1.841016764125803e-06, "loss": 0.5944, "step": 16279 }, { "epoch": 1.45, "grad_norm": 6.197071401254663, "learning_rate": 1.8404568624342696e-06, "loss": 0.6057, "step": 16280 }, { "epoch": 1.45, "grad_norm": 8.788672765885801, "learning_rate": 1.8398970266912775e-06, "loss": 0.5863, "step": 16281 }, { "epoch": 1.45, "grad_norm": 6.34432337201427, "learning_rate": 1.839337256908515e-06, "loss": 0.5974, "step": 16282 }, { "epoch": 1.45, "grad_norm": 8.03130411353677, "learning_rate": 1.838777553097667e-06, "loss": 0.5822, "step": 16283 }, { "epoch": 1.45, "grad_norm": 6.654015060021429, "learning_rate": 1.8382179152704133e-06, "loss": 0.5449, "step": 16284 }, { "epoch": 1.45, "grad_norm": 6.124565549144169, "learning_rate": 1.8376583434384359e-06, "loss": 0.5697, "step": 16285 }, { "epoch": 1.45, "grad_norm": 9.105481997250518, "learning_rate": 1.8370988376134152e-06, "loss": 0.6279, "step": 16286 }, { "epoch": 1.45, "grad_norm": 6.6802535292513054, "learning_rate": 1.8365393978070318e-06, "loss": 0.5565, "step": 16287 }, { "epoch": 1.45, "grad_norm": 6.46991484681146, "learning_rate": 1.8359800240309589e-06, "loss": 0.6244, "step": 16288 }, { "epoch": 1.45, "grad_norm": 7.281604875624274, "learning_rate": 1.8354207162968734e-06, "loss": 0.5676, "step": 16289 }, { "epoch": 1.45, "grad_norm": 5.87015468704604, "learning_rate": 1.8348614746164507e-06, "loss": 0.5689, "step": 16290 }, { "epoch": 1.45, "grad_norm": 6.279443504218136, "learning_rate": 1.8343022990013626e-06, "loss": 0.5643, "step": 16291 }, { "epoch": 1.45, "grad_norm": 6.119794713215712, "learning_rate": 1.8337431894632813e-06, "loss": 0.5389, "step": 16292 }, { "epoch": 1.45, "grad_norm": 7.1096547347079975, "learning_rate": 1.8331841460138788e-06, "loss": 0.5856, "step": 16293 }, { "epoch": 1.45, "grad_norm": 7.568462879772909, "learning_rate": 1.8326251686648206e-06, "loss": 0.565, "step": 16294 }, { "epoch": 1.45, "grad_norm": 6.471584211937221, "learning_rate": 1.8320662574277752e-06, "loss": 0.6153, "step": 16295 }, { "epoch": 1.45, "grad_norm": 4.9926222575426875, "learning_rate": 1.8315074123144095e-06, "loss": 0.615, "step": 16296 }, { "epoch": 1.45, "grad_norm": 5.9283244648809745, "learning_rate": 1.830948633336389e-06, "loss": 0.5751, "step": 16297 }, { "epoch": 1.45, "grad_norm": 5.498145594968313, "learning_rate": 1.8303899205053737e-06, "loss": 0.5784, "step": 16298 }, { "epoch": 1.45, "grad_norm": 5.365005395258117, "learning_rate": 1.8298312738330282e-06, "loss": 0.5806, "step": 16299 }, { "epoch": 1.45, "grad_norm": 7.208699418245424, "learning_rate": 1.8292726933310134e-06, "loss": 0.5831, "step": 16300 }, { "epoch": 1.45, "grad_norm": 5.86267833441473, "learning_rate": 1.8287141790109863e-06, "loss": 0.5744, "step": 16301 }, { "epoch": 1.45, "grad_norm": 4.9391496907998285, "learning_rate": 1.8281557308846048e-06, "loss": 0.5898, "step": 16302 }, { "epoch": 1.45, "grad_norm": 6.727900753601115, "learning_rate": 1.8275973489635269e-06, "loss": 0.6217, "step": 16303 }, { "epoch": 1.45, "grad_norm": 7.776533182953649, "learning_rate": 1.8270390332594069e-06, "loss": 0.5477, "step": 16304 }, { "epoch": 1.45, "grad_norm": 8.739776611514607, "learning_rate": 1.8264807837838984e-06, "loss": 0.5997, "step": 16305 }, { "epoch": 1.45, "grad_norm": 7.881110880699467, "learning_rate": 1.8259226005486546e-06, "loss": 0.6152, "step": 16306 }, { "epoch": 1.45, "grad_norm": 5.79267813934058, "learning_rate": 1.8253644835653246e-06, "loss": 0.6278, "step": 16307 }, { "epoch": 1.45, "grad_norm": 5.963118010119635, "learning_rate": 1.8248064328455584e-06, "loss": 0.5867, "step": 16308 }, { "epoch": 1.45, "grad_norm": 7.509784420871031, "learning_rate": 1.8242484484010043e-06, "loss": 0.5875, "step": 16309 }, { "epoch": 1.46, "grad_norm": 8.761721715297359, "learning_rate": 1.823690530243309e-06, "loss": 0.6384, "step": 16310 }, { "epoch": 1.46, "grad_norm": 6.8677910886859275, "learning_rate": 1.823132678384118e-06, "loss": 0.6004, "step": 16311 }, { "epoch": 1.46, "grad_norm": 5.787450160647995, "learning_rate": 1.8225748928350768e-06, "loss": 0.5489, "step": 16312 }, { "epoch": 1.46, "grad_norm": 6.620884479461966, "learning_rate": 1.8220171736078262e-06, "loss": 0.5606, "step": 16313 }, { "epoch": 1.46, "grad_norm": 8.224192459143556, "learning_rate": 1.8214595207140057e-06, "loss": 0.607, "step": 16314 }, { "epoch": 1.46, "grad_norm": 7.3780925978028895, "learning_rate": 1.8209019341652568e-06, "loss": 0.5179, "step": 16315 }, { "epoch": 1.46, "grad_norm": 5.35100096296534, "learning_rate": 1.820344413973218e-06, "loss": 0.5825, "step": 16316 }, { "epoch": 1.46, "grad_norm": 8.018503118362428, "learning_rate": 1.8197869601495266e-06, "loss": 0.5751, "step": 16317 }, { "epoch": 1.46, "grad_norm": 6.205054550624041, "learning_rate": 1.8192295727058173e-06, "loss": 0.6155, "step": 16318 }, { "epoch": 1.46, "grad_norm": 6.170218966387842, "learning_rate": 1.818672251653727e-06, "loss": 0.5774, "step": 16319 }, { "epoch": 1.46, "grad_norm": 7.319257654567645, "learning_rate": 1.8181149970048845e-06, "loss": 0.5992, "step": 16320 }, { "epoch": 1.46, "grad_norm": 5.99747917922349, "learning_rate": 1.8175578087709235e-06, "loss": 0.6031, "step": 16321 }, { "epoch": 1.46, "grad_norm": 7.180349885510405, "learning_rate": 1.8170006869634732e-06, "loss": 0.6224, "step": 16322 }, { "epoch": 1.46, "grad_norm": 7.889343604751983, "learning_rate": 1.8164436315941636e-06, "loss": 0.6623, "step": 16323 }, { "epoch": 1.46, "grad_norm": 5.35165286231498, "learning_rate": 1.8158866426746208e-06, "loss": 0.5968, "step": 16324 }, { "epoch": 1.46, "grad_norm": 4.676287046207306, "learning_rate": 1.8153297202164733e-06, "loss": 0.5593, "step": 16325 }, { "epoch": 1.46, "grad_norm": 7.742830516677, "learning_rate": 1.8147728642313433e-06, "loss": 0.6331, "step": 16326 }, { "epoch": 1.46, "grad_norm": 7.480131439749104, "learning_rate": 1.8142160747308529e-06, "loss": 0.5986, "step": 16327 }, { "epoch": 1.46, "grad_norm": 6.129823294366671, "learning_rate": 1.8136593517266248e-06, "loss": 0.5818, "step": 16328 }, { "epoch": 1.46, "grad_norm": 7.09663871287527, "learning_rate": 1.8131026952302799e-06, "loss": 0.5814, "step": 16329 }, { "epoch": 1.46, "grad_norm": 10.536229061324756, "learning_rate": 1.8125461052534372e-06, "loss": 0.5919, "step": 16330 }, { "epoch": 1.46, "grad_norm": 9.298875508631372, "learning_rate": 1.8119895818077139e-06, "loss": 0.6211, "step": 16331 }, { "epoch": 1.46, "grad_norm": 7.407934867337465, "learning_rate": 1.8114331249047284e-06, "loss": 0.5521, "step": 16332 }, { "epoch": 1.46, "grad_norm": 5.049368171249713, "learning_rate": 1.8108767345560917e-06, "loss": 0.6387, "step": 16333 }, { "epoch": 1.46, "grad_norm": 7.777152072826537, "learning_rate": 1.810320410773419e-06, "loss": 0.5921, "step": 16334 }, { "epoch": 1.46, "grad_norm": 5.915295872546212, "learning_rate": 1.809764153568323e-06, "loss": 0.5879, "step": 16335 }, { "epoch": 1.46, "grad_norm": 8.24097664423097, "learning_rate": 1.8092079629524134e-06, "loss": 0.5875, "step": 16336 }, { "epoch": 1.46, "grad_norm": 8.522546151274087, "learning_rate": 1.8086518389372997e-06, "loss": 0.6018, "step": 16337 }, { "epoch": 1.46, "grad_norm": 5.9793097194203435, "learning_rate": 1.8080957815345917e-06, "loss": 0.6312, "step": 16338 }, { "epoch": 1.46, "grad_norm": 7.3268046494974355, "learning_rate": 1.8075397907558928e-06, "loss": 0.6065, "step": 16339 }, { "epoch": 1.46, "grad_norm": 6.996413440278465, "learning_rate": 1.8069838666128092e-06, "loss": 0.6328, "step": 16340 }, { "epoch": 1.46, "grad_norm": 5.0023634398639105, "learning_rate": 1.806428009116946e-06, "loss": 0.5738, "step": 16341 }, { "epoch": 1.46, "grad_norm": 7.2591221938597625, "learning_rate": 1.8058722182799033e-06, "loss": 0.5711, "step": 16342 }, { "epoch": 1.46, "grad_norm": 8.819464184656283, "learning_rate": 1.8053164941132828e-06, "loss": 0.6165, "step": 16343 }, { "epoch": 1.46, "grad_norm": 5.005922404526672, "learning_rate": 1.804760836628684e-06, "loss": 0.5655, "step": 16344 }, { "epoch": 1.46, "grad_norm": 7.4774953453816915, "learning_rate": 1.8042052458377074e-06, "loss": 0.5583, "step": 16345 }, { "epoch": 1.46, "grad_norm": 7.357042728700943, "learning_rate": 1.8036497217519455e-06, "loss": 0.5772, "step": 16346 }, { "epoch": 1.46, "grad_norm": 6.054178568447883, "learning_rate": 1.8030942643829962e-06, "loss": 0.5189, "step": 16347 }, { "epoch": 1.46, "grad_norm": 7.416366900940011, "learning_rate": 1.8025388737424525e-06, "loss": 0.677, "step": 16348 }, { "epoch": 1.46, "grad_norm": 8.270732316699222, "learning_rate": 1.801983549841908e-06, "loss": 0.6107, "step": 16349 }, { "epoch": 1.46, "grad_norm": 6.4013031746739335, "learning_rate": 1.8014282926929549e-06, "loss": 0.5902, "step": 16350 }, { "epoch": 1.46, "grad_norm": 6.866427259821172, "learning_rate": 1.8008731023071795e-06, "loss": 0.568, "step": 16351 }, { "epoch": 1.46, "grad_norm": 4.7346255860178825, "learning_rate": 1.8003179786961723e-06, "loss": 0.5683, "step": 16352 }, { "epoch": 1.46, "grad_norm": 5.613088524420837, "learning_rate": 1.7997629218715202e-06, "loss": 0.6077, "step": 16353 }, { "epoch": 1.46, "grad_norm": 7.65503981856747, "learning_rate": 1.7992079318448086e-06, "loss": 0.5411, "step": 16354 }, { "epoch": 1.46, "grad_norm": 5.822945030232632, "learning_rate": 1.7986530086276233e-06, "loss": 0.6039, "step": 16355 }, { "epoch": 1.46, "grad_norm": 5.384875719425806, "learning_rate": 1.7980981522315444e-06, "loss": 0.6603, "step": 16356 }, { "epoch": 1.46, "grad_norm": 6.484024236540993, "learning_rate": 1.797543362668156e-06, "loss": 0.5426, "step": 16357 }, { "epoch": 1.46, "grad_norm": 6.887113584972544, "learning_rate": 1.7969886399490349e-06, "loss": 0.5579, "step": 16358 }, { "epoch": 1.46, "grad_norm": 7.014255035696754, "learning_rate": 1.7964339840857613e-06, "loss": 0.6259, "step": 16359 }, { "epoch": 1.46, "grad_norm": 8.542543382145876, "learning_rate": 1.795879395089913e-06, "loss": 0.6329, "step": 16360 }, { "epoch": 1.46, "grad_norm": 7.798775541274467, "learning_rate": 1.7953248729730649e-06, "loss": 0.6272, "step": 16361 }, { "epoch": 1.46, "grad_norm": 6.5989667883147956, "learning_rate": 1.7947704177467922e-06, "loss": 0.5784, "step": 16362 }, { "epoch": 1.46, "grad_norm": 7.907383672694986, "learning_rate": 1.7942160294226696e-06, "loss": 0.6281, "step": 16363 }, { "epoch": 1.46, "grad_norm": 7.857906127203873, "learning_rate": 1.7936617080122647e-06, "loss": 0.5778, "step": 16364 }, { "epoch": 1.46, "grad_norm": 9.107736716023513, "learning_rate": 1.7931074535271504e-06, "loss": 0.5489, "step": 16365 }, { "epoch": 1.46, "grad_norm": 7.090924376955901, "learning_rate": 1.7925532659788946e-06, "loss": 0.5684, "step": 16366 }, { "epoch": 1.46, "grad_norm": 6.130619380261054, "learning_rate": 1.7919991453790658e-06, "loss": 0.6047, "step": 16367 }, { "epoch": 1.46, "grad_norm": 6.131546780775847, "learning_rate": 1.791445091739229e-06, "loss": 0.6121, "step": 16368 }, { "epoch": 1.46, "grad_norm": 4.7780971017423655, "learning_rate": 1.790891105070951e-06, "loss": 0.5838, "step": 16369 }, { "epoch": 1.46, "grad_norm": 5.172190985316813, "learning_rate": 1.7903371853857932e-06, "loss": 0.6455, "step": 16370 }, { "epoch": 1.46, "grad_norm": 5.550905781183034, "learning_rate": 1.7897833326953163e-06, "loss": 0.6034, "step": 16371 }, { "epoch": 1.46, "grad_norm": 6.159074538253677, "learning_rate": 1.7892295470110816e-06, "loss": 0.6053, "step": 16372 }, { "epoch": 1.46, "grad_norm": 5.778393793574685, "learning_rate": 1.7886758283446493e-06, "loss": 0.5864, "step": 16373 }, { "epoch": 1.46, "grad_norm": 7.40541597988544, "learning_rate": 1.7881221767075758e-06, "loss": 0.5494, "step": 16374 }, { "epoch": 1.46, "grad_norm": 6.985553059936738, "learning_rate": 1.7875685921114183e-06, "loss": 0.5458, "step": 16375 }, { "epoch": 1.46, "grad_norm": 8.403783854063095, "learning_rate": 1.7870150745677328e-06, "loss": 0.5148, "step": 16376 }, { "epoch": 1.46, "grad_norm": 8.257542271760794, "learning_rate": 1.7864616240880695e-06, "loss": 0.6432, "step": 16377 }, { "epoch": 1.46, "grad_norm": 10.090677851824813, "learning_rate": 1.7859082406839829e-06, "loss": 0.6054, "step": 16378 }, { "epoch": 1.46, "grad_norm": 5.963683683681956, "learning_rate": 1.7853549243670226e-06, "loss": 0.6161, "step": 16379 }, { "epoch": 1.46, "grad_norm": 7.34587301430023, "learning_rate": 1.784801675148738e-06, "loss": 0.6177, "step": 16380 }, { "epoch": 1.46, "grad_norm": 8.502210959767844, "learning_rate": 1.7842484930406777e-06, "loss": 0.5891, "step": 16381 }, { "epoch": 1.46, "grad_norm": 7.345200328663422, "learning_rate": 1.7836953780543892e-06, "loss": 0.5884, "step": 16382 }, { "epoch": 1.46, "grad_norm": 6.162294791378467, "learning_rate": 1.7831423302014144e-06, "loss": 0.5762, "step": 16383 }, { "epoch": 1.46, "grad_norm": 6.757664762506362, "learning_rate": 1.7825893494933004e-06, "loss": 0.6098, "step": 16384 }, { "epoch": 1.46, "grad_norm": 7.5982109921185685, "learning_rate": 1.7820364359415866e-06, "loss": 0.6148, "step": 16385 }, { "epoch": 1.46, "grad_norm": 6.4861150503482525, "learning_rate": 1.7814835895578143e-06, "loss": 0.5619, "step": 16386 }, { "epoch": 1.46, "grad_norm": 9.010386869416386, "learning_rate": 1.780930810353524e-06, "loss": 0.5517, "step": 16387 }, { "epoch": 1.46, "grad_norm": 7.488620912388788, "learning_rate": 1.780378098340253e-06, "loss": 0.5748, "step": 16388 }, { "epoch": 1.46, "grad_norm": 9.37634697268703, "learning_rate": 1.77982545352954e-06, "loss": 0.5997, "step": 16389 }, { "epoch": 1.46, "grad_norm": 5.747776695134638, "learning_rate": 1.7792728759329175e-06, "loss": 0.5372, "step": 16390 }, { "epoch": 1.46, "grad_norm": 6.74506048532081, "learning_rate": 1.77872036556192e-06, "loss": 0.6079, "step": 16391 }, { "epoch": 1.46, "grad_norm": 6.552135228030642, "learning_rate": 1.7781679224280802e-06, "loss": 0.5907, "step": 16392 }, { "epoch": 1.46, "grad_norm": 7.350695114090226, "learning_rate": 1.7776155465429295e-06, "loss": 0.5805, "step": 16393 }, { "epoch": 1.46, "grad_norm": 6.917067070581846, "learning_rate": 1.7770632379179974e-06, "loss": 0.5582, "step": 16394 }, { "epoch": 1.46, "grad_norm": 6.370524421998532, "learning_rate": 1.7765109965648142e-06, "loss": 0.582, "step": 16395 }, { "epoch": 1.46, "grad_norm": 7.2700719100010165, "learning_rate": 1.7759588224949027e-06, "loss": 0.5602, "step": 16396 }, { "epoch": 1.46, "grad_norm": 5.855570517977772, "learning_rate": 1.7754067157197903e-06, "loss": 0.5916, "step": 16397 }, { "epoch": 1.46, "grad_norm": 7.285386008060553, "learning_rate": 1.7748546762510022e-06, "loss": 0.5792, "step": 16398 }, { "epoch": 1.46, "grad_norm": 6.296600356846596, "learning_rate": 1.7743027041000583e-06, "loss": 0.6275, "step": 16399 }, { "epoch": 1.46, "grad_norm": 5.478797359030748, "learning_rate": 1.7737507992784813e-06, "loss": 0.5353, "step": 16400 }, { "epoch": 1.46, "grad_norm": 8.339159481991633, "learning_rate": 1.7731989617977913e-06, "loss": 0.5457, "step": 16401 }, { "epoch": 1.46, "grad_norm": 5.755782852412119, "learning_rate": 1.7726471916695075e-06, "loss": 0.5983, "step": 16402 }, { "epoch": 1.46, "grad_norm": 6.488101470458473, "learning_rate": 1.7720954889051446e-06, "loss": 0.5713, "step": 16403 }, { "epoch": 1.46, "grad_norm": 8.369793803829351, "learning_rate": 1.7715438535162194e-06, "loss": 0.5355, "step": 16404 }, { "epoch": 1.46, "grad_norm": 6.696974310062657, "learning_rate": 1.770992285514246e-06, "loss": 0.5587, "step": 16405 }, { "epoch": 1.46, "grad_norm": 6.2362624015491805, "learning_rate": 1.770440784910737e-06, "loss": 0.5982, "step": 16406 }, { "epoch": 1.46, "grad_norm": 7.959145627490885, "learning_rate": 1.7698893517172044e-06, "loss": 0.5825, "step": 16407 }, { "epoch": 1.46, "grad_norm": 7.4254572471326235, "learning_rate": 1.7693379859451594e-06, "loss": 0.5254, "step": 16408 }, { "epoch": 1.46, "grad_norm": 5.312904735368366, "learning_rate": 1.7687866876061067e-06, "loss": 0.598, "step": 16409 }, { "epoch": 1.46, "grad_norm": 6.942059761584586, "learning_rate": 1.7682354567115561e-06, "loss": 0.5775, "step": 16410 }, { "epoch": 1.46, "grad_norm": 6.508029409927698, "learning_rate": 1.7676842932730126e-06, "loss": 0.6087, "step": 16411 }, { "epoch": 1.46, "grad_norm": 5.82634930122898, "learning_rate": 1.7671331973019812e-06, "loss": 0.5299, "step": 16412 }, { "epoch": 1.46, "grad_norm": 7.908623143678013, "learning_rate": 1.766582168809966e-06, "loss": 0.6611, "step": 16413 }, { "epoch": 1.46, "grad_norm": 5.988879632618105, "learning_rate": 1.7660312078084651e-06, "loss": 0.5802, "step": 16414 }, { "epoch": 1.46, "grad_norm": 8.511844104203053, "learning_rate": 1.7654803143089822e-06, "loss": 0.6115, "step": 16415 }, { "epoch": 1.46, "grad_norm": 7.095299196093812, "learning_rate": 1.764929488323013e-06, "loss": 0.6084, "step": 16416 }, { "epoch": 1.46, "grad_norm": 5.884386276805763, "learning_rate": 1.764378729862055e-06, "loss": 0.5469, "step": 16417 }, { "epoch": 1.46, "grad_norm": 7.265833856796682, "learning_rate": 1.7638280389376062e-06, "loss": 0.5899, "step": 16418 }, { "epoch": 1.46, "grad_norm": 7.113083319442705, "learning_rate": 1.7632774155611594e-06, "loss": 0.6116, "step": 16419 }, { "epoch": 1.46, "grad_norm": 5.526903764825625, "learning_rate": 1.7627268597442089e-06, "loss": 0.5557, "step": 16420 }, { "epoch": 1.46, "grad_norm": 7.449843735274143, "learning_rate": 1.7621763714982466e-06, "loss": 0.5136, "step": 16421 }, { "epoch": 1.47, "grad_norm": 4.935520975617091, "learning_rate": 1.7616259508347606e-06, "loss": 0.6259, "step": 16422 }, { "epoch": 1.47, "grad_norm": 8.446641740268513, "learning_rate": 1.7610755977652405e-06, "loss": 0.5741, "step": 16423 }, { "epoch": 1.47, "grad_norm": 4.523522043225614, "learning_rate": 1.7605253123011745e-06, "loss": 0.5812, "step": 16424 }, { "epoch": 1.47, "grad_norm": 7.100239845336078, "learning_rate": 1.7599750944540484e-06, "loss": 0.6504, "step": 16425 }, { "epoch": 1.47, "grad_norm": 10.508854736111482, "learning_rate": 1.7594249442353484e-06, "loss": 0.5553, "step": 16426 }, { "epoch": 1.47, "grad_norm": 5.95883471653413, "learning_rate": 1.7588748616565543e-06, "loss": 0.5536, "step": 16427 }, { "epoch": 1.47, "grad_norm": 5.4658683026330745, "learning_rate": 1.7583248467291508e-06, "loss": 0.6096, "step": 16428 }, { "epoch": 1.47, "grad_norm": 7.391304583632329, "learning_rate": 1.7577748994646154e-06, "loss": 0.6084, "step": 16429 }, { "epoch": 1.47, "grad_norm": 6.868654446102574, "learning_rate": 1.7572250198744289e-06, "loss": 0.5564, "step": 16430 }, { "epoch": 1.47, "grad_norm": 6.159560403993126, "learning_rate": 1.7566752079700683e-06, "loss": 0.6009, "step": 16431 }, { "epoch": 1.47, "grad_norm": 6.848815766143723, "learning_rate": 1.7561254637630104e-06, "loss": 0.5567, "step": 16432 }, { "epoch": 1.47, "grad_norm": 6.564617998226331, "learning_rate": 1.7555757872647305e-06, "loss": 0.5348, "step": 16433 }, { "epoch": 1.47, "grad_norm": 7.0063722694897645, "learning_rate": 1.7550261784866996e-06, "loss": 0.5491, "step": 16434 }, { "epoch": 1.47, "grad_norm": 6.641869635127833, "learning_rate": 1.754476637440391e-06, "loss": 0.5988, "step": 16435 }, { "epoch": 1.47, "grad_norm": 5.3912221846705295, "learning_rate": 1.7539271641372745e-06, "loss": 0.5293, "step": 16436 }, { "epoch": 1.47, "grad_norm": 5.379154189457372, "learning_rate": 1.75337775858882e-06, "loss": 0.6103, "step": 16437 }, { "epoch": 1.47, "grad_norm": 6.747307379007315, "learning_rate": 1.752828420806495e-06, "loss": 0.6146, "step": 16438 }, { "epoch": 1.47, "grad_norm": 6.141754531769357, "learning_rate": 1.7522791508017668e-06, "loss": 0.5775, "step": 16439 }, { "epoch": 1.47, "grad_norm": 8.071004271213777, "learning_rate": 1.7517299485860971e-06, "loss": 0.486, "step": 16440 }, { "epoch": 1.47, "grad_norm": 7.82141363190007, "learning_rate": 1.751180814170952e-06, "loss": 0.6774, "step": 16441 }, { "epoch": 1.47, "grad_norm": 8.419992518976178, "learning_rate": 1.7506317475677936e-06, "loss": 0.6162, "step": 16442 }, { "epoch": 1.47, "grad_norm": 6.605130950790347, "learning_rate": 1.75008274878808e-06, "loss": 0.6248, "step": 16443 }, { "epoch": 1.47, "grad_norm": 7.237289316305557, "learning_rate": 1.7495338178432714e-06, "loss": 0.5815, "step": 16444 }, { "epoch": 1.47, "grad_norm": 6.6334659407380725, "learning_rate": 1.7489849547448267e-06, "loss": 0.5891, "step": 16445 }, { "epoch": 1.47, "grad_norm": 8.116689993732846, "learning_rate": 1.7484361595042026e-06, "loss": 0.5295, "step": 16446 }, { "epoch": 1.47, "grad_norm": 6.211655853298133, "learning_rate": 1.7478874321328516e-06, "loss": 0.6426, "step": 16447 }, { "epoch": 1.47, "grad_norm": 5.115320588904381, "learning_rate": 1.747338772642228e-06, "loss": 0.6178, "step": 16448 }, { "epoch": 1.47, "grad_norm": 12.072628340157333, "learning_rate": 1.7467901810437842e-06, "loss": 0.6188, "step": 16449 }, { "epoch": 1.47, "grad_norm": 7.0148729756478065, "learning_rate": 1.7462416573489716e-06, "loss": 0.5996, "step": 16450 }, { "epoch": 1.47, "grad_norm": 5.552429292404611, "learning_rate": 1.7456932015692384e-06, "loss": 0.5699, "step": 16451 }, { "epoch": 1.47, "grad_norm": 6.783157919566073, "learning_rate": 1.745144813716035e-06, "loss": 0.5847, "step": 16452 }, { "epoch": 1.47, "grad_norm": 8.889362861991899, "learning_rate": 1.744596493800803e-06, "loss": 0.6019, "step": 16453 }, { "epoch": 1.47, "grad_norm": 5.9732792788612254, "learning_rate": 1.7440482418349908e-06, "loss": 0.5664, "step": 16454 }, { "epoch": 1.47, "grad_norm": 5.450408625638357, "learning_rate": 1.7435000578300404e-06, "loss": 0.6158, "step": 16455 }, { "epoch": 1.47, "grad_norm": 5.840335244916685, "learning_rate": 1.7429519417973972e-06, "loss": 0.5827, "step": 16456 }, { "epoch": 1.47, "grad_norm": 6.983676486315571, "learning_rate": 1.7424038937484973e-06, "loss": 0.5873, "step": 16457 }, { "epoch": 1.47, "grad_norm": 6.182682482052349, "learning_rate": 1.7418559136947822e-06, "loss": 0.5376, "step": 16458 }, { "epoch": 1.47, "grad_norm": 9.228246701456108, "learning_rate": 1.7413080016476908e-06, "loss": 0.5265, "step": 16459 }, { "epoch": 1.47, "grad_norm": 5.542901373354848, "learning_rate": 1.7407601576186573e-06, "loss": 0.659, "step": 16460 }, { "epoch": 1.47, "grad_norm": 6.100189893437488, "learning_rate": 1.740212381619118e-06, "loss": 0.5765, "step": 16461 }, { "epoch": 1.47, "grad_norm": 7.2278794175283245, "learning_rate": 1.739664673660506e-06, "loss": 0.5939, "step": 16462 }, { "epoch": 1.47, "grad_norm": 5.624046773275481, "learning_rate": 1.7391170337542545e-06, "loss": 0.6329, "step": 16463 }, { "epoch": 1.47, "grad_norm": 5.5140356523056155, "learning_rate": 1.7385694619117933e-06, "loss": 0.588, "step": 16464 }, { "epoch": 1.47, "grad_norm": 6.5590733528292775, "learning_rate": 1.738021958144554e-06, "loss": 0.6249, "step": 16465 }, { "epoch": 1.47, "grad_norm": 6.768323764133486, "learning_rate": 1.737474522463961e-06, "loss": 0.5638, "step": 16466 }, { "epoch": 1.47, "grad_norm": 6.3637754242454925, "learning_rate": 1.7369271548814427e-06, "loss": 0.5967, "step": 16467 }, { "epoch": 1.47, "grad_norm": 6.9021058065854755, "learning_rate": 1.7363798554084233e-06, "loss": 0.5742, "step": 16468 }, { "epoch": 1.47, "grad_norm": 7.388257866318392, "learning_rate": 1.7358326240563278e-06, "loss": 0.5997, "step": 16469 }, { "epoch": 1.47, "grad_norm": 7.920003303595473, "learning_rate": 1.7352854608365798e-06, "loss": 0.6088, "step": 16470 }, { "epoch": 1.47, "grad_norm": 5.591527304914028, "learning_rate": 1.734738365760596e-06, "loss": 0.5296, "step": 16471 }, { "epoch": 1.47, "grad_norm": 7.042667818418202, "learning_rate": 1.7341913388398001e-06, "loss": 0.5629, "step": 16472 }, { "epoch": 1.47, "grad_norm": 9.987595367452592, "learning_rate": 1.7336443800856063e-06, "loss": 0.5593, "step": 16473 }, { "epoch": 1.47, "grad_norm": 7.408967023544354, "learning_rate": 1.7330974895094326e-06, "loss": 0.5314, "step": 16474 }, { "epoch": 1.47, "grad_norm": 5.927844212267987, "learning_rate": 1.7325506671226944e-06, "loss": 0.6658, "step": 16475 }, { "epoch": 1.47, "grad_norm": 5.662738532041472, "learning_rate": 1.7320039129368054e-06, "loss": 0.5508, "step": 16476 }, { "epoch": 1.47, "grad_norm": 7.366333622631743, "learning_rate": 1.731457226963178e-06, "loss": 0.5661, "step": 16477 }, { "epoch": 1.47, "grad_norm": 6.4623796979354635, "learning_rate": 1.730910609213225e-06, "loss": 0.5793, "step": 16478 }, { "epoch": 1.47, "grad_norm": 7.251000645718733, "learning_rate": 1.7303640596983517e-06, "loss": 0.5397, "step": 16479 }, { "epoch": 1.47, "grad_norm": 5.894155544410825, "learning_rate": 1.7298175784299687e-06, "loss": 0.5972, "step": 16480 }, { "epoch": 1.47, "grad_norm": 8.253088409171935, "learning_rate": 1.729271165419482e-06, "loss": 0.5873, "step": 16481 }, { "epoch": 1.47, "grad_norm": 6.826955125219654, "learning_rate": 1.7287248206782969e-06, "loss": 0.5556, "step": 16482 }, { "epoch": 1.47, "grad_norm": 7.705706379338986, "learning_rate": 1.7281785442178172e-06, "loss": 0.5436, "step": 16483 }, { "epoch": 1.47, "grad_norm": 6.229898458422859, "learning_rate": 1.7276323360494468e-06, "loss": 0.5525, "step": 16484 }, { "epoch": 1.47, "grad_norm": 6.911040823939299, "learning_rate": 1.7270861961845853e-06, "loss": 0.6267, "step": 16485 }, { "epoch": 1.47, "grad_norm": 6.364408725427549, "learning_rate": 1.7265401246346297e-06, "loss": 0.5544, "step": 16486 }, { "epoch": 1.47, "grad_norm": 5.660698259834166, "learning_rate": 1.725994121410981e-06, "loss": 0.5935, "step": 16487 }, { "epoch": 1.47, "grad_norm": 6.0981224498678115, "learning_rate": 1.7254481865250343e-06, "loss": 0.5861, "step": 16488 }, { "epoch": 1.47, "grad_norm": 5.594231949013769, "learning_rate": 1.724902319988186e-06, "loss": 0.5656, "step": 16489 }, { "epoch": 1.47, "grad_norm": 5.446473556869346, "learning_rate": 1.7243565218118297e-06, "loss": 0.5833, "step": 16490 }, { "epoch": 1.47, "grad_norm": 6.892282604342074, "learning_rate": 1.7238107920073587e-06, "loss": 0.655, "step": 16491 }, { "epoch": 1.47, "grad_norm": 8.417659055164213, "learning_rate": 1.7232651305861608e-06, "loss": 0.567, "step": 16492 }, { "epoch": 1.47, "grad_norm": 7.6612810327057455, "learning_rate": 1.7227195375596278e-06, "loss": 0.5808, "step": 16493 }, { "epoch": 1.47, "grad_norm": 6.398299180595786, "learning_rate": 1.7221740129391473e-06, "loss": 0.532, "step": 16494 }, { "epoch": 1.47, "grad_norm": 5.89502126144822, "learning_rate": 1.7216285567361057e-06, "loss": 0.5547, "step": 16495 }, { "epoch": 1.47, "grad_norm": 9.069613885552089, "learning_rate": 1.721083168961889e-06, "loss": 0.5336, "step": 16496 }, { "epoch": 1.47, "grad_norm": 5.326908206658528, "learning_rate": 1.7205378496278819e-06, "loss": 0.5878, "step": 16497 }, { "epoch": 1.47, "grad_norm": 8.195943823514401, "learning_rate": 1.719992598745463e-06, "loss": 0.5581, "step": 16498 }, { "epoch": 1.47, "grad_norm": 4.890217353093632, "learning_rate": 1.7194474163260177e-06, "loss": 0.5668, "step": 16499 }, { "epoch": 1.47, "grad_norm": 7.2022225192996, "learning_rate": 1.718902302380922e-06, "loss": 0.5778, "step": 16500 }, { "epoch": 1.47, "grad_norm": 7.0303982239117975, "learning_rate": 1.7183572569215546e-06, "loss": 0.5649, "step": 16501 }, { "epoch": 1.47, "grad_norm": 7.243568532151577, "learning_rate": 1.7178122799592928e-06, "loss": 0.5626, "step": 16502 }, { "epoch": 1.47, "grad_norm": 7.7177653505792065, "learning_rate": 1.7172673715055122e-06, "loss": 0.6472, "step": 16503 }, { "epoch": 1.47, "grad_norm": 6.585908549472535, "learning_rate": 1.7167225315715875e-06, "loss": 0.5777, "step": 16504 }, { "epoch": 1.47, "grad_norm": 7.201407942730368, "learning_rate": 1.7161777601688884e-06, "loss": 0.593, "step": 16505 }, { "epoch": 1.47, "grad_norm": 7.551146527073849, "learning_rate": 1.715633057308787e-06, "loss": 0.564, "step": 16506 }, { "epoch": 1.47, "grad_norm": 5.726890711438248, "learning_rate": 1.7150884230026526e-06, "loss": 0.5585, "step": 16507 }, { "epoch": 1.47, "grad_norm": 5.84556524854544, "learning_rate": 1.714543857261854e-06, "loss": 0.5713, "step": 16508 }, { "epoch": 1.47, "grad_norm": 5.341648573196326, "learning_rate": 1.7139993600977584e-06, "loss": 0.5381, "step": 16509 }, { "epoch": 1.47, "grad_norm": 7.320066142562166, "learning_rate": 1.7134549315217285e-06, "loss": 0.5895, "step": 16510 }, { "epoch": 1.47, "grad_norm": 6.920735426355344, "learning_rate": 1.71291057154513e-06, "loss": 0.6122, "step": 16511 }, { "epoch": 1.47, "grad_norm": 6.448360109151868, "learning_rate": 1.7123662801793238e-06, "loss": 0.5792, "step": 16512 }, { "epoch": 1.47, "grad_norm": 10.719274782276246, "learning_rate": 1.7118220574356726e-06, "loss": 0.6349, "step": 16513 }, { "epoch": 1.47, "grad_norm": 7.767291162002003, "learning_rate": 1.7112779033255362e-06, "loss": 0.6315, "step": 16514 }, { "epoch": 1.47, "grad_norm": 8.247966498288287, "learning_rate": 1.7107338178602695e-06, "loss": 0.6481, "step": 16515 }, { "epoch": 1.47, "grad_norm": 7.334886885675697, "learning_rate": 1.7101898010512314e-06, "loss": 0.5758, "step": 16516 }, { "epoch": 1.47, "grad_norm": 7.10570135098028, "learning_rate": 1.7096458529097782e-06, "loss": 0.644, "step": 16517 }, { "epoch": 1.47, "grad_norm": 8.067835330079424, "learning_rate": 1.7091019734472602e-06, "loss": 0.5651, "step": 16518 }, { "epoch": 1.47, "grad_norm": 5.261835544281759, "learning_rate": 1.708558162675032e-06, "loss": 0.5476, "step": 16519 }, { "epoch": 1.47, "grad_norm": 6.093236247934998, "learning_rate": 1.7080144206044436e-06, "loss": 0.5982, "step": 16520 }, { "epoch": 1.47, "grad_norm": 6.434780478211395, "learning_rate": 1.707470747246845e-06, "loss": 0.6341, "step": 16521 }, { "epoch": 1.47, "grad_norm": 10.396918319603516, "learning_rate": 1.706927142613586e-06, "loss": 0.6313, "step": 16522 }, { "epoch": 1.47, "grad_norm": 6.9451153009657185, "learning_rate": 1.706383606716009e-06, "loss": 0.575, "step": 16523 }, { "epoch": 1.47, "grad_norm": 7.990815018571126, "learning_rate": 1.705840139565461e-06, "loss": 0.5477, "step": 16524 }, { "epoch": 1.47, "grad_norm": 5.336118001988609, "learning_rate": 1.7052967411732868e-06, "loss": 0.6527, "step": 16525 }, { "epoch": 1.47, "grad_norm": 8.381307056280331, "learning_rate": 1.7047534115508275e-06, "loss": 0.6262, "step": 16526 }, { "epoch": 1.47, "grad_norm": 7.39208435460862, "learning_rate": 1.7042101507094245e-06, "loss": 0.5986, "step": 16527 }, { "epoch": 1.47, "grad_norm": 11.190441279039538, "learning_rate": 1.7036669586604181e-06, "loss": 0.5429, "step": 16528 }, { "epoch": 1.47, "grad_norm": 7.83192644969971, "learning_rate": 1.7031238354151459e-06, "loss": 0.6681, "step": 16529 }, { "epoch": 1.47, "grad_norm": 6.951067847134404, "learning_rate": 1.7025807809849415e-06, "loss": 0.5555, "step": 16530 }, { "epoch": 1.47, "grad_norm": 7.559167036066518, "learning_rate": 1.7020377953811417e-06, "loss": 0.5874, "step": 16531 }, { "epoch": 1.47, "grad_norm": 6.37285094076611, "learning_rate": 1.7014948786150808e-06, "loss": 0.5652, "step": 16532 }, { "epoch": 1.47, "grad_norm": 6.91981596470154, "learning_rate": 1.7009520306980908e-06, "loss": 0.6155, "step": 16533 }, { "epoch": 1.47, "grad_norm": 8.937347893790458, "learning_rate": 1.7004092516415028e-06, "loss": 0.6173, "step": 16534 }, { "epoch": 1.48, "grad_norm": 6.0091233593499345, "learning_rate": 1.6998665414566468e-06, "loss": 0.6022, "step": 16535 }, { "epoch": 1.48, "grad_norm": 4.636789076231314, "learning_rate": 1.6993239001548479e-06, "loss": 0.5725, "step": 16536 }, { "epoch": 1.48, "grad_norm": 10.253770329494172, "learning_rate": 1.698781327747434e-06, "loss": 0.5997, "step": 16537 }, { "epoch": 1.48, "grad_norm": 5.428425380032173, "learning_rate": 1.698238824245731e-06, "loss": 0.5878, "step": 16538 }, { "epoch": 1.48, "grad_norm": 4.260750680893849, "learning_rate": 1.6976963896610616e-06, "loss": 0.5474, "step": 16539 }, { "epoch": 1.48, "grad_norm": 7.594576995943618, "learning_rate": 1.697154024004748e-06, "loss": 0.6043, "step": 16540 }, { "epoch": 1.48, "grad_norm": 7.663287193126755, "learning_rate": 1.6966117272881128e-06, "loss": 0.5747, "step": 16541 }, { "epoch": 1.48, "grad_norm": 6.361909326069752, "learning_rate": 1.696069499522473e-06, "loss": 0.5878, "step": 16542 }, { "epoch": 1.48, "grad_norm": 9.697076854712439, "learning_rate": 1.6955273407191458e-06, "loss": 0.5627, "step": 16543 }, { "epoch": 1.48, "grad_norm": 7.80678395347635, "learning_rate": 1.6949852508894487e-06, "loss": 0.5632, "step": 16544 }, { "epoch": 1.48, "grad_norm": 7.486827688184406, "learning_rate": 1.6944432300446962e-06, "loss": 0.5889, "step": 16545 }, { "epoch": 1.48, "grad_norm": 7.02809799268602, "learning_rate": 1.6939012781962027e-06, "loss": 0.5344, "step": 16546 }, { "epoch": 1.48, "grad_norm": 5.378808999105505, "learning_rate": 1.6933593953552797e-06, "loss": 0.5255, "step": 16547 }, { "epoch": 1.48, "grad_norm": 7.431992336574399, "learning_rate": 1.6928175815332393e-06, "loss": 0.5724, "step": 16548 }, { "epoch": 1.48, "grad_norm": 5.152550001983407, "learning_rate": 1.6922758367413877e-06, "loss": 0.5773, "step": 16549 }, { "epoch": 1.48, "grad_norm": 6.393385165646159, "learning_rate": 1.6917341609910343e-06, "loss": 0.5188, "step": 16550 }, { "epoch": 1.48, "grad_norm": 8.068840934993005, "learning_rate": 1.6911925542934854e-06, "loss": 0.5988, "step": 16551 }, { "epoch": 1.48, "grad_norm": 8.280324837267758, "learning_rate": 1.6906510166600453e-06, "loss": 0.5786, "step": 16552 }, { "epoch": 1.48, "grad_norm": 8.207147491305431, "learning_rate": 1.6901095481020186e-06, "loss": 0.5687, "step": 16553 }, { "epoch": 1.48, "grad_norm": 6.527103629119033, "learning_rate": 1.689568148630708e-06, "loss": 0.5611, "step": 16554 }, { "epoch": 1.48, "grad_norm": 7.7652518075511, "learning_rate": 1.6890268182574105e-06, "loss": 0.5898, "step": 16555 }, { "epoch": 1.48, "grad_norm": 7.994604716928091, "learning_rate": 1.6884855569934277e-06, "loss": 0.628, "step": 16556 }, { "epoch": 1.48, "grad_norm": 8.896071386189432, "learning_rate": 1.6879443648500583e-06, "loss": 0.6175, "step": 16557 }, { "epoch": 1.48, "grad_norm": 5.963064498233573, "learning_rate": 1.6874032418385956e-06, "loss": 0.5787, "step": 16558 }, { "epoch": 1.48, "grad_norm": 7.594275067964992, "learning_rate": 1.6868621879703352e-06, "loss": 0.5915, "step": 16559 }, { "epoch": 1.48, "grad_norm": 5.9111448819211185, "learning_rate": 1.6863212032565711e-06, "loss": 0.5795, "step": 16560 }, { "epoch": 1.48, "grad_norm": 6.497332272768373, "learning_rate": 1.685780287708597e-06, "loss": 0.5584, "step": 16561 }, { "epoch": 1.48, "grad_norm": 5.393054669006064, "learning_rate": 1.6852394413376993e-06, "loss": 0.6107, "step": 16562 }, { "epoch": 1.48, "grad_norm": 6.186714646085623, "learning_rate": 1.6846986641551698e-06, "loss": 0.6104, "step": 16563 }, { "epoch": 1.48, "grad_norm": 6.281449912520924, "learning_rate": 1.6841579561722948e-06, "loss": 0.611, "step": 16564 }, { "epoch": 1.48, "grad_norm": 6.992975683782048, "learning_rate": 1.6836173174003612e-06, "loss": 0.6178, "step": 16565 }, { "epoch": 1.48, "grad_norm": 7.38173420698355, "learning_rate": 1.6830767478506526e-06, "loss": 0.6153, "step": 16566 }, { "epoch": 1.48, "grad_norm": 5.013868596119787, "learning_rate": 1.6825362475344554e-06, "loss": 0.5972, "step": 16567 }, { "epoch": 1.48, "grad_norm": 4.969636074282615, "learning_rate": 1.6819958164630467e-06, "loss": 0.5447, "step": 16568 }, { "epoch": 1.48, "grad_norm": 5.234929506250406, "learning_rate": 1.6814554546477097e-06, "loss": 0.5399, "step": 16569 }, { "epoch": 1.48, "grad_norm": 4.983584075996516, "learning_rate": 1.6809151620997223e-06, "loss": 0.5587, "step": 16570 }, { "epoch": 1.48, "grad_norm": 12.099099127040672, "learning_rate": 1.6803749388303635e-06, "loss": 0.578, "step": 16571 }, { "epoch": 1.48, "grad_norm": 9.660101057504896, "learning_rate": 1.6798347848509066e-06, "loss": 0.5615, "step": 16572 }, { "epoch": 1.48, "grad_norm": 6.513756195581352, "learning_rate": 1.6792947001726272e-06, "loss": 0.5342, "step": 16573 }, { "epoch": 1.48, "grad_norm": 6.406667380212826, "learning_rate": 1.6787546848068009e-06, "loss": 0.5942, "step": 16574 }, { "epoch": 1.48, "grad_norm": 5.471590197973633, "learning_rate": 1.6782147387646952e-06, "loss": 0.5446, "step": 16575 }, { "epoch": 1.48, "grad_norm": 7.306045701849128, "learning_rate": 1.6776748620575827e-06, "loss": 0.5858, "step": 16576 }, { "epoch": 1.48, "grad_norm": 6.199609514298581, "learning_rate": 1.6771350546967313e-06, "loss": 0.6053, "step": 16577 }, { "epoch": 1.48, "grad_norm": 4.936926547546596, "learning_rate": 1.6765953166934084e-06, "loss": 0.5273, "step": 16578 }, { "epoch": 1.48, "grad_norm": 6.049296351796455, "learning_rate": 1.6760556480588808e-06, "loss": 0.5647, "step": 16579 }, { "epoch": 1.48, "grad_norm": 6.824906192656742, "learning_rate": 1.675516048804413e-06, "loss": 0.5832, "step": 16580 }, { "epoch": 1.48, "grad_norm": 5.627082339631345, "learning_rate": 1.6749765189412664e-06, "loss": 0.5755, "step": 16581 }, { "epoch": 1.48, "grad_norm": 7.630146473809049, "learning_rate": 1.674437058480703e-06, "loss": 0.6432, "step": 16582 }, { "epoch": 1.48, "grad_norm": 5.651120908544123, "learning_rate": 1.673897667433983e-06, "loss": 0.5461, "step": 16583 }, { "epoch": 1.48, "grad_norm": 4.914929660480168, "learning_rate": 1.6733583458123653e-06, "loss": 0.5926, "step": 16584 }, { "epoch": 1.48, "grad_norm": 6.539824555046824, "learning_rate": 1.6728190936271083e-06, "loss": 0.5693, "step": 16585 }, { "epoch": 1.48, "grad_norm": 6.8287051149684475, "learning_rate": 1.6722799108894649e-06, "loss": 0.5877, "step": 16586 }, { "epoch": 1.48, "grad_norm": 6.955510709179059, "learning_rate": 1.6717407976106924e-06, "loss": 0.5121, "step": 16587 }, { "epoch": 1.48, "grad_norm": 6.663675724114788, "learning_rate": 1.6712017538020403e-06, "loss": 0.5384, "step": 16588 }, { "epoch": 1.48, "grad_norm": 6.750613381587288, "learning_rate": 1.670662779474762e-06, "loss": 0.5577, "step": 16589 }, { "epoch": 1.48, "grad_norm": 6.186775837948213, "learning_rate": 1.6701238746401067e-06, "loss": 0.5424, "step": 16590 }, { "epoch": 1.48, "grad_norm": 5.654051989346489, "learning_rate": 1.6695850393093232e-06, "loss": 0.5128, "step": 16591 }, { "epoch": 1.48, "grad_norm": 7.916845528115406, "learning_rate": 1.6690462734936585e-06, "loss": 0.6194, "step": 16592 }, { "epoch": 1.48, "grad_norm": 6.873349880905711, "learning_rate": 1.66850757720436e-06, "loss": 0.5328, "step": 16593 }, { "epoch": 1.48, "grad_norm": 10.05275849358468, "learning_rate": 1.6679689504526681e-06, "loss": 0.577, "step": 16594 }, { "epoch": 1.48, "grad_norm": 5.759958750937419, "learning_rate": 1.6674303932498276e-06, "loss": 0.5411, "step": 16595 }, { "epoch": 1.48, "grad_norm": 10.178021153286334, "learning_rate": 1.6668919056070791e-06, "loss": 0.622, "step": 16596 }, { "epoch": 1.48, "grad_norm": 9.991715223919822, "learning_rate": 1.666353487535663e-06, "loss": 0.6455, "step": 16597 }, { "epoch": 1.48, "grad_norm": 8.087950414974, "learning_rate": 1.665815139046819e-06, "loss": 0.604, "step": 16598 }, { "epoch": 1.48, "grad_norm": 6.033741306683741, "learning_rate": 1.6652768601517806e-06, "loss": 0.5777, "step": 16599 }, { "epoch": 1.48, "grad_norm": 5.439815337910645, "learning_rate": 1.6647386508617859e-06, "loss": 0.6402, "step": 16600 }, { "epoch": 1.48, "grad_norm": 8.494904963878362, "learning_rate": 1.6642005111880667e-06, "loss": 0.592, "step": 16601 }, { "epoch": 1.48, "grad_norm": 6.457864431489162, "learning_rate": 1.663662441141856e-06, "loss": 0.5947, "step": 16602 }, { "epoch": 1.48, "grad_norm": 5.056866316047348, "learning_rate": 1.663124440734386e-06, "loss": 0.5652, "step": 16603 }, { "epoch": 1.48, "grad_norm": 5.995928368157356, "learning_rate": 1.6625865099768857e-06, "loss": 0.5752, "step": 16604 }, { "epoch": 1.48, "grad_norm": 6.747724772196537, "learning_rate": 1.6620486488805843e-06, "loss": 0.6337, "step": 16605 }, { "epoch": 1.48, "grad_norm": 9.35551256479892, "learning_rate": 1.6615108574567058e-06, "loss": 0.57, "step": 16606 }, { "epoch": 1.48, "grad_norm": 5.103698483276872, "learning_rate": 1.6609731357164771e-06, "loss": 0.5674, "step": 16607 }, { "epoch": 1.48, "grad_norm": 7.79342806235292, "learning_rate": 1.6604354836711223e-06, "loss": 0.5899, "step": 16608 }, { "epoch": 1.48, "grad_norm": 6.62488845513608, "learning_rate": 1.6598979013318627e-06, "loss": 0.5268, "step": 16609 }, { "epoch": 1.48, "grad_norm": 6.910075097509002, "learning_rate": 1.6593603887099197e-06, "loss": 0.611, "step": 16610 }, { "epoch": 1.48, "grad_norm": 6.7007034101346425, "learning_rate": 1.6588229458165146e-06, "loss": 0.5504, "step": 16611 }, { "epoch": 1.48, "grad_norm": 8.599594331958338, "learning_rate": 1.6582855726628616e-06, "loss": 0.5729, "step": 16612 }, { "epoch": 1.48, "grad_norm": 6.1839356863591, "learning_rate": 1.6577482692601792e-06, "loss": 0.6002, "step": 16613 }, { "epoch": 1.48, "grad_norm": 5.704475860980167, "learning_rate": 1.6572110356196824e-06, "loss": 0.6151, "step": 16614 }, { "epoch": 1.48, "grad_norm": 5.201011643137781, "learning_rate": 1.656673871752586e-06, "loss": 0.5659, "step": 16615 }, { "epoch": 1.48, "grad_norm": 6.193811455059978, "learning_rate": 1.6561367776700992e-06, "loss": 0.4949, "step": 16616 }, { "epoch": 1.48, "grad_norm": 6.322774419402176, "learning_rate": 1.6555997533834344e-06, "loss": 0.5575, "step": 16617 }, { "epoch": 1.48, "grad_norm": 7.898296678091713, "learning_rate": 1.6550627989038021e-06, "loss": 0.5205, "step": 16618 }, { "epoch": 1.48, "grad_norm": 6.375989766115637, "learning_rate": 1.6545259142424068e-06, "loss": 0.5927, "step": 16619 }, { "epoch": 1.48, "grad_norm": 7.0032702986742095, "learning_rate": 1.6539890994104575e-06, "loss": 0.5731, "step": 16620 }, { "epoch": 1.48, "grad_norm": 6.14675028719798, "learning_rate": 1.653452354419157e-06, "loss": 0.5696, "step": 16621 }, { "epoch": 1.48, "grad_norm": 5.5899674622885644, "learning_rate": 1.6529156792797103e-06, "loss": 0.558, "step": 16622 }, { "epoch": 1.48, "grad_norm": 7.542103532816081, "learning_rate": 1.6523790740033186e-06, "loss": 0.5681, "step": 16623 }, { "epoch": 1.48, "grad_norm": 5.912798220405645, "learning_rate": 1.6518425386011843e-06, "loss": 0.5484, "step": 16624 }, { "epoch": 1.48, "grad_norm": 7.615110354113473, "learning_rate": 1.6513060730845033e-06, "loss": 0.6139, "step": 16625 }, { "epoch": 1.48, "grad_norm": 5.181576852344464, "learning_rate": 1.6507696774644738e-06, "loss": 0.5209, "step": 16626 }, { "epoch": 1.48, "grad_norm": 5.8786776000173315, "learning_rate": 1.6502333517522934e-06, "loss": 0.5269, "step": 16627 }, { "epoch": 1.48, "grad_norm": 6.1049147940846185, "learning_rate": 1.6496970959591551e-06, "loss": 0.6305, "step": 16628 }, { "epoch": 1.48, "grad_norm": 7.552948581576313, "learning_rate": 1.6491609100962552e-06, "loss": 0.5346, "step": 16629 }, { "epoch": 1.48, "grad_norm": 6.447048407610653, "learning_rate": 1.6486247941747814e-06, "loss": 0.5712, "step": 16630 }, { "epoch": 1.48, "grad_norm": 5.47248307326096, "learning_rate": 1.648088748205927e-06, "loss": 0.5357, "step": 16631 }, { "epoch": 1.48, "grad_norm": 5.0205452795778776, "learning_rate": 1.6475527722008778e-06, "loss": 0.562, "step": 16632 }, { "epoch": 1.48, "grad_norm": 8.03459093078818, "learning_rate": 1.647016866170823e-06, "loss": 0.5862, "step": 16633 }, { "epoch": 1.48, "grad_norm": 6.164269649813392, "learning_rate": 1.6464810301269478e-06, "loss": 0.5868, "step": 16634 }, { "epoch": 1.48, "grad_norm": 6.507291188247103, "learning_rate": 1.6459452640804375e-06, "loss": 0.527, "step": 16635 }, { "epoch": 1.48, "grad_norm": 6.965831526796126, "learning_rate": 1.6454095680424742e-06, "loss": 0.5659, "step": 16636 }, { "epoch": 1.48, "grad_norm": 6.493163489805926, "learning_rate": 1.6448739420242415e-06, "loss": 0.5626, "step": 16637 }, { "epoch": 1.48, "grad_norm": 6.712547380098561, "learning_rate": 1.644338386036916e-06, "loss": 0.6207, "step": 16638 }, { "epoch": 1.48, "grad_norm": 7.490833915331839, "learning_rate": 1.6438029000916777e-06, "loss": 0.6342, "step": 16639 }, { "epoch": 1.48, "grad_norm": 6.854992469299814, "learning_rate": 1.6432674841997043e-06, "loss": 0.6371, "step": 16640 }, { "epoch": 1.48, "grad_norm": 6.902218775789666, "learning_rate": 1.642732138372171e-06, "loss": 0.5702, "step": 16641 }, { "epoch": 1.48, "grad_norm": 6.9455572731226685, "learning_rate": 1.6421968626202517e-06, "loss": 0.6081, "step": 16642 }, { "epoch": 1.48, "grad_norm": 5.689781124202494, "learning_rate": 1.6416616569551215e-06, "loss": 0.5497, "step": 16643 }, { "epoch": 1.48, "grad_norm": 5.960609559904213, "learning_rate": 1.641126521387949e-06, "loss": 0.5892, "step": 16644 }, { "epoch": 1.48, "grad_norm": 6.140138300341864, "learning_rate": 1.6405914559299035e-06, "loss": 0.6215, "step": 16645 }, { "epoch": 1.48, "grad_norm": 9.515282672561462, "learning_rate": 1.6400564605921544e-06, "loss": 0.6107, "step": 16646 }, { "epoch": 1.49, "grad_norm": 7.604401659373851, "learning_rate": 1.639521535385868e-06, "loss": 0.5802, "step": 16647 }, { "epoch": 1.49, "grad_norm": 7.001151857977251, "learning_rate": 1.6389866803222103e-06, "loss": 0.5914, "step": 16648 }, { "epoch": 1.49, "grad_norm": 7.0211447620198095, "learning_rate": 1.6384518954123458e-06, "loss": 0.5712, "step": 16649 }, { "epoch": 1.49, "grad_norm": 5.485660832722303, "learning_rate": 1.637917180667437e-06, "loss": 0.6246, "step": 16650 }, { "epoch": 1.49, "grad_norm": 8.518229386585967, "learning_rate": 1.6373825360986433e-06, "loss": 0.5964, "step": 16651 }, { "epoch": 1.49, "grad_norm": 6.284903333346108, "learning_rate": 1.6368479617171252e-06, "loss": 0.532, "step": 16652 }, { "epoch": 1.49, "grad_norm": 5.0209026568469675, "learning_rate": 1.6363134575340406e-06, "loss": 0.6041, "step": 16653 }, { "epoch": 1.49, "grad_norm": 6.708289448200882, "learning_rate": 1.6357790235605458e-06, "loss": 0.6003, "step": 16654 }, { "epoch": 1.49, "grad_norm": 5.290959956570718, "learning_rate": 1.635244659807797e-06, "loss": 0.6078, "step": 16655 }, { "epoch": 1.49, "grad_norm": 5.72734090756184, "learning_rate": 1.6347103662869484e-06, "loss": 0.594, "step": 16656 }, { "epoch": 1.49, "grad_norm": 4.965067709899691, "learning_rate": 1.6341761430091496e-06, "loss": 0.5733, "step": 16657 }, { "epoch": 1.49, "grad_norm": 6.366230254486659, "learning_rate": 1.6336419899855543e-06, "loss": 0.563, "step": 16658 }, { "epoch": 1.49, "grad_norm": 7.909160635289137, "learning_rate": 1.6331079072273088e-06, "loss": 0.6554, "step": 16659 }, { "epoch": 1.49, "grad_norm": 7.033414196553512, "learning_rate": 1.6325738947455622e-06, "loss": 0.5714, "step": 16660 }, { "epoch": 1.49, "grad_norm": 7.635448458332921, "learning_rate": 1.6320399525514607e-06, "loss": 0.6046, "step": 16661 }, { "epoch": 1.49, "grad_norm": 5.713828785374766, "learning_rate": 1.63150608065615e-06, "loss": 0.5734, "step": 16662 }, { "epoch": 1.49, "grad_norm": 7.271150437677039, "learning_rate": 1.6309722790707743e-06, "loss": 0.6403, "step": 16663 }, { "epoch": 1.49, "grad_norm": 7.025148751273399, "learning_rate": 1.6304385478064733e-06, "loss": 0.5598, "step": 16664 }, { "epoch": 1.49, "grad_norm": 5.808731394193037, "learning_rate": 1.6299048868743877e-06, "loss": 0.5412, "step": 16665 }, { "epoch": 1.49, "grad_norm": 5.229872536117584, "learning_rate": 1.6293712962856573e-06, "loss": 0.5343, "step": 16666 }, { "epoch": 1.49, "grad_norm": 5.336061732774519, "learning_rate": 1.6288377760514196e-06, "loss": 0.6017, "step": 16667 }, { "epoch": 1.49, "grad_norm": 5.610966991128474, "learning_rate": 1.6283043261828108e-06, "loss": 0.5718, "step": 16668 }, { "epoch": 1.49, "grad_norm": 6.2715599096960215, "learning_rate": 1.6277709466909665e-06, "loss": 0.6031, "step": 16669 }, { "epoch": 1.49, "grad_norm": 4.806732465137839, "learning_rate": 1.6272376375870175e-06, "loss": 0.5427, "step": 16670 }, { "epoch": 1.49, "grad_norm": 6.625003950590108, "learning_rate": 1.6267043988820964e-06, "loss": 0.5204, "step": 16671 }, { "epoch": 1.49, "grad_norm": 7.74521689137516, "learning_rate": 1.626171230587335e-06, "loss": 0.5901, "step": 16672 }, { "epoch": 1.49, "grad_norm": 11.117689826062975, "learning_rate": 1.6256381327138592e-06, "loss": 0.6322, "step": 16673 }, { "epoch": 1.49, "grad_norm": 6.797582290533975, "learning_rate": 1.6251051052727972e-06, "loss": 0.622, "step": 16674 }, { "epoch": 1.49, "grad_norm": 6.351792263753973, "learning_rate": 1.6245721482752758e-06, "loss": 0.6261, "step": 16675 }, { "epoch": 1.49, "grad_norm": 6.303232590996054, "learning_rate": 1.6240392617324201e-06, "loss": 0.565, "step": 16676 }, { "epoch": 1.49, "grad_norm": 6.05355647029909, "learning_rate": 1.62350644565535e-06, "loss": 0.555, "step": 16677 }, { "epoch": 1.49, "grad_norm": 6.650389682371294, "learning_rate": 1.6229737000551887e-06, "loss": 0.5439, "step": 16678 }, { "epoch": 1.49, "grad_norm": 6.378009632278634, "learning_rate": 1.6224410249430556e-06, "loss": 0.5732, "step": 16679 }, { "epoch": 1.49, "grad_norm": 6.793531313277027, "learning_rate": 1.6219084203300694e-06, "loss": 0.5668, "step": 16680 }, { "epoch": 1.49, "grad_norm": 5.790090437937187, "learning_rate": 1.6213758862273488e-06, "loss": 0.5826, "step": 16681 }, { "epoch": 1.49, "grad_norm": 6.837965475276673, "learning_rate": 1.6208434226460057e-06, "loss": 0.6326, "step": 16682 }, { "epoch": 1.49, "grad_norm": 8.443416630272074, "learning_rate": 1.6203110295971564e-06, "loss": 0.6027, "step": 16683 }, { "epoch": 1.49, "grad_norm": 6.528876909275584, "learning_rate": 1.6197787070919125e-06, "loss": 0.5794, "step": 16684 }, { "epoch": 1.49, "grad_norm": 7.676104133551258, "learning_rate": 1.6192464551413856e-06, "loss": 0.5879, "step": 16685 }, { "epoch": 1.49, "grad_norm": 5.814349724983146, "learning_rate": 1.6187142737566853e-06, "loss": 0.5529, "step": 16686 }, { "epoch": 1.49, "grad_norm": 8.292522033463866, "learning_rate": 1.6181821629489207e-06, "loss": 0.5873, "step": 16687 }, { "epoch": 1.49, "grad_norm": 6.261182769541119, "learning_rate": 1.617650122729198e-06, "loss": 0.5623, "step": 16688 }, { "epoch": 1.49, "grad_norm": 5.772272625769619, "learning_rate": 1.61711815310862e-06, "loss": 0.6192, "step": 16689 }, { "epoch": 1.49, "grad_norm": 7.407282572254293, "learning_rate": 1.6165862540982925e-06, "loss": 0.4841, "step": 16690 }, { "epoch": 1.49, "grad_norm": 6.67164465366453, "learning_rate": 1.6160544257093169e-06, "loss": 0.6261, "step": 16691 }, { "epoch": 1.49, "grad_norm": 7.991257380542349, "learning_rate": 1.6155226679527947e-06, "loss": 0.5292, "step": 16692 }, { "epoch": 1.49, "grad_norm": 4.951781130407027, "learning_rate": 1.614990980839825e-06, "loss": 0.5387, "step": 16693 }, { "epoch": 1.49, "grad_norm": 10.14783282544295, "learning_rate": 1.6144593643815072e-06, "loss": 0.5465, "step": 16694 }, { "epoch": 1.49, "grad_norm": 7.539908575903127, "learning_rate": 1.613927818588934e-06, "loss": 0.5492, "step": 16695 }, { "epoch": 1.49, "grad_norm": 8.872591144314589, "learning_rate": 1.613396343473202e-06, "loss": 0.5717, "step": 16696 }, { "epoch": 1.49, "grad_norm": 8.507985598624865, "learning_rate": 1.6128649390454054e-06, "loss": 0.6166, "step": 16697 }, { "epoch": 1.49, "grad_norm": 6.872565254484306, "learning_rate": 1.6123336053166356e-06, "loss": 0.6033, "step": 16698 }, { "epoch": 1.49, "grad_norm": 8.072640520381217, "learning_rate": 1.6118023422979824e-06, "loss": 0.6203, "step": 16699 }, { "epoch": 1.49, "grad_norm": 6.614923398617723, "learning_rate": 1.611271150000537e-06, "loss": 0.6769, "step": 16700 }, { "epoch": 1.49, "grad_norm": 5.820166654772655, "learning_rate": 1.6107400284353853e-06, "loss": 0.5871, "step": 16701 }, { "epoch": 1.49, "grad_norm": 5.984714869319991, "learning_rate": 1.6102089776136115e-06, "loss": 0.5826, "step": 16702 }, { "epoch": 1.49, "grad_norm": 4.5894678041458015, "learning_rate": 1.6096779975463017e-06, "loss": 0.5909, "step": 16703 }, { "epoch": 1.49, "grad_norm": 4.909938221599264, "learning_rate": 1.6091470882445392e-06, "loss": 0.5375, "step": 16704 }, { "epoch": 1.49, "grad_norm": 6.898843697070322, "learning_rate": 1.6086162497194053e-06, "loss": 0.5818, "step": 16705 }, { "epoch": 1.49, "grad_norm": 5.794170215688313, "learning_rate": 1.6080854819819802e-06, "loss": 0.5788, "step": 16706 }, { "epoch": 1.49, "grad_norm": 5.79958408572142, "learning_rate": 1.6075547850433438e-06, "loss": 0.6074, "step": 16707 }, { "epoch": 1.49, "grad_norm": 6.464420432732051, "learning_rate": 1.6070241589145707e-06, "loss": 0.6008, "step": 16708 }, { "epoch": 1.49, "grad_norm": 6.914940611028755, "learning_rate": 1.6064936036067375e-06, "loss": 0.6076, "step": 16709 }, { "epoch": 1.49, "grad_norm": 6.458014272574896, "learning_rate": 1.6059631191309188e-06, "loss": 0.5346, "step": 16710 }, { "epoch": 1.49, "grad_norm": 7.355617067042456, "learning_rate": 1.6054327054981872e-06, "loss": 0.5485, "step": 16711 }, { "epoch": 1.49, "grad_norm": 5.566484226785939, "learning_rate": 1.6049023627196142e-06, "loss": 0.5595, "step": 16712 }, { "epoch": 1.49, "grad_norm": 7.597167938616306, "learning_rate": 1.6043720908062704e-06, "loss": 0.6605, "step": 16713 }, { "epoch": 1.49, "grad_norm": 8.263576046207257, "learning_rate": 1.6038418897692216e-06, "loss": 0.5941, "step": 16714 }, { "epoch": 1.49, "grad_norm": 9.39715580968337, "learning_rate": 1.6033117596195374e-06, "loss": 0.5778, "step": 16715 }, { "epoch": 1.49, "grad_norm": 5.848323229540237, "learning_rate": 1.60278170036828e-06, "loss": 0.5315, "step": 16716 }, { "epoch": 1.49, "grad_norm": 6.310809064454719, "learning_rate": 1.6022517120265147e-06, "loss": 0.6096, "step": 16717 }, { "epoch": 1.49, "grad_norm": 7.366407359242103, "learning_rate": 1.6017217946053043e-06, "loss": 0.5357, "step": 16718 }, { "epoch": 1.49, "grad_norm": 6.382373595327632, "learning_rate": 1.601191948115709e-06, "loss": 0.5756, "step": 16719 }, { "epoch": 1.49, "grad_norm": 6.74537348680575, "learning_rate": 1.6006621725687898e-06, "loss": 0.6326, "step": 16720 }, { "epoch": 1.49, "grad_norm": 6.545372676706489, "learning_rate": 1.6001324679756025e-06, "loss": 0.5688, "step": 16721 }, { "epoch": 1.49, "grad_norm": 7.361894963171339, "learning_rate": 1.5996028343472037e-06, "loss": 0.5889, "step": 16722 }, { "epoch": 1.49, "grad_norm": 5.925789878060734, "learning_rate": 1.5990732716946494e-06, "loss": 0.5602, "step": 16723 }, { "epoch": 1.49, "grad_norm": 7.660571515877035, "learning_rate": 1.5985437800289927e-06, "loss": 0.594, "step": 16724 }, { "epoch": 1.49, "grad_norm": 7.053704217982891, "learning_rate": 1.598014359361285e-06, "loss": 0.6119, "step": 16725 }, { "epoch": 1.49, "grad_norm": 8.808648241283187, "learning_rate": 1.5974850097025796e-06, "loss": 0.589, "step": 16726 }, { "epoch": 1.49, "grad_norm": 5.834943724720598, "learning_rate": 1.596955731063921e-06, "loss": 0.5673, "step": 16727 }, { "epoch": 1.49, "grad_norm": 6.826934484310981, "learning_rate": 1.5964265234563598e-06, "loss": 0.5569, "step": 16728 }, { "epoch": 1.49, "grad_norm": 8.48301202766483, "learning_rate": 1.5958973868909405e-06, "loss": 0.535, "step": 16729 }, { "epoch": 1.49, "grad_norm": 7.5721790617803855, "learning_rate": 1.5953683213787103e-06, "loss": 0.588, "step": 16730 }, { "epoch": 1.49, "grad_norm": 6.8207179456687514, "learning_rate": 1.5948393269307089e-06, "loss": 0.5193, "step": 16731 }, { "epoch": 1.49, "grad_norm": 11.28320685927629, "learning_rate": 1.5943104035579788e-06, "loss": 0.663, "step": 16732 }, { "epoch": 1.49, "grad_norm": 5.35600836946967, "learning_rate": 1.5937815512715626e-06, "loss": 0.6383, "step": 16733 }, { "epoch": 1.49, "grad_norm": 5.968123952905001, "learning_rate": 1.5932527700824958e-06, "loss": 0.5437, "step": 16734 }, { "epoch": 1.49, "grad_norm": 5.988569404127991, "learning_rate": 1.5927240600018163e-06, "loss": 0.5514, "step": 16735 }, { "epoch": 1.49, "grad_norm": 4.912546840163095, "learning_rate": 1.5921954210405604e-06, "loss": 0.575, "step": 16736 }, { "epoch": 1.49, "grad_norm": 6.693930891212401, "learning_rate": 1.5916668532097623e-06, "loss": 0.6015, "step": 16737 }, { "epoch": 1.49, "grad_norm": 5.557722772014418, "learning_rate": 1.5911383565204542e-06, "loss": 0.5781, "step": 16738 }, { "epoch": 1.49, "grad_norm": 4.637071915738045, "learning_rate": 1.5906099309836692e-06, "loss": 0.5912, "step": 16739 }, { "epoch": 1.49, "grad_norm": 8.222988690762703, "learning_rate": 1.5900815766104343e-06, "loss": 0.5736, "step": 16740 }, { "epoch": 1.49, "grad_norm": 6.643031930790192, "learning_rate": 1.5895532934117786e-06, "loss": 0.6055, "step": 16741 }, { "epoch": 1.49, "grad_norm": 7.526633741889608, "learning_rate": 1.589025081398729e-06, "loss": 0.6608, "step": 16742 }, { "epoch": 1.49, "grad_norm": 6.92228043208061, "learning_rate": 1.5884969405823108e-06, "loss": 0.5515, "step": 16743 }, { "epoch": 1.49, "grad_norm": 37.175921219515786, "learning_rate": 1.5879688709735503e-06, "loss": 0.5627, "step": 16744 }, { "epoch": 1.49, "grad_norm": 7.545507744545335, "learning_rate": 1.587440872583465e-06, "loss": 0.6245, "step": 16745 }, { "epoch": 1.49, "grad_norm": 5.139268806700995, "learning_rate": 1.5869129454230803e-06, "loss": 0.5421, "step": 16746 }, { "epoch": 1.49, "grad_norm": 5.557413508936121, "learning_rate": 1.5863850895034111e-06, "loss": 0.5396, "step": 16747 }, { "epoch": 1.49, "grad_norm": 6.237603577264708, "learning_rate": 1.585857304835478e-06, "loss": 0.5217, "step": 16748 }, { "epoch": 1.49, "grad_norm": 7.0468979522732464, "learning_rate": 1.5853295914302963e-06, "loss": 0.5956, "step": 16749 }, { "epoch": 1.49, "grad_norm": 5.805988833876837, "learning_rate": 1.584801949298882e-06, "loss": 0.534, "step": 16750 }, { "epoch": 1.49, "grad_norm": 6.318620256762924, "learning_rate": 1.5842743784522474e-06, "loss": 0.5526, "step": 16751 }, { "epoch": 1.49, "grad_norm": 8.470532115060395, "learning_rate": 1.5837468789014065e-06, "loss": 0.5403, "step": 16752 }, { "epoch": 1.49, "grad_norm": 6.220646207366034, "learning_rate": 1.5832194506573668e-06, "loss": 0.5842, "step": 16753 }, { "epoch": 1.49, "grad_norm": 6.692356559529226, "learning_rate": 1.582692093731138e-06, "loss": 0.6114, "step": 16754 }, { "epoch": 1.49, "grad_norm": 6.399612801301306, "learning_rate": 1.5821648081337282e-06, "loss": 0.569, "step": 16755 }, { "epoch": 1.49, "grad_norm": 6.086331036861599, "learning_rate": 1.581637593876143e-06, "loss": 0.5774, "step": 16756 }, { "epoch": 1.49, "grad_norm": 5.699185694144533, "learning_rate": 1.5811104509693885e-06, "loss": 0.6002, "step": 16757 }, { "epoch": 1.49, "grad_norm": 5.816872706272658, "learning_rate": 1.5805833794244646e-06, "loss": 0.6098, "step": 16758 }, { "epoch": 1.5, "grad_norm": 4.935027128919735, "learning_rate": 1.5800563792523753e-06, "loss": 0.5373, "step": 16759 }, { "epoch": 1.5, "grad_norm": 7.711214428057946, "learning_rate": 1.579529450464119e-06, "loss": 0.5794, "step": 16760 }, { "epoch": 1.5, "grad_norm": 8.951590348349548, "learning_rate": 1.5790025930706942e-06, "loss": 0.5302, "step": 16761 }, { "epoch": 1.5, "grad_norm": 6.6799708879233926, "learning_rate": 1.578475807083098e-06, "loss": 0.6207, "step": 16762 }, { "epoch": 1.5, "grad_norm": 7.324378560309559, "learning_rate": 1.577949092512327e-06, "loss": 0.6075, "step": 16763 }, { "epoch": 1.5, "grad_norm": 6.292971885625318, "learning_rate": 1.5774224493693758e-06, "loss": 0.6078, "step": 16764 }, { "epoch": 1.5, "grad_norm": 11.434302272053369, "learning_rate": 1.5768958776652337e-06, "loss": 0.6097, "step": 16765 }, { "epoch": 1.5, "grad_norm": 7.4537356989544605, "learning_rate": 1.5763693774108947e-06, "loss": 0.6137, "step": 16766 }, { "epoch": 1.5, "grad_norm": 6.692422017733832, "learning_rate": 1.5758429486173466e-06, "loss": 0.5395, "step": 16767 }, { "epoch": 1.5, "grad_norm": 6.984120391552226, "learning_rate": 1.5753165912955782e-06, "loss": 0.5974, "step": 16768 }, { "epoch": 1.5, "grad_norm": 7.194015864942682, "learning_rate": 1.574790305456576e-06, "loss": 0.5895, "step": 16769 }, { "epoch": 1.5, "grad_norm": 5.466290861714776, "learning_rate": 1.5742640911113266e-06, "loss": 0.5717, "step": 16770 }, { "epoch": 1.5, "grad_norm": 6.505166767675522, "learning_rate": 1.573737948270811e-06, "loss": 0.5893, "step": 16771 }, { "epoch": 1.5, "grad_norm": 8.956972197127385, "learning_rate": 1.5732118769460114e-06, "loss": 0.6324, "step": 16772 }, { "epoch": 1.5, "grad_norm": 7.81632473599714, "learning_rate": 1.572685877147912e-06, "loss": 0.609, "step": 16773 }, { "epoch": 1.5, "grad_norm": 8.032868449432764, "learning_rate": 1.5721599488874868e-06, "loss": 0.5293, "step": 16774 }, { "epoch": 1.5, "grad_norm": 4.848094488963424, "learning_rate": 1.5716340921757163e-06, "loss": 0.6037, "step": 16775 }, { "epoch": 1.5, "grad_norm": 6.365854374609732, "learning_rate": 1.5711083070235761e-06, "loss": 0.6153, "step": 16776 }, { "epoch": 1.5, "grad_norm": 5.5475550147255355, "learning_rate": 1.570582593442042e-06, "loss": 0.4942, "step": 16777 }, { "epoch": 1.5, "grad_norm": 5.5538132619482665, "learning_rate": 1.570056951442085e-06, "loss": 0.5585, "step": 16778 }, { "epoch": 1.5, "grad_norm": 5.753281795377747, "learning_rate": 1.5695313810346773e-06, "loss": 0.6307, "step": 16779 }, { "epoch": 1.5, "grad_norm": 5.122768767233399, "learning_rate": 1.5690058822307897e-06, "loss": 0.6047, "step": 16780 }, { "epoch": 1.5, "grad_norm": 5.236408478936888, "learning_rate": 1.56848045504139e-06, "loss": 0.5134, "step": 16781 }, { "epoch": 1.5, "grad_norm": 6.8478781119709815, "learning_rate": 1.5679550994774468e-06, "loss": 0.5676, "step": 16782 }, { "epoch": 1.5, "grad_norm": 8.642244717617755, "learning_rate": 1.5674298155499256e-06, "loss": 0.5981, "step": 16783 }, { "epoch": 1.5, "grad_norm": 7.427131849180458, "learning_rate": 1.5669046032697887e-06, "loss": 0.6602, "step": 16784 }, { "epoch": 1.5, "grad_norm": 6.973882235941195, "learning_rate": 1.5663794626479995e-06, "loss": 0.5977, "step": 16785 }, { "epoch": 1.5, "grad_norm": 6.306449840687734, "learning_rate": 1.56585439369552e-06, "loss": 0.5363, "step": 16786 }, { "epoch": 1.5, "grad_norm": 8.490969417576634, "learning_rate": 1.5653293964233091e-06, "loss": 0.4922, "step": 16787 }, { "epoch": 1.5, "grad_norm": 7.21398665990198, "learning_rate": 1.5648044708423266e-06, "loss": 0.6041, "step": 16788 }, { "epoch": 1.5, "grad_norm": 5.725159605917228, "learning_rate": 1.5642796169635266e-06, "loss": 0.5739, "step": 16789 }, { "epoch": 1.5, "grad_norm": 6.434902109849927, "learning_rate": 1.5637548347978672e-06, "loss": 0.5281, "step": 16790 }, { "epoch": 1.5, "grad_norm": 6.611414180750037, "learning_rate": 1.563230124356298e-06, "loss": 0.501, "step": 16791 }, { "epoch": 1.5, "grad_norm": 6.053918409348015, "learning_rate": 1.5627054856497742e-06, "loss": 0.6053, "step": 16792 }, { "epoch": 1.5, "grad_norm": 6.852498829476014, "learning_rate": 1.5621809186892456e-06, "loss": 0.5942, "step": 16793 }, { "epoch": 1.5, "grad_norm": 8.031463005049226, "learning_rate": 1.5616564234856618e-06, "loss": 0.5631, "step": 16794 }, { "epoch": 1.5, "grad_norm": 8.704780300603673, "learning_rate": 1.5611320000499703e-06, "loss": 0.5471, "step": 16795 }, { "epoch": 1.5, "grad_norm": 6.296510254166477, "learning_rate": 1.5606076483931194e-06, "loss": 0.6254, "step": 16796 }, { "epoch": 1.5, "grad_norm": 5.153200659118216, "learning_rate": 1.560083368526049e-06, "loss": 0.6022, "step": 16797 }, { "epoch": 1.5, "grad_norm": 6.8459699849027595, "learning_rate": 1.5595591604597061e-06, "loss": 0.6264, "step": 16798 }, { "epoch": 1.5, "grad_norm": 5.752436644413176, "learning_rate": 1.5590350242050307e-06, "loss": 0.5972, "step": 16799 }, { "epoch": 1.5, "grad_norm": 8.622995591578157, "learning_rate": 1.5585109597729636e-06, "loss": 0.5799, "step": 16800 }, { "epoch": 1.5, "grad_norm": 7.003510220025189, "learning_rate": 1.5579869671744435e-06, "loss": 0.5526, "step": 16801 }, { "epoch": 1.5, "grad_norm": 5.109543097393736, "learning_rate": 1.5574630464204088e-06, "loss": 0.5868, "step": 16802 }, { "epoch": 1.5, "grad_norm": 5.390315666806166, "learning_rate": 1.5569391975217946e-06, "loss": 0.5862, "step": 16803 }, { "epoch": 1.5, "grad_norm": 5.400884180037942, "learning_rate": 1.5564154204895326e-06, "loss": 0.6366, "step": 16804 }, { "epoch": 1.5, "grad_norm": 5.949500842996548, "learning_rate": 1.5558917153345576e-06, "loss": 0.5849, "step": 16805 }, { "epoch": 1.5, "grad_norm": 9.531653189461284, "learning_rate": 1.5553680820678001e-06, "loss": 0.624, "step": 16806 }, { "epoch": 1.5, "grad_norm": 10.105981370431945, "learning_rate": 1.5548445207001905e-06, "loss": 0.5829, "step": 16807 }, { "epoch": 1.5, "grad_norm": 6.409710555357583, "learning_rate": 1.5543210312426572e-06, "loss": 0.6064, "step": 16808 }, { "epoch": 1.5, "grad_norm": 6.204022142372398, "learning_rate": 1.5537976137061277e-06, "loss": 0.6145, "step": 16809 }, { "epoch": 1.5, "grad_norm": 4.610417126751667, "learning_rate": 1.5532742681015246e-06, "loss": 0.6273, "step": 16810 }, { "epoch": 1.5, "grad_norm": 5.666492543800809, "learning_rate": 1.5527509944397722e-06, "loss": 0.5669, "step": 16811 }, { "epoch": 1.5, "grad_norm": 7.014163606065578, "learning_rate": 1.552227792731794e-06, "loss": 0.5074, "step": 16812 }, { "epoch": 1.5, "grad_norm": 7.578866835006075, "learning_rate": 1.5517046629885102e-06, "loss": 0.5974, "step": 16813 }, { "epoch": 1.5, "grad_norm": 6.755308615221877, "learning_rate": 1.55118160522084e-06, "loss": 0.5292, "step": 16814 }, { "epoch": 1.5, "grad_norm": 7.581632136474802, "learning_rate": 1.5506586194397022e-06, "loss": 0.5992, "step": 16815 }, { "epoch": 1.5, "grad_norm": 6.7000648947954335, "learning_rate": 1.5501357056560124e-06, "loss": 0.5871, "step": 16816 }, { "epoch": 1.5, "grad_norm": 6.017812880946282, "learning_rate": 1.5496128638806823e-06, "loss": 0.5247, "step": 16817 }, { "epoch": 1.5, "grad_norm": 6.9238154317379, "learning_rate": 1.5490900941246277e-06, "loss": 0.5859, "step": 16818 }, { "epoch": 1.5, "grad_norm": 5.881708604251993, "learning_rate": 1.5485673963987602e-06, "loss": 0.5535, "step": 16819 }, { "epoch": 1.5, "grad_norm": 7.56207547671571, "learning_rate": 1.5480447707139901e-06, "loss": 0.5455, "step": 16820 }, { "epoch": 1.5, "grad_norm": 6.502301929375096, "learning_rate": 1.5475222170812254e-06, "loss": 0.5373, "step": 16821 }, { "epoch": 1.5, "grad_norm": 6.781356036280376, "learning_rate": 1.5469997355113754e-06, "loss": 0.5741, "step": 16822 }, { "epoch": 1.5, "grad_norm": 7.957798746290355, "learning_rate": 1.5464773260153422e-06, "loss": 0.5484, "step": 16823 }, { "epoch": 1.5, "grad_norm": 5.330582846339879, "learning_rate": 1.5459549886040316e-06, "loss": 0.5426, "step": 16824 }, { "epoch": 1.5, "grad_norm": 6.219110400666478, "learning_rate": 1.5454327232883464e-06, "loss": 0.5879, "step": 16825 }, { "epoch": 1.5, "grad_norm": 6.690894005412815, "learning_rate": 1.5449105300791877e-06, "loss": 0.5791, "step": 16826 }, { "epoch": 1.5, "grad_norm": 7.094725398301818, "learning_rate": 1.5443884089874556e-06, "loss": 0.5417, "step": 16827 }, { "epoch": 1.5, "grad_norm": 7.859573947025127, "learning_rate": 1.5438663600240489e-06, "loss": 0.5899, "step": 16828 }, { "epoch": 1.5, "grad_norm": 7.198966971163168, "learning_rate": 1.5433443831998617e-06, "loss": 0.5603, "step": 16829 }, { "epoch": 1.5, "grad_norm": 7.5852795176589165, "learning_rate": 1.5428224785257905e-06, "loss": 0.5737, "step": 16830 }, { "epoch": 1.5, "grad_norm": 5.850092883001039, "learning_rate": 1.542300646012731e-06, "loss": 0.5121, "step": 16831 }, { "epoch": 1.5, "grad_norm": 7.131816184152208, "learning_rate": 1.5417788856715715e-06, "loss": 0.5183, "step": 16832 }, { "epoch": 1.5, "grad_norm": 6.129416243612443, "learning_rate": 1.541257197513204e-06, "loss": 0.5946, "step": 16833 }, { "epoch": 1.5, "grad_norm": 4.914848007828148, "learning_rate": 1.5407355815485186e-06, "loss": 0.5603, "step": 16834 }, { "epoch": 1.5, "grad_norm": 6.808754844607723, "learning_rate": 1.5402140377884033e-06, "loss": 0.6444, "step": 16835 }, { "epoch": 1.5, "grad_norm": 6.360562096805939, "learning_rate": 1.5396925662437418e-06, "loss": 0.6203, "step": 16836 }, { "epoch": 1.5, "grad_norm": 7.416345933163767, "learning_rate": 1.53917116692542e-06, "loss": 0.5844, "step": 16837 }, { "epoch": 1.5, "grad_norm": 8.215498326348715, "learning_rate": 1.5386498398443212e-06, "loss": 0.6025, "step": 16838 }, { "epoch": 1.5, "grad_norm": 6.457286907001019, "learning_rate": 1.5381285850113265e-06, "loss": 0.5717, "step": 16839 }, { "epoch": 1.5, "grad_norm": 6.929342921416541, "learning_rate": 1.5376074024373177e-06, "loss": 0.5899, "step": 16840 }, { "epoch": 1.5, "grad_norm": 6.6516565144397735, "learning_rate": 1.5370862921331702e-06, "loss": 0.6176, "step": 16841 }, { "epoch": 1.5, "grad_norm": 7.63403378821846, "learning_rate": 1.5365652541097632e-06, "loss": 0.5112, "step": 16842 }, { "epoch": 1.5, "grad_norm": 6.619954474913986, "learning_rate": 1.5360442883779714e-06, "loss": 0.5737, "step": 16843 }, { "epoch": 1.5, "grad_norm": 6.496357411073156, "learning_rate": 1.5355233949486692e-06, "loss": 0.5969, "step": 16844 }, { "epoch": 1.5, "grad_norm": 6.6564859816153, "learning_rate": 1.5350025738327306e-06, "loss": 0.6549, "step": 16845 }, { "epoch": 1.5, "grad_norm": 4.953769198471918, "learning_rate": 1.5344818250410236e-06, "loss": 0.5996, "step": 16846 }, { "epoch": 1.5, "grad_norm": 7.081171506946415, "learning_rate": 1.533961148584421e-06, "loss": 0.5547, "step": 16847 }, { "epoch": 1.5, "grad_norm": 6.497737228076618, "learning_rate": 1.5334405444737872e-06, "loss": 0.5209, "step": 16848 }, { "epoch": 1.5, "grad_norm": 6.530693774119447, "learning_rate": 1.53292001271999e-06, "loss": 0.554, "step": 16849 }, { "epoch": 1.5, "grad_norm": 7.3617373942971, "learning_rate": 1.5323995533338948e-06, "loss": 0.5859, "step": 16850 }, { "epoch": 1.5, "grad_norm": 6.566887394395408, "learning_rate": 1.5318791663263655e-06, "loss": 0.653, "step": 16851 }, { "epoch": 1.5, "grad_norm": 8.126462117665236, "learning_rate": 1.5313588517082629e-06, "loss": 0.5672, "step": 16852 }, { "epoch": 1.5, "grad_norm": 8.362663880686503, "learning_rate": 1.5308386094904503e-06, "loss": 0.643, "step": 16853 }, { "epoch": 1.5, "grad_norm": 6.804395003636447, "learning_rate": 1.5303184396837822e-06, "loss": 0.5192, "step": 16854 }, { "epoch": 1.5, "grad_norm": 5.792810022007835, "learning_rate": 1.5297983422991185e-06, "loss": 0.5922, "step": 16855 }, { "epoch": 1.5, "grad_norm": 6.208169827099368, "learning_rate": 1.5292783173473148e-06, "loss": 0.5378, "step": 16856 }, { "epoch": 1.5, "grad_norm": 14.911308930877057, "learning_rate": 1.5287583648392257e-06, "loss": 0.5516, "step": 16857 }, { "epoch": 1.5, "grad_norm": 5.567176430498715, "learning_rate": 1.5282384847857034e-06, "loss": 0.5936, "step": 16858 }, { "epoch": 1.5, "grad_norm": 6.226493358083275, "learning_rate": 1.5277186771976017e-06, "loss": 0.685, "step": 16859 }, { "epoch": 1.5, "grad_norm": 5.703301127168551, "learning_rate": 1.527198942085768e-06, "loss": 0.5947, "step": 16860 }, { "epoch": 1.5, "grad_norm": 5.00481229006696, "learning_rate": 1.5266792794610502e-06, "loss": 0.5947, "step": 16861 }, { "epoch": 1.5, "grad_norm": 5.339412458804203, "learning_rate": 1.526159689334295e-06, "loss": 0.5903, "step": 16862 }, { "epoch": 1.5, "grad_norm": 5.2605824636914695, "learning_rate": 1.5256401717163493e-06, "loss": 0.5978, "step": 16863 }, { "epoch": 1.5, "grad_norm": 8.589631169591014, "learning_rate": 1.5251207266180563e-06, "loss": 0.5784, "step": 16864 }, { "epoch": 1.5, "grad_norm": 5.845554417266622, "learning_rate": 1.5246013540502585e-06, "loss": 0.5875, "step": 16865 }, { "epoch": 1.5, "grad_norm": 5.320865328208996, "learning_rate": 1.5240820540237977e-06, "loss": 0.5528, "step": 16866 }, { "epoch": 1.5, "grad_norm": 7.946965172803159, "learning_rate": 1.52356282654951e-06, "loss": 0.6102, "step": 16867 }, { "epoch": 1.5, "grad_norm": 7.816021703274899, "learning_rate": 1.5230436716382357e-06, "loss": 0.5848, "step": 16868 }, { "epoch": 1.5, "grad_norm": 5.143598880538589, "learning_rate": 1.52252458930081e-06, "loss": 0.5734, "step": 16869 }, { "epoch": 1.5, "grad_norm": 6.1002160057754375, "learning_rate": 1.5220055795480682e-06, "loss": 0.5517, "step": 16870 }, { "epoch": 1.51, "grad_norm": 5.533570892571663, "learning_rate": 1.5214866423908426e-06, "loss": 0.5923, "step": 16871 }, { "epoch": 1.51, "grad_norm": 8.638158150679322, "learning_rate": 1.5209677778399679e-06, "loss": 0.6162, "step": 16872 }, { "epoch": 1.51, "grad_norm": 7.7771281823540654, "learning_rate": 1.5204489859062698e-06, "loss": 0.6016, "step": 16873 }, { "epoch": 1.51, "grad_norm": 5.137086332278969, "learning_rate": 1.5199302666005806e-06, "loss": 0.5689, "step": 16874 }, { "epoch": 1.51, "grad_norm": 6.236746757028715, "learning_rate": 1.5194116199337244e-06, "loss": 0.6159, "step": 16875 }, { "epoch": 1.51, "grad_norm": 7.421301297833723, "learning_rate": 1.5188930459165285e-06, "loss": 0.516, "step": 16876 }, { "epoch": 1.51, "grad_norm": 6.369528047401714, "learning_rate": 1.5183745445598163e-06, "loss": 0.5652, "step": 16877 }, { "epoch": 1.51, "grad_norm": 6.268908387895806, "learning_rate": 1.5178561158744114e-06, "loss": 0.5954, "step": 16878 }, { "epoch": 1.51, "grad_norm": 5.785568696390738, "learning_rate": 1.5173377598711358e-06, "loss": 0.5547, "step": 16879 }, { "epoch": 1.51, "grad_norm": 4.963414360977335, "learning_rate": 1.5168194765608056e-06, "loss": 0.551, "step": 16880 }, { "epoch": 1.51, "grad_norm": 6.096847788836213, "learning_rate": 1.5163012659542415e-06, "loss": 0.6148, "step": 16881 }, { "epoch": 1.51, "grad_norm": 6.475800259595204, "learning_rate": 1.5157831280622588e-06, "loss": 0.598, "step": 16882 }, { "epoch": 1.51, "grad_norm": 5.516214648830509, "learning_rate": 1.5152650628956728e-06, "loss": 0.5884, "step": 16883 }, { "epoch": 1.51, "grad_norm": 7.4927402721726475, "learning_rate": 1.5147470704652972e-06, "loss": 0.6516, "step": 16884 }, { "epoch": 1.51, "grad_norm": 5.312731820842023, "learning_rate": 1.5142291507819456e-06, "loss": 0.6066, "step": 16885 }, { "epoch": 1.51, "grad_norm": 6.698428053854762, "learning_rate": 1.5137113038564255e-06, "loss": 0.5768, "step": 16886 }, { "epoch": 1.51, "grad_norm": 6.380225913351903, "learning_rate": 1.513193529699547e-06, "loss": 0.6028, "step": 16887 }, { "epoch": 1.51, "grad_norm": 5.970754808949401, "learning_rate": 1.512675828322117e-06, "loss": 0.5253, "step": 16888 }, { "epoch": 1.51, "grad_norm": 8.068637685352222, "learning_rate": 1.5121581997349444e-06, "loss": 0.6, "step": 16889 }, { "epoch": 1.51, "grad_norm": 7.303789554567294, "learning_rate": 1.511640643948829e-06, "loss": 0.5473, "step": 16890 }, { "epoch": 1.51, "grad_norm": 8.353584205687582, "learning_rate": 1.5111231609745757e-06, "loss": 0.6044, "step": 16891 }, { "epoch": 1.51, "grad_norm": 5.94678239147186, "learning_rate": 1.5106057508229883e-06, "loss": 0.5684, "step": 16892 }, { "epoch": 1.51, "grad_norm": 13.28060723767068, "learning_rate": 1.510088413504862e-06, "loss": 0.5193, "step": 16893 }, { "epoch": 1.51, "grad_norm": 6.941891450640458, "learning_rate": 1.5095711490309971e-06, "loss": 0.5588, "step": 16894 }, { "epoch": 1.51, "grad_norm": 6.573108442517075, "learning_rate": 1.509053957412191e-06, "loss": 0.5752, "step": 16895 }, { "epoch": 1.51, "grad_norm": 6.322290896729642, "learning_rate": 1.508536838659238e-06, "loss": 0.5518, "step": 16896 }, { "epoch": 1.51, "grad_norm": 5.698157130322034, "learning_rate": 1.5080197927829327e-06, "loss": 0.6051, "step": 16897 }, { "epoch": 1.51, "grad_norm": 4.949685492741059, "learning_rate": 1.507502819794069e-06, "loss": 0.6003, "step": 16898 }, { "epoch": 1.51, "grad_norm": 6.625093219866395, "learning_rate": 1.5069859197034331e-06, "loss": 0.5506, "step": 16899 }, { "epoch": 1.51, "grad_norm": 7.065878393403962, "learning_rate": 1.5064690925218168e-06, "loss": 0.5771, "step": 16900 }, { "epoch": 1.51, "grad_norm": 6.831908400791802, "learning_rate": 1.505952338260007e-06, "loss": 0.522, "step": 16901 }, { "epoch": 1.51, "grad_norm": 6.368798533246033, "learning_rate": 1.5054356569287915e-06, "loss": 0.5834, "step": 16902 }, { "epoch": 1.51, "grad_norm": 6.587107382856981, "learning_rate": 1.5049190485389542e-06, "loss": 0.5537, "step": 16903 }, { "epoch": 1.51, "grad_norm": 6.1794080740734065, "learning_rate": 1.5044025131012768e-06, "loss": 0.5512, "step": 16904 }, { "epoch": 1.51, "grad_norm": 7.234934825915811, "learning_rate": 1.503886050626543e-06, "loss": 0.6082, "step": 16905 }, { "epoch": 1.51, "grad_norm": 5.058257044554382, "learning_rate": 1.5033696611255295e-06, "loss": 0.5819, "step": 16906 }, { "epoch": 1.51, "grad_norm": 8.229679049709825, "learning_rate": 1.5028533446090177e-06, "loss": 0.5977, "step": 16907 }, { "epoch": 1.51, "grad_norm": 4.947598913005139, "learning_rate": 1.502337101087783e-06, "loss": 0.5931, "step": 16908 }, { "epoch": 1.51, "grad_norm": 7.108274904674371, "learning_rate": 1.5018209305726018e-06, "loss": 0.5713, "step": 16909 }, { "epoch": 1.51, "grad_norm": 6.579044993432209, "learning_rate": 1.501304833074248e-06, "loss": 0.5767, "step": 16910 }, { "epoch": 1.51, "grad_norm": 5.5350739747985465, "learning_rate": 1.500788808603495e-06, "loss": 0.5453, "step": 16911 }, { "epoch": 1.51, "grad_norm": 8.186964577244913, "learning_rate": 1.5002728571711112e-06, "loss": 0.5772, "step": 16912 }, { "epoch": 1.51, "grad_norm": 6.649075751012441, "learning_rate": 1.4997569787878669e-06, "loss": 0.5231, "step": 16913 }, { "epoch": 1.51, "grad_norm": 9.922576033936998, "learning_rate": 1.4992411734645306e-06, "loss": 0.5571, "step": 16914 }, { "epoch": 1.51, "grad_norm": 6.122536903201537, "learning_rate": 1.498725441211868e-06, "loss": 0.6606, "step": 16915 }, { "epoch": 1.51, "grad_norm": 11.676300874363852, "learning_rate": 1.498209782040646e-06, "loss": 0.5837, "step": 16916 }, { "epoch": 1.51, "grad_norm": 7.401980000517567, "learning_rate": 1.4976941959616243e-06, "loss": 0.5333, "step": 16917 }, { "epoch": 1.51, "grad_norm": 6.505126893853667, "learning_rate": 1.4971786829855677e-06, "loss": 0.5397, "step": 16918 }, { "epoch": 1.51, "grad_norm": 5.121363167108908, "learning_rate": 1.4966632431232342e-06, "loss": 0.5616, "step": 16919 }, { "epoch": 1.51, "grad_norm": 8.371938913630846, "learning_rate": 1.4961478763853826e-06, "loss": 0.5583, "step": 16920 }, { "epoch": 1.51, "grad_norm": 6.032093087233527, "learning_rate": 1.495632582782771e-06, "loss": 0.6029, "step": 16921 }, { "epoch": 1.51, "grad_norm": 6.167540307889497, "learning_rate": 1.495117362326155e-06, "loss": 0.6352, "step": 16922 }, { "epoch": 1.51, "grad_norm": 9.281804431728423, "learning_rate": 1.4946022150262895e-06, "loss": 0.6379, "step": 16923 }, { "epoch": 1.51, "grad_norm": 5.6998891308402575, "learning_rate": 1.494087140893925e-06, "loss": 0.5748, "step": 16924 }, { "epoch": 1.51, "grad_norm": 11.021861221179147, "learning_rate": 1.4935721399398134e-06, "loss": 0.5667, "step": 16925 }, { "epoch": 1.51, "grad_norm": 8.205758654106678, "learning_rate": 1.493057212174705e-06, "loss": 0.5488, "step": 16926 }, { "epoch": 1.51, "grad_norm": 6.673298460605711, "learning_rate": 1.4925423576093467e-06, "loss": 0.5189, "step": 16927 }, { "epoch": 1.51, "grad_norm": 5.550696210393093, "learning_rate": 1.4920275762544855e-06, "loss": 0.6093, "step": 16928 }, { "epoch": 1.51, "grad_norm": 6.259836067506893, "learning_rate": 1.491512868120868e-06, "loss": 0.5832, "step": 16929 }, { "epoch": 1.51, "grad_norm": 5.432706063009192, "learning_rate": 1.4909982332192342e-06, "loss": 0.5367, "step": 16930 }, { "epoch": 1.51, "grad_norm": 7.337540694619478, "learning_rate": 1.4904836715603277e-06, "loss": 0.5521, "step": 16931 }, { "epoch": 1.51, "grad_norm": 8.155671258102808, "learning_rate": 1.4899691831548906e-06, "loss": 0.6556, "step": 16932 }, { "epoch": 1.51, "grad_norm": 8.125246501037363, "learning_rate": 1.4894547680136583e-06, "loss": 0.5465, "step": 16933 }, { "epoch": 1.51, "grad_norm": 6.331457651777294, "learning_rate": 1.4889404261473695e-06, "loss": 0.5679, "step": 16934 }, { "epoch": 1.51, "grad_norm": 5.675738305890865, "learning_rate": 1.4884261575667609e-06, "loss": 0.5328, "step": 16935 }, { "epoch": 1.51, "grad_norm": 8.919153645497104, "learning_rate": 1.4879119622825665e-06, "loss": 0.6, "step": 16936 }, { "epoch": 1.51, "grad_norm": 6.928387492889682, "learning_rate": 1.4873978403055177e-06, "loss": 0.604, "step": 16937 }, { "epoch": 1.51, "grad_norm": 6.800001362905879, "learning_rate": 1.486883791646347e-06, "loss": 0.605, "step": 16938 }, { "epoch": 1.51, "grad_norm": 13.127859140706317, "learning_rate": 1.4863698163157824e-06, "loss": 0.6081, "step": 16939 }, { "epoch": 1.51, "grad_norm": 5.54511341384218, "learning_rate": 1.4858559143245544e-06, "loss": 0.5413, "step": 16940 }, { "epoch": 1.51, "grad_norm": 6.357791716089701, "learning_rate": 1.4853420856833873e-06, "loss": 0.5996, "step": 16941 }, { "epoch": 1.51, "grad_norm": 5.999622745628324, "learning_rate": 1.4848283304030092e-06, "loss": 0.5738, "step": 16942 }, { "epoch": 1.51, "grad_norm": 5.81557122094534, "learning_rate": 1.48431464849414e-06, "loss": 0.6007, "step": 16943 }, { "epoch": 1.51, "grad_norm": 5.976561085509542, "learning_rate": 1.4838010399675034e-06, "loss": 0.563, "step": 16944 }, { "epoch": 1.51, "grad_norm": 8.007905199390978, "learning_rate": 1.4832875048338197e-06, "loss": 0.6301, "step": 16945 }, { "epoch": 1.51, "grad_norm": 8.187648323041252, "learning_rate": 1.4827740431038096e-06, "loss": 0.5219, "step": 16946 }, { "epoch": 1.51, "grad_norm": 7.613614816891229, "learning_rate": 1.4822606547881868e-06, "loss": 0.662, "step": 16947 }, { "epoch": 1.51, "grad_norm": 6.135455813000128, "learning_rate": 1.4817473398976694e-06, "loss": 0.5469, "step": 16948 }, { "epoch": 1.51, "grad_norm": 7.154659253153712, "learning_rate": 1.481234098442973e-06, "loss": 0.5628, "step": 16949 }, { "epoch": 1.51, "grad_norm": 6.954271222745732, "learning_rate": 1.480720930434807e-06, "loss": 0.5569, "step": 16950 }, { "epoch": 1.51, "grad_norm": 4.970741520091181, "learning_rate": 1.4802078358838846e-06, "loss": 0.5359, "step": 16951 }, { "epoch": 1.51, "grad_norm": 10.556978732077475, "learning_rate": 1.4796948148009154e-06, "loss": 0.6334, "step": 16952 }, { "epoch": 1.51, "grad_norm": 6.148771128113096, "learning_rate": 1.479181867196608e-06, "loss": 0.6177, "step": 16953 }, { "epoch": 1.51, "grad_norm": 9.529060178580059, "learning_rate": 1.4786689930816684e-06, "loss": 0.5555, "step": 16954 }, { "epoch": 1.51, "grad_norm": 6.2290759468001955, "learning_rate": 1.478156192466803e-06, "loss": 0.6116, "step": 16955 }, { "epoch": 1.51, "grad_norm": 10.441380037035078, "learning_rate": 1.4776434653627135e-06, "loss": 0.6153, "step": 16956 }, { "epoch": 1.51, "grad_norm": 7.374788467635253, "learning_rate": 1.4771308117801026e-06, "loss": 0.6498, "step": 16957 }, { "epoch": 1.51, "grad_norm": 8.40346301075115, "learning_rate": 1.4766182317296712e-06, "loss": 0.575, "step": 16958 }, { "epoch": 1.51, "grad_norm": 8.06572885937927, "learning_rate": 1.4761057252221177e-06, "loss": 0.5665, "step": 16959 }, { "epoch": 1.51, "grad_norm": 5.017676081238112, "learning_rate": 1.4755932922681405e-06, "loss": 0.5842, "step": 16960 }, { "epoch": 1.51, "grad_norm": 6.9089317805024075, "learning_rate": 1.4750809328784366e-06, "loss": 0.5624, "step": 16961 }, { "epoch": 1.51, "grad_norm": 8.170103075492344, "learning_rate": 1.4745686470636983e-06, "loss": 0.5871, "step": 16962 }, { "epoch": 1.51, "grad_norm": 9.35976995624796, "learning_rate": 1.4740564348346175e-06, "loss": 0.5351, "step": 16963 }, { "epoch": 1.51, "grad_norm": 6.474340321253286, "learning_rate": 1.4735442962018875e-06, "loss": 0.5579, "step": 16964 }, { "epoch": 1.51, "grad_norm": 6.77185154503553, "learning_rate": 1.4730322311761969e-06, "loss": 0.5737, "step": 16965 }, { "epoch": 1.51, "grad_norm": 6.330677686238066, "learning_rate": 1.4725202397682349e-06, "loss": 0.5854, "step": 16966 }, { "epoch": 1.51, "grad_norm": 8.833550313847681, "learning_rate": 1.4720083219886876e-06, "loss": 0.6223, "step": 16967 }, { "epoch": 1.51, "grad_norm": 6.901323390193607, "learning_rate": 1.4714964778482421e-06, "loss": 0.613, "step": 16968 }, { "epoch": 1.51, "grad_norm": 6.134137593856659, "learning_rate": 1.4709847073575789e-06, "loss": 0.5695, "step": 16969 }, { "epoch": 1.51, "grad_norm": 4.269011154381314, "learning_rate": 1.4704730105273812e-06, "loss": 0.5585, "step": 16970 }, { "epoch": 1.51, "grad_norm": 8.726669617280312, "learning_rate": 1.4699613873683299e-06, "loss": 0.5795, "step": 16971 }, { "epoch": 1.51, "grad_norm": 6.454584544124313, "learning_rate": 1.4694498378911042e-06, "loss": 0.5705, "step": 16972 }, { "epoch": 1.51, "grad_norm": 7.826911093720265, "learning_rate": 1.4689383621063813e-06, "loss": 0.5251, "step": 16973 }, { "epoch": 1.51, "grad_norm": 7.989212286133505, "learning_rate": 1.4684269600248386e-06, "loss": 0.5691, "step": 16974 }, { "epoch": 1.51, "grad_norm": 6.72731095518736, "learning_rate": 1.4679156316571491e-06, "loss": 0.563, "step": 16975 }, { "epoch": 1.51, "grad_norm": 7.127027033232882, "learning_rate": 1.4674043770139845e-06, "loss": 0.5999, "step": 16976 }, { "epoch": 1.51, "grad_norm": 7.8670026110484255, "learning_rate": 1.4668931961060167e-06, "loss": 0.5319, "step": 16977 }, { "epoch": 1.51, "grad_norm": 7.966683172180466, "learning_rate": 1.4663820889439162e-06, "loss": 0.5767, "step": 16978 }, { "epoch": 1.51, "grad_norm": 7.208040738568394, "learning_rate": 1.4658710555383514e-06, "loss": 0.5613, "step": 16979 }, { "epoch": 1.51, "grad_norm": 7.65608397514103, "learning_rate": 1.4653600958999886e-06, "loss": 0.5928, "step": 16980 }, { "epoch": 1.51, "grad_norm": 6.730181792705907, "learning_rate": 1.4648492100394945e-06, "loss": 0.5538, "step": 16981 }, { "epoch": 1.51, "grad_norm": 8.956717601004273, "learning_rate": 1.4643383979675301e-06, "loss": 0.5375, "step": 16982 }, { "epoch": 1.52, "grad_norm": 6.711230511663297, "learning_rate": 1.4638276596947582e-06, "loss": 0.6045, "step": 16983 }, { "epoch": 1.52, "grad_norm": 6.716785830959988, "learning_rate": 1.4633169952318399e-06, "loss": 0.5689, "step": 16984 }, { "epoch": 1.52, "grad_norm": 7.865416214991, "learning_rate": 1.462806404589434e-06, "loss": 0.554, "step": 16985 }, { "epoch": 1.52, "grad_norm": 7.223765876311126, "learning_rate": 1.4622958877781984e-06, "loss": 0.5815, "step": 16986 }, { "epoch": 1.52, "grad_norm": 5.651384479422669, "learning_rate": 1.4617854448087903e-06, "loss": 0.5987, "step": 16987 }, { "epoch": 1.52, "grad_norm": 7.732375905213915, "learning_rate": 1.461275075691861e-06, "loss": 0.5575, "step": 16988 }, { "epoch": 1.52, "grad_norm": 6.568258258778996, "learning_rate": 1.460764780438066e-06, "loss": 0.5907, "step": 16989 }, { "epoch": 1.52, "grad_norm": 7.772168713986043, "learning_rate": 1.4602545590580542e-06, "loss": 0.5847, "step": 16990 }, { "epoch": 1.52, "grad_norm": 7.903571049075014, "learning_rate": 1.4597444115624765e-06, "loss": 0.5781, "step": 16991 }, { "epoch": 1.52, "grad_norm": 6.697963815901953, "learning_rate": 1.4592343379619811e-06, "loss": 0.5401, "step": 16992 }, { "epoch": 1.52, "grad_norm": 8.207606374399852, "learning_rate": 1.4587243382672144e-06, "loss": 0.5883, "step": 16993 }, { "epoch": 1.52, "grad_norm": 7.010681293929883, "learning_rate": 1.4582144124888236e-06, "loss": 0.5601, "step": 16994 }, { "epoch": 1.52, "grad_norm": 7.924733992958024, "learning_rate": 1.4577045606374495e-06, "loss": 0.569, "step": 16995 }, { "epoch": 1.52, "grad_norm": 6.149419950985524, "learning_rate": 1.4571947827237347e-06, "loss": 0.6002, "step": 16996 }, { "epoch": 1.52, "grad_norm": 6.548513618143878, "learning_rate": 1.4566850787583203e-06, "loss": 0.6148, "step": 16997 }, { "epoch": 1.52, "grad_norm": 5.441147614603664, "learning_rate": 1.4561754487518453e-06, "loss": 0.5637, "step": 16998 }, { "epoch": 1.52, "grad_norm": 5.2704946933655155, "learning_rate": 1.4556658927149486e-06, "loss": 0.556, "step": 16999 }, { "epoch": 1.52, "grad_norm": 6.423241182910366, "learning_rate": 1.4551564106582627e-06, "loss": 0.5969, "step": 17000 }, { "epoch": 1.52, "grad_norm": 5.9610497748127065, "learning_rate": 1.4546470025924236e-06, "loss": 0.6348, "step": 17001 }, { "epoch": 1.52, "grad_norm": 6.374804204856671, "learning_rate": 1.4541376685280645e-06, "loss": 0.5783, "step": 17002 }, { "epoch": 1.52, "grad_norm": 5.287040592895309, "learning_rate": 1.4536284084758158e-06, "loss": 0.5929, "step": 17003 }, { "epoch": 1.52, "grad_norm": 5.9459225754592016, "learning_rate": 1.4531192224463092e-06, "loss": 0.584, "step": 17004 }, { "epoch": 1.52, "grad_norm": 9.381562277913252, "learning_rate": 1.45261011045017e-06, "loss": 0.5811, "step": 17005 }, { "epoch": 1.52, "grad_norm": 6.229638833004457, "learning_rate": 1.452101072498026e-06, "loss": 0.5529, "step": 17006 }, { "epoch": 1.52, "grad_norm": 5.610994267315945, "learning_rate": 1.4515921086005042e-06, "loss": 0.5768, "step": 17007 }, { "epoch": 1.52, "grad_norm": 6.466925359609324, "learning_rate": 1.4510832187682238e-06, "loss": 0.5811, "step": 17008 }, { "epoch": 1.52, "grad_norm": 5.32255262784391, "learning_rate": 1.4505744030118101e-06, "loss": 0.5812, "step": 17009 }, { "epoch": 1.52, "grad_norm": 4.924745742089377, "learning_rate": 1.4500656613418823e-06, "loss": 0.5679, "step": 17010 }, { "epoch": 1.52, "grad_norm": 6.675467232968973, "learning_rate": 1.4495569937690596e-06, "loss": 0.5792, "step": 17011 }, { "epoch": 1.52, "grad_norm": 6.999430014820665, "learning_rate": 1.4490484003039612e-06, "loss": 0.5404, "step": 17012 }, { "epoch": 1.52, "grad_norm": 4.401867955412797, "learning_rate": 1.4485398809571987e-06, "loss": 0.5566, "step": 17013 }, { "epoch": 1.52, "grad_norm": 7.037650000839489, "learning_rate": 1.4480314357393894e-06, "loss": 0.6018, "step": 17014 }, { "epoch": 1.52, "grad_norm": 7.79481737185491, "learning_rate": 1.447523064661145e-06, "loss": 0.5828, "step": 17015 }, { "epoch": 1.52, "grad_norm": 6.246310364582602, "learning_rate": 1.4470147677330765e-06, "loss": 0.6104, "step": 17016 }, { "epoch": 1.52, "grad_norm": 7.248819128479334, "learning_rate": 1.4465065449657938e-06, "loss": 0.6043, "step": 17017 }, { "epoch": 1.52, "grad_norm": 6.37401185373268, "learning_rate": 1.445998396369907e-06, "loss": 0.5951, "step": 17018 }, { "epoch": 1.52, "grad_norm": 6.965007713527907, "learning_rate": 1.4454903219560202e-06, "loss": 0.5866, "step": 17019 }, { "epoch": 1.52, "grad_norm": 6.730367440462978, "learning_rate": 1.4449823217347374e-06, "loss": 0.6012, "step": 17020 }, { "epoch": 1.52, "grad_norm": 9.497564765854772, "learning_rate": 1.444474395716663e-06, "loss": 0.6141, "step": 17021 }, { "epoch": 1.52, "grad_norm": 8.212334247427338, "learning_rate": 1.443966543912399e-06, "loss": 0.5242, "step": 17022 }, { "epoch": 1.52, "grad_norm": 5.866404223139299, "learning_rate": 1.4434587663325461e-06, "loss": 0.5551, "step": 17023 }, { "epoch": 1.52, "grad_norm": 7.111070802165409, "learning_rate": 1.4429510629877024e-06, "loss": 0.577, "step": 17024 }, { "epoch": 1.52, "grad_norm": 8.659516591616146, "learning_rate": 1.4424434338884673e-06, "loss": 0.597, "step": 17025 }, { "epoch": 1.52, "grad_norm": 6.1816492716599205, "learning_rate": 1.4419358790454335e-06, "loss": 0.5434, "step": 17026 }, { "epoch": 1.52, "grad_norm": 8.814873084403002, "learning_rate": 1.4414283984691957e-06, "loss": 0.5728, "step": 17027 }, { "epoch": 1.52, "grad_norm": 5.921360581994144, "learning_rate": 1.4409209921703472e-06, "loss": 0.5589, "step": 17028 }, { "epoch": 1.52, "grad_norm": 8.066792840849494, "learning_rate": 1.440413660159478e-06, "loss": 0.5547, "step": 17029 }, { "epoch": 1.52, "grad_norm": 6.00201394203805, "learning_rate": 1.4399064024471787e-06, "loss": 0.5737, "step": 17030 }, { "epoch": 1.52, "grad_norm": 6.317986570868885, "learning_rate": 1.4393992190440387e-06, "loss": 0.6148, "step": 17031 }, { "epoch": 1.52, "grad_norm": 4.907747874005524, "learning_rate": 1.4388921099606406e-06, "loss": 0.58, "step": 17032 }, { "epoch": 1.52, "grad_norm": 5.510114527045778, "learning_rate": 1.4383850752075722e-06, "loss": 0.5938, "step": 17033 }, { "epoch": 1.52, "grad_norm": 6.330826189766111, "learning_rate": 1.4378781147954146e-06, "loss": 0.6149, "step": 17034 }, { "epoch": 1.52, "grad_norm": 7.533918618248367, "learning_rate": 1.4373712287347503e-06, "loss": 0.5753, "step": 17035 }, { "epoch": 1.52, "grad_norm": 5.501711238818725, "learning_rate": 1.4368644170361596e-06, "loss": 0.5855, "step": 17036 }, { "epoch": 1.52, "grad_norm": 5.731019116024797, "learning_rate": 1.4363576797102209e-06, "loss": 0.5519, "step": 17037 }, { "epoch": 1.52, "grad_norm": 7.399919282005908, "learning_rate": 1.4358510167675127e-06, "loss": 0.5264, "step": 17038 }, { "epoch": 1.52, "grad_norm": 6.7046073530896555, "learning_rate": 1.435344428218608e-06, "loss": 0.5698, "step": 17039 }, { "epoch": 1.52, "grad_norm": 5.659277033431452, "learning_rate": 1.434837914074082e-06, "loss": 0.6135, "step": 17040 }, { "epoch": 1.52, "grad_norm": 7.028677435708255, "learning_rate": 1.434331474344507e-06, "loss": 0.5683, "step": 17041 }, { "epoch": 1.52, "grad_norm": 8.281433100661124, "learning_rate": 1.433825109040453e-06, "loss": 0.627, "step": 17042 }, { "epoch": 1.52, "grad_norm": 5.859346543878757, "learning_rate": 1.433318818172491e-06, "loss": 0.5969, "step": 17043 }, { "epoch": 1.52, "grad_norm": 10.09264460304895, "learning_rate": 1.432812601751189e-06, "loss": 0.5606, "step": 17044 }, { "epoch": 1.52, "grad_norm": 9.056394973278627, "learning_rate": 1.43230645978711e-06, "loss": 0.6208, "step": 17045 }, { "epoch": 1.52, "grad_norm": 8.656508658506924, "learning_rate": 1.4318003922908209e-06, "loss": 0.5709, "step": 17046 }, { "epoch": 1.52, "grad_norm": 7.388810885660052, "learning_rate": 1.4312943992728861e-06, "loss": 0.6635, "step": 17047 }, { "epoch": 1.52, "grad_norm": 4.997425791485858, "learning_rate": 1.4307884807438638e-06, "loss": 0.5217, "step": 17048 }, { "epoch": 1.52, "grad_norm": 4.689121616601459, "learning_rate": 1.4302826367143152e-06, "loss": 0.5607, "step": 17049 }, { "epoch": 1.52, "grad_norm": 8.57077540295167, "learning_rate": 1.4297768671947993e-06, "loss": 0.539, "step": 17050 }, { "epoch": 1.52, "grad_norm": 9.9097316221064, "learning_rate": 1.429271172195874e-06, "loss": 0.5385, "step": 17051 }, { "epoch": 1.52, "grad_norm": 6.644919609914317, "learning_rate": 1.4287655517280914e-06, "loss": 0.5836, "step": 17052 }, { "epoch": 1.52, "grad_norm": 6.89454848095521, "learning_rate": 1.4282600058020075e-06, "loss": 0.5733, "step": 17053 }, { "epoch": 1.52, "grad_norm": 6.425362405136173, "learning_rate": 1.427754534428174e-06, "loss": 0.5261, "step": 17054 }, { "epoch": 1.52, "grad_norm": 7.004830530081999, "learning_rate": 1.427249137617141e-06, "loss": 0.5791, "step": 17055 }, { "epoch": 1.52, "grad_norm": 6.528118408170565, "learning_rate": 1.4267438153794587e-06, "loss": 0.5715, "step": 17056 }, { "epoch": 1.52, "grad_norm": 5.150775696860845, "learning_rate": 1.4262385677256752e-06, "loss": 0.5999, "step": 17057 }, { "epoch": 1.52, "grad_norm": 6.75458757416896, "learning_rate": 1.4257333946663337e-06, "loss": 0.5496, "step": 17058 }, { "epoch": 1.52, "grad_norm": 6.798288314986123, "learning_rate": 1.42522829621198e-06, "loss": 0.5932, "step": 17059 }, { "epoch": 1.52, "grad_norm": 8.47014171041464, "learning_rate": 1.4247232723731568e-06, "loss": 0.5657, "step": 17060 }, { "epoch": 1.52, "grad_norm": 6.539769631878163, "learning_rate": 1.4242183231604062e-06, "loss": 0.5362, "step": 17061 }, { "epoch": 1.52, "grad_norm": 6.090022373630785, "learning_rate": 1.4237134485842686e-06, "loss": 0.5913, "step": 17062 }, { "epoch": 1.52, "grad_norm": 6.395845164162824, "learning_rate": 1.4232086486552787e-06, "loss": 0.5015, "step": 17063 }, { "epoch": 1.52, "grad_norm": 7.100527999433925, "learning_rate": 1.4227039233839773e-06, "loss": 0.5565, "step": 17064 }, { "epoch": 1.52, "grad_norm": 8.364808059342145, "learning_rate": 1.4221992727808958e-06, "loss": 0.5427, "step": 17065 }, { "epoch": 1.52, "grad_norm": 5.333190514829901, "learning_rate": 1.4216946968565692e-06, "loss": 0.6033, "step": 17066 }, { "epoch": 1.52, "grad_norm": 8.795771845250657, "learning_rate": 1.4211901956215296e-06, "loss": 0.639, "step": 17067 }, { "epoch": 1.52, "grad_norm": 7.293848447781189, "learning_rate": 1.4206857690863069e-06, "loss": 0.6258, "step": 17068 }, { "epoch": 1.52, "grad_norm": 5.406113685914998, "learning_rate": 1.4201814172614304e-06, "loss": 0.5444, "step": 17069 }, { "epoch": 1.52, "grad_norm": 6.2203752536094825, "learning_rate": 1.4196771401574283e-06, "loss": 0.6146, "step": 17070 }, { "epoch": 1.52, "grad_norm": 7.890502286589144, "learning_rate": 1.419172937784824e-06, "loss": 0.5686, "step": 17071 }, { "epoch": 1.52, "grad_norm": 7.911029133267699, "learning_rate": 1.4186688101541424e-06, "loss": 0.5746, "step": 17072 }, { "epoch": 1.52, "grad_norm": 6.915699627830184, "learning_rate": 1.4181647572759066e-06, "loss": 0.651, "step": 17073 }, { "epoch": 1.52, "grad_norm": 10.54165062283654, "learning_rate": 1.4176607791606378e-06, "loss": 0.5701, "step": 17074 }, { "epoch": 1.52, "grad_norm": 7.24303961642342, "learning_rate": 1.4171568758188559e-06, "loss": 0.5378, "step": 17075 }, { "epoch": 1.52, "grad_norm": 6.058354805581522, "learning_rate": 1.4166530472610767e-06, "loss": 0.4983, "step": 17076 }, { "epoch": 1.52, "grad_norm": 5.787151245411657, "learning_rate": 1.4161492934978193e-06, "loss": 0.6237, "step": 17077 }, { "epoch": 1.52, "grad_norm": 7.210216407515425, "learning_rate": 1.4156456145395953e-06, "loss": 0.5399, "step": 17078 }, { "epoch": 1.52, "grad_norm": 7.673595822795365, "learning_rate": 1.41514201039692e-06, "loss": 0.5473, "step": 17079 }, { "epoch": 1.52, "grad_norm": 7.11531931345151, "learning_rate": 1.4146384810803037e-06, "loss": 0.5903, "step": 17080 }, { "epoch": 1.52, "grad_norm": 6.473145491893182, "learning_rate": 1.4141350266002579e-06, "loss": 0.617, "step": 17081 }, { "epoch": 1.52, "grad_norm": 4.81752703803079, "learning_rate": 1.41363164696729e-06, "loss": 0.5195, "step": 17082 }, { "epoch": 1.52, "grad_norm": 7.218602758536105, "learning_rate": 1.4131283421919095e-06, "loss": 0.575, "step": 17083 }, { "epoch": 1.52, "grad_norm": 6.949264138943726, "learning_rate": 1.412625112284618e-06, "loss": 0.5167, "step": 17084 }, { "epoch": 1.52, "grad_norm": 5.80423636969925, "learning_rate": 1.4121219572559208e-06, "loss": 0.5598, "step": 17085 }, { "epoch": 1.52, "grad_norm": 5.963377593054064, "learning_rate": 1.4116188771163209e-06, "loss": 0.5924, "step": 17086 }, { "epoch": 1.52, "grad_norm": 6.282565605454972, "learning_rate": 1.4111158718763178e-06, "loss": 0.6284, "step": 17087 }, { "epoch": 1.52, "grad_norm": 8.340652634776813, "learning_rate": 1.4106129415464137e-06, "loss": 0.6034, "step": 17088 }, { "epoch": 1.52, "grad_norm": 7.503411791150942, "learning_rate": 1.410110086137102e-06, "loss": 0.6064, "step": 17089 }, { "epoch": 1.52, "grad_norm": 5.202567400877777, "learning_rate": 1.409607305658882e-06, "loss": 0.6036, "step": 17090 }, { "epoch": 1.52, "grad_norm": 9.54170363205557, "learning_rate": 1.4091046001222452e-06, "loss": 0.5371, "step": 17091 }, { "epoch": 1.52, "grad_norm": 6.181845470674902, "learning_rate": 1.4086019695376857e-06, "loss": 0.5469, "step": 17092 }, { "epoch": 1.52, "grad_norm": 6.247900563601584, "learning_rate": 1.4080994139156951e-06, "loss": 0.5956, "step": 17093 }, { "epoch": 1.52, "grad_norm": 7.203691266473642, "learning_rate": 1.4075969332667634e-06, "loss": 0.5269, "step": 17094 }, { "epoch": 1.53, "grad_norm": 6.292161864616709, "learning_rate": 1.4070945276013798e-06, "loss": 0.5692, "step": 17095 }, { "epoch": 1.53, "grad_norm": 8.72133611494868, "learning_rate": 1.4065921969300283e-06, "loss": 0.5994, "step": 17096 }, { "epoch": 1.53, "grad_norm": 7.712680534444186, "learning_rate": 1.4060899412631952e-06, "loss": 0.5783, "step": 17097 }, { "epoch": 1.53, "grad_norm": 6.3852637369112, "learning_rate": 1.4055877606113638e-06, "loss": 0.596, "step": 17098 }, { "epoch": 1.53, "grad_norm": 6.538009025413003, "learning_rate": 1.4050856549850167e-06, "loss": 0.6015, "step": 17099 }, { "epoch": 1.53, "grad_norm": 7.085812333683136, "learning_rate": 1.404583624394633e-06, "loss": 0.5941, "step": 17100 }, { "epoch": 1.53, "grad_norm": 5.544788132678288, "learning_rate": 1.4040816688506947e-06, "loss": 0.5262, "step": 17101 }, { "epoch": 1.53, "grad_norm": 6.762615506556841, "learning_rate": 1.4035797883636748e-06, "loss": 0.5381, "step": 17102 }, { "epoch": 1.53, "grad_norm": 8.341307263927458, "learning_rate": 1.403077982944051e-06, "loss": 0.5942, "step": 17103 }, { "epoch": 1.53, "grad_norm": 7.613060596095744, "learning_rate": 1.4025762526022967e-06, "loss": 0.5247, "step": 17104 }, { "epoch": 1.53, "grad_norm": 7.486935404739469, "learning_rate": 1.402074597348887e-06, "loss": 0.5158, "step": 17105 }, { "epoch": 1.53, "grad_norm": 7.407385358804876, "learning_rate": 1.4015730171942887e-06, "loss": 0.5187, "step": 17106 }, { "epoch": 1.53, "grad_norm": 7.7347560973513545, "learning_rate": 1.4010715121489738e-06, "loss": 0.5418, "step": 17107 }, { "epoch": 1.53, "grad_norm": 7.475381948460167, "learning_rate": 1.4005700822234103e-06, "loss": 0.5765, "step": 17108 }, { "epoch": 1.53, "grad_norm": 8.123065194099311, "learning_rate": 1.4000687274280628e-06, "loss": 0.5424, "step": 17109 }, { "epoch": 1.53, "grad_norm": 7.005833465060595, "learning_rate": 1.3995674477733966e-06, "loss": 0.5853, "step": 17110 }, { "epoch": 1.53, "grad_norm": 7.422340420733775, "learning_rate": 1.3990662432698754e-06, "loss": 0.5706, "step": 17111 }, { "epoch": 1.53, "grad_norm": 8.434492507514486, "learning_rate": 1.3985651139279604e-06, "loss": 0.6086, "step": 17112 }, { "epoch": 1.53, "grad_norm": 6.831721890267589, "learning_rate": 1.3980640597581118e-06, "loss": 0.5311, "step": 17113 }, { "epoch": 1.53, "grad_norm": 5.768505560551281, "learning_rate": 1.397563080770789e-06, "loss": 0.6147, "step": 17114 }, { "epoch": 1.53, "grad_norm": 5.554759950822776, "learning_rate": 1.3970621769764464e-06, "loss": 0.5395, "step": 17115 }, { "epoch": 1.53, "grad_norm": 6.00255772985404, "learning_rate": 1.3965613483855407e-06, "loss": 0.6163, "step": 17116 }, { "epoch": 1.53, "grad_norm": 5.909666508039577, "learning_rate": 1.396060595008525e-06, "loss": 0.5831, "step": 17117 }, { "epoch": 1.53, "grad_norm": 6.895321846788557, "learning_rate": 1.395559916855852e-06, "loss": 0.6005, "step": 17118 }, { "epoch": 1.53, "grad_norm": 6.048870029571252, "learning_rate": 1.3950593139379737e-06, "loss": 0.5612, "step": 17119 }, { "epoch": 1.53, "grad_norm": 4.923943264471194, "learning_rate": 1.3945587862653364e-06, "loss": 0.5977, "step": 17120 }, { "epoch": 1.53, "grad_norm": 6.3913623112344675, "learning_rate": 1.3940583338483898e-06, "loss": 0.5877, "step": 17121 }, { "epoch": 1.53, "grad_norm": 5.608899038285433, "learning_rate": 1.393557956697577e-06, "loss": 0.5143, "step": 17122 }, { "epoch": 1.53, "grad_norm": 7.311225548446261, "learning_rate": 1.3930576548233444e-06, "loss": 0.5499, "step": 17123 }, { "epoch": 1.53, "grad_norm": 8.356160989808739, "learning_rate": 1.3925574282361337e-06, "loss": 0.5633, "step": 17124 }, { "epoch": 1.53, "grad_norm": 6.524088672353561, "learning_rate": 1.3920572769463863e-06, "loss": 0.5766, "step": 17125 }, { "epoch": 1.53, "grad_norm": 6.633612419111581, "learning_rate": 1.3915572009645424e-06, "loss": 0.53, "step": 17126 }, { "epoch": 1.53, "grad_norm": 5.424412973553313, "learning_rate": 1.3910572003010409e-06, "loss": 0.61, "step": 17127 }, { "epoch": 1.53, "grad_norm": 6.708626771784581, "learning_rate": 1.390557274966316e-06, "loss": 0.5425, "step": 17128 }, { "epoch": 1.53, "grad_norm": 6.825640530283765, "learning_rate": 1.3900574249708026e-06, "loss": 0.5403, "step": 17129 }, { "epoch": 1.53, "grad_norm": 8.565101484358312, "learning_rate": 1.3895576503249353e-06, "loss": 0.5722, "step": 17130 }, { "epoch": 1.53, "grad_norm": 8.473971459554877, "learning_rate": 1.389057951039145e-06, "loss": 0.5888, "step": 17131 }, { "epoch": 1.53, "grad_norm": 7.2858879451788425, "learning_rate": 1.3885583271238623e-06, "loss": 0.5836, "step": 17132 }, { "epoch": 1.53, "grad_norm": 8.183077975943158, "learning_rate": 1.3880587785895172e-06, "loss": 0.5656, "step": 17133 }, { "epoch": 1.53, "grad_norm": 7.873234073700398, "learning_rate": 1.3875593054465354e-06, "loss": 0.5863, "step": 17134 }, { "epoch": 1.53, "grad_norm": 6.914949148826478, "learning_rate": 1.3870599077053398e-06, "loss": 0.604, "step": 17135 }, { "epoch": 1.53, "grad_norm": 5.960098597862141, "learning_rate": 1.3865605853763565e-06, "loss": 0.6009, "step": 17136 }, { "epoch": 1.53, "grad_norm": 6.746782057245046, "learning_rate": 1.3860613384700079e-06, "loss": 0.5696, "step": 17137 }, { "epoch": 1.53, "grad_norm": 6.092434252637906, "learning_rate": 1.3855621669967146e-06, "loss": 0.568, "step": 17138 }, { "epoch": 1.53, "grad_norm": 8.059536043403599, "learning_rate": 1.3850630709668955e-06, "loss": 0.6352, "step": 17139 }, { "epoch": 1.53, "grad_norm": 5.441010218434534, "learning_rate": 1.3845640503909697e-06, "loss": 0.608, "step": 17140 }, { "epoch": 1.53, "grad_norm": 6.52485271955215, "learning_rate": 1.3840651052793502e-06, "loss": 0.5795, "step": 17141 }, { "epoch": 1.53, "grad_norm": 7.119131884496756, "learning_rate": 1.3835662356424523e-06, "loss": 0.544, "step": 17142 }, { "epoch": 1.53, "grad_norm": 6.593199791405027, "learning_rate": 1.3830674414906903e-06, "loss": 0.519, "step": 17143 }, { "epoch": 1.53, "grad_norm": 6.532707441828155, "learning_rate": 1.3825687228344737e-06, "loss": 0.5294, "step": 17144 }, { "epoch": 1.53, "grad_norm": 5.736672632322665, "learning_rate": 1.3820700796842134e-06, "loss": 0.5289, "step": 17145 }, { "epoch": 1.53, "grad_norm": 6.5041837512061, "learning_rate": 1.3815715120503187e-06, "loss": 0.596, "step": 17146 }, { "epoch": 1.53, "grad_norm": 5.36567910027059, "learning_rate": 1.3810730199431926e-06, "loss": 0.5792, "step": 17147 }, { "epoch": 1.53, "grad_norm": 7.35250356153874, "learning_rate": 1.3805746033732436e-06, "loss": 0.5448, "step": 17148 }, { "epoch": 1.53, "grad_norm": 5.967663569395669, "learning_rate": 1.3800762623508718e-06, "loss": 0.5656, "step": 17149 }, { "epoch": 1.53, "grad_norm": 7.128653795764353, "learning_rate": 1.3795779968864809e-06, "loss": 0.5399, "step": 17150 }, { "epoch": 1.53, "grad_norm": 6.468037630911577, "learning_rate": 1.3790798069904704e-06, "loss": 0.5653, "step": 17151 }, { "epoch": 1.53, "grad_norm": 6.252543886713164, "learning_rate": 1.3785816926732386e-06, "loss": 0.5323, "step": 17152 }, { "epoch": 1.53, "grad_norm": 6.652344369408779, "learning_rate": 1.378083653945186e-06, "loss": 0.5912, "step": 17153 }, { "epoch": 1.53, "grad_norm": 6.746934849512741, "learning_rate": 1.377585690816703e-06, "loss": 0.6201, "step": 17154 }, { "epoch": 1.53, "grad_norm": 6.595270129921932, "learning_rate": 1.3770878032981855e-06, "loss": 0.5475, "step": 17155 }, { "epoch": 1.53, "grad_norm": 7.458343033265828, "learning_rate": 1.3765899914000264e-06, "loss": 0.5211, "step": 17156 }, { "epoch": 1.53, "grad_norm": 7.396505445998879, "learning_rate": 1.3760922551326161e-06, "loss": 0.5756, "step": 17157 }, { "epoch": 1.53, "grad_norm": 6.409950084025306, "learning_rate": 1.3755945945063437e-06, "loss": 0.5732, "step": 17158 }, { "epoch": 1.53, "grad_norm": 8.50468168717987, "learning_rate": 1.3750970095315985e-06, "loss": 0.5691, "step": 17159 }, { "epoch": 1.53, "grad_norm": 5.73658086535854, "learning_rate": 1.3745995002187628e-06, "loss": 0.5813, "step": 17160 }, { "epoch": 1.53, "grad_norm": 5.64410403907359, "learning_rate": 1.374102066578224e-06, "loss": 0.5058, "step": 17161 }, { "epoch": 1.53, "grad_norm": 7.6634437558626995, "learning_rate": 1.3736047086203646e-06, "loss": 0.557, "step": 17162 }, { "epoch": 1.53, "grad_norm": 6.693474412219433, "learning_rate": 1.3731074263555644e-06, "loss": 0.599, "step": 17163 }, { "epoch": 1.53, "grad_norm": 8.829400735690452, "learning_rate": 1.3726102197942032e-06, "loss": 0.6393, "step": 17164 }, { "epoch": 1.53, "grad_norm": 6.040320654644621, "learning_rate": 1.3721130889466605e-06, "loss": 0.5803, "step": 17165 }, { "epoch": 1.53, "grad_norm": 7.424055474795878, "learning_rate": 1.371616033823313e-06, "loss": 0.6783, "step": 17166 }, { "epoch": 1.53, "grad_norm": 6.325946275760738, "learning_rate": 1.3711190544345338e-06, "loss": 0.5596, "step": 17167 }, { "epoch": 1.53, "grad_norm": 6.772113740904194, "learning_rate": 1.370622150790697e-06, "loss": 0.6217, "step": 17168 }, { "epoch": 1.53, "grad_norm": 6.1436612950390606, "learning_rate": 1.3701253229021743e-06, "loss": 0.6003, "step": 17169 }, { "epoch": 1.53, "grad_norm": 7.188933023333301, "learning_rate": 1.3696285707793367e-06, "loss": 0.5531, "step": 17170 }, { "epoch": 1.53, "grad_norm": 7.660661952164542, "learning_rate": 1.3691318944325537e-06, "loss": 0.5639, "step": 17171 }, { "epoch": 1.53, "grad_norm": 5.404268850189079, "learning_rate": 1.3686352938721891e-06, "loss": 0.562, "step": 17172 }, { "epoch": 1.53, "grad_norm": 8.356135569509364, "learning_rate": 1.3681387691086106e-06, "loss": 0.5895, "step": 17173 }, { "epoch": 1.53, "grad_norm": 6.963103654957032, "learning_rate": 1.3676423201521816e-06, "loss": 0.5697, "step": 17174 }, { "epoch": 1.53, "grad_norm": 7.664938026634077, "learning_rate": 1.3671459470132642e-06, "loss": 0.5716, "step": 17175 }, { "epoch": 1.53, "grad_norm": 4.934932144087231, "learning_rate": 1.366649649702219e-06, "loss": 0.5673, "step": 17176 }, { "epoch": 1.53, "grad_norm": 6.735893107563485, "learning_rate": 1.3661534282294076e-06, "loss": 0.589, "step": 17177 }, { "epoch": 1.53, "grad_norm": 5.922995519893428, "learning_rate": 1.3656572826051855e-06, "loss": 0.5825, "step": 17178 }, { "epoch": 1.53, "grad_norm": 6.383885688005431, "learning_rate": 1.3651612128399066e-06, "loss": 0.5979, "step": 17179 }, { "epoch": 1.53, "grad_norm": 6.125472813171852, "learning_rate": 1.3646652189439274e-06, "loss": 0.5675, "step": 17180 }, { "epoch": 1.53, "grad_norm": 4.871495666191913, "learning_rate": 1.3641693009276008e-06, "loss": 0.5449, "step": 17181 }, { "epoch": 1.53, "grad_norm": 8.466121559417719, "learning_rate": 1.3636734588012773e-06, "loss": 0.5667, "step": 17182 }, { "epoch": 1.53, "grad_norm": 7.183328942381242, "learning_rate": 1.363177692575307e-06, "loss": 0.6173, "step": 17183 }, { "epoch": 1.53, "grad_norm": 7.326729498920868, "learning_rate": 1.3626820022600396e-06, "loss": 0.6117, "step": 17184 }, { "epoch": 1.53, "grad_norm": 6.440073519740164, "learning_rate": 1.3621863878658175e-06, "loss": 0.5382, "step": 17185 }, { "epoch": 1.53, "grad_norm": 5.709914531512257, "learning_rate": 1.361690849402989e-06, "loss": 0.578, "step": 17186 }, { "epoch": 1.53, "grad_norm": 7.7057942429563955, "learning_rate": 1.3611953868818955e-06, "loss": 0.6453, "step": 17187 }, { "epoch": 1.53, "grad_norm": 6.389504478235555, "learning_rate": 1.3607000003128795e-06, "loss": 0.5591, "step": 17188 }, { "epoch": 1.53, "grad_norm": 7.771795446396589, "learning_rate": 1.3602046897062816e-06, "loss": 0.5513, "step": 17189 }, { "epoch": 1.53, "grad_norm": 5.689915186590739, "learning_rate": 1.359709455072441e-06, "loss": 0.5412, "step": 17190 }, { "epoch": 1.53, "grad_norm": 4.769806607016308, "learning_rate": 1.359214296421693e-06, "loss": 0.5973, "step": 17191 }, { "epoch": 1.53, "grad_norm": 4.580144854601644, "learning_rate": 1.3587192137643724e-06, "loss": 0.6545, "step": 17192 }, { "epoch": 1.53, "grad_norm": 7.355361570421232, "learning_rate": 1.3582242071108137e-06, "loss": 0.5534, "step": 17193 }, { "epoch": 1.53, "grad_norm": 6.442230003203185, "learning_rate": 1.3577292764713495e-06, "loss": 0.5962, "step": 17194 }, { "epoch": 1.53, "grad_norm": 8.299109219735138, "learning_rate": 1.35723442185631e-06, "loss": 0.5474, "step": 17195 }, { "epoch": 1.53, "grad_norm": 11.89131766733263, "learning_rate": 1.356739643276025e-06, "loss": 0.5739, "step": 17196 }, { "epoch": 1.53, "grad_norm": 7.084525599281193, "learning_rate": 1.3562449407408224e-06, "loss": 0.5715, "step": 17197 }, { "epoch": 1.53, "grad_norm": 6.651234459872414, "learning_rate": 1.355750314261025e-06, "loss": 0.5653, "step": 17198 }, { "epoch": 1.53, "grad_norm": 5.593601304039529, "learning_rate": 1.3552557638469598e-06, "loss": 0.59, "step": 17199 }, { "epoch": 1.53, "grad_norm": 5.391088223007171, "learning_rate": 1.3547612895089484e-06, "loss": 0.5686, "step": 17200 }, { "epoch": 1.53, "grad_norm": 6.731818543463911, "learning_rate": 1.354266891257312e-06, "loss": 0.538, "step": 17201 }, { "epoch": 1.53, "grad_norm": 6.941340349079574, "learning_rate": 1.35377256910237e-06, "loss": 0.6421, "step": 17202 }, { "epoch": 1.53, "grad_norm": 7.05003289734198, "learning_rate": 1.3532783230544427e-06, "loss": 0.5449, "step": 17203 }, { "epoch": 1.53, "grad_norm": 7.050969330137939, "learning_rate": 1.3527841531238424e-06, "loss": 0.5109, "step": 17204 }, { "epoch": 1.53, "grad_norm": 7.251876431825372, "learning_rate": 1.3522900593208855e-06, "loss": 0.653, "step": 17205 }, { "epoch": 1.53, "grad_norm": 4.507660076440911, "learning_rate": 1.3517960416558867e-06, "loss": 0.573, "step": 17206 }, { "epoch": 1.54, "grad_norm": 6.9492525280987705, "learning_rate": 1.351302100139155e-06, "loss": 0.5899, "step": 17207 }, { "epoch": 1.54, "grad_norm": 5.737250786599135, "learning_rate": 1.3508082347810015e-06, "loss": 0.6003, "step": 17208 }, { "epoch": 1.54, "grad_norm": 5.176956386321778, "learning_rate": 1.3503144455917338e-06, "loss": 0.5373, "step": 17209 }, { "epoch": 1.54, "grad_norm": 9.233353015812144, "learning_rate": 1.3498207325816616e-06, "loss": 0.621, "step": 17210 }, { "epoch": 1.54, "grad_norm": 6.865537707031853, "learning_rate": 1.349327095761086e-06, "loss": 0.5251, "step": 17211 }, { "epoch": 1.54, "grad_norm": 6.307457729914165, "learning_rate": 1.3488335351403126e-06, "loss": 0.5923, "step": 17212 }, { "epoch": 1.54, "grad_norm": 7.666545530329459, "learning_rate": 1.348340050729643e-06, "loss": 0.5762, "step": 17213 }, { "epoch": 1.54, "grad_norm": 8.083835936859414, "learning_rate": 1.3478466425393777e-06, "loss": 0.588, "step": 17214 }, { "epoch": 1.54, "grad_norm": 5.723388187308514, "learning_rate": 1.3473533105798165e-06, "loss": 0.5957, "step": 17215 }, { "epoch": 1.54, "grad_norm": 5.621775797697638, "learning_rate": 1.3468600548612564e-06, "loss": 0.5555, "step": 17216 }, { "epoch": 1.54, "grad_norm": 7.376757545061219, "learning_rate": 1.3463668753939912e-06, "loss": 0.5441, "step": 17217 }, { "epoch": 1.54, "grad_norm": 6.345131954590711, "learning_rate": 1.3458737721883164e-06, "loss": 0.641, "step": 17218 }, { "epoch": 1.54, "grad_norm": 7.4600938979622065, "learning_rate": 1.3453807452545237e-06, "loss": 0.6177, "step": 17219 }, { "epoch": 1.54, "grad_norm": 6.525025611048279, "learning_rate": 1.3448877946029066e-06, "loss": 0.585, "step": 17220 }, { "epoch": 1.54, "grad_norm": 6.052636035158669, "learning_rate": 1.34439492024375e-06, "loss": 0.4968, "step": 17221 }, { "epoch": 1.54, "grad_norm": 6.2400243982581065, "learning_rate": 1.3439021221873444e-06, "loss": 0.5576, "step": 17222 }, { "epoch": 1.54, "grad_norm": 5.680058238085944, "learning_rate": 1.3434094004439763e-06, "loss": 0.5938, "step": 17223 }, { "epoch": 1.54, "grad_norm": 9.645817839131162, "learning_rate": 1.342916755023928e-06, "loss": 0.5308, "step": 17224 }, { "epoch": 1.54, "grad_norm": 4.3579330983813565, "learning_rate": 1.3424241859374838e-06, "loss": 0.6107, "step": 17225 }, { "epoch": 1.54, "grad_norm": 6.51045558972247, "learning_rate": 1.3419316931949244e-06, "loss": 0.6173, "step": 17226 }, { "epoch": 1.54, "grad_norm": 5.0635030595174415, "learning_rate": 1.3414392768065299e-06, "loss": 0.518, "step": 17227 }, { "epoch": 1.54, "grad_norm": 6.546095559568027, "learning_rate": 1.3409469367825783e-06, "loss": 0.568, "step": 17228 }, { "epoch": 1.54, "grad_norm": 7.427457411431497, "learning_rate": 1.3404546731333479e-06, "loss": 0.5646, "step": 17229 }, { "epoch": 1.54, "grad_norm": 5.735403751908864, "learning_rate": 1.33996248586911e-06, "loss": 0.5889, "step": 17230 }, { "epoch": 1.54, "grad_norm": 5.521213096138269, "learning_rate": 1.3394703750001402e-06, "loss": 0.5328, "step": 17231 }, { "epoch": 1.54, "grad_norm": 6.857847196777849, "learning_rate": 1.3389783405367101e-06, "loss": 0.5799, "step": 17232 }, { "epoch": 1.54, "grad_norm": 5.689310098665093, "learning_rate": 1.3384863824890892e-06, "loss": 0.5833, "step": 17233 }, { "epoch": 1.54, "grad_norm": 6.0469807765847525, "learning_rate": 1.3379945008675471e-06, "loss": 0.5446, "step": 17234 }, { "epoch": 1.54, "grad_norm": 5.385839946422461, "learning_rate": 1.3375026956823511e-06, "loss": 0.575, "step": 17235 }, { "epoch": 1.54, "grad_norm": 8.537239846371557, "learning_rate": 1.3370109669437658e-06, "loss": 0.5768, "step": 17236 }, { "epoch": 1.54, "grad_norm": 8.74254941842663, "learning_rate": 1.336519314662053e-06, "loss": 0.5685, "step": 17237 }, { "epoch": 1.54, "grad_norm": 7.088149501599103, "learning_rate": 1.3360277388474775e-06, "loss": 0.5371, "step": 17238 }, { "epoch": 1.54, "grad_norm": 4.907752948339701, "learning_rate": 1.3355362395102978e-06, "loss": 0.5433, "step": 17239 }, { "epoch": 1.54, "grad_norm": 7.466592954661629, "learning_rate": 1.335044816660775e-06, "loss": 0.5721, "step": 17240 }, { "epoch": 1.54, "grad_norm": 8.643739334134564, "learning_rate": 1.334553470309165e-06, "loss": 0.5485, "step": 17241 }, { "epoch": 1.54, "grad_norm": 6.293671885429159, "learning_rate": 1.3340622004657256e-06, "loss": 0.5873, "step": 17242 }, { "epoch": 1.54, "grad_norm": 8.816699485460827, "learning_rate": 1.3335710071407083e-06, "loss": 0.5576, "step": 17243 }, { "epoch": 1.54, "grad_norm": 6.896603293774081, "learning_rate": 1.3330798903443664e-06, "loss": 0.5881, "step": 17244 }, { "epoch": 1.54, "grad_norm": 6.2898247501867734, "learning_rate": 1.3325888500869521e-06, "loss": 0.5302, "step": 17245 }, { "epoch": 1.54, "grad_norm": 8.212186048251358, "learning_rate": 1.3320978863787138e-06, "loss": 0.6015, "step": 17246 }, { "epoch": 1.54, "grad_norm": 9.689162834221072, "learning_rate": 1.3316069992299008e-06, "loss": 0.6375, "step": 17247 }, { "epoch": 1.54, "grad_norm": 5.776247723007912, "learning_rate": 1.3311161886507567e-06, "loss": 0.5829, "step": 17248 }, { "epoch": 1.54, "grad_norm": 8.125661946561763, "learning_rate": 1.330625454651529e-06, "loss": 0.649, "step": 17249 }, { "epoch": 1.54, "grad_norm": 5.762690563721956, "learning_rate": 1.3301347972424571e-06, "loss": 0.5172, "step": 17250 }, { "epoch": 1.54, "grad_norm": 6.060673459796315, "learning_rate": 1.3296442164337853e-06, "loss": 0.5862, "step": 17251 }, { "epoch": 1.54, "grad_norm": 7.400547592277442, "learning_rate": 1.3291537122357516e-06, "loss": 0.6272, "step": 17252 }, { "epoch": 1.54, "grad_norm": 5.558162792623261, "learning_rate": 1.3286632846585962e-06, "loss": 0.5665, "step": 17253 }, { "epoch": 1.54, "grad_norm": 8.147254434823392, "learning_rate": 1.3281729337125555e-06, "loss": 0.6542, "step": 17254 }, { "epoch": 1.54, "grad_norm": 6.914219439315344, "learning_rate": 1.3276826594078624e-06, "loss": 0.5612, "step": 17255 }, { "epoch": 1.54, "grad_norm": 6.224201794993361, "learning_rate": 1.327192461754751e-06, "loss": 0.5783, "step": 17256 }, { "epoch": 1.54, "grad_norm": 7.477388226571542, "learning_rate": 1.3267023407634545e-06, "loss": 0.5723, "step": 17257 }, { "epoch": 1.54, "grad_norm": 7.617188723293537, "learning_rate": 1.3262122964442015e-06, "loss": 0.5966, "step": 17258 }, { "epoch": 1.54, "grad_norm": 6.988454674425447, "learning_rate": 1.325722328807222e-06, "loss": 0.5792, "step": 17259 }, { "epoch": 1.54, "grad_norm": 7.276064495762684, "learning_rate": 1.3252324378627436e-06, "loss": 0.5763, "step": 17260 }, { "epoch": 1.54, "grad_norm": 6.44593463128524, "learning_rate": 1.3247426236209888e-06, "loss": 0.5306, "step": 17261 }, { "epoch": 1.54, "grad_norm": 5.997806998845987, "learning_rate": 1.3242528860921828e-06, "loss": 0.5863, "step": 17262 }, { "epoch": 1.54, "grad_norm": 8.995027915848794, "learning_rate": 1.32376322528655e-06, "loss": 0.5639, "step": 17263 }, { "epoch": 1.54, "grad_norm": 8.645436398407703, "learning_rate": 1.323273641214307e-06, "loss": 0.5578, "step": 17264 }, { "epoch": 1.54, "grad_norm": 6.742021292393683, "learning_rate": 1.3227841338856756e-06, "loss": 0.5688, "step": 17265 }, { "epoch": 1.54, "grad_norm": 6.391831239357793, "learning_rate": 1.3222947033108723e-06, "loss": 0.5233, "step": 17266 }, { "epoch": 1.54, "grad_norm": 5.986138945531844, "learning_rate": 1.3218053495001143e-06, "loss": 0.5766, "step": 17267 }, { "epoch": 1.54, "grad_norm": 9.704256007936365, "learning_rate": 1.3213160724636131e-06, "loss": 0.6232, "step": 17268 }, { "epoch": 1.54, "grad_norm": 6.060612500962514, "learning_rate": 1.3208268722115825e-06, "loss": 0.5826, "step": 17269 }, { "epoch": 1.54, "grad_norm": 9.653650272126415, "learning_rate": 1.320337748754234e-06, "loss": 0.5968, "step": 17270 }, { "epoch": 1.54, "grad_norm": 5.840766154009693, "learning_rate": 1.3198487021017764e-06, "loss": 0.5103, "step": 17271 }, { "epoch": 1.54, "grad_norm": 7.874350103719004, "learning_rate": 1.3193597322644181e-06, "loss": 0.5664, "step": 17272 }, { "epoch": 1.54, "grad_norm": 6.691861256692753, "learning_rate": 1.3188708392523658e-06, "loss": 0.6094, "step": 17273 }, { "epoch": 1.54, "grad_norm": 7.754711192315602, "learning_rate": 1.318382023075822e-06, "loss": 0.6109, "step": 17274 }, { "epoch": 1.54, "grad_norm": 6.744563464408076, "learning_rate": 1.3178932837449915e-06, "loss": 0.536, "step": 17275 }, { "epoch": 1.54, "grad_norm": 7.29435858995001, "learning_rate": 1.3174046212700741e-06, "loss": 0.6039, "step": 17276 }, { "epoch": 1.54, "grad_norm": 7.574385768081241, "learning_rate": 1.3169160356612709e-06, "loss": 0.604, "step": 17277 }, { "epoch": 1.54, "grad_norm": 5.989069496385774, "learning_rate": 1.3164275269287813e-06, "loss": 0.5794, "step": 17278 }, { "epoch": 1.54, "grad_norm": 5.925875257193215, "learning_rate": 1.3159390950827983e-06, "loss": 0.5883, "step": 17279 }, { "epoch": 1.54, "grad_norm": 6.443585166261667, "learning_rate": 1.3154507401335203e-06, "loss": 0.5362, "step": 17280 }, { "epoch": 1.54, "grad_norm": 6.142624026374915, "learning_rate": 1.3149624620911383e-06, "loss": 0.5591, "step": 17281 }, { "epoch": 1.54, "grad_norm": 8.112807055874939, "learning_rate": 1.314474260965844e-06, "loss": 0.5953, "step": 17282 }, { "epoch": 1.54, "grad_norm": 5.831955221853754, "learning_rate": 1.3139861367678285e-06, "loss": 0.6228, "step": 17283 }, { "epoch": 1.54, "grad_norm": 5.506625451766481, "learning_rate": 1.3134980895072801e-06, "loss": 0.5525, "step": 17284 }, { "epoch": 1.54, "grad_norm": 4.744234970448051, "learning_rate": 1.3130101191943862e-06, "loss": 0.5863, "step": 17285 }, { "epoch": 1.54, "grad_norm": 6.665854295837863, "learning_rate": 1.3125222258393328e-06, "loss": 0.5452, "step": 17286 }, { "epoch": 1.54, "grad_norm": 6.937346679108787, "learning_rate": 1.3120344094523013e-06, "loss": 0.4993, "step": 17287 }, { "epoch": 1.54, "grad_norm": 7.543173454027063, "learning_rate": 1.3115466700434748e-06, "loss": 0.5353, "step": 17288 }, { "epoch": 1.54, "grad_norm": 9.249846991069104, "learning_rate": 1.311059007623034e-06, "loss": 0.5837, "step": 17289 }, { "epoch": 1.54, "grad_norm": 6.607967512185825, "learning_rate": 1.3105714222011573e-06, "loss": 0.5559, "step": 17290 }, { "epoch": 1.54, "grad_norm": 5.438346236725876, "learning_rate": 1.3100839137880233e-06, "loss": 0.5126, "step": 17291 }, { "epoch": 1.54, "grad_norm": 8.363293475057521, "learning_rate": 1.309596482393808e-06, "loss": 0.6378, "step": 17292 }, { "epoch": 1.54, "grad_norm": 6.109516232616605, "learning_rate": 1.3091091280286839e-06, "loss": 0.5302, "step": 17293 }, { "epoch": 1.54, "grad_norm": 7.469059445326008, "learning_rate": 1.3086218507028225e-06, "loss": 0.5718, "step": 17294 }, { "epoch": 1.54, "grad_norm": 7.519727820808889, "learning_rate": 1.3081346504263965e-06, "loss": 0.5635, "step": 17295 }, { "epoch": 1.54, "grad_norm": 7.4090645105864255, "learning_rate": 1.307647527209574e-06, "loss": 0.5512, "step": 17296 }, { "epoch": 1.54, "grad_norm": 8.924462378419442, "learning_rate": 1.3071604810625238e-06, "loss": 0.5898, "step": 17297 }, { "epoch": 1.54, "grad_norm": 6.963865259798091, "learning_rate": 1.306673511995411e-06, "loss": 0.5464, "step": 17298 }, { "epoch": 1.54, "grad_norm": 8.233148354648746, "learning_rate": 1.306186620018402e-06, "loss": 0.5831, "step": 17299 }, { "epoch": 1.54, "grad_norm": 5.95769236680475, "learning_rate": 1.3056998051416564e-06, "loss": 0.5446, "step": 17300 }, { "epoch": 1.54, "grad_norm": 8.990008786889529, "learning_rate": 1.3052130673753371e-06, "loss": 0.5402, "step": 17301 }, { "epoch": 1.54, "grad_norm": 8.18673863882345, "learning_rate": 1.3047264067296033e-06, "loss": 0.6324, "step": 17302 }, { "epoch": 1.54, "grad_norm": 6.862994418072173, "learning_rate": 1.3042398232146137e-06, "loss": 0.5162, "step": 17303 }, { "epoch": 1.54, "grad_norm": 5.717723546229892, "learning_rate": 1.3037533168405237e-06, "loss": 0.5651, "step": 17304 }, { "epoch": 1.54, "grad_norm": 5.257003965634214, "learning_rate": 1.3032668876174903e-06, "loss": 0.5323, "step": 17305 }, { "epoch": 1.54, "grad_norm": 6.329755364792456, "learning_rate": 1.3027805355556638e-06, "loss": 0.5239, "step": 17306 }, { "epoch": 1.54, "grad_norm": 6.556761176061359, "learning_rate": 1.3022942606651978e-06, "loss": 0.5558, "step": 17307 }, { "epoch": 1.54, "grad_norm": 8.90833858982528, "learning_rate": 1.3018080629562403e-06, "loss": 0.6134, "step": 17308 }, { "epoch": 1.54, "grad_norm": 5.44114822285237, "learning_rate": 1.30132194243894e-06, "loss": 0.5715, "step": 17309 }, { "epoch": 1.54, "grad_norm": 6.03933345369698, "learning_rate": 1.3008358991234449e-06, "loss": 0.5932, "step": 17310 }, { "epoch": 1.54, "grad_norm": 8.03662734901584, "learning_rate": 1.3003499330198988e-06, "loss": 0.6465, "step": 17311 }, { "epoch": 1.54, "grad_norm": 5.007990680498799, "learning_rate": 1.2998640441384474e-06, "loss": 0.6266, "step": 17312 }, { "epoch": 1.54, "grad_norm": 6.615670055093644, "learning_rate": 1.299378232489229e-06, "loss": 0.6115, "step": 17313 }, { "epoch": 1.54, "grad_norm": 9.639660647423137, "learning_rate": 1.2988924980823863e-06, "loss": 0.5945, "step": 17314 }, { "epoch": 1.54, "grad_norm": 6.8583074459546856, "learning_rate": 1.2984068409280575e-06, "loss": 0.5284, "step": 17315 }, { "epoch": 1.54, "grad_norm": 6.642721270487775, "learning_rate": 1.2979212610363795e-06, "loss": 0.5048, "step": 17316 }, { "epoch": 1.54, "grad_norm": 7.619949391458259, "learning_rate": 1.2974357584174875e-06, "loss": 0.573, "step": 17317 }, { "epoch": 1.54, "grad_norm": 5.957207118813275, "learning_rate": 1.296950333081517e-06, "loss": 0.6212, "step": 17318 }, { "epoch": 1.55, "grad_norm": 6.130068820172727, "learning_rate": 1.2964649850385973e-06, "loss": 0.5425, "step": 17319 }, { "epoch": 1.55, "grad_norm": 8.397878514607624, "learning_rate": 1.2959797142988606e-06, "loss": 0.5417, "step": 17320 }, { "epoch": 1.55, "grad_norm": 6.147378636626203, "learning_rate": 1.2954945208724373e-06, "loss": 0.5606, "step": 17321 }, { "epoch": 1.55, "grad_norm": 5.4299083388866975, "learning_rate": 1.2950094047694516e-06, "loss": 0.6266, "step": 17322 }, { "epoch": 1.55, "grad_norm": 6.30084481447171, "learning_rate": 1.2945243660000306e-06, "loss": 0.5983, "step": 17323 }, { "epoch": 1.55, "grad_norm": 4.74439112916879, "learning_rate": 1.2940394045742987e-06, "loss": 0.5296, "step": 17324 }, { "epoch": 1.55, "grad_norm": 5.5314200170317225, "learning_rate": 1.2935545205023797e-06, "loss": 0.5449, "step": 17325 }, { "epoch": 1.55, "grad_norm": 7.500080433141965, "learning_rate": 1.293069713794392e-06, "loss": 0.5835, "step": 17326 }, { "epoch": 1.55, "grad_norm": 6.464567088067988, "learning_rate": 1.2925849844604554e-06, "loss": 0.5686, "step": 17327 }, { "epoch": 1.55, "grad_norm": 5.535663860008356, "learning_rate": 1.2921003325106885e-06, "loss": 0.5483, "step": 17328 }, { "epoch": 1.55, "grad_norm": 7.462844500440332, "learning_rate": 1.2916157579552068e-06, "loss": 0.5871, "step": 17329 }, { "epoch": 1.55, "grad_norm": 6.701895253490909, "learning_rate": 1.291131260804127e-06, "loss": 0.5538, "step": 17330 }, { "epoch": 1.55, "grad_norm": 8.303542088568534, "learning_rate": 1.2906468410675576e-06, "loss": 0.582, "step": 17331 }, { "epoch": 1.55, "grad_norm": 5.774927310850524, "learning_rate": 1.2901624987556127e-06, "loss": 0.6156, "step": 17332 }, { "epoch": 1.55, "grad_norm": 6.883386806311535, "learning_rate": 1.289678233878401e-06, "loss": 0.5774, "step": 17333 }, { "epoch": 1.55, "grad_norm": 6.347578328360446, "learning_rate": 1.2891940464460307e-06, "loss": 0.5165, "step": 17334 }, { "epoch": 1.55, "grad_norm": 5.864645939782381, "learning_rate": 1.288709936468608e-06, "loss": 0.5579, "step": 17335 }, { "epoch": 1.55, "grad_norm": 6.8457705574006775, "learning_rate": 1.2882259039562388e-06, "loss": 0.5749, "step": 17336 }, { "epoch": 1.55, "grad_norm": 6.47952245941112, "learning_rate": 1.2877419489190247e-06, "loss": 0.5587, "step": 17337 }, { "epoch": 1.55, "grad_norm": 8.96728662099873, "learning_rate": 1.2872580713670685e-06, "loss": 0.6003, "step": 17338 }, { "epoch": 1.55, "grad_norm": 6.497134620616692, "learning_rate": 1.2867742713104681e-06, "loss": 0.6048, "step": 17339 }, { "epoch": 1.55, "grad_norm": 6.7826070877584135, "learning_rate": 1.286290548759323e-06, "loss": 0.6112, "step": 17340 }, { "epoch": 1.55, "grad_norm": 5.275838887523173, "learning_rate": 1.2858069037237297e-06, "loss": 0.5696, "step": 17341 }, { "epoch": 1.55, "grad_norm": 5.989755664653599, "learning_rate": 1.2853233362137834e-06, "loss": 0.5563, "step": 17342 }, { "epoch": 1.55, "grad_norm": 8.283723876098554, "learning_rate": 1.284839846239579e-06, "loss": 0.5581, "step": 17343 }, { "epoch": 1.55, "grad_norm": 6.087608397177082, "learning_rate": 1.2843564338112053e-06, "loss": 0.5828, "step": 17344 }, { "epoch": 1.55, "grad_norm": 4.6466926673436735, "learning_rate": 1.2838730989387538e-06, "loss": 0.6639, "step": 17345 }, { "epoch": 1.55, "grad_norm": 6.82354726597059, "learning_rate": 1.2833898416323132e-06, "loss": 0.6134, "step": 17346 }, { "epoch": 1.55, "grad_norm": 6.978271596064632, "learning_rate": 1.2829066619019704e-06, "loss": 0.5556, "step": 17347 }, { "epoch": 1.55, "grad_norm": 6.183317712742663, "learning_rate": 1.2824235597578105e-06, "loss": 0.565, "step": 17348 }, { "epoch": 1.55, "grad_norm": 7.209882796896954, "learning_rate": 1.281940535209919e-06, "loss": 0.5736, "step": 17349 }, { "epoch": 1.55, "grad_norm": 9.209196233562606, "learning_rate": 1.2814575882683767e-06, "loss": 0.6018, "step": 17350 }, { "epoch": 1.55, "grad_norm": 7.077723242901754, "learning_rate": 1.2809747189432625e-06, "loss": 0.619, "step": 17351 }, { "epoch": 1.55, "grad_norm": 6.251291602509589, "learning_rate": 1.2804919272446558e-06, "loss": 0.5588, "step": 17352 }, { "epoch": 1.55, "grad_norm": 6.020536803162597, "learning_rate": 1.2800092131826352e-06, "loss": 0.5977, "step": 17353 }, { "epoch": 1.55, "grad_norm": 5.309385119656159, "learning_rate": 1.2795265767672755e-06, "loss": 0.5726, "step": 17354 }, { "epoch": 1.55, "grad_norm": 6.991265310699507, "learning_rate": 1.2790440180086505e-06, "loss": 0.6044, "step": 17355 }, { "epoch": 1.55, "grad_norm": 5.716133921907439, "learning_rate": 1.2785615369168348e-06, "loss": 0.5331, "step": 17356 }, { "epoch": 1.55, "grad_norm": 7.028972751645588, "learning_rate": 1.2780791335018949e-06, "loss": 0.5895, "step": 17357 }, { "epoch": 1.55, "grad_norm": 8.539145636486476, "learning_rate": 1.277596807773903e-06, "loss": 0.6064, "step": 17358 }, { "epoch": 1.55, "grad_norm": 5.226798478437806, "learning_rate": 1.2771145597429258e-06, "loss": 0.628, "step": 17359 }, { "epoch": 1.55, "grad_norm": 6.303066508181731, "learning_rate": 1.2766323894190285e-06, "loss": 0.5412, "step": 17360 }, { "epoch": 1.55, "grad_norm": 5.281463666536227, "learning_rate": 1.276150296812277e-06, "loss": 0.5464, "step": 17361 }, { "epoch": 1.55, "grad_norm": 11.304835011836763, "learning_rate": 1.2756682819327338e-06, "loss": 0.6251, "step": 17362 }, { "epoch": 1.55, "grad_norm": 5.7447746020163795, "learning_rate": 1.2751863447904578e-06, "loss": 0.5236, "step": 17363 }, { "epoch": 1.55, "grad_norm": 7.634700829932257, "learning_rate": 1.274704485395511e-06, "loss": 0.5442, "step": 17364 }, { "epoch": 1.55, "grad_norm": 5.878913949677425, "learning_rate": 1.274222703757948e-06, "loss": 0.58, "step": 17365 }, { "epoch": 1.55, "grad_norm": 6.168336236057548, "learning_rate": 1.2737409998878275e-06, "loss": 0.6197, "step": 17366 }, { "epoch": 1.55, "grad_norm": 5.47938954240068, "learning_rate": 1.2732593737952026e-06, "loss": 0.536, "step": 17367 }, { "epoch": 1.55, "grad_norm": 6.120253382959296, "learning_rate": 1.2727778254901274e-06, "loss": 0.5719, "step": 17368 }, { "epoch": 1.55, "grad_norm": 5.163106629751549, "learning_rate": 1.272296354982654e-06, "loss": 0.5501, "step": 17369 }, { "epoch": 1.55, "grad_norm": 7.076162849052849, "learning_rate": 1.2718149622828284e-06, "loss": 0.604, "step": 17370 }, { "epoch": 1.55, "grad_norm": 7.202840745181803, "learning_rate": 1.2713336474007016e-06, "loss": 0.6497, "step": 17371 }, { "epoch": 1.55, "grad_norm": 6.869114363994082, "learning_rate": 1.2708524103463193e-06, "loss": 0.515, "step": 17372 }, { "epoch": 1.55, "grad_norm": 6.717167747236392, "learning_rate": 1.2703712511297262e-06, "loss": 0.5056, "step": 17373 }, { "epoch": 1.55, "grad_norm": 7.167528924486516, "learning_rate": 1.2698901697609656e-06, "loss": 0.501, "step": 17374 }, { "epoch": 1.55, "grad_norm": 8.188106380164934, "learning_rate": 1.2694091662500802e-06, "loss": 0.5412, "step": 17375 }, { "epoch": 1.55, "grad_norm": 8.475440760250637, "learning_rate": 1.2689282406071068e-06, "loss": 0.5305, "step": 17376 }, { "epoch": 1.55, "grad_norm": 7.134242151850569, "learning_rate": 1.268447392842086e-06, "loss": 0.5694, "step": 17377 }, { "epoch": 1.55, "grad_norm": 7.592762297763197, "learning_rate": 1.2679666229650535e-06, "loss": 0.581, "step": 17378 }, { "epoch": 1.55, "grad_norm": 6.9185869608611545, "learning_rate": 1.2674859309860467e-06, "loss": 0.5688, "step": 17379 }, { "epoch": 1.55, "grad_norm": 9.358404706136367, "learning_rate": 1.2670053169150947e-06, "loss": 0.6441, "step": 17380 }, { "epoch": 1.55, "grad_norm": 7.514340010592223, "learning_rate": 1.2665247807622322e-06, "loss": 0.5796, "step": 17381 }, { "epoch": 1.55, "grad_norm": 8.828243188124922, "learning_rate": 1.2660443225374901e-06, "loss": 0.5848, "step": 17382 }, { "epoch": 1.55, "grad_norm": 7.143889821302738, "learning_rate": 1.2655639422508942e-06, "loss": 0.546, "step": 17383 }, { "epoch": 1.55, "grad_norm": 7.811558151819244, "learning_rate": 1.2650836399124728e-06, "loss": 0.5389, "step": 17384 }, { "epoch": 1.55, "grad_norm": 12.259479844745151, "learning_rate": 1.2646034155322507e-06, "loss": 0.5897, "step": 17385 }, { "epoch": 1.55, "grad_norm": 6.491060810351299, "learning_rate": 1.2641232691202521e-06, "loss": 0.5776, "step": 17386 }, { "epoch": 1.55, "grad_norm": 7.349517452618759, "learning_rate": 1.263643200686499e-06, "loss": 0.6231, "step": 17387 }, { "epoch": 1.55, "grad_norm": 6.957422636358688, "learning_rate": 1.263163210241013e-06, "loss": 0.5766, "step": 17388 }, { "epoch": 1.55, "grad_norm": 4.193821674054475, "learning_rate": 1.2626832977938097e-06, "loss": 0.5987, "step": 17389 }, { "epoch": 1.55, "grad_norm": 6.732039625682774, "learning_rate": 1.2622034633549084e-06, "loss": 0.6051, "step": 17390 }, { "epoch": 1.55, "grad_norm": 6.636372365665808, "learning_rate": 1.2617237069343237e-06, "loss": 0.5391, "step": 17391 }, { "epoch": 1.55, "grad_norm": 5.5792845413978025, "learning_rate": 1.26124402854207e-06, "loss": 0.6774, "step": 17392 }, { "epoch": 1.55, "grad_norm": 7.26516192719665, "learning_rate": 1.2607644281881614e-06, "loss": 0.544, "step": 17393 }, { "epoch": 1.55, "grad_norm": 7.748804044321922, "learning_rate": 1.260284905882605e-06, "loss": 0.5998, "step": 17394 }, { "epoch": 1.55, "grad_norm": 4.897225723365914, "learning_rate": 1.2598054616354128e-06, "loss": 0.6042, "step": 17395 }, { "epoch": 1.55, "grad_norm": 4.442007542996957, "learning_rate": 1.259326095456589e-06, "loss": 0.57, "step": 17396 }, { "epoch": 1.55, "grad_norm": 6.466936854007667, "learning_rate": 1.2588468073561411e-06, "loss": 0.5587, "step": 17397 }, { "epoch": 1.55, "grad_norm": 4.291988331152342, "learning_rate": 1.2583675973440728e-06, "loss": 0.5595, "step": 17398 }, { "epoch": 1.55, "grad_norm": 7.697696545467632, "learning_rate": 1.2578884654303874e-06, "loss": 0.5786, "step": 17399 }, { "epoch": 1.55, "grad_norm": 7.584286456790095, "learning_rate": 1.257409411625085e-06, "loss": 0.549, "step": 17400 }, { "epoch": 1.55, "grad_norm": 5.388720685933137, "learning_rate": 1.2569304359381662e-06, "loss": 0.5232, "step": 17401 }, { "epoch": 1.55, "grad_norm": 5.745303609455814, "learning_rate": 1.2564515383796265e-06, "loss": 0.5561, "step": 17402 }, { "epoch": 1.55, "grad_norm": 6.278913654171453, "learning_rate": 1.2559727189594622e-06, "loss": 0.5353, "step": 17403 }, { "epoch": 1.55, "grad_norm": 6.744580632994773, "learning_rate": 1.2554939776876685e-06, "loss": 0.5977, "step": 17404 }, { "epoch": 1.55, "grad_norm": 5.942346809265603, "learning_rate": 1.2550153145742372e-06, "loss": 0.5931, "step": 17405 }, { "epoch": 1.55, "grad_norm": 17.283645726848174, "learning_rate": 1.2545367296291616e-06, "loss": 0.5445, "step": 17406 }, { "epoch": 1.55, "grad_norm": 4.996382571033899, "learning_rate": 1.2540582228624276e-06, "loss": 0.5851, "step": 17407 }, { "epoch": 1.55, "grad_norm": 6.943684946664891, "learning_rate": 1.2535797942840266e-06, "loss": 0.643, "step": 17408 }, { "epoch": 1.55, "grad_norm": 7.330552704184243, "learning_rate": 1.2531014439039412e-06, "loss": 0.5718, "step": 17409 }, { "epoch": 1.55, "grad_norm": 5.124795186306825, "learning_rate": 1.2526231717321579e-06, "loss": 0.5497, "step": 17410 }, { "epoch": 1.55, "grad_norm": 5.289613483148967, "learning_rate": 1.2521449777786588e-06, "loss": 0.5712, "step": 17411 }, { "epoch": 1.55, "grad_norm": 6.633336080303487, "learning_rate": 1.251666862053426e-06, "loss": 0.6618, "step": 17412 }, { "epoch": 1.55, "grad_norm": 6.493377971288357, "learning_rate": 1.2511888245664383e-06, "loss": 0.5944, "step": 17413 }, { "epoch": 1.55, "grad_norm": 7.442078103266807, "learning_rate": 1.2507108653276763e-06, "loss": 0.6224, "step": 17414 }, { "epoch": 1.55, "grad_norm": 7.554689374024966, "learning_rate": 1.2502329843471123e-06, "loss": 0.5872, "step": 17415 }, { "epoch": 1.55, "grad_norm": 7.854034827507665, "learning_rate": 1.2497551816347225e-06, "loss": 0.5824, "step": 17416 }, { "epoch": 1.55, "grad_norm": 4.933673278112818, "learning_rate": 1.2492774572004807e-06, "loss": 0.5147, "step": 17417 }, { "epoch": 1.55, "grad_norm": 5.696584649091364, "learning_rate": 1.2487998110543582e-06, "loss": 0.568, "step": 17418 }, { "epoch": 1.55, "grad_norm": 5.449716681928235, "learning_rate": 1.2483222432063263e-06, "loss": 0.5656, "step": 17419 }, { "epoch": 1.55, "grad_norm": 6.967172918609351, "learning_rate": 1.2478447536663496e-06, "loss": 0.5308, "step": 17420 }, { "epoch": 1.55, "grad_norm": 6.655498136413138, "learning_rate": 1.2473673424443967e-06, "loss": 0.5702, "step": 17421 }, { "epoch": 1.55, "grad_norm": 4.502569824872035, "learning_rate": 1.2468900095504344e-06, "loss": 0.596, "step": 17422 }, { "epoch": 1.55, "grad_norm": 7.005621091119339, "learning_rate": 1.2464127549944217e-06, "loss": 0.5719, "step": 17423 }, { "epoch": 1.55, "grad_norm": 7.728514011822037, "learning_rate": 1.245935578786323e-06, "loss": 0.6022, "step": 17424 }, { "epoch": 1.55, "grad_norm": 6.074323556187885, "learning_rate": 1.2454584809360975e-06, "loss": 0.5902, "step": 17425 }, { "epoch": 1.55, "grad_norm": 8.5492543447658, "learning_rate": 1.244981461453706e-06, "loss": 0.648, "step": 17426 }, { "epoch": 1.55, "grad_norm": 6.911501679909859, "learning_rate": 1.2445045203491012e-06, "loss": 0.5763, "step": 17427 }, { "epoch": 1.55, "grad_norm": 10.95835956950578, "learning_rate": 1.2440276576322401e-06, "loss": 0.5599, "step": 17428 }, { "epoch": 1.55, "grad_norm": 5.913698574888189, "learning_rate": 1.2435508733130763e-06, "loss": 0.5852, "step": 17429 }, { "epoch": 1.55, "grad_norm": 7.552328851650199, "learning_rate": 1.2430741674015618e-06, "loss": 0.5842, "step": 17430 }, { "epoch": 1.56, "grad_norm": 4.714268508975373, "learning_rate": 1.2425975399076462e-06, "loss": 0.5796, "step": 17431 }, { "epoch": 1.56, "grad_norm": 5.39943832367043, "learning_rate": 1.2421209908412802e-06, "loss": 0.624, "step": 17432 }, { "epoch": 1.56, "grad_norm": 7.40268151745162, "learning_rate": 1.2416445202124067e-06, "loss": 0.5699, "step": 17433 }, { "epoch": 1.56, "grad_norm": 6.88948449275006, "learning_rate": 1.241168128030974e-06, "loss": 0.5188, "step": 17434 }, { "epoch": 1.56, "grad_norm": 6.117668067626755, "learning_rate": 1.2406918143069247e-06, "loss": 0.538, "step": 17435 }, { "epoch": 1.56, "grad_norm": 9.140514177026661, "learning_rate": 1.2402155790502025e-06, "loss": 0.596, "step": 17436 }, { "epoch": 1.56, "grad_norm": 5.986916924592516, "learning_rate": 1.2397394222707444e-06, "loss": 0.5605, "step": 17437 }, { "epoch": 1.56, "grad_norm": 8.320222467861766, "learning_rate": 1.2392633439784912e-06, "loss": 0.5507, "step": 17438 }, { "epoch": 1.56, "grad_norm": 6.793995661779951, "learning_rate": 1.2387873441833815e-06, "loss": 0.5757, "step": 17439 }, { "epoch": 1.56, "grad_norm": 8.3288312413563, "learning_rate": 1.2383114228953475e-06, "loss": 0.6319, "step": 17440 }, { "epoch": 1.56, "grad_norm": 7.387145179973341, "learning_rate": 1.2378355801243247e-06, "loss": 0.5471, "step": 17441 }, { "epoch": 1.56, "grad_norm": 7.069976446105957, "learning_rate": 1.2373598158802451e-06, "loss": 0.5754, "step": 17442 }, { "epoch": 1.56, "grad_norm": 8.17768159878173, "learning_rate": 1.2368841301730395e-06, "loss": 0.6187, "step": 17443 }, { "epoch": 1.56, "grad_norm": 8.32442714186497, "learning_rate": 1.236408523012636e-06, "loss": 0.5877, "step": 17444 }, { "epoch": 1.56, "grad_norm": 7.137038139855707, "learning_rate": 1.2359329944089644e-06, "loss": 0.5948, "step": 17445 }, { "epoch": 1.56, "grad_norm": 7.804529743742463, "learning_rate": 1.2354575443719468e-06, "loss": 0.643, "step": 17446 }, { "epoch": 1.56, "grad_norm": 6.115533200021045, "learning_rate": 1.234982172911509e-06, "loss": 0.5612, "step": 17447 }, { "epoch": 1.56, "grad_norm": 7.74410593598114, "learning_rate": 1.234506880037573e-06, "loss": 0.5465, "step": 17448 }, { "epoch": 1.56, "grad_norm": 7.7760410769887045, "learning_rate": 1.2340316657600593e-06, "loss": 0.5672, "step": 17449 }, { "epoch": 1.56, "grad_norm": 5.362668923162735, "learning_rate": 1.2335565300888874e-06, "loss": 0.541, "step": 17450 }, { "epoch": 1.56, "grad_norm": 8.32970869856787, "learning_rate": 1.2330814730339763e-06, "loss": 0.6055, "step": 17451 }, { "epoch": 1.56, "grad_norm": 5.177203715461129, "learning_rate": 1.2326064946052397e-06, "loss": 0.6262, "step": 17452 }, { "epoch": 1.56, "grad_norm": 8.61977247998519, "learning_rate": 1.2321315948125905e-06, "loss": 0.5412, "step": 17453 }, { "epoch": 1.56, "grad_norm": 8.971913365378189, "learning_rate": 1.231656773665943e-06, "loss": 0.5452, "step": 17454 }, { "epoch": 1.56, "grad_norm": 8.409040111201957, "learning_rate": 1.2311820311752076e-06, "loss": 0.6313, "step": 17455 }, { "epoch": 1.56, "grad_norm": 6.284515083630078, "learning_rate": 1.230707367350294e-06, "loss": 0.5906, "step": 17456 }, { "epoch": 1.56, "grad_norm": 7.789747707709738, "learning_rate": 1.2302327822011096e-06, "loss": 0.5443, "step": 17457 }, { "epoch": 1.56, "grad_norm": 7.2682858013212215, "learning_rate": 1.2297582757375614e-06, "loss": 0.5602, "step": 17458 }, { "epoch": 1.56, "grad_norm": 8.185725919193953, "learning_rate": 1.2292838479695512e-06, "loss": 0.591, "step": 17459 }, { "epoch": 1.56, "grad_norm": 6.4373965714299475, "learning_rate": 1.2288094989069827e-06, "loss": 0.5762, "step": 17460 }, { "epoch": 1.56, "grad_norm": 8.510182736549691, "learning_rate": 1.2283352285597578e-06, "loss": 0.5755, "step": 17461 }, { "epoch": 1.56, "grad_norm": 6.678599073827171, "learning_rate": 1.2278610369377748e-06, "loss": 0.5818, "step": 17462 }, { "epoch": 1.56, "grad_norm": 11.04693049636378, "learning_rate": 1.2273869240509311e-06, "loss": 0.5812, "step": 17463 }, { "epoch": 1.56, "grad_norm": 7.297345108869949, "learning_rate": 1.2269128899091259e-06, "loss": 0.5661, "step": 17464 }, { "epoch": 1.56, "grad_norm": 5.0312790094692135, "learning_rate": 1.226438934522251e-06, "loss": 0.5528, "step": 17465 }, { "epoch": 1.56, "grad_norm": 6.419629614039998, "learning_rate": 1.2259650579001976e-06, "loss": 0.5702, "step": 17466 }, { "epoch": 1.56, "grad_norm": 9.261425522074216, "learning_rate": 1.2254912600528591e-06, "loss": 0.6362, "step": 17467 }, { "epoch": 1.56, "grad_norm": 7.643826913833802, "learning_rate": 1.2250175409901237e-06, "loss": 0.5456, "step": 17468 }, { "epoch": 1.56, "grad_norm": 11.500597059103457, "learning_rate": 1.2245439007218807e-06, "loss": 0.557, "step": 17469 }, { "epoch": 1.56, "grad_norm": 5.146138494860286, "learning_rate": 1.2240703392580155e-06, "loss": 0.5582, "step": 17470 }, { "epoch": 1.56, "grad_norm": 7.000505571331699, "learning_rate": 1.2235968566084145e-06, "loss": 0.609, "step": 17471 }, { "epoch": 1.56, "grad_norm": 7.246654394445658, "learning_rate": 1.2231234527829572e-06, "loss": 0.6229, "step": 17472 }, { "epoch": 1.56, "grad_norm": 7.188415570503029, "learning_rate": 1.222650127791527e-06, "loss": 0.5606, "step": 17473 }, { "epoch": 1.56, "grad_norm": 7.417222533053564, "learning_rate": 1.2221768816440028e-06, "loss": 0.5509, "step": 17474 }, { "epoch": 1.56, "grad_norm": 7.064451617865004, "learning_rate": 1.221703714350263e-06, "loss": 0.6459, "step": 17475 }, { "epoch": 1.56, "grad_norm": 5.104306178049994, "learning_rate": 1.2212306259201839e-06, "loss": 0.5722, "step": 17476 }, { "epoch": 1.56, "grad_norm": 6.921234408154279, "learning_rate": 1.2207576163636414e-06, "loss": 0.6274, "step": 17477 }, { "epoch": 1.56, "grad_norm": 5.5989764362581305, "learning_rate": 1.220284685690506e-06, "loss": 0.5347, "step": 17478 }, { "epoch": 1.56, "grad_norm": 6.808123218471105, "learning_rate": 1.2198118339106502e-06, "loss": 0.5984, "step": 17479 }, { "epoch": 1.56, "grad_norm": 5.892510637659492, "learning_rate": 1.219339061033945e-06, "loss": 0.5075, "step": 17480 }, { "epoch": 1.56, "grad_norm": 6.83172668768152, "learning_rate": 1.2188663670702567e-06, "loss": 0.6643, "step": 17481 }, { "epoch": 1.56, "grad_norm": 6.828689804456462, "learning_rate": 1.218393752029452e-06, "loss": 0.5223, "step": 17482 }, { "epoch": 1.56, "grad_norm": 6.3693019239925945, "learning_rate": 1.217921215921396e-06, "loss": 0.5822, "step": 17483 }, { "epoch": 1.56, "grad_norm": 6.323963211257537, "learning_rate": 1.2174487587559536e-06, "loss": 0.6023, "step": 17484 }, { "epoch": 1.56, "grad_norm": 7.28000771124247, "learning_rate": 1.2169763805429835e-06, "loss": 0.6106, "step": 17485 }, { "epoch": 1.56, "grad_norm": 8.192305334998114, "learning_rate": 1.2165040812923469e-06, "loss": 0.5892, "step": 17486 }, { "epoch": 1.56, "grad_norm": 6.3409938334387, "learning_rate": 1.216031861013901e-06, "loss": 0.5737, "step": 17487 }, { "epoch": 1.56, "grad_norm": 6.192748356193479, "learning_rate": 1.215559719717504e-06, "loss": 0.6072, "step": 17488 }, { "epoch": 1.56, "grad_norm": 6.703313470632493, "learning_rate": 1.21508765741301e-06, "loss": 0.6082, "step": 17489 }, { "epoch": 1.56, "grad_norm": 7.40457828702366, "learning_rate": 1.2146156741102738e-06, "loss": 0.5864, "step": 17490 }, { "epoch": 1.56, "grad_norm": 7.683124187525168, "learning_rate": 1.2141437698191438e-06, "loss": 0.61, "step": 17491 }, { "epoch": 1.56, "grad_norm": 6.5592190676857, "learning_rate": 1.2136719445494722e-06, "loss": 0.6083, "step": 17492 }, { "epoch": 1.56, "grad_norm": 6.723956615389475, "learning_rate": 1.2132001983111063e-06, "loss": 0.5614, "step": 17493 }, { "epoch": 1.56, "grad_norm": 7.456372449913669, "learning_rate": 1.2127285311138953e-06, "loss": 0.5535, "step": 17494 }, { "epoch": 1.56, "grad_norm": 6.651094708277122, "learning_rate": 1.21225694296768e-06, "loss": 0.5762, "step": 17495 }, { "epoch": 1.56, "grad_norm": 6.40011041661267, "learning_rate": 1.211785433882307e-06, "loss": 0.6046, "step": 17496 }, { "epoch": 1.56, "grad_norm": 5.806765664935959, "learning_rate": 1.211314003867618e-06, "loss": 0.6059, "step": 17497 }, { "epoch": 1.56, "grad_norm": 9.9896509502385, "learning_rate": 1.2108426529334505e-06, "loss": 0.5961, "step": 17498 }, { "epoch": 1.56, "grad_norm": 7.433581216076977, "learning_rate": 1.2103713810896444e-06, "loss": 0.5485, "step": 17499 }, { "epoch": 1.56, "grad_norm": 4.716122719120951, "learning_rate": 1.209900188346037e-06, "loss": 0.5595, "step": 17500 }, { "epoch": 1.56, "grad_norm": 7.735565398442674, "learning_rate": 1.2094290747124625e-06, "loss": 0.6159, "step": 17501 }, { "epoch": 1.56, "grad_norm": 6.077466605776167, "learning_rate": 1.2089580401987565e-06, "loss": 0.5383, "step": 17502 }, { "epoch": 1.56, "grad_norm": 6.223327832765259, "learning_rate": 1.2084870848147478e-06, "loss": 0.5611, "step": 17503 }, { "epoch": 1.56, "grad_norm": 6.072638303510687, "learning_rate": 1.2080162085702684e-06, "loss": 0.5714, "step": 17504 }, { "epoch": 1.56, "grad_norm": 6.86199714333141, "learning_rate": 1.2075454114751456e-06, "loss": 0.6115, "step": 17505 }, { "epoch": 1.56, "grad_norm": 7.563242160266035, "learning_rate": 1.2070746935392075e-06, "loss": 0.5824, "step": 17506 }, { "epoch": 1.56, "grad_norm": 8.373450207118786, "learning_rate": 1.2066040547722786e-06, "loss": 0.713, "step": 17507 }, { "epoch": 1.56, "grad_norm": 8.124507342068487, "learning_rate": 1.2061334951841842e-06, "loss": 0.5252, "step": 17508 }, { "epoch": 1.56, "grad_norm": 7.101838980803422, "learning_rate": 1.2056630147847443e-06, "loss": 0.5963, "step": 17509 }, { "epoch": 1.56, "grad_norm": 7.658888657759445, "learning_rate": 1.2051926135837782e-06, "loss": 0.5933, "step": 17510 }, { "epoch": 1.56, "grad_norm": 5.852504894577199, "learning_rate": 1.2047222915911061e-06, "loss": 0.5454, "step": 17511 }, { "epoch": 1.56, "grad_norm": 5.810147318295622, "learning_rate": 1.2042520488165444e-06, "loss": 0.5448, "step": 17512 }, { "epoch": 1.56, "grad_norm": 4.985444086672907, "learning_rate": 1.203781885269909e-06, "loss": 0.5109, "step": 17513 }, { "epoch": 1.56, "grad_norm": 8.111466516927749, "learning_rate": 1.2033118009610129e-06, "loss": 0.549, "step": 17514 }, { "epoch": 1.56, "grad_norm": 7.253520658131106, "learning_rate": 1.2028417958996697e-06, "loss": 0.5829, "step": 17515 }, { "epoch": 1.56, "grad_norm": 7.610722961520685, "learning_rate": 1.2023718700956871e-06, "loss": 0.5672, "step": 17516 }, { "epoch": 1.56, "grad_norm": 5.587370140444301, "learning_rate": 1.2019020235588746e-06, "loss": 0.5798, "step": 17517 }, { "epoch": 1.56, "grad_norm": 6.822915148311635, "learning_rate": 1.2014322562990399e-06, "loss": 0.5741, "step": 17518 }, { "epoch": 1.56, "grad_norm": 7.676255404325774, "learning_rate": 1.2009625683259884e-06, "loss": 0.5556, "step": 17519 }, { "epoch": 1.56, "grad_norm": 6.624171115592358, "learning_rate": 1.2004929596495235e-06, "loss": 0.5897, "step": 17520 }, { "epoch": 1.56, "grad_norm": 5.2320123978786555, "learning_rate": 1.200023430279449e-06, "loss": 0.5597, "step": 17521 }, { "epoch": 1.56, "grad_norm": 5.451904420178755, "learning_rate": 1.1995539802255613e-06, "loss": 0.5959, "step": 17522 }, { "epoch": 1.56, "grad_norm": 6.187249608200956, "learning_rate": 1.1990846094976633e-06, "loss": 0.569, "step": 17523 }, { "epoch": 1.56, "grad_norm": 7.700284464525787, "learning_rate": 1.1986153181055487e-06, "loss": 0.5978, "step": 17524 }, { "epoch": 1.56, "grad_norm": 7.373417187084846, "learning_rate": 1.1981461060590149e-06, "loss": 0.5556, "step": 17525 }, { "epoch": 1.56, "grad_norm": 7.83430413630013, "learning_rate": 1.1976769733678545e-06, "loss": 0.5487, "step": 17526 }, { "epoch": 1.56, "grad_norm": 5.4292427010407, "learning_rate": 1.1972079200418607e-06, "loss": 0.5729, "step": 17527 }, { "epoch": 1.56, "grad_norm": 5.34457904747408, "learning_rate": 1.1967389460908252e-06, "loss": 0.5698, "step": 17528 }, { "epoch": 1.56, "grad_norm": 5.928357290386004, "learning_rate": 1.1962700515245334e-06, "loss": 0.4968, "step": 17529 }, { "epoch": 1.56, "grad_norm": 4.966534594500497, "learning_rate": 1.195801236352775e-06, "loss": 0.596, "step": 17530 }, { "epoch": 1.56, "grad_norm": 5.049498188129611, "learning_rate": 1.1953325005853338e-06, "loss": 0.5768, "step": 17531 }, { "epoch": 1.56, "grad_norm": 7.14840821757045, "learning_rate": 1.1948638442319948e-06, "loss": 0.5957, "step": 17532 }, { "epoch": 1.56, "grad_norm": 15.742418485475719, "learning_rate": 1.1943952673025405e-06, "loss": 0.527, "step": 17533 }, { "epoch": 1.56, "grad_norm": 7.361481873804907, "learning_rate": 1.193926769806752e-06, "loss": 0.5711, "step": 17534 }, { "epoch": 1.56, "grad_norm": 5.8058257334515115, "learning_rate": 1.1934583517544051e-06, "loss": 0.5481, "step": 17535 }, { "epoch": 1.56, "grad_norm": 5.522003222664767, "learning_rate": 1.1929900131552797e-06, "loss": 0.6394, "step": 17536 }, { "epoch": 1.56, "grad_norm": 8.568055990091468, "learning_rate": 1.1925217540191514e-06, "loss": 0.5829, "step": 17537 }, { "epoch": 1.56, "grad_norm": 5.6179381809101185, "learning_rate": 1.192053574355792e-06, "loss": 0.5401, "step": 17538 }, { "epoch": 1.56, "grad_norm": 7.079608716629765, "learning_rate": 1.191585474174975e-06, "loss": 0.6404, "step": 17539 }, { "epoch": 1.56, "grad_norm": 6.0144933926428905, "learning_rate": 1.1911174534864712e-06, "loss": 0.6223, "step": 17540 }, { "epoch": 1.56, "grad_norm": 8.931475641912185, "learning_rate": 1.19064951230005e-06, "loss": 0.6207, "step": 17541 }, { "epoch": 1.56, "grad_norm": 8.48372971843655, "learning_rate": 1.190181650625477e-06, "loss": 0.611, "step": 17542 }, { "epoch": 1.57, "grad_norm": 7.6976954213756885, "learning_rate": 1.1897138684725185e-06, "loss": 0.5677, "step": 17543 }, { "epoch": 1.57, "grad_norm": 8.203647708357257, "learning_rate": 1.189246165850939e-06, "loss": 0.604, "step": 17544 }, { "epoch": 1.57, "grad_norm": 5.739744933272041, "learning_rate": 1.1887785427705002e-06, "loss": 0.5736, "step": 17545 }, { "epoch": 1.57, "grad_norm": 5.4963304568911315, "learning_rate": 1.1883109992409626e-06, "loss": 0.5364, "step": 17546 }, { "epoch": 1.57, "grad_norm": 5.199572128192072, "learning_rate": 1.1878435352720868e-06, "loss": 0.5595, "step": 17547 }, { "epoch": 1.57, "grad_norm": 9.619231243239213, "learning_rate": 1.1873761508736276e-06, "loss": 0.6181, "step": 17548 }, { "epoch": 1.57, "grad_norm": 5.685566374448985, "learning_rate": 1.1869088460553419e-06, "loss": 0.5892, "step": 17549 }, { "epoch": 1.57, "grad_norm": 4.62412880939974, "learning_rate": 1.186441620826983e-06, "loss": 0.5654, "step": 17550 }, { "epoch": 1.57, "grad_norm": 4.944455648565771, "learning_rate": 1.1859744751983038e-06, "loss": 0.5602, "step": 17551 }, { "epoch": 1.57, "grad_norm": 8.046929538831908, "learning_rate": 1.1855074091790564e-06, "loss": 0.5862, "step": 17552 }, { "epoch": 1.57, "grad_norm": 6.609677379300007, "learning_rate": 1.185040422778987e-06, "loss": 0.5675, "step": 17553 }, { "epoch": 1.57, "grad_norm": 8.178996898822414, "learning_rate": 1.1845735160078453e-06, "loss": 0.5181, "step": 17554 }, { "epoch": 1.57, "grad_norm": 6.293241749510595, "learning_rate": 1.184106688875375e-06, "loss": 0.5253, "step": 17555 }, { "epoch": 1.57, "grad_norm": 5.738574059653685, "learning_rate": 1.1836399413913202e-06, "loss": 0.4986, "step": 17556 }, { "epoch": 1.57, "grad_norm": 9.072559090882352, "learning_rate": 1.1831732735654244e-06, "loss": 0.5652, "step": 17557 }, { "epoch": 1.57, "grad_norm": 8.005683916670149, "learning_rate": 1.1827066854074276e-06, "loss": 0.5655, "step": 17558 }, { "epoch": 1.57, "grad_norm": 9.709550710540134, "learning_rate": 1.1822401769270686e-06, "loss": 0.5438, "step": 17559 }, { "epoch": 1.57, "grad_norm": 6.951344911305748, "learning_rate": 1.1817737481340873e-06, "loss": 0.5661, "step": 17560 }, { "epoch": 1.57, "grad_norm": 7.75094450919303, "learning_rate": 1.181307399038215e-06, "loss": 0.5858, "step": 17561 }, { "epoch": 1.57, "grad_norm": 6.577197074944826, "learning_rate": 1.180841129649189e-06, "loss": 0.5904, "step": 17562 }, { "epoch": 1.57, "grad_norm": 5.664382703152952, "learning_rate": 1.18037493997674e-06, "loss": 0.541, "step": 17563 }, { "epoch": 1.57, "grad_norm": 6.44626506938711, "learning_rate": 1.1799088300305988e-06, "loss": 0.528, "step": 17564 }, { "epoch": 1.57, "grad_norm": 11.149435392406595, "learning_rate": 1.179442799820496e-06, "loss": 0.636, "step": 17565 }, { "epoch": 1.57, "grad_norm": 10.308166721249735, "learning_rate": 1.1789768493561588e-06, "loss": 0.5913, "step": 17566 }, { "epoch": 1.57, "grad_norm": 7.762513803425698, "learning_rate": 1.1785109786473115e-06, "loss": 0.6022, "step": 17567 }, { "epoch": 1.57, "grad_norm": 7.936050500553025, "learning_rate": 1.178045187703678e-06, "loss": 0.5888, "step": 17568 }, { "epoch": 1.57, "grad_norm": 6.545817291591111, "learning_rate": 1.1775794765349808e-06, "loss": 0.5215, "step": 17569 }, { "epoch": 1.57, "grad_norm": 6.083466547408499, "learning_rate": 1.1771138451509411e-06, "loss": 0.5443, "step": 17570 }, { "epoch": 1.57, "grad_norm": 8.765782575109482, "learning_rate": 1.1766482935612777e-06, "loss": 0.5207, "step": 17571 }, { "epoch": 1.57, "grad_norm": 11.025180648890347, "learning_rate": 1.1761828217757086e-06, "loss": 0.5459, "step": 17572 }, { "epoch": 1.57, "grad_norm": 8.490475243845772, "learning_rate": 1.1757174298039504e-06, "loss": 0.5996, "step": 17573 }, { "epoch": 1.57, "grad_norm": 7.020848604936575, "learning_rate": 1.175252117655714e-06, "loss": 0.5472, "step": 17574 }, { "epoch": 1.57, "grad_norm": 5.860657648245051, "learning_rate": 1.174786885340714e-06, "loss": 0.5577, "step": 17575 }, { "epoch": 1.57, "grad_norm": 8.077163190648616, "learning_rate": 1.1743217328686606e-06, "loss": 0.5521, "step": 17576 }, { "epoch": 1.57, "grad_norm": 8.586000214711108, "learning_rate": 1.173856660249263e-06, "loss": 0.5786, "step": 17577 }, { "epoch": 1.57, "grad_norm": 6.298788892657059, "learning_rate": 1.1733916674922297e-06, "loss": 0.5813, "step": 17578 }, { "epoch": 1.57, "grad_norm": 6.879600066371928, "learning_rate": 1.1729267546072643e-06, "loss": 0.5538, "step": 17579 }, { "epoch": 1.57, "grad_norm": 6.0006610919841, "learning_rate": 1.1724619216040711e-06, "loss": 0.5493, "step": 17580 }, { "epoch": 1.57, "grad_norm": 6.0419483445649895, "learning_rate": 1.1719971684923547e-06, "loss": 0.5685, "step": 17581 }, { "epoch": 1.57, "grad_norm": 6.697016448544396, "learning_rate": 1.1715324952818131e-06, "loss": 0.558, "step": 17582 }, { "epoch": 1.57, "grad_norm": 8.934118603147919, "learning_rate": 1.1710679019821464e-06, "loss": 0.5632, "step": 17583 }, { "epoch": 1.57, "grad_norm": 5.466383918998832, "learning_rate": 1.1706033886030522e-06, "loss": 0.5253, "step": 17584 }, { "epoch": 1.57, "grad_norm": 5.232780734584359, "learning_rate": 1.170138955154227e-06, "loss": 0.525, "step": 17585 }, { "epoch": 1.57, "grad_norm": 7.767842190564642, "learning_rate": 1.1696746016453625e-06, "loss": 0.5371, "step": 17586 }, { "epoch": 1.57, "grad_norm": 6.7705543079910795, "learning_rate": 1.1692103280861527e-06, "loss": 0.5996, "step": 17587 }, { "epoch": 1.57, "grad_norm": 6.22515234073975, "learning_rate": 1.1687461344862877e-06, "loss": 0.5639, "step": 17588 }, { "epoch": 1.57, "grad_norm": 7.536203549360837, "learning_rate": 1.1682820208554568e-06, "loss": 0.6654, "step": 17589 }, { "epoch": 1.57, "grad_norm": 6.666714806595545, "learning_rate": 1.167817987203348e-06, "loss": 0.5511, "step": 17590 }, { "epoch": 1.57, "grad_norm": 6.553629238873237, "learning_rate": 1.167354033539647e-06, "loss": 0.5833, "step": 17591 }, { "epoch": 1.57, "grad_norm": 5.778856454918171, "learning_rate": 1.1668901598740361e-06, "loss": 0.5411, "step": 17592 }, { "epoch": 1.57, "grad_norm": 6.991463373254179, "learning_rate": 1.166426366216199e-06, "loss": 0.6322, "step": 17593 }, { "epoch": 1.57, "grad_norm": 4.94847358974116, "learning_rate": 1.1659626525758162e-06, "loss": 0.5848, "step": 17594 }, { "epoch": 1.57, "grad_norm": 10.455772706609835, "learning_rate": 1.1654990189625675e-06, "loss": 0.6667, "step": 17595 }, { "epoch": 1.57, "grad_norm": 8.732808264126477, "learning_rate": 1.1650354653861285e-06, "loss": 0.5502, "step": 17596 }, { "epoch": 1.57, "grad_norm": 5.892109443238059, "learning_rate": 1.1645719918561754e-06, "loss": 0.5485, "step": 17597 }, { "epoch": 1.57, "grad_norm": 5.623143240164863, "learning_rate": 1.1641085983823842e-06, "loss": 0.6079, "step": 17598 }, { "epoch": 1.57, "grad_norm": 7.174589788914577, "learning_rate": 1.1636452849744234e-06, "loss": 0.6394, "step": 17599 }, { "epoch": 1.57, "grad_norm": 5.706087256025584, "learning_rate": 1.1631820516419666e-06, "loss": 0.6194, "step": 17600 }, { "epoch": 1.57, "grad_norm": 5.6481938292610545, "learning_rate": 1.1627188983946818e-06, "loss": 0.5826, "step": 17601 }, { "epoch": 1.57, "grad_norm": 9.440743521523116, "learning_rate": 1.1622558252422362e-06, "loss": 0.5945, "step": 17602 }, { "epoch": 1.57, "grad_norm": 5.530970971570583, "learning_rate": 1.161792832194296e-06, "loss": 0.5367, "step": 17603 }, { "epoch": 1.57, "grad_norm": 6.134415825938056, "learning_rate": 1.1613299192605266e-06, "loss": 0.5751, "step": 17604 }, { "epoch": 1.57, "grad_norm": 7.461679353721445, "learning_rate": 1.1608670864505866e-06, "loss": 0.5266, "step": 17605 }, { "epoch": 1.57, "grad_norm": 8.441428249777484, "learning_rate": 1.1604043337741389e-06, "loss": 0.6099, "step": 17606 }, { "epoch": 1.57, "grad_norm": 7.151995179630965, "learning_rate": 1.159941661240842e-06, "loss": 0.5817, "step": 17607 }, { "epoch": 1.57, "grad_norm": 6.509525239267581, "learning_rate": 1.1594790688603536e-06, "loss": 0.5275, "step": 17608 }, { "epoch": 1.57, "grad_norm": 7.229352783757999, "learning_rate": 1.159016556642329e-06, "loss": 0.5845, "step": 17609 }, { "epoch": 1.57, "grad_norm": 5.703699121671413, "learning_rate": 1.1585541245964233e-06, "loss": 0.5707, "step": 17610 }, { "epoch": 1.57, "grad_norm": 5.133714358673982, "learning_rate": 1.1580917727322877e-06, "loss": 0.5852, "step": 17611 }, { "epoch": 1.57, "grad_norm": 6.10137159819938, "learning_rate": 1.157629501059571e-06, "loss": 0.6194, "step": 17612 }, { "epoch": 1.57, "grad_norm": 6.401974627595709, "learning_rate": 1.1571673095879243e-06, "loss": 0.5648, "step": 17613 }, { "epoch": 1.57, "grad_norm": 6.709783962686572, "learning_rate": 1.1567051983269944e-06, "loss": 0.6162, "step": 17614 }, { "epoch": 1.57, "grad_norm": 7.301432933364805, "learning_rate": 1.1562431672864266e-06, "loss": 0.54, "step": 17615 }, { "epoch": 1.57, "grad_norm": 6.070898424920377, "learning_rate": 1.1557812164758653e-06, "loss": 0.5079, "step": 17616 }, { "epoch": 1.57, "grad_norm": 6.737304784229033, "learning_rate": 1.1553193459049539e-06, "loss": 0.5557, "step": 17617 }, { "epoch": 1.57, "grad_norm": 7.868875271528416, "learning_rate": 1.1548575555833296e-06, "loss": 0.5449, "step": 17618 }, { "epoch": 1.57, "grad_norm": 6.655474048772448, "learning_rate": 1.1543958455206332e-06, "loss": 0.6246, "step": 17619 }, { "epoch": 1.57, "grad_norm": 6.6244002371188895, "learning_rate": 1.1539342157265016e-06, "loss": 0.5731, "step": 17620 }, { "epoch": 1.57, "grad_norm": 7.394729089621897, "learning_rate": 1.1534726662105711e-06, "loss": 0.5732, "step": 17621 }, { "epoch": 1.57, "grad_norm": 6.166296960212958, "learning_rate": 1.1530111969824741e-06, "loss": 0.6375, "step": 17622 }, { "epoch": 1.57, "grad_norm": 5.118623491137184, "learning_rate": 1.1525498080518454e-06, "loss": 0.5683, "step": 17623 }, { "epoch": 1.57, "grad_norm": 6.55332377402804, "learning_rate": 1.1520884994283143e-06, "loss": 0.5322, "step": 17624 }, { "epoch": 1.57, "grad_norm": 7.436402084787508, "learning_rate": 1.1516272711215071e-06, "loss": 0.5557, "step": 17625 }, { "epoch": 1.57, "grad_norm": 5.474515025132489, "learning_rate": 1.1511661231410525e-06, "loss": 0.5171, "step": 17626 }, { "epoch": 1.57, "grad_norm": 7.86304459797412, "learning_rate": 1.1507050554965765e-06, "loss": 0.5451, "step": 17627 }, { "epoch": 1.57, "grad_norm": 5.889198987651089, "learning_rate": 1.1502440681977028e-06, "loss": 0.5785, "step": 17628 }, { "epoch": 1.57, "grad_norm": 5.9069599294165265, "learning_rate": 1.1497831612540533e-06, "loss": 0.5159, "step": 17629 }, { "epoch": 1.57, "grad_norm": 6.543425686435022, "learning_rate": 1.1493223346752497e-06, "loss": 0.5505, "step": 17630 }, { "epoch": 1.57, "grad_norm": 5.757359246944983, "learning_rate": 1.1488615884709087e-06, "loss": 0.6159, "step": 17631 }, { "epoch": 1.57, "grad_norm": 5.439803658969097, "learning_rate": 1.1484009226506476e-06, "loss": 0.6027, "step": 17632 }, { "epoch": 1.57, "grad_norm": 7.140376393213283, "learning_rate": 1.1479403372240827e-06, "loss": 0.6429, "step": 17633 }, { "epoch": 1.57, "grad_norm": 7.6127737813441945, "learning_rate": 1.1474798322008268e-06, "loss": 0.6145, "step": 17634 }, { "epoch": 1.57, "grad_norm": 4.636709319676939, "learning_rate": 1.1470194075904933e-06, "loss": 0.5427, "step": 17635 }, { "epoch": 1.57, "grad_norm": 7.26971096420603, "learning_rate": 1.146559063402693e-06, "loss": 0.6114, "step": 17636 }, { "epoch": 1.57, "grad_norm": 7.89730471010234, "learning_rate": 1.1460987996470312e-06, "loss": 0.6108, "step": 17637 }, { "epoch": 1.57, "grad_norm": 7.648310567035245, "learning_rate": 1.1456386163331196e-06, "loss": 0.61, "step": 17638 }, { "epoch": 1.57, "grad_norm": 8.068494283055411, "learning_rate": 1.145178513470559e-06, "loss": 0.6556, "step": 17639 }, { "epoch": 1.57, "grad_norm": 5.305359839361662, "learning_rate": 1.1447184910689551e-06, "loss": 0.544, "step": 17640 }, { "epoch": 1.57, "grad_norm": 5.956723305108273, "learning_rate": 1.1442585491379093e-06, "loss": 0.5757, "step": 17641 }, { "epoch": 1.57, "grad_norm": 5.7889438822487245, "learning_rate": 1.1437986876870233e-06, "loss": 0.5868, "step": 17642 }, { "epoch": 1.57, "grad_norm": 6.696042383043067, "learning_rate": 1.1433389067258954e-06, "loss": 0.5977, "step": 17643 }, { "epoch": 1.57, "grad_norm": 7.448711761256851, "learning_rate": 1.1428792062641208e-06, "loss": 0.6048, "step": 17644 }, { "epoch": 1.57, "grad_norm": 7.640758342254516, "learning_rate": 1.1424195863112958e-06, "loss": 0.564, "step": 17645 }, { "epoch": 1.57, "grad_norm": 10.838259614073339, "learning_rate": 1.1419600468770136e-06, "loss": 0.6264, "step": 17646 }, { "epoch": 1.57, "grad_norm": 9.124060894674399, "learning_rate": 1.1415005879708667e-06, "loss": 0.5439, "step": 17647 }, { "epoch": 1.57, "grad_norm": 5.216321286460809, "learning_rate": 1.1410412096024455e-06, "loss": 0.573, "step": 17648 }, { "epoch": 1.57, "grad_norm": 8.351468318380226, "learning_rate": 1.1405819117813388e-06, "loss": 0.5775, "step": 17649 }, { "epoch": 1.57, "grad_norm": 6.888094900736607, "learning_rate": 1.1401226945171317e-06, "loss": 0.7039, "step": 17650 }, { "epoch": 1.57, "grad_norm": 6.096366194892592, "learning_rate": 1.1396635578194105e-06, "loss": 0.5539, "step": 17651 }, { "epoch": 1.57, "grad_norm": 6.5777847621281404, "learning_rate": 1.1392045016977582e-06, "loss": 0.6413, "step": 17652 }, { "epoch": 1.57, "grad_norm": 6.339265038727438, "learning_rate": 1.1387455261617587e-06, "loss": 0.5195, "step": 17653 }, { "epoch": 1.57, "grad_norm": 6.772291480156985, "learning_rate": 1.1382866312209884e-06, "loss": 0.4871, "step": 17654 }, { "epoch": 1.58, "grad_norm": 7.556524185158354, "learning_rate": 1.1378278168850281e-06, "loss": 0.5538, "step": 17655 }, { "epoch": 1.58, "grad_norm": 5.2158788639142015, "learning_rate": 1.1373690831634554e-06, "loss": 0.5623, "step": 17656 }, { "epoch": 1.58, "grad_norm": 5.292218152188463, "learning_rate": 1.1369104300658424e-06, "loss": 0.5719, "step": 17657 }, { "epoch": 1.58, "grad_norm": 6.811378328784184, "learning_rate": 1.1364518576017646e-06, "loss": 0.5493, "step": 17658 }, { "epoch": 1.58, "grad_norm": 5.279648308308718, "learning_rate": 1.135993365780793e-06, "loss": 0.5831, "step": 17659 }, { "epoch": 1.58, "grad_norm": 6.4619990909974785, "learning_rate": 1.1355349546124977e-06, "loss": 0.5972, "step": 17660 }, { "epoch": 1.58, "grad_norm": 5.132199600284785, "learning_rate": 1.1350766241064492e-06, "loss": 0.5293, "step": 17661 }, { "epoch": 1.58, "grad_norm": 5.4525953187982354, "learning_rate": 1.13461837427221e-06, "loss": 0.5937, "step": 17662 }, { "epoch": 1.58, "grad_norm": 8.799719374880995, "learning_rate": 1.1341602051193478e-06, "loss": 0.6274, "step": 17663 }, { "epoch": 1.58, "grad_norm": 6.946026697565971, "learning_rate": 1.1337021166574247e-06, "loss": 0.5322, "step": 17664 }, { "epoch": 1.58, "grad_norm": 5.9911416949183876, "learning_rate": 1.1332441088960032e-06, "loss": 0.6066, "step": 17665 }, { "epoch": 1.58, "grad_norm": 6.402167870042317, "learning_rate": 1.1327861818446428e-06, "loss": 0.5597, "step": 17666 }, { "epoch": 1.58, "grad_norm": 6.016841521583817, "learning_rate": 1.1323283355129034e-06, "loss": 0.5952, "step": 17667 }, { "epoch": 1.58, "grad_norm": 6.357475724652351, "learning_rate": 1.1318705699103383e-06, "loss": 0.5984, "step": 17668 }, { "epoch": 1.58, "grad_norm": 5.2110813929138775, "learning_rate": 1.1314128850465056e-06, "loss": 0.5688, "step": 17669 }, { "epoch": 1.58, "grad_norm": 6.725419305558041, "learning_rate": 1.1309552809309553e-06, "loss": 0.4859, "step": 17670 }, { "epoch": 1.58, "grad_norm": 5.934242950524787, "learning_rate": 1.1304977575732406e-06, "loss": 0.546, "step": 17671 }, { "epoch": 1.58, "grad_norm": 6.091958764988245, "learning_rate": 1.130040314982911e-06, "loss": 0.5939, "step": 17672 }, { "epoch": 1.58, "grad_norm": 7.48677561714069, "learning_rate": 1.1295829531695146e-06, "loss": 0.5472, "step": 17673 }, { "epoch": 1.58, "grad_norm": 7.294167758087319, "learning_rate": 1.1291256721425998e-06, "loss": 0.6545, "step": 17674 }, { "epoch": 1.58, "grad_norm": 4.851636419862357, "learning_rate": 1.1286684719117075e-06, "loss": 0.6196, "step": 17675 }, { "epoch": 1.58, "grad_norm": 6.691897401135834, "learning_rate": 1.128211352486383e-06, "loss": 0.6005, "step": 17676 }, { "epoch": 1.58, "grad_norm": 10.849557928834944, "learning_rate": 1.1277543138761677e-06, "loss": 0.5473, "step": 17677 }, { "epoch": 1.58, "grad_norm": 7.017934869629899, "learning_rate": 1.127297356090601e-06, "loss": 0.5922, "step": 17678 }, { "epoch": 1.58, "grad_norm": 4.5809124151661615, "learning_rate": 1.1268404791392212e-06, "loss": 0.5994, "step": 17679 }, { "epoch": 1.58, "grad_norm": 7.69663292637957, "learning_rate": 1.126383683031565e-06, "loss": 0.5861, "step": 17680 }, { "epoch": 1.58, "grad_norm": 5.710745142889105, "learning_rate": 1.1259269677771656e-06, "loss": 0.5377, "step": 17681 }, { "epoch": 1.58, "grad_norm": 5.922999184560165, "learning_rate": 1.1254703333855577e-06, "loss": 0.5694, "step": 17682 }, { "epoch": 1.58, "grad_norm": 4.667874140926641, "learning_rate": 1.1250137798662703e-06, "loss": 0.5236, "step": 17683 }, { "epoch": 1.58, "grad_norm": 7.88009997981002, "learning_rate": 1.124557307228834e-06, "loss": 0.6224, "step": 17684 }, { "epoch": 1.58, "grad_norm": 8.318224975180792, "learning_rate": 1.124100915482777e-06, "loss": 0.5704, "step": 17685 }, { "epoch": 1.58, "grad_norm": 6.653214432750371, "learning_rate": 1.1236446046376247e-06, "loss": 0.6105, "step": 17686 }, { "epoch": 1.58, "grad_norm": 5.795984918594723, "learning_rate": 1.1231883747029042e-06, "loss": 0.5865, "step": 17687 }, { "epoch": 1.58, "grad_norm": 6.313947423645107, "learning_rate": 1.122732225688135e-06, "loss": 0.5343, "step": 17688 }, { "epoch": 1.58, "grad_norm": 6.593911455145975, "learning_rate": 1.1222761576028395e-06, "loss": 0.5655, "step": 17689 }, { "epoch": 1.58, "grad_norm": 8.452867654013565, "learning_rate": 1.1218201704565369e-06, "loss": 0.5362, "step": 17690 }, { "epoch": 1.58, "grad_norm": 6.6077937599457695, "learning_rate": 1.1213642642587452e-06, "loss": 0.6037, "step": 17691 }, { "epoch": 1.58, "grad_norm": 7.06267875882567, "learning_rate": 1.1209084390189807e-06, "loss": 0.5483, "step": 17692 }, { "epoch": 1.58, "grad_norm": 5.621951030899895, "learning_rate": 1.1204526947467587e-06, "loss": 0.5802, "step": 17693 }, { "epoch": 1.58, "grad_norm": 6.372409619993127, "learning_rate": 1.119997031451589e-06, "loss": 0.5555, "step": 17694 }, { "epoch": 1.58, "grad_norm": 7.956070895443782, "learning_rate": 1.1195414491429846e-06, "loss": 0.5284, "step": 17695 }, { "epoch": 1.58, "grad_norm": 9.874166519113821, "learning_rate": 1.1190859478304556e-06, "loss": 0.6203, "step": 17696 }, { "epoch": 1.58, "grad_norm": 7.956959852078902, "learning_rate": 1.1186305275235064e-06, "loss": 0.5907, "step": 17697 }, { "epoch": 1.58, "grad_norm": 7.920914129514998, "learning_rate": 1.118175188231646e-06, "loss": 0.6417, "step": 17698 }, { "epoch": 1.58, "grad_norm": 5.59495518621229, "learning_rate": 1.1177199299643765e-06, "loss": 0.5589, "step": 17699 }, { "epoch": 1.58, "grad_norm": 9.482105263370439, "learning_rate": 1.1172647527312036e-06, "loss": 0.6238, "step": 17700 }, { "epoch": 1.58, "grad_norm": 6.481394261149703, "learning_rate": 1.1168096565416241e-06, "loss": 0.5802, "step": 17701 }, { "epoch": 1.58, "grad_norm": 7.789736639306126, "learning_rate": 1.1163546414051392e-06, "loss": 0.6241, "step": 17702 }, { "epoch": 1.58, "grad_norm": 7.193404325897423, "learning_rate": 1.115899707331246e-06, "loss": 0.569, "step": 17703 }, { "epoch": 1.58, "grad_norm": 6.100987165619187, "learning_rate": 1.11544485432944e-06, "loss": 0.5206, "step": 17704 }, { "epoch": 1.58, "grad_norm": 6.328007613583612, "learning_rate": 1.1149900824092164e-06, "loss": 0.547, "step": 17705 }, { "epoch": 1.58, "grad_norm": 9.561106162539094, "learning_rate": 1.1145353915800677e-06, "loss": 0.5553, "step": 17706 }, { "epoch": 1.58, "grad_norm": 6.678303277414667, "learning_rate": 1.114080781851482e-06, "loss": 0.5616, "step": 17707 }, { "epoch": 1.58, "grad_norm": 11.551402481705045, "learning_rate": 1.1136262532329506e-06, "loss": 0.5104, "step": 17708 }, { "epoch": 1.58, "grad_norm": 5.060719489729847, "learning_rate": 1.1131718057339597e-06, "loss": 0.6152, "step": 17709 }, { "epoch": 1.58, "grad_norm": 6.293374329352161, "learning_rate": 1.1127174393639972e-06, "loss": 0.5854, "step": 17710 }, { "epoch": 1.58, "grad_norm": 7.487383975367874, "learning_rate": 1.1122631541325434e-06, "loss": 0.5251, "step": 17711 }, { "epoch": 1.58, "grad_norm": 4.42005142352032, "learning_rate": 1.1118089500490825e-06, "loss": 0.5867, "step": 17712 }, { "epoch": 1.58, "grad_norm": 6.9773956462611215, "learning_rate": 1.1113548271230961e-06, "loss": 0.55, "step": 17713 }, { "epoch": 1.58, "grad_norm": 6.7930934978371456, "learning_rate": 1.1109007853640603e-06, "loss": 0.5151, "step": 17714 }, { "epoch": 1.58, "grad_norm": 7.551132356965308, "learning_rate": 1.1104468247814531e-06, "loss": 0.543, "step": 17715 }, { "epoch": 1.58, "grad_norm": 7.270968295741264, "learning_rate": 1.1099929453847513e-06, "loss": 0.5592, "step": 17716 }, { "epoch": 1.58, "grad_norm": 5.536533418728756, "learning_rate": 1.109539147183427e-06, "loss": 0.6386, "step": 17717 }, { "epoch": 1.58, "grad_norm": 8.064194912985668, "learning_rate": 1.1090854301869535e-06, "loss": 0.5601, "step": 17718 }, { "epoch": 1.58, "grad_norm": 6.259881509723264, "learning_rate": 1.108631794404802e-06, "loss": 0.556, "step": 17719 }, { "epoch": 1.58, "grad_norm": 6.887729594770233, "learning_rate": 1.1081782398464385e-06, "loss": 0.5206, "step": 17720 }, { "epoch": 1.58, "grad_norm": 5.841013845276308, "learning_rate": 1.1077247665213315e-06, "loss": 0.5894, "step": 17721 }, { "epoch": 1.58, "grad_norm": 5.412811913463078, "learning_rate": 1.1072713744389457e-06, "loss": 0.5794, "step": 17722 }, { "epoch": 1.58, "grad_norm": 9.952991823220742, "learning_rate": 1.106818063608745e-06, "loss": 0.5901, "step": 17723 }, { "epoch": 1.58, "grad_norm": 5.7409951695840755, "learning_rate": 1.1063648340401917e-06, "loss": 0.5957, "step": 17724 }, { "epoch": 1.58, "grad_norm": 5.801133686735875, "learning_rate": 1.1059116857427465e-06, "loss": 0.5703, "step": 17725 }, { "epoch": 1.58, "grad_norm": 6.708503282693633, "learning_rate": 1.1054586187258675e-06, "loss": 0.5421, "step": 17726 }, { "epoch": 1.58, "grad_norm": 5.624638061538166, "learning_rate": 1.1050056329990088e-06, "loss": 0.5714, "step": 17727 }, { "epoch": 1.58, "grad_norm": 6.523456884517037, "learning_rate": 1.1045527285716285e-06, "loss": 0.5089, "step": 17728 }, { "epoch": 1.58, "grad_norm": 8.34611735204096, "learning_rate": 1.1040999054531782e-06, "loss": 0.5162, "step": 17729 }, { "epoch": 1.58, "grad_norm": 6.539722577345801, "learning_rate": 1.1036471636531109e-06, "loss": 0.5436, "step": 17730 }, { "epoch": 1.58, "grad_norm": 9.439643711616217, "learning_rate": 1.1031945031808766e-06, "loss": 0.5824, "step": 17731 }, { "epoch": 1.58, "grad_norm": 6.490443016983458, "learning_rate": 1.102741924045924e-06, "loss": 0.5805, "step": 17732 }, { "epoch": 1.58, "grad_norm": 5.867945960677296, "learning_rate": 1.1022894262576977e-06, "loss": 0.5828, "step": 17733 }, { "epoch": 1.58, "grad_norm": 6.898182935104365, "learning_rate": 1.1018370098256438e-06, "loss": 0.5542, "step": 17734 }, { "epoch": 1.58, "grad_norm": 7.35400791048111, "learning_rate": 1.1013846747592054e-06, "loss": 0.6802, "step": 17735 }, { "epoch": 1.58, "grad_norm": 7.256036469806392, "learning_rate": 1.1009324210678241e-06, "loss": 0.5645, "step": 17736 }, { "epoch": 1.58, "grad_norm": 5.392053705164908, "learning_rate": 1.1004802487609412e-06, "loss": 0.5175, "step": 17737 }, { "epoch": 1.58, "grad_norm": 6.918740236424694, "learning_rate": 1.100028157847992e-06, "loss": 0.5794, "step": 17738 }, { "epoch": 1.58, "grad_norm": 6.726331262929888, "learning_rate": 1.0995761483384154e-06, "loss": 0.5734, "step": 17739 }, { "epoch": 1.58, "grad_norm": 6.07235344206029, "learning_rate": 1.0991242202416436e-06, "loss": 0.5432, "step": 17740 }, { "epoch": 1.58, "grad_norm": 6.5209443307153565, "learning_rate": 1.0986723735671112e-06, "loss": 0.5768, "step": 17741 }, { "epoch": 1.58, "grad_norm": 6.189145490494776, "learning_rate": 1.0982206083242486e-06, "loss": 0.5434, "step": 17742 }, { "epoch": 1.58, "grad_norm": 5.233959160485008, "learning_rate": 1.0977689245224866e-06, "loss": 0.5461, "step": 17743 }, { "epoch": 1.58, "grad_norm": 8.252156029758718, "learning_rate": 1.0973173221712525e-06, "loss": 0.5704, "step": 17744 }, { "epoch": 1.58, "grad_norm": 7.109437680917873, "learning_rate": 1.0968658012799738e-06, "loss": 0.5485, "step": 17745 }, { "epoch": 1.58, "grad_norm": 8.413060250308108, "learning_rate": 1.0964143618580724e-06, "loss": 0.5912, "step": 17746 }, { "epoch": 1.58, "grad_norm": 8.593120382772229, "learning_rate": 1.0959630039149727e-06, "loss": 0.5295, "step": 17747 }, { "epoch": 1.58, "grad_norm": 6.544339664796516, "learning_rate": 1.0955117274600952e-06, "loss": 0.5104, "step": 17748 }, { "epoch": 1.58, "grad_norm": 6.058131026871833, "learning_rate": 1.0950605325028603e-06, "loss": 0.5737, "step": 17749 }, { "epoch": 1.58, "grad_norm": 6.296662434684804, "learning_rate": 1.0946094190526857e-06, "loss": 0.5233, "step": 17750 }, { "epoch": 1.58, "grad_norm": 7.839372883811948, "learning_rate": 1.0941583871189859e-06, "loss": 0.5891, "step": 17751 }, { "epoch": 1.58, "grad_norm": 5.90961878181536, "learning_rate": 1.0937074367111761e-06, "loss": 0.5487, "step": 17752 }, { "epoch": 1.58, "grad_norm": 5.885026000082986, "learning_rate": 1.0932565678386686e-06, "loss": 0.6232, "step": 17753 }, { "epoch": 1.58, "grad_norm": 5.478300776562269, "learning_rate": 1.0928057805108766e-06, "loss": 0.5585, "step": 17754 }, { "epoch": 1.58, "grad_norm": 7.374362122676819, "learning_rate": 1.0923550747372058e-06, "loss": 0.6231, "step": 17755 }, { "epoch": 1.58, "grad_norm": 6.708365307632975, "learning_rate": 1.091904450527065e-06, "loss": 0.5851, "step": 17756 }, { "epoch": 1.58, "grad_norm": 6.6851067766590315, "learning_rate": 1.0914539078898617e-06, "loss": 0.5702, "step": 17757 }, { "epoch": 1.58, "grad_norm": 6.2750267580916805, "learning_rate": 1.0910034468349966e-06, "loss": 0.6326, "step": 17758 }, { "epoch": 1.58, "grad_norm": 9.650695614065741, "learning_rate": 1.090553067371875e-06, "loss": 0.5245, "step": 17759 }, { "epoch": 1.58, "grad_norm": 4.860630450871063, "learning_rate": 1.090102769509896e-06, "loss": 0.594, "step": 17760 }, { "epoch": 1.58, "grad_norm": 6.574096636205051, "learning_rate": 1.089652553258459e-06, "loss": 0.5299, "step": 17761 }, { "epoch": 1.58, "grad_norm": 8.332461574230216, "learning_rate": 1.0892024186269617e-06, "loss": 0.5467, "step": 17762 }, { "epoch": 1.58, "grad_norm": 6.4375445676229495, "learning_rate": 1.0887523656248007e-06, "loss": 0.5871, "step": 17763 }, { "epoch": 1.58, "grad_norm": 9.261211107713168, "learning_rate": 1.0883023942613675e-06, "loss": 0.5681, "step": 17764 }, { "epoch": 1.58, "grad_norm": 6.196270508884942, "learning_rate": 1.0878525045460547e-06, "loss": 0.6373, "step": 17765 }, { "epoch": 1.58, "grad_norm": 6.064010773737531, "learning_rate": 1.087402696488254e-06, "loss": 0.5424, "step": 17766 }, { "epoch": 1.58, "grad_norm": 6.243937581007242, "learning_rate": 1.0869529700973535e-06, "loss": 0.6306, "step": 17767 }, { "epoch": 1.59, "grad_norm": 5.904216207153296, "learning_rate": 1.0865033253827411e-06, "loss": 0.5861, "step": 17768 }, { "epoch": 1.59, "grad_norm": 6.990579666496277, "learning_rate": 1.0860537623538008e-06, "loss": 0.6083, "step": 17769 }, { "epoch": 1.59, "grad_norm": 7.096826427565851, "learning_rate": 1.085604281019918e-06, "loss": 0.5337, "step": 17770 }, { "epoch": 1.59, "grad_norm": 7.28618132287405, "learning_rate": 1.0851548813904717e-06, "loss": 0.6189, "step": 17771 }, { "epoch": 1.59, "grad_norm": 7.163644500256609, "learning_rate": 1.0847055634748443e-06, "loss": 0.5671, "step": 17772 }, { "epoch": 1.59, "grad_norm": 8.928538379805731, "learning_rate": 1.0842563272824135e-06, "loss": 0.5324, "step": 17773 }, { "epoch": 1.59, "grad_norm": 5.312560945268219, "learning_rate": 1.083807172822557e-06, "loss": 0.5815, "step": 17774 }, { "epoch": 1.59, "grad_norm": 7.872664348330219, "learning_rate": 1.083358100104649e-06, "loss": 0.5656, "step": 17775 }, { "epoch": 1.59, "grad_norm": 5.787312677290027, "learning_rate": 1.082909109138065e-06, "loss": 0.608, "step": 17776 }, { "epoch": 1.59, "grad_norm": 6.635507936883284, "learning_rate": 1.0824601999321737e-06, "loss": 0.6158, "step": 17777 }, { "epoch": 1.59, "grad_norm": 6.263178158344926, "learning_rate": 1.0820113724963466e-06, "loss": 0.5486, "step": 17778 }, { "epoch": 1.59, "grad_norm": 6.521753862970267, "learning_rate": 1.0815626268399514e-06, "loss": 0.5775, "step": 17779 }, { "epoch": 1.59, "grad_norm": 7.8979017475594135, "learning_rate": 1.0811139629723554e-06, "loss": 0.5326, "step": 17780 }, { "epoch": 1.59, "grad_norm": 7.222733420324145, "learning_rate": 1.0806653809029232e-06, "loss": 0.6071, "step": 17781 }, { "epoch": 1.59, "grad_norm": 10.383767741238573, "learning_rate": 1.080216880641019e-06, "loss": 0.5978, "step": 17782 }, { "epoch": 1.59, "grad_norm": 8.308116932836388, "learning_rate": 1.0797684621960036e-06, "loss": 0.624, "step": 17783 }, { "epoch": 1.59, "grad_norm": 6.902020971840928, "learning_rate": 1.079320125577235e-06, "loss": 0.5757, "step": 17784 }, { "epoch": 1.59, "grad_norm": 7.344343498406879, "learning_rate": 1.0788718707940733e-06, "loss": 0.5659, "step": 17785 }, { "epoch": 1.59, "grad_norm": 7.570604203909546, "learning_rate": 1.0784236978558732e-06, "loss": 0.5649, "step": 17786 }, { "epoch": 1.59, "grad_norm": 7.255248243184729, "learning_rate": 1.0779756067719904e-06, "loss": 0.5186, "step": 17787 }, { "epoch": 1.59, "grad_norm": 7.747212977056284, "learning_rate": 1.077527597551778e-06, "loss": 0.6498, "step": 17788 }, { "epoch": 1.59, "grad_norm": 8.002220535377601, "learning_rate": 1.0770796702045883e-06, "loss": 0.5705, "step": 17789 }, { "epoch": 1.59, "grad_norm": 6.135299744019238, "learning_rate": 1.076631824739768e-06, "loss": 0.5447, "step": 17790 }, { "epoch": 1.59, "grad_norm": 8.453854530225575, "learning_rate": 1.0761840611666663e-06, "loss": 0.5993, "step": 17791 }, { "epoch": 1.59, "grad_norm": 6.969544962737254, "learning_rate": 1.0757363794946296e-06, "loss": 0.5941, "step": 17792 }, { "epoch": 1.59, "grad_norm": 8.68358230543144, "learning_rate": 1.0752887797330013e-06, "loss": 0.5784, "step": 17793 }, { "epoch": 1.59, "grad_norm": 9.061466529685534, "learning_rate": 1.0748412618911252e-06, "loss": 0.5841, "step": 17794 }, { "epoch": 1.59, "grad_norm": 6.722506629732372, "learning_rate": 1.0743938259783432e-06, "loss": 0.5721, "step": 17795 }, { "epoch": 1.59, "grad_norm": 6.5981482440850465, "learning_rate": 1.0739464720039917e-06, "loss": 0.5717, "step": 17796 }, { "epoch": 1.59, "grad_norm": 7.801759063294678, "learning_rate": 1.0734991999774114e-06, "loss": 0.6548, "step": 17797 }, { "epoch": 1.59, "grad_norm": 5.806590692207091, "learning_rate": 1.0730520099079344e-06, "loss": 0.5441, "step": 17798 }, { "epoch": 1.59, "grad_norm": 5.635888149628336, "learning_rate": 1.0726049018048973e-06, "loss": 0.5414, "step": 17799 }, { "epoch": 1.59, "grad_norm": 6.098682450096499, "learning_rate": 1.072157875677632e-06, "loss": 0.4946, "step": 17800 }, { "epoch": 1.59, "grad_norm": 7.710716472867745, "learning_rate": 1.071710931535469e-06, "loss": 0.576, "step": 17801 }, { "epoch": 1.59, "grad_norm": 9.193788776004538, "learning_rate": 1.071264069387739e-06, "loss": 0.5718, "step": 17802 }, { "epoch": 1.59, "grad_norm": 7.408977051020424, "learning_rate": 1.0708172892437663e-06, "loss": 0.565, "step": 17803 }, { "epoch": 1.59, "grad_norm": 6.888604011482237, "learning_rate": 1.070370591112878e-06, "loss": 0.5446, "step": 17804 }, { "epoch": 1.59, "grad_norm": 5.917138532387253, "learning_rate": 1.0699239750043977e-06, "loss": 0.5455, "step": 17805 }, { "epoch": 1.59, "grad_norm": 7.499181678780615, "learning_rate": 1.0694774409276482e-06, "loss": 0.5622, "step": 17806 }, { "epoch": 1.59, "grad_norm": 7.001052303073451, "learning_rate": 1.0690309888919486e-06, "loss": 0.6349, "step": 17807 }, { "epoch": 1.59, "grad_norm": 7.8295293123916565, "learning_rate": 1.0685846189066208e-06, "loss": 0.6075, "step": 17808 }, { "epoch": 1.59, "grad_norm": 6.780082007565848, "learning_rate": 1.0681383309809773e-06, "loss": 0.5788, "step": 17809 }, { "epoch": 1.59, "grad_norm": 5.107598562288877, "learning_rate": 1.067692125124336e-06, "loss": 0.513, "step": 17810 }, { "epoch": 1.59, "grad_norm": 7.4331468625276464, "learning_rate": 1.0672460013460107e-06, "loss": 0.6001, "step": 17811 }, { "epoch": 1.59, "grad_norm": 4.754641019969943, "learning_rate": 1.066799959655312e-06, "loss": 0.5313, "step": 17812 }, { "epoch": 1.59, "grad_norm": 7.504273239894298, "learning_rate": 1.0663540000615497e-06, "loss": 0.6187, "step": 17813 }, { "epoch": 1.59, "grad_norm": 6.30204082150503, "learning_rate": 1.0659081225740337e-06, "loss": 0.5977, "step": 17814 }, { "epoch": 1.59, "grad_norm": 5.5580178233010535, "learning_rate": 1.0654623272020714e-06, "loss": 0.5918, "step": 17815 }, { "epoch": 1.59, "grad_norm": 6.705899725047185, "learning_rate": 1.065016613954965e-06, "loss": 0.5866, "step": 17816 }, { "epoch": 1.59, "grad_norm": 6.18351206553548, "learning_rate": 1.0645709828420197e-06, "loss": 0.5814, "step": 17817 }, { "epoch": 1.59, "grad_norm": 6.868224854281957, "learning_rate": 1.0641254338725366e-06, "loss": 0.5602, "step": 17818 }, { "epoch": 1.59, "grad_norm": 6.160273082915747, "learning_rate": 1.0636799670558152e-06, "loss": 0.5908, "step": 17819 }, { "epoch": 1.59, "grad_norm": 6.777197151151375, "learning_rate": 1.0632345824011558e-06, "loss": 0.6076, "step": 17820 }, { "epoch": 1.59, "grad_norm": 8.996773313510069, "learning_rate": 1.0627892799178519e-06, "loss": 0.5623, "step": 17821 }, { "epoch": 1.59, "grad_norm": 5.592931544479642, "learning_rate": 1.062344059615199e-06, "loss": 0.5519, "step": 17822 }, { "epoch": 1.59, "grad_norm": 7.879203915935024, "learning_rate": 1.0618989215024906e-06, "loss": 0.5959, "step": 17823 }, { "epoch": 1.59, "grad_norm": 4.287736841737791, "learning_rate": 1.0614538655890184e-06, "loss": 0.5449, "step": 17824 }, { "epoch": 1.59, "grad_norm": 5.3857028850049105, "learning_rate": 1.061008891884071e-06, "loss": 0.5308, "step": 17825 }, { "epoch": 1.59, "grad_norm": 8.013516535495173, "learning_rate": 1.060564000396938e-06, "loss": 0.6342, "step": 17826 }, { "epoch": 1.59, "grad_norm": 5.570478011760584, "learning_rate": 1.0601191911369035e-06, "loss": 0.5189, "step": 17827 }, { "epoch": 1.59, "grad_norm": 7.387882034458008, "learning_rate": 1.059674464113254e-06, "loss": 0.6003, "step": 17828 }, { "epoch": 1.59, "grad_norm": 8.381526059383503, "learning_rate": 1.059229819335269e-06, "loss": 0.5862, "step": 17829 }, { "epoch": 1.59, "grad_norm": 5.7110127841859075, "learning_rate": 1.0587852568122313e-06, "loss": 0.5873, "step": 17830 }, { "epoch": 1.59, "grad_norm": 8.566206534977958, "learning_rate": 1.0583407765534208e-06, "loss": 0.5806, "step": 17831 }, { "epoch": 1.59, "grad_norm": 8.878977940324667, "learning_rate": 1.0578963785681146e-06, "loss": 0.5767, "step": 17832 }, { "epoch": 1.59, "grad_norm": 8.281668846928035, "learning_rate": 1.0574520628655893e-06, "loss": 0.6024, "step": 17833 }, { "epoch": 1.59, "grad_norm": 6.196724762141909, "learning_rate": 1.0570078294551173e-06, "loss": 0.6052, "step": 17834 }, { "epoch": 1.59, "grad_norm": 6.139092853789234, "learning_rate": 1.0565636783459716e-06, "loss": 0.5689, "step": 17835 }, { "epoch": 1.59, "grad_norm": 6.692549126222678, "learning_rate": 1.056119609547423e-06, "loss": 0.594, "step": 17836 }, { "epoch": 1.59, "grad_norm": 6.882844845287688, "learning_rate": 1.0556756230687404e-06, "loss": 0.5634, "step": 17837 }, { "epoch": 1.59, "grad_norm": 8.633520614034097, "learning_rate": 1.0552317189191918e-06, "loss": 0.6378, "step": 17838 }, { "epoch": 1.59, "grad_norm": 6.547676428937197, "learning_rate": 1.0547878971080434e-06, "loss": 0.6131, "step": 17839 }, { "epoch": 1.59, "grad_norm": 5.868919060663789, "learning_rate": 1.0543441576445573e-06, "loss": 0.5583, "step": 17840 }, { "epoch": 1.59, "grad_norm": 6.9106206197607305, "learning_rate": 1.0539005005379948e-06, "loss": 0.5847, "step": 17841 }, { "epoch": 1.59, "grad_norm": 6.336278129303098, "learning_rate": 1.053456925797618e-06, "loss": 0.5285, "step": 17842 }, { "epoch": 1.59, "grad_norm": 5.9401043502473145, "learning_rate": 1.0530134334326852e-06, "loss": 0.5644, "step": 17843 }, { "epoch": 1.59, "grad_norm": 6.170904236183107, "learning_rate": 1.0525700234524527e-06, "loss": 0.5996, "step": 17844 }, { "epoch": 1.59, "grad_norm": 7.070065680514238, "learning_rate": 1.0521266958661763e-06, "loss": 0.5936, "step": 17845 }, { "epoch": 1.59, "grad_norm": 7.0022738450111195, "learning_rate": 1.0516834506831108e-06, "loss": 0.5893, "step": 17846 }, { "epoch": 1.59, "grad_norm": 6.317311933933317, "learning_rate": 1.0512402879125055e-06, "loss": 0.5829, "step": 17847 }, { "epoch": 1.59, "grad_norm": 6.412783155747906, "learning_rate": 1.0507972075636114e-06, "loss": 0.5976, "step": 17848 }, { "epoch": 1.59, "grad_norm": 7.1555088405656, "learning_rate": 1.050354209645677e-06, "loss": 0.5349, "step": 17849 }, { "epoch": 1.59, "grad_norm": 9.001218295400314, "learning_rate": 1.049911294167949e-06, "loss": 0.6117, "step": 17850 }, { "epoch": 1.59, "grad_norm": 8.197922673965673, "learning_rate": 1.0494684611396723e-06, "loss": 0.5884, "step": 17851 }, { "epoch": 1.59, "grad_norm": 7.447160999464562, "learning_rate": 1.0490257105700912e-06, "loss": 0.6193, "step": 17852 }, { "epoch": 1.59, "grad_norm": 6.750135448385365, "learning_rate": 1.0485830424684445e-06, "loss": 0.5654, "step": 17853 }, { "epoch": 1.59, "grad_norm": 4.9668117626147446, "learning_rate": 1.048140456843973e-06, "loss": 0.5597, "step": 17854 }, { "epoch": 1.59, "grad_norm": 8.24674428608199, "learning_rate": 1.0476979537059169e-06, "loss": 0.5391, "step": 17855 }, { "epoch": 1.59, "grad_norm": 9.302652039303435, "learning_rate": 1.0472555330635093e-06, "loss": 0.5168, "step": 17856 }, { "epoch": 1.59, "grad_norm": 7.269103699811801, "learning_rate": 1.0468131949259858e-06, "loss": 0.5416, "step": 17857 }, { "epoch": 1.59, "grad_norm": 7.543041449120049, "learning_rate": 1.0463709393025794e-06, "loss": 0.596, "step": 17858 }, { "epoch": 1.59, "grad_norm": 6.1948759877539725, "learning_rate": 1.045928766202523e-06, "loss": 0.5908, "step": 17859 }, { "epoch": 1.59, "grad_norm": 7.8516659451070705, "learning_rate": 1.0454866756350429e-06, "loss": 0.5849, "step": 17860 }, { "epoch": 1.59, "grad_norm": 5.786807343905919, "learning_rate": 1.0450446676093679e-06, "loss": 0.6407, "step": 17861 }, { "epoch": 1.59, "grad_norm": 5.64601993650089, "learning_rate": 1.0446027421347243e-06, "loss": 0.6478, "step": 17862 }, { "epoch": 1.59, "grad_norm": 6.6596466746364955, "learning_rate": 1.0441608992203366e-06, "loss": 0.6206, "step": 17863 }, { "epoch": 1.59, "grad_norm": 7.33169474420827, "learning_rate": 1.043719138875427e-06, "loss": 0.5401, "step": 17864 }, { "epoch": 1.59, "grad_norm": 5.8527383994451165, "learning_rate": 1.0432774611092178e-06, "loss": 0.5835, "step": 17865 }, { "epoch": 1.59, "grad_norm": 8.117905511179607, "learning_rate": 1.0428358659309245e-06, "loss": 0.6231, "step": 17866 }, { "epoch": 1.59, "grad_norm": 7.059053953460864, "learning_rate": 1.0423943533497671e-06, "loss": 0.5188, "step": 17867 }, { "epoch": 1.59, "grad_norm": 7.541342704647159, "learning_rate": 1.0419529233749603e-06, "loss": 0.6044, "step": 17868 }, { "epoch": 1.59, "grad_norm": 5.816980523470719, "learning_rate": 1.0415115760157196e-06, "loss": 0.5509, "step": 17869 }, { "epoch": 1.59, "grad_norm": 5.994279221239179, "learning_rate": 1.0410703112812542e-06, "loss": 0.5982, "step": 17870 }, { "epoch": 1.59, "grad_norm": 6.553976484046335, "learning_rate": 1.0406291291807769e-06, "loss": 0.5404, "step": 17871 }, { "epoch": 1.59, "grad_norm": 5.949487129269515, "learning_rate": 1.0401880297234963e-06, "loss": 0.6214, "step": 17872 }, { "epoch": 1.59, "grad_norm": 5.093023167912436, "learning_rate": 1.0397470129186177e-06, "loss": 0.5863, "step": 17873 }, { "epoch": 1.59, "grad_norm": 10.150629838619027, "learning_rate": 1.0393060787753472e-06, "loss": 0.5641, "step": 17874 }, { "epoch": 1.59, "grad_norm": 7.257900662946631, "learning_rate": 1.0388652273028887e-06, "loss": 0.5688, "step": 17875 }, { "epoch": 1.59, "grad_norm": 7.165158257296282, "learning_rate": 1.0384244585104435e-06, "loss": 0.5484, "step": 17876 }, { "epoch": 1.59, "grad_norm": 6.327781142237763, "learning_rate": 1.0379837724072122e-06, "loss": 0.5791, "step": 17877 }, { "epoch": 1.59, "grad_norm": 9.143195088751693, "learning_rate": 1.0375431690023946e-06, "loss": 0.492, "step": 17878 }, { "epoch": 1.59, "grad_norm": 5.691840694717053, "learning_rate": 1.0371026483051843e-06, "loss": 0.5307, "step": 17879 }, { "epoch": 1.6, "grad_norm": 6.395680615334547, "learning_rate": 1.0366622103247775e-06, "loss": 0.6097, "step": 17880 }, { "epoch": 1.6, "grad_norm": 6.331842125765225, "learning_rate": 1.0362218550703674e-06, "loss": 0.56, "step": 17881 }, { "epoch": 1.6, "grad_norm": 7.340207433401166, "learning_rate": 1.0357815825511457e-06, "loss": 0.5532, "step": 17882 }, { "epoch": 1.6, "grad_norm": 8.11571916083722, "learning_rate": 1.0353413927763034e-06, "loss": 0.5661, "step": 17883 }, { "epoch": 1.6, "grad_norm": 6.573559146599973, "learning_rate": 1.0349012857550256e-06, "loss": 0.5606, "step": 17884 }, { "epoch": 1.6, "grad_norm": 6.851948934406516, "learning_rate": 1.0344612614965017e-06, "loss": 0.5749, "step": 17885 }, { "epoch": 1.6, "grad_norm": 5.727476626141845, "learning_rate": 1.0340213200099124e-06, "loss": 0.5869, "step": 17886 }, { "epoch": 1.6, "grad_norm": 6.048040298937964, "learning_rate": 1.0335814613044437e-06, "loss": 0.5422, "step": 17887 }, { "epoch": 1.6, "grad_norm": 5.767757136300132, "learning_rate": 1.033141685389275e-06, "loss": 0.574, "step": 17888 }, { "epoch": 1.6, "grad_norm": 6.986327850397047, "learning_rate": 1.0327019922735865e-06, "loss": 0.5545, "step": 17889 }, { "epoch": 1.6, "grad_norm": 7.171196785220543, "learning_rate": 1.0322623819665561e-06, "loss": 0.5628, "step": 17890 }, { "epoch": 1.6, "grad_norm": 8.154903852106468, "learning_rate": 1.0318228544773601e-06, "loss": 0.6194, "step": 17891 }, { "epoch": 1.6, "grad_norm": 5.905868906887148, "learning_rate": 1.031383409815171e-06, "loss": 0.6186, "step": 17892 }, { "epoch": 1.6, "grad_norm": 8.204149627428391, "learning_rate": 1.0309440479891614e-06, "loss": 0.5553, "step": 17893 }, { "epoch": 1.6, "grad_norm": 8.439976331496068, "learning_rate": 1.030504769008503e-06, "loss": 0.6057, "step": 17894 }, { "epoch": 1.6, "grad_norm": 6.669718611817225, "learning_rate": 1.030065572882365e-06, "loss": 0.5796, "step": 17895 }, { "epoch": 1.6, "grad_norm": 6.687794437756498, "learning_rate": 1.029626459619915e-06, "loss": 0.6103, "step": 17896 }, { "epoch": 1.6, "grad_norm": 10.852417348612583, "learning_rate": 1.0291874292303162e-06, "loss": 0.5679, "step": 17897 }, { "epoch": 1.6, "grad_norm": 4.906574637240102, "learning_rate": 1.0287484817227355e-06, "loss": 0.5795, "step": 17898 }, { "epoch": 1.6, "grad_norm": 5.491049326842565, "learning_rate": 1.0283096171063323e-06, "loss": 0.5738, "step": 17899 }, { "epoch": 1.6, "grad_norm": 6.494878796091423, "learning_rate": 1.0278708353902672e-06, "loss": 0.5763, "step": 17900 }, { "epoch": 1.6, "grad_norm": 5.263773414060625, "learning_rate": 1.0274321365837004e-06, "loss": 0.5503, "step": 17901 }, { "epoch": 1.6, "grad_norm": 8.973000855070655, "learning_rate": 1.0269935206957877e-06, "loss": 0.5439, "step": 17902 }, { "epoch": 1.6, "grad_norm": 6.856203949969184, "learning_rate": 1.0265549877356845e-06, "loss": 0.5877, "step": 17903 }, { "epoch": 1.6, "grad_norm": 6.625037269434562, "learning_rate": 1.0261165377125455e-06, "loss": 0.5882, "step": 17904 }, { "epoch": 1.6, "grad_norm": 5.257673586606543, "learning_rate": 1.0256781706355195e-06, "loss": 0.5704, "step": 17905 }, { "epoch": 1.6, "grad_norm": 4.667115250922777, "learning_rate": 1.0252398865137585e-06, "loss": 0.5305, "step": 17906 }, { "epoch": 1.6, "grad_norm": 7.81372116738008, "learning_rate": 1.0248016853564103e-06, "loss": 0.5911, "step": 17907 }, { "epoch": 1.6, "grad_norm": 5.91788836998909, "learning_rate": 1.024363567172621e-06, "loss": 0.5817, "step": 17908 }, { "epoch": 1.6, "grad_norm": 6.754402470034206, "learning_rate": 1.0239255319715375e-06, "loss": 0.5946, "step": 17909 }, { "epoch": 1.6, "grad_norm": 7.224993216483908, "learning_rate": 1.0234875797622996e-06, "loss": 0.5629, "step": 17910 }, { "epoch": 1.6, "grad_norm": 6.556795619134239, "learning_rate": 1.0230497105540499e-06, "loss": 0.5456, "step": 17911 }, { "epoch": 1.6, "grad_norm": 6.007123396013225, "learning_rate": 1.0226119243559296e-06, "loss": 0.542, "step": 17912 }, { "epoch": 1.6, "grad_norm": 6.6920822989340225, "learning_rate": 1.0221742211770736e-06, "loss": 0.5731, "step": 17913 }, { "epoch": 1.6, "grad_norm": 7.714460393338287, "learning_rate": 1.0217366010266195e-06, "loss": 0.5324, "step": 17914 }, { "epoch": 1.6, "grad_norm": 6.5964535056578875, "learning_rate": 1.0212990639137016e-06, "loss": 0.5147, "step": 17915 }, { "epoch": 1.6, "grad_norm": 5.932281550265375, "learning_rate": 1.020861609847454e-06, "loss": 0.6094, "step": 17916 }, { "epoch": 1.6, "grad_norm": 5.427404379885244, "learning_rate": 1.020424238837005e-06, "loss": 0.57, "step": 17917 }, { "epoch": 1.6, "grad_norm": 6.941842004995757, "learning_rate": 1.0199869508914845e-06, "loss": 0.5406, "step": 17918 }, { "epoch": 1.6, "grad_norm": 5.909774034379287, "learning_rate": 1.0195497460200204e-06, "loss": 0.5572, "step": 17919 }, { "epoch": 1.6, "grad_norm": 6.162914029904658, "learning_rate": 1.0191126242317384e-06, "loss": 0.5873, "step": 17920 }, { "epoch": 1.6, "grad_norm": 7.824884703504038, "learning_rate": 1.0186755855357622e-06, "loss": 0.5778, "step": 17921 }, { "epoch": 1.6, "grad_norm": 8.776991746339222, "learning_rate": 1.0182386299412155e-06, "loss": 0.5782, "step": 17922 }, { "epoch": 1.6, "grad_norm": 5.660437497236797, "learning_rate": 1.0178017574572163e-06, "loss": 0.5608, "step": 17923 }, { "epoch": 1.6, "grad_norm": 5.335076318755636, "learning_rate": 1.017364968092885e-06, "loss": 0.5019, "step": 17924 }, { "epoch": 1.6, "grad_norm": 6.015201904414893, "learning_rate": 1.0169282618573372e-06, "loss": 0.5379, "step": 17925 }, { "epoch": 1.6, "grad_norm": 6.391434608219474, "learning_rate": 1.0164916387596895e-06, "loss": 0.5552, "step": 17926 }, { "epoch": 1.6, "grad_norm": 6.670931095303166, "learning_rate": 1.0160550988090567e-06, "loss": 0.5532, "step": 17927 }, { "epoch": 1.6, "grad_norm": 6.475913534876052, "learning_rate": 1.0156186420145481e-06, "loss": 0.5439, "step": 17928 }, { "epoch": 1.6, "grad_norm": 7.801711744677141, "learning_rate": 1.0151822683852751e-06, "loss": 0.5882, "step": 17929 }, { "epoch": 1.6, "grad_norm": 10.58160274986975, "learning_rate": 1.0147459779303448e-06, "loss": 0.5873, "step": 17930 }, { "epoch": 1.6, "grad_norm": 7.874222126969351, "learning_rate": 1.0143097706588644e-06, "loss": 0.5666, "step": 17931 }, { "epoch": 1.6, "grad_norm": 6.824714289315292, "learning_rate": 1.0138736465799392e-06, "loss": 0.5587, "step": 17932 }, { "epoch": 1.6, "grad_norm": 6.175451958735273, "learning_rate": 1.0134376057026723e-06, "loss": 0.5381, "step": 17933 }, { "epoch": 1.6, "grad_norm": 5.164983455541102, "learning_rate": 1.0130016480361648e-06, "loss": 0.6112, "step": 17934 }, { "epoch": 1.6, "grad_norm": 4.966470028820992, "learning_rate": 1.0125657735895183e-06, "loss": 0.5614, "step": 17935 }, { "epoch": 1.6, "grad_norm": 6.765476108518543, "learning_rate": 1.0121299823718273e-06, "loss": 0.6289, "step": 17936 }, { "epoch": 1.6, "grad_norm": 8.040670272872225, "learning_rate": 1.0116942743921894e-06, "loss": 0.6106, "step": 17937 }, { "epoch": 1.6, "grad_norm": 4.924396206151737, "learning_rate": 1.0112586496596993e-06, "loss": 0.5606, "step": 17938 }, { "epoch": 1.6, "grad_norm": 7.144947512052589, "learning_rate": 1.0108231081834496e-06, "loss": 0.5773, "step": 17939 }, { "epoch": 1.6, "grad_norm": 6.762917369082842, "learning_rate": 1.0103876499725318e-06, "loss": 0.5784, "step": 17940 }, { "epoch": 1.6, "grad_norm": 6.339976656452219, "learning_rate": 1.0099522750360358e-06, "loss": 0.5999, "step": 17941 }, { "epoch": 1.6, "grad_norm": 5.69336274950625, "learning_rate": 1.0095169833830481e-06, "loss": 0.4993, "step": 17942 }, { "epoch": 1.6, "grad_norm": 6.883742156638753, "learning_rate": 1.009081775022653e-06, "loss": 0.5567, "step": 17943 }, { "epoch": 1.6, "grad_norm": 5.108165981320566, "learning_rate": 1.0086466499639359e-06, "loss": 0.5578, "step": 17944 }, { "epoch": 1.6, "grad_norm": 7.898492172591261, "learning_rate": 1.0082116082159787e-06, "loss": 0.5635, "step": 17945 }, { "epoch": 1.6, "grad_norm": 9.116390084168836, "learning_rate": 1.0077766497878626e-06, "loss": 0.5573, "step": 17946 }, { "epoch": 1.6, "grad_norm": 7.773513822313401, "learning_rate": 1.007341774688666e-06, "loss": 0.5879, "step": 17947 }, { "epoch": 1.6, "grad_norm": 8.716741745423962, "learning_rate": 1.0069069829274674e-06, "loss": 0.5305, "step": 17948 }, { "epoch": 1.6, "grad_norm": 5.466805175680484, "learning_rate": 1.00647227451334e-06, "loss": 0.5644, "step": 17949 }, { "epoch": 1.6, "grad_norm": 5.602322894069889, "learning_rate": 1.0060376494553576e-06, "loss": 0.5627, "step": 17950 }, { "epoch": 1.6, "grad_norm": 6.657535894055436, "learning_rate": 1.0056031077625928e-06, "loss": 0.5719, "step": 17951 }, { "epoch": 1.6, "grad_norm": 7.8299182180177045, "learning_rate": 1.005168649444116e-06, "loss": 0.6256, "step": 17952 }, { "epoch": 1.6, "grad_norm": 8.283424314138635, "learning_rate": 1.0047342745089945e-06, "loss": 0.5263, "step": 17953 }, { "epoch": 1.6, "grad_norm": 6.596214865853448, "learning_rate": 1.0042999829662969e-06, "loss": 0.5497, "step": 17954 }, { "epoch": 1.6, "grad_norm": 6.47954095715288, "learning_rate": 1.0038657748250857e-06, "loss": 0.5773, "step": 17955 }, { "epoch": 1.6, "grad_norm": 7.279826020334558, "learning_rate": 1.0034316500944264e-06, "loss": 0.5422, "step": 17956 }, { "epoch": 1.6, "grad_norm": 5.239460555781768, "learning_rate": 1.0029976087833775e-06, "loss": 0.5782, "step": 17957 }, { "epoch": 1.6, "grad_norm": 6.743679715730344, "learning_rate": 1.002563650901e-06, "loss": 0.5259, "step": 17958 }, { "epoch": 1.6, "grad_norm": 6.983877867469228, "learning_rate": 1.0021297764563525e-06, "loss": 0.5879, "step": 17959 }, { "epoch": 1.6, "grad_norm": 8.988793410527816, "learning_rate": 1.0016959854584907e-06, "loss": 0.6043, "step": 17960 }, { "epoch": 1.6, "grad_norm": 8.616293180661032, "learning_rate": 1.0012622779164705e-06, "loss": 0.5924, "step": 17961 }, { "epoch": 1.6, "grad_norm": 7.354137099705865, "learning_rate": 1.0008286538393413e-06, "loss": 0.5641, "step": 17962 }, { "epoch": 1.6, "grad_norm": 5.492538431726884, "learning_rate": 1.0003951132361567e-06, "loss": 0.549, "step": 17963 }, { "epoch": 1.6, "grad_norm": 6.395931494875349, "learning_rate": 9.99961656115964e-07, "loss": 0.6514, "step": 17964 }, { "epoch": 1.6, "grad_norm": 6.2551491776408, "learning_rate": 9.995282824878126e-07, "loss": 0.547, "step": 17965 }, { "epoch": 1.6, "grad_norm": 6.803667568057199, "learning_rate": 9.990949923607475e-07, "loss": 0.6259, "step": 17966 }, { "epoch": 1.6, "grad_norm": 5.99188088864589, "learning_rate": 9.986617857438142e-07, "loss": 0.5972, "step": 17967 }, { "epoch": 1.6, "grad_norm": 6.48495625254666, "learning_rate": 9.982286626460513e-07, "loss": 0.6347, "step": 17968 }, { "epoch": 1.6, "grad_norm": 5.423958116520265, "learning_rate": 9.977956230765012e-07, "loss": 0.5277, "step": 17969 }, { "epoch": 1.6, "grad_norm": 8.649402742421797, "learning_rate": 9.97362667044205e-07, "loss": 0.5798, "step": 17970 }, { "epoch": 1.6, "grad_norm": 6.517187609907048, "learning_rate": 9.969297945581952e-07, "loss": 0.4766, "step": 17971 }, { "epoch": 1.6, "grad_norm": 10.055484090328658, "learning_rate": 9.964970056275103e-07, "loss": 0.531, "step": 17972 }, { "epoch": 1.6, "grad_norm": 7.560072759646931, "learning_rate": 9.96064300261182e-07, "loss": 0.5643, "step": 17973 }, { "epoch": 1.6, "grad_norm": 5.448675832494569, "learning_rate": 9.956316784682447e-07, "loss": 0.5663, "step": 17974 }, { "epoch": 1.6, "grad_norm": 8.01787937007255, "learning_rate": 9.951991402577255e-07, "loss": 0.5199, "step": 17975 }, { "epoch": 1.6, "grad_norm": 7.083516418009018, "learning_rate": 9.947666856386539e-07, "loss": 0.5869, "step": 17976 }, { "epoch": 1.6, "grad_norm": 6.54914215738966, "learning_rate": 9.943343146200562e-07, "loss": 0.5817, "step": 17977 }, { "epoch": 1.6, "grad_norm": 6.444040045450929, "learning_rate": 9.939020272109573e-07, "loss": 0.5433, "step": 17978 }, { "epoch": 1.6, "grad_norm": 5.932660118985743, "learning_rate": 9.934698234203806e-07, "loss": 0.5879, "step": 17979 }, { "epoch": 1.6, "grad_norm": 7.030407642824599, "learning_rate": 9.930377032573484e-07, "loss": 0.5981, "step": 17980 }, { "epoch": 1.6, "grad_norm": 6.192101452125295, "learning_rate": 9.92605666730878e-07, "loss": 0.5833, "step": 17981 }, { "epoch": 1.6, "grad_norm": 8.465799717657243, "learning_rate": 9.92173713849988e-07, "loss": 0.602, "step": 17982 }, { "epoch": 1.6, "grad_norm": 4.497635446413826, "learning_rate": 9.917418446236948e-07, "loss": 0.5102, "step": 17983 }, { "epoch": 1.6, "grad_norm": 7.6673657941356685, "learning_rate": 9.913100590610142e-07, "loss": 0.5859, "step": 17984 }, { "epoch": 1.6, "grad_norm": 7.509090966173564, "learning_rate": 9.908783571709557e-07, "loss": 0.5493, "step": 17985 }, { "epoch": 1.6, "grad_norm": 5.5947902460955, "learning_rate": 9.904467389625316e-07, "loss": 0.5423, "step": 17986 }, { "epoch": 1.6, "grad_norm": 7.855177744175048, "learning_rate": 9.900152044447526e-07, "loss": 0.5456, "step": 17987 }, { "epoch": 1.6, "grad_norm": 4.8348466250041255, "learning_rate": 9.895837536266228e-07, "loss": 0.5553, "step": 17988 }, { "epoch": 1.6, "grad_norm": 5.057372022037554, "learning_rate": 9.891523865171499e-07, "loss": 0.5495, "step": 17989 }, { "epoch": 1.6, "grad_norm": 6.519113164080511, "learning_rate": 9.88721103125337e-07, "loss": 0.6254, "step": 17990 }, { "epoch": 1.6, "grad_norm": 6.723823069202138, "learning_rate": 9.882899034601867e-07, "loss": 0.5083, "step": 17991 }, { "epoch": 1.61, "grad_norm": 6.369328881633095, "learning_rate": 9.878587875307004e-07, "loss": 0.5325, "step": 17992 }, { "epoch": 1.61, "grad_norm": 8.651705855498967, "learning_rate": 9.874277553458739e-07, "loss": 0.5902, "step": 17993 }, { "epoch": 1.61, "grad_norm": 7.267116287098495, "learning_rate": 9.869968069147063e-07, "loss": 0.5903, "step": 17994 }, { "epoch": 1.61, "grad_norm": 7.1115460899195035, "learning_rate": 9.865659422461915e-07, "loss": 0.5779, "step": 17995 }, { "epoch": 1.61, "grad_norm": 5.742900131274435, "learning_rate": 9.861351613493237e-07, "loss": 0.533, "step": 17996 }, { "epoch": 1.61, "grad_norm": 5.939148698926264, "learning_rate": 9.857044642330938e-07, "loss": 0.5221, "step": 17997 }, { "epoch": 1.61, "grad_norm": 8.352423426737401, "learning_rate": 9.852738509064935e-07, "loss": 0.5826, "step": 17998 }, { "epoch": 1.61, "grad_norm": 7.074680716908424, "learning_rate": 9.848433213785096e-07, "loss": 0.5723, "step": 17999 }, { "epoch": 1.61, "grad_norm": 7.011741785447262, "learning_rate": 9.844128756581267e-07, "loss": 0.5814, "step": 18000 }, { "epoch": 1.61, "grad_norm": 5.052003390364944, "learning_rate": 9.839825137543313e-07, "loss": 0.5808, "step": 18001 }, { "epoch": 1.61, "grad_norm": 7.321803259024591, "learning_rate": 9.835522356761062e-07, "loss": 0.5946, "step": 18002 }, { "epoch": 1.61, "grad_norm": 7.463009381255687, "learning_rate": 9.83122041432432e-07, "loss": 0.5535, "step": 18003 }, { "epoch": 1.61, "grad_norm": 8.080380687985928, "learning_rate": 9.826919310322885e-07, "loss": 0.6401, "step": 18004 }, { "epoch": 1.61, "grad_norm": 9.839408560340061, "learning_rate": 9.822619044846542e-07, "loss": 0.6069, "step": 18005 }, { "epoch": 1.61, "grad_norm": 7.6711660311823495, "learning_rate": 9.818319617985033e-07, "loss": 0.5354, "step": 18006 }, { "epoch": 1.61, "grad_norm": 10.270535068295798, "learning_rate": 9.814021029828103e-07, "loss": 0.5814, "step": 18007 }, { "epoch": 1.61, "grad_norm": 5.11920750412422, "learning_rate": 9.809723280465472e-07, "loss": 0.5658, "step": 18008 }, { "epoch": 1.61, "grad_norm": 7.1753964861662745, "learning_rate": 9.80542636998686e-07, "loss": 0.6164, "step": 18009 }, { "epoch": 1.61, "grad_norm": 6.426947887606235, "learning_rate": 9.801130298481943e-07, "loss": 0.5783, "step": 18010 }, { "epoch": 1.61, "grad_norm": 7.121693078933553, "learning_rate": 9.79683506604041e-07, "loss": 0.5818, "step": 18011 }, { "epoch": 1.61, "grad_norm": 6.229544697844796, "learning_rate": 9.792540672751894e-07, "loss": 0.5771, "step": 18012 }, { "epoch": 1.61, "grad_norm": 7.372668040645134, "learning_rate": 9.788247118706046e-07, "loss": 0.6475, "step": 18013 }, { "epoch": 1.61, "grad_norm": 9.209672970319012, "learning_rate": 9.783954403992469e-07, "loss": 0.6316, "step": 18014 }, { "epoch": 1.61, "grad_norm": 4.5970458846866, "learning_rate": 9.779662528700767e-07, "loss": 0.6519, "step": 18015 }, { "epoch": 1.61, "grad_norm": 8.894697246666857, "learning_rate": 9.775371492920533e-07, "loss": 0.5726, "step": 18016 }, { "epoch": 1.61, "grad_norm": 7.130659306484971, "learning_rate": 9.771081296741324e-07, "loss": 0.5552, "step": 18017 }, { "epoch": 1.61, "grad_norm": 7.870486430407558, "learning_rate": 9.766791940252713e-07, "loss": 0.629, "step": 18018 }, { "epoch": 1.61, "grad_norm": 6.8728440336023064, "learning_rate": 9.762503423544189e-07, "loss": 0.5838, "step": 18019 }, { "epoch": 1.61, "grad_norm": 5.884969695351848, "learning_rate": 9.758215746705297e-07, "loss": 0.5177, "step": 18020 }, { "epoch": 1.61, "grad_norm": 6.672025775242217, "learning_rate": 9.753928909825517e-07, "loss": 0.4861, "step": 18021 }, { "epoch": 1.61, "grad_norm": 10.288569374391328, "learning_rate": 9.749642912994333e-07, "loss": 0.6394, "step": 18022 }, { "epoch": 1.61, "grad_norm": 8.740721509826953, "learning_rate": 9.74535775630121e-07, "loss": 0.5529, "step": 18023 }, { "epoch": 1.61, "grad_norm": 8.375824903345414, "learning_rate": 9.741073439835602e-07, "loss": 0.6015, "step": 18024 }, { "epoch": 1.61, "grad_norm": 11.260175340558872, "learning_rate": 9.736789963686905e-07, "loss": 0.637, "step": 18025 }, { "epoch": 1.61, "grad_norm": 5.380123289173198, "learning_rate": 9.73250732794454e-07, "loss": 0.6042, "step": 18026 }, { "epoch": 1.61, "grad_norm": 6.995545044959576, "learning_rate": 9.728225532697904e-07, "loss": 0.5772, "step": 18027 }, { "epoch": 1.61, "grad_norm": 5.93833155522414, "learning_rate": 9.723944578036377e-07, "loss": 0.5632, "step": 18028 }, { "epoch": 1.61, "grad_norm": 6.066695618865758, "learning_rate": 9.719664464049288e-07, "loss": 0.5477, "step": 18029 }, { "epoch": 1.61, "grad_norm": 6.232785468837888, "learning_rate": 9.715385190826e-07, "loss": 0.5712, "step": 18030 }, { "epoch": 1.61, "grad_norm": 8.82081922891611, "learning_rate": 9.71110675845583e-07, "loss": 0.6058, "step": 18031 }, { "epoch": 1.61, "grad_norm": 5.3196415182183285, "learning_rate": 9.706829167028065e-07, "loss": 0.6185, "step": 18032 }, { "epoch": 1.61, "grad_norm": 6.96483592228943, "learning_rate": 9.702552416632e-07, "loss": 0.5336, "step": 18033 }, { "epoch": 1.61, "grad_norm": 8.206738380919841, "learning_rate": 9.698276507356902e-07, "loss": 0.6332, "step": 18034 }, { "epoch": 1.61, "grad_norm": 6.805018256680471, "learning_rate": 9.694001439292023e-07, "loss": 0.5762, "step": 18035 }, { "epoch": 1.61, "grad_norm": 5.172714142509349, "learning_rate": 9.689727212526595e-07, "loss": 0.5694, "step": 18036 }, { "epoch": 1.61, "grad_norm": 6.316395020481393, "learning_rate": 9.68545382714985e-07, "loss": 0.5591, "step": 18037 }, { "epoch": 1.61, "grad_norm": 6.50723767082141, "learning_rate": 9.68118128325095e-07, "loss": 0.5189, "step": 18038 }, { "epoch": 1.61, "grad_norm": 5.4464019216834965, "learning_rate": 9.676909580919104e-07, "loss": 0.5395, "step": 18039 }, { "epoch": 1.61, "grad_norm": 6.456851870142474, "learning_rate": 9.672638720243455e-07, "loss": 0.5606, "step": 18040 }, { "epoch": 1.61, "grad_norm": 6.27135875505036, "learning_rate": 9.668368701313163e-07, "loss": 0.5923, "step": 18041 }, { "epoch": 1.61, "grad_norm": 6.2691206161530575, "learning_rate": 9.664099524217357e-07, "loss": 0.6302, "step": 18042 }, { "epoch": 1.61, "grad_norm": 7.840339803473557, "learning_rate": 9.659831189045131e-07, "loss": 0.5394, "step": 18043 }, { "epoch": 1.61, "grad_norm": 7.773929094086008, "learning_rate": 9.655563695885595e-07, "loss": 0.5973, "step": 18044 }, { "epoch": 1.61, "grad_norm": 7.582149149519686, "learning_rate": 9.6512970448278e-07, "loss": 0.6163, "step": 18045 }, { "epoch": 1.61, "grad_norm": 5.987381089511987, "learning_rate": 9.647031235960823e-07, "loss": 0.5758, "step": 18046 }, { "epoch": 1.61, "grad_norm": 8.91635882671805, "learning_rate": 9.642766269373694e-07, "loss": 0.5619, "step": 18047 }, { "epoch": 1.61, "grad_norm": 6.3212839491229165, "learning_rate": 9.638502145155437e-07, "loss": 0.5565, "step": 18048 }, { "epoch": 1.61, "grad_norm": 6.509118485109363, "learning_rate": 9.634238863395063e-07, "loss": 0.6023, "step": 18049 }, { "epoch": 1.61, "grad_norm": 7.281743591361238, "learning_rate": 9.629976424181559e-07, "loss": 0.6121, "step": 18050 }, { "epoch": 1.61, "grad_norm": 6.140922322496242, "learning_rate": 9.625714827603883e-07, "loss": 0.5938, "step": 18051 }, { "epoch": 1.61, "grad_norm": 9.61034415215165, "learning_rate": 9.621454073750985e-07, "loss": 0.6023, "step": 18052 }, { "epoch": 1.61, "grad_norm": 7.656095541778473, "learning_rate": 9.617194162711807e-07, "loss": 0.6243, "step": 18053 }, { "epoch": 1.61, "grad_norm": 9.367966395941323, "learning_rate": 9.612935094575266e-07, "loss": 0.5512, "step": 18054 }, { "epoch": 1.61, "grad_norm": 7.176830376566611, "learning_rate": 9.608676869430256e-07, "loss": 0.5224, "step": 18055 }, { "epoch": 1.61, "grad_norm": 7.931181996276936, "learning_rate": 9.604419487365678e-07, "loss": 0.6333, "step": 18056 }, { "epoch": 1.61, "grad_norm": 6.405671132052591, "learning_rate": 9.600162948470375e-07, "loss": 0.6084, "step": 18057 }, { "epoch": 1.61, "grad_norm": 6.732338912349537, "learning_rate": 9.595907252833186e-07, "loss": 0.5166, "step": 18058 }, { "epoch": 1.61, "grad_norm": 7.1826508953102906, "learning_rate": 9.591652400542944e-07, "loss": 0.5534, "step": 18059 }, { "epoch": 1.61, "grad_norm": 6.293707759915436, "learning_rate": 9.587398391688474e-07, "loss": 0.5868, "step": 18060 }, { "epoch": 1.61, "grad_norm": 6.513590001232611, "learning_rate": 9.583145226358553e-07, "loss": 0.5036, "step": 18061 }, { "epoch": 1.61, "grad_norm": 6.757857985886947, "learning_rate": 9.578892904641968e-07, "loss": 0.5454, "step": 18062 }, { "epoch": 1.61, "grad_norm": 6.210660811821863, "learning_rate": 9.574641426627485e-07, "loss": 0.5832, "step": 18063 }, { "epoch": 1.61, "grad_norm": 7.941386405073426, "learning_rate": 9.570390792403817e-07, "loss": 0.5554, "step": 18064 }, { "epoch": 1.61, "grad_norm": 7.283088427749256, "learning_rate": 9.566141002059704e-07, "loss": 0.5792, "step": 18065 }, { "epoch": 1.61, "grad_norm": 5.903695155945653, "learning_rate": 9.561892055683853e-07, "loss": 0.541, "step": 18066 }, { "epoch": 1.61, "grad_norm": 8.400972708696255, "learning_rate": 9.557643953364941e-07, "loss": 0.5204, "step": 18067 }, { "epoch": 1.61, "grad_norm": 5.356171884780086, "learning_rate": 9.553396695191658e-07, "loss": 0.5825, "step": 18068 }, { "epoch": 1.61, "grad_norm": 6.623470942125871, "learning_rate": 9.549150281252633e-07, "loss": 0.6098, "step": 18069 }, { "epoch": 1.61, "grad_norm": 7.520050788493921, "learning_rate": 9.54490471163651e-07, "loss": 0.5763, "step": 18070 }, { "epoch": 1.61, "grad_norm": 4.968596769330583, "learning_rate": 9.540659986431915e-07, "loss": 0.6135, "step": 18071 }, { "epoch": 1.61, "grad_norm": 8.469512765493613, "learning_rate": 9.536416105727425e-07, "loss": 0.5946, "step": 18072 }, { "epoch": 1.61, "grad_norm": 7.12278582604844, "learning_rate": 9.532173069611639e-07, "loss": 0.615, "step": 18073 }, { "epoch": 1.61, "grad_norm": 6.958245490925318, "learning_rate": 9.527930878173114e-07, "loss": 0.5816, "step": 18074 }, { "epoch": 1.61, "grad_norm": 8.316766192917656, "learning_rate": 9.523689531500413e-07, "loss": 0.6127, "step": 18075 }, { "epoch": 1.61, "grad_norm": 7.193514553598102, "learning_rate": 9.519449029682043e-07, "loss": 0.5993, "step": 18076 }, { "epoch": 1.61, "grad_norm": 5.963136570665678, "learning_rate": 9.515209372806516e-07, "loss": 0.6115, "step": 18077 }, { "epoch": 1.61, "grad_norm": 6.952080538996575, "learning_rate": 9.510970560962341e-07, "loss": 0.544, "step": 18078 }, { "epoch": 1.61, "grad_norm": 5.212647188095568, "learning_rate": 9.506732594237983e-07, "loss": 0.619, "step": 18079 }, { "epoch": 1.61, "grad_norm": 6.80202706092716, "learning_rate": 9.502495472721901e-07, "loss": 0.5578, "step": 18080 }, { "epoch": 1.61, "grad_norm": 8.666234992907185, "learning_rate": 9.498259196502551e-07, "loss": 0.558, "step": 18081 }, { "epoch": 1.61, "grad_norm": 6.382497703328379, "learning_rate": 9.494023765668331e-07, "loss": 0.5952, "step": 18082 }, { "epoch": 1.61, "grad_norm": 8.032225442150004, "learning_rate": 9.489789180307663e-07, "loss": 0.6117, "step": 18083 }, { "epoch": 1.61, "grad_norm": 9.018436103334455, "learning_rate": 9.485555440508926e-07, "loss": 0.5011, "step": 18084 }, { "epoch": 1.61, "grad_norm": 5.806528618356726, "learning_rate": 9.481322546360506e-07, "loss": 0.5781, "step": 18085 }, { "epoch": 1.61, "grad_norm": 6.460995935337576, "learning_rate": 9.47709049795073e-07, "loss": 0.5743, "step": 18086 }, { "epoch": 1.61, "grad_norm": 7.754529860536604, "learning_rate": 9.472859295367948e-07, "loss": 0.5801, "step": 18087 }, { "epoch": 1.61, "grad_norm": 6.895373505525724, "learning_rate": 9.468628938700486e-07, "loss": 0.5663, "step": 18088 }, { "epoch": 1.61, "grad_norm": 7.586870168147743, "learning_rate": 9.464399428036624e-07, "loss": 0.5567, "step": 18089 }, { "epoch": 1.61, "grad_norm": 6.792852011115068, "learning_rate": 9.460170763464649e-07, "loss": 0.5966, "step": 18090 }, { "epoch": 1.61, "grad_norm": 6.760744210161751, "learning_rate": 9.455942945072832e-07, "loss": 0.5565, "step": 18091 }, { "epoch": 1.61, "grad_norm": 5.396715946723238, "learning_rate": 9.451715972949411e-07, "loss": 0.5727, "step": 18092 }, { "epoch": 1.61, "grad_norm": 8.58112569948942, "learning_rate": 9.447489847182623e-07, "loss": 0.5698, "step": 18093 }, { "epoch": 1.61, "grad_norm": 7.372931367343568, "learning_rate": 9.443264567860689e-07, "loss": 0.5097, "step": 18094 }, { "epoch": 1.61, "grad_norm": 7.172780107970153, "learning_rate": 9.439040135071775e-07, "loss": 0.5947, "step": 18095 }, { "epoch": 1.61, "grad_norm": 5.582566467700685, "learning_rate": 9.434816548904075e-07, "loss": 0.5538, "step": 18096 }, { "epoch": 1.61, "grad_norm": 8.554207934452519, "learning_rate": 9.430593809445737e-07, "loss": 0.545, "step": 18097 }, { "epoch": 1.61, "grad_norm": 8.129507860853522, "learning_rate": 9.426371916784915e-07, "loss": 0.6629, "step": 18098 }, { "epoch": 1.61, "grad_norm": 7.465862128530161, "learning_rate": 9.422150871009722e-07, "loss": 0.6073, "step": 18099 }, { "epoch": 1.61, "grad_norm": 5.1068664945842315, "learning_rate": 9.417930672208275e-07, "loss": 0.5436, "step": 18100 }, { "epoch": 1.61, "grad_norm": 7.111132390917083, "learning_rate": 9.413711320468655e-07, "loss": 0.5927, "step": 18101 }, { "epoch": 1.61, "grad_norm": 8.194102502326075, "learning_rate": 9.409492815878913e-07, "loss": 0.5917, "step": 18102 }, { "epoch": 1.61, "grad_norm": 8.904928591343761, "learning_rate": 9.405275158527122e-07, "loss": 0.5543, "step": 18103 }, { "epoch": 1.62, "grad_norm": 6.794164152941984, "learning_rate": 9.401058348501308e-07, "loss": 0.5024, "step": 18104 }, { "epoch": 1.62, "grad_norm": 6.56897720431886, "learning_rate": 9.396842385889488e-07, "loss": 0.5491, "step": 18105 }, { "epoch": 1.62, "grad_norm": 8.179582842668646, "learning_rate": 9.39262727077967e-07, "loss": 0.5528, "step": 18106 }, { "epoch": 1.62, "grad_norm": 6.793682072915202, "learning_rate": 9.388413003259838e-07, "loss": 0.6067, "step": 18107 }, { "epoch": 1.62, "grad_norm": 9.306617016808637, "learning_rate": 9.38419958341793e-07, "loss": 0.5805, "step": 18108 }, { "epoch": 1.62, "grad_norm": 6.500871052048031, "learning_rate": 9.379987011341917e-07, "loss": 0.6502, "step": 18109 }, { "epoch": 1.62, "grad_norm": 7.760892511905849, "learning_rate": 9.375775287119715e-07, "loss": 0.5615, "step": 18110 }, { "epoch": 1.62, "grad_norm": 8.650118440272678, "learning_rate": 9.371564410839235e-07, "loss": 0.6076, "step": 18111 }, { "epoch": 1.62, "grad_norm": 8.412104921769325, "learning_rate": 9.367354382588378e-07, "loss": 0.5963, "step": 18112 }, { "epoch": 1.62, "grad_norm": 5.0778008201289815, "learning_rate": 9.363145202455026e-07, "loss": 0.615, "step": 18113 }, { "epoch": 1.62, "grad_norm": 8.983094967584496, "learning_rate": 9.358936870527025e-07, "loss": 0.6083, "step": 18114 }, { "epoch": 1.62, "grad_norm": 6.88732145503271, "learning_rate": 9.354729386892198e-07, "loss": 0.5522, "step": 18115 }, { "epoch": 1.62, "grad_norm": 6.546466854323706, "learning_rate": 9.350522751638386e-07, "loss": 0.5408, "step": 18116 }, { "epoch": 1.62, "grad_norm": 6.809403545064443, "learning_rate": 9.346316964853391e-07, "loss": 0.5425, "step": 18117 }, { "epoch": 1.62, "grad_norm": 6.663529293114206, "learning_rate": 9.342112026625e-07, "loss": 0.5507, "step": 18118 }, { "epoch": 1.62, "grad_norm": 6.41937619631052, "learning_rate": 9.33790793704098e-07, "loss": 0.6171, "step": 18119 }, { "epoch": 1.62, "grad_norm": 8.204743906589467, "learning_rate": 9.3337046961891e-07, "loss": 0.6251, "step": 18120 }, { "epoch": 1.62, "grad_norm": 8.289674382428082, "learning_rate": 9.329502304157057e-07, "loss": 0.592, "step": 18121 }, { "epoch": 1.62, "grad_norm": 7.308616186988823, "learning_rate": 9.32530076103259e-07, "loss": 0.549, "step": 18122 }, { "epoch": 1.62, "grad_norm": 7.4421348802028575, "learning_rate": 9.321100066903394e-07, "loss": 0.5451, "step": 18123 }, { "epoch": 1.62, "grad_norm": 6.5302337458914455, "learning_rate": 9.316900221857151e-07, "loss": 0.6012, "step": 18124 }, { "epoch": 1.62, "grad_norm": 6.524583398293805, "learning_rate": 9.312701225981518e-07, "loss": 0.5795, "step": 18125 }, { "epoch": 1.62, "grad_norm": 5.593520559160312, "learning_rate": 9.308503079364156e-07, "loss": 0.5382, "step": 18126 }, { "epoch": 1.62, "grad_norm": 5.445094326605866, "learning_rate": 9.304305782092666e-07, "loss": 0.5515, "step": 18127 }, { "epoch": 1.62, "grad_norm": 6.303265705376453, "learning_rate": 9.300109334254676e-07, "loss": 0.5719, "step": 18128 }, { "epoch": 1.62, "grad_norm": 8.090124058031757, "learning_rate": 9.295913735937778e-07, "loss": 0.5305, "step": 18129 }, { "epoch": 1.62, "grad_norm": 6.682520079975063, "learning_rate": 9.291718987229531e-07, "loss": 0.5346, "step": 18130 }, { "epoch": 1.62, "grad_norm": 7.4746586132778035, "learning_rate": 9.287525088217503e-07, "loss": 0.5723, "step": 18131 }, { "epoch": 1.62, "grad_norm": 5.945535703779306, "learning_rate": 9.283332038989223e-07, "loss": 0.6007, "step": 18132 }, { "epoch": 1.62, "grad_norm": 6.425709410167282, "learning_rate": 9.27913983963224e-07, "loss": 0.5145, "step": 18133 }, { "epoch": 1.62, "grad_norm": 6.893429475420057, "learning_rate": 9.27494849023402e-07, "loss": 0.6322, "step": 18134 }, { "epoch": 1.62, "grad_norm": 5.993502580428424, "learning_rate": 9.270757990882062e-07, "loss": 0.5916, "step": 18135 }, { "epoch": 1.62, "grad_norm": 5.6011847511657775, "learning_rate": 9.266568341663834e-07, "loss": 0.5758, "step": 18136 }, { "epoch": 1.62, "grad_norm": 7.46842099981995, "learning_rate": 9.262379542666789e-07, "loss": 0.6099, "step": 18137 }, { "epoch": 1.62, "grad_norm": 7.7779002170033396, "learning_rate": 9.258191593978361e-07, "loss": 0.6607, "step": 18138 }, { "epoch": 1.62, "grad_norm": 7.280392844724336, "learning_rate": 9.25400449568597e-07, "loss": 0.5746, "step": 18139 }, { "epoch": 1.62, "grad_norm": 5.997898779026832, "learning_rate": 9.249818247876985e-07, "loss": 0.5594, "step": 18140 }, { "epoch": 1.62, "grad_norm": 6.430487917373639, "learning_rate": 9.245632850638808e-07, "loss": 0.6142, "step": 18141 }, { "epoch": 1.62, "grad_norm": 7.775979933853435, "learning_rate": 9.241448304058797e-07, "loss": 0.6309, "step": 18142 }, { "epoch": 1.62, "grad_norm": 7.0164742740793615, "learning_rate": 9.2372646082243e-07, "loss": 0.5896, "step": 18143 }, { "epoch": 1.62, "grad_norm": 8.06522074831444, "learning_rate": 9.233081763222629e-07, "loss": 0.5374, "step": 18144 }, { "epoch": 1.62, "grad_norm": 8.90661086655355, "learning_rate": 9.228899769141092e-07, "loss": 0.6205, "step": 18145 }, { "epoch": 1.62, "grad_norm": 8.862815489606257, "learning_rate": 9.224718626067003e-07, "loss": 0.595, "step": 18146 }, { "epoch": 1.62, "grad_norm": 6.364263405771982, "learning_rate": 9.220538334087603e-07, "loss": 0.5547, "step": 18147 }, { "epoch": 1.62, "grad_norm": 5.647371388757861, "learning_rate": 9.216358893290156e-07, "loss": 0.5403, "step": 18148 }, { "epoch": 1.62, "grad_norm": 5.4180001227946075, "learning_rate": 9.212180303761902e-07, "loss": 0.5645, "step": 18149 }, { "epoch": 1.62, "grad_norm": 7.180771357419384, "learning_rate": 9.208002565590063e-07, "loss": 0.6358, "step": 18150 }, { "epoch": 1.62, "grad_norm": 5.777220386331927, "learning_rate": 9.203825678861855e-07, "loss": 0.5419, "step": 18151 }, { "epoch": 1.62, "grad_norm": 6.947930827615376, "learning_rate": 9.199649643664427e-07, "loss": 0.5598, "step": 18152 }, { "epoch": 1.62, "grad_norm": 7.542144097294394, "learning_rate": 9.195474460084957e-07, "loss": 0.5338, "step": 18153 }, { "epoch": 1.62, "grad_norm": 7.6298807296404645, "learning_rate": 9.191300128210601e-07, "loss": 0.5691, "step": 18154 }, { "epoch": 1.62, "grad_norm": 5.815459108308268, "learning_rate": 9.187126648128486e-07, "loss": 0.5316, "step": 18155 }, { "epoch": 1.62, "grad_norm": 4.93702853052063, "learning_rate": 9.182954019925722e-07, "loss": 0.5578, "step": 18156 }, { "epoch": 1.62, "grad_norm": 5.2538920253433465, "learning_rate": 9.178782243689422e-07, "loss": 0.5497, "step": 18157 }, { "epoch": 1.62, "grad_norm": 7.481762684575561, "learning_rate": 9.174611319506627e-07, "loss": 0.6307, "step": 18158 }, { "epoch": 1.62, "grad_norm": 7.274444107403817, "learning_rate": 9.170441247464429e-07, "loss": 0.5542, "step": 18159 }, { "epoch": 1.62, "grad_norm": 7.146270320364464, "learning_rate": 9.166272027649842e-07, "loss": 0.5264, "step": 18160 }, { "epoch": 1.62, "grad_norm": 7.319799178275437, "learning_rate": 9.162103660149907e-07, "loss": 0.5936, "step": 18161 }, { "epoch": 1.62, "grad_norm": 9.345474281761662, "learning_rate": 9.157936145051616e-07, "loss": 0.5404, "step": 18162 }, { "epoch": 1.62, "grad_norm": 7.591932417497094, "learning_rate": 9.153769482441971e-07, "loss": 0.5788, "step": 18163 }, { "epoch": 1.62, "grad_norm": 7.3519645834185745, "learning_rate": 9.149603672407948e-07, "loss": 0.5785, "step": 18164 }, { "epoch": 1.62, "grad_norm": 8.629589389749933, "learning_rate": 9.145438715036476e-07, "loss": 0.571, "step": 18165 }, { "epoch": 1.62, "grad_norm": 7.90974301994723, "learning_rate": 9.141274610414502e-07, "loss": 0.6099, "step": 18166 }, { "epoch": 1.62, "grad_norm": 8.058936816321117, "learning_rate": 9.137111358628936e-07, "loss": 0.5566, "step": 18167 }, { "epoch": 1.62, "grad_norm": 8.54178005201007, "learning_rate": 9.132948959766685e-07, "loss": 0.569, "step": 18168 }, { "epoch": 1.62, "grad_norm": 6.083389351051305, "learning_rate": 9.12878741391463e-07, "loss": 0.591, "step": 18169 }, { "epoch": 1.62, "grad_norm": 5.937526517419829, "learning_rate": 9.124626721159641e-07, "loss": 0.5185, "step": 18170 }, { "epoch": 1.62, "grad_norm": 6.642668794253535, "learning_rate": 9.120466881588541e-07, "loss": 0.6023, "step": 18171 }, { "epoch": 1.62, "grad_norm": 7.5647297583179665, "learning_rate": 9.116307895288184e-07, "loss": 0.5742, "step": 18172 }, { "epoch": 1.62, "grad_norm": 5.550828858481529, "learning_rate": 9.112149762345351e-07, "loss": 0.5813, "step": 18173 }, { "epoch": 1.62, "grad_norm": 7.251916190084358, "learning_rate": 9.107992482846851e-07, "loss": 0.6446, "step": 18174 }, { "epoch": 1.62, "grad_norm": 4.755232465377591, "learning_rate": 9.103836056879456e-07, "loss": 0.4867, "step": 18175 }, { "epoch": 1.62, "grad_norm": 9.11630851057307, "learning_rate": 9.099680484529922e-07, "loss": 0.5677, "step": 18176 }, { "epoch": 1.62, "grad_norm": 6.4569925344765995, "learning_rate": 9.095525765884999e-07, "loss": 0.5735, "step": 18177 }, { "epoch": 1.62, "grad_norm": 6.0885229444357, "learning_rate": 9.091371901031387e-07, "loss": 0.5577, "step": 18178 }, { "epoch": 1.62, "grad_norm": 7.111844569047185, "learning_rate": 9.087218890055794e-07, "loss": 0.5698, "step": 18179 }, { "epoch": 1.62, "grad_norm": 6.5841871307804745, "learning_rate": 9.08306673304491e-07, "loss": 0.5376, "step": 18180 }, { "epoch": 1.62, "grad_norm": 6.39593420742997, "learning_rate": 9.078915430085406e-07, "loss": 0.5835, "step": 18181 }, { "epoch": 1.62, "grad_norm": 6.65293981552305, "learning_rate": 9.074764981263922e-07, "loss": 0.5567, "step": 18182 }, { "epoch": 1.62, "grad_norm": 5.672798146785269, "learning_rate": 9.070615386667108e-07, "loss": 0.5872, "step": 18183 }, { "epoch": 1.62, "grad_norm": 8.071761118214333, "learning_rate": 9.066466646381549e-07, "loss": 0.5935, "step": 18184 }, { "epoch": 1.62, "grad_norm": 6.96932499253134, "learning_rate": 9.062318760493861e-07, "loss": 0.5416, "step": 18185 }, { "epoch": 1.62, "grad_norm": 5.768353946733788, "learning_rate": 9.058171729090626e-07, "loss": 0.5886, "step": 18186 }, { "epoch": 1.62, "grad_norm": 5.6956942295698685, "learning_rate": 9.054025552258384e-07, "loss": 0.5377, "step": 18187 }, { "epoch": 1.62, "grad_norm": 5.7887822762732215, "learning_rate": 9.049880230083686e-07, "loss": 0.6197, "step": 18188 }, { "epoch": 1.62, "grad_norm": 8.413489361931383, "learning_rate": 9.045735762653062e-07, "loss": 0.5703, "step": 18189 }, { "epoch": 1.62, "grad_norm": 6.861512754231773, "learning_rate": 9.041592150053025e-07, "loss": 0.6017, "step": 18190 }, { "epoch": 1.62, "grad_norm": 6.568912488610636, "learning_rate": 9.037449392370046e-07, "loss": 0.5553, "step": 18191 }, { "epoch": 1.62, "grad_norm": 8.638129074091095, "learning_rate": 9.033307489690602e-07, "loss": 0.6256, "step": 18192 }, { "epoch": 1.62, "grad_norm": 7.07565636234224, "learning_rate": 9.029166442101151e-07, "loss": 0.5528, "step": 18193 }, { "epoch": 1.62, "grad_norm": 6.172719327698289, "learning_rate": 9.025026249688124e-07, "loss": 0.6111, "step": 18194 }, { "epoch": 1.62, "grad_norm": 6.35347233747857, "learning_rate": 9.02088691253794e-07, "loss": 0.5738, "step": 18195 }, { "epoch": 1.62, "grad_norm": 6.545685696964957, "learning_rate": 9.01674843073701e-07, "loss": 0.5307, "step": 18196 }, { "epoch": 1.62, "grad_norm": 5.778536255055442, "learning_rate": 9.012610804371691e-07, "loss": 0.5791, "step": 18197 }, { "epoch": 1.62, "grad_norm": 6.150687859433288, "learning_rate": 9.008474033528364e-07, "loss": 0.5596, "step": 18198 }, { "epoch": 1.62, "grad_norm": 6.665846221871463, "learning_rate": 9.004338118293371e-07, "loss": 0.5095, "step": 18199 }, { "epoch": 1.62, "grad_norm": 6.2974351778042745, "learning_rate": 9.000203058753043e-07, "loss": 0.5658, "step": 18200 }, { "epoch": 1.62, "grad_norm": 4.938608079163612, "learning_rate": 8.9960688549937e-07, "loss": 0.585, "step": 18201 }, { "epoch": 1.62, "grad_norm": 6.878645190647172, "learning_rate": 8.991935507101607e-07, "loss": 0.4916, "step": 18202 }, { "epoch": 1.62, "grad_norm": 7.44400088175061, "learning_rate": 8.987803015163071e-07, "loss": 0.5172, "step": 18203 }, { "epoch": 1.62, "grad_norm": 6.929406495873715, "learning_rate": 8.983671379264313e-07, "loss": 0.5805, "step": 18204 }, { "epoch": 1.62, "grad_norm": 7.483117842506898, "learning_rate": 8.979540599491598e-07, "loss": 0.5957, "step": 18205 }, { "epoch": 1.62, "grad_norm": 4.781265618241893, "learning_rate": 8.975410675931134e-07, "loss": 0.572, "step": 18206 }, { "epoch": 1.62, "grad_norm": 7.4633718299878975, "learning_rate": 8.971281608669131e-07, "loss": 0.5789, "step": 18207 }, { "epoch": 1.62, "grad_norm": 5.806068469258499, "learning_rate": 8.967153397791778e-07, "loss": 0.5418, "step": 18208 }, { "epoch": 1.62, "grad_norm": 7.028874673802735, "learning_rate": 8.963026043385243e-07, "loss": 0.5481, "step": 18209 }, { "epoch": 1.62, "grad_norm": 6.839980543359812, "learning_rate": 8.958899545535659e-07, "loss": 0.5443, "step": 18210 }, { "epoch": 1.62, "grad_norm": 5.907919751102488, "learning_rate": 8.954773904329172e-07, "loss": 0.5791, "step": 18211 }, { "epoch": 1.62, "grad_norm": 6.4091860759876695, "learning_rate": 8.95064911985189e-07, "loss": 0.555, "step": 18212 }, { "epoch": 1.62, "grad_norm": 6.105995657521063, "learning_rate": 8.946525192189909e-07, "loss": 0.5658, "step": 18213 }, { "epoch": 1.62, "grad_norm": 6.083985179423467, "learning_rate": 8.942402121429311e-07, "loss": 0.5575, "step": 18214 }, { "epoch": 1.62, "grad_norm": 8.157613019451317, "learning_rate": 8.938279907656172e-07, "loss": 0.523, "step": 18215 }, { "epoch": 1.63, "grad_norm": 7.404431203815068, "learning_rate": 8.934158550956507e-07, "loss": 0.5383, "step": 18216 }, { "epoch": 1.63, "grad_norm": 7.06904448834443, "learning_rate": 8.930038051416345e-07, "loss": 0.5917, "step": 18217 }, { "epoch": 1.63, "grad_norm": 6.411463415487744, "learning_rate": 8.925918409121692e-07, "loss": 0.536, "step": 18218 }, { "epoch": 1.63, "grad_norm": 8.44280171565088, "learning_rate": 8.921799624158545e-07, "loss": 0.5371, "step": 18219 }, { "epoch": 1.63, "grad_norm": 6.989123998941415, "learning_rate": 8.917681696612868e-07, "loss": 0.5949, "step": 18220 }, { "epoch": 1.63, "grad_norm": 7.411109279427316, "learning_rate": 8.913564626570614e-07, "loss": 0.5828, "step": 18221 }, { "epoch": 1.63, "grad_norm": 11.50710360909609, "learning_rate": 8.909448414117739e-07, "loss": 0.6403, "step": 18222 }, { "epoch": 1.63, "grad_norm": 8.171957785944814, "learning_rate": 8.905333059340126e-07, "loss": 0.5338, "step": 18223 }, { "epoch": 1.63, "grad_norm": 9.368755365464496, "learning_rate": 8.901218562323693e-07, "loss": 0.6161, "step": 18224 }, { "epoch": 1.63, "grad_norm": 6.627411603439027, "learning_rate": 8.897104923154314e-07, "loss": 0.5799, "step": 18225 }, { "epoch": 1.63, "grad_norm": 7.5215441214076915, "learning_rate": 8.892992141917855e-07, "loss": 0.5806, "step": 18226 }, { "epoch": 1.63, "grad_norm": 6.254197858888724, "learning_rate": 8.88888021870018e-07, "loss": 0.5572, "step": 18227 }, { "epoch": 1.63, "grad_norm": 8.07503258611836, "learning_rate": 8.88476915358708e-07, "loss": 0.5766, "step": 18228 }, { "epoch": 1.63, "grad_norm": 5.243080125410569, "learning_rate": 8.880658946664389e-07, "loss": 0.5344, "step": 18229 }, { "epoch": 1.63, "grad_norm": 5.724137989824178, "learning_rate": 8.876549598017902e-07, "loss": 0.5555, "step": 18230 }, { "epoch": 1.63, "grad_norm": 10.768252537941144, "learning_rate": 8.87244110773337e-07, "loss": 0.6036, "step": 18231 }, { "epoch": 1.63, "grad_norm": 5.927711266582771, "learning_rate": 8.868333475896562e-07, "loss": 0.5998, "step": 18232 }, { "epoch": 1.63, "grad_norm": 7.3889397044285, "learning_rate": 8.864226702593221e-07, "loss": 0.5324, "step": 18233 }, { "epoch": 1.63, "grad_norm": 6.516824043163474, "learning_rate": 8.860120787909054e-07, "loss": 0.5646, "step": 18234 }, { "epoch": 1.63, "grad_norm": 7.159444074707801, "learning_rate": 8.85601573192979e-07, "loss": 0.5946, "step": 18235 }, { "epoch": 1.63, "grad_norm": 6.050766597138478, "learning_rate": 8.851911534741075e-07, "loss": 0.5491, "step": 18236 }, { "epoch": 1.63, "grad_norm": 7.330062601257174, "learning_rate": 8.847808196428598e-07, "loss": 0.6027, "step": 18237 }, { "epoch": 1.63, "grad_norm": 6.45215967532486, "learning_rate": 8.843705717078005e-07, "loss": 0.5824, "step": 18238 }, { "epoch": 1.63, "grad_norm": 5.742406344323576, "learning_rate": 8.839604096774918e-07, "loss": 0.5417, "step": 18239 }, { "epoch": 1.63, "grad_norm": 9.617684973221731, "learning_rate": 8.835503335604972e-07, "loss": 0.6004, "step": 18240 }, { "epoch": 1.63, "grad_norm": 9.355380586833276, "learning_rate": 8.831403433653735e-07, "loss": 0.6032, "step": 18241 }, { "epoch": 1.63, "grad_norm": 5.985660832061222, "learning_rate": 8.827304391006791e-07, "loss": 0.6157, "step": 18242 }, { "epoch": 1.63, "grad_norm": 6.875050018215632, "learning_rate": 8.823206207749702e-07, "loss": 0.5809, "step": 18243 }, { "epoch": 1.63, "grad_norm": 9.104459750617547, "learning_rate": 8.81910888396802e-07, "loss": 0.6527, "step": 18244 }, { "epoch": 1.63, "grad_norm": 7.280077704248827, "learning_rate": 8.815012419747243e-07, "loss": 0.5491, "step": 18245 }, { "epoch": 1.63, "grad_norm": 7.965060210899694, "learning_rate": 8.810916815172888e-07, "loss": 0.6062, "step": 18246 }, { "epoch": 1.63, "grad_norm": 5.943564385547581, "learning_rate": 8.806822070330456e-07, "loss": 0.5667, "step": 18247 }, { "epoch": 1.63, "grad_norm": 6.408258668217703, "learning_rate": 8.802728185305387e-07, "loss": 0.515, "step": 18248 }, { "epoch": 1.63, "grad_norm": 6.423691706315809, "learning_rate": 8.798635160183145e-07, "loss": 0.6134, "step": 18249 }, { "epoch": 1.63, "grad_norm": 6.431795502419906, "learning_rate": 8.794542995049166e-07, "loss": 0.567, "step": 18250 }, { "epoch": 1.63, "grad_norm": 6.685827884063977, "learning_rate": 8.790451689988866e-07, "loss": 0.5943, "step": 18251 }, { "epoch": 1.63, "grad_norm": 7.130129509872973, "learning_rate": 8.786361245087638e-07, "loss": 0.5533, "step": 18252 }, { "epoch": 1.63, "grad_norm": 5.670509791079872, "learning_rate": 8.782271660430875e-07, "loss": 0.5534, "step": 18253 }, { "epoch": 1.63, "grad_norm": 5.294371334691596, "learning_rate": 8.778182936103913e-07, "loss": 0.5361, "step": 18254 }, { "epoch": 1.63, "grad_norm": 5.864417934365739, "learning_rate": 8.774095072192107e-07, "loss": 0.5456, "step": 18255 }, { "epoch": 1.63, "grad_norm": 6.787434693839971, "learning_rate": 8.770008068780789e-07, "loss": 0.5451, "step": 18256 }, { "epoch": 1.63, "grad_norm": 6.0494658661463685, "learning_rate": 8.765921925955256e-07, "loss": 0.5724, "step": 18257 }, { "epoch": 1.63, "grad_norm": 6.315313633618835, "learning_rate": 8.761836643800814e-07, "loss": 0.504, "step": 18258 }, { "epoch": 1.63, "grad_norm": 7.466149561554944, "learning_rate": 8.75775222240271e-07, "loss": 0.5926, "step": 18259 }, { "epoch": 1.63, "grad_norm": 7.430287093721745, "learning_rate": 8.75366866184622e-07, "loss": 0.5838, "step": 18260 }, { "epoch": 1.63, "grad_norm": 6.413733393376896, "learning_rate": 8.749585962216555e-07, "loss": 0.611, "step": 18261 }, { "epoch": 1.63, "grad_norm": 8.230063366444163, "learning_rate": 8.745504123598953e-07, "loss": 0.5847, "step": 18262 }, { "epoch": 1.63, "grad_norm": 6.682211424666847, "learning_rate": 8.741423146078604e-07, "loss": 0.5727, "step": 18263 }, { "epoch": 1.63, "grad_norm": 6.275878777534016, "learning_rate": 8.737343029740685e-07, "loss": 0.56, "step": 18264 }, { "epoch": 1.63, "grad_norm": 7.115965077793975, "learning_rate": 8.733263774670375e-07, "loss": 0.5803, "step": 18265 }, { "epoch": 1.63, "grad_norm": 6.358341814071191, "learning_rate": 8.72918538095282e-07, "loss": 0.5739, "step": 18266 }, { "epoch": 1.63, "grad_norm": 6.28456975753307, "learning_rate": 8.725107848673126e-07, "loss": 0.5502, "step": 18267 }, { "epoch": 1.63, "grad_norm": 6.686623893231531, "learning_rate": 8.721031177916417e-07, "loss": 0.5896, "step": 18268 }, { "epoch": 1.63, "grad_norm": 4.973361288364852, "learning_rate": 8.716955368767783e-07, "loss": 0.5404, "step": 18269 }, { "epoch": 1.63, "grad_norm": 5.459976452132031, "learning_rate": 8.712880421312297e-07, "loss": 0.5348, "step": 18270 }, { "epoch": 1.63, "grad_norm": 6.804935385000885, "learning_rate": 8.708806335635012e-07, "loss": 0.6035, "step": 18271 }, { "epoch": 1.63, "grad_norm": 9.144761355081727, "learning_rate": 8.704733111820985e-07, "loss": 0.5538, "step": 18272 }, { "epoch": 1.63, "grad_norm": 5.552658767156932, "learning_rate": 8.70066074995522e-07, "loss": 0.5763, "step": 18273 }, { "epoch": 1.63, "grad_norm": 7.439231166025338, "learning_rate": 8.6965892501227e-07, "loss": 0.5602, "step": 18274 }, { "epoch": 1.63, "grad_norm": 8.355927689980527, "learning_rate": 8.69251861240843e-07, "loss": 0.61, "step": 18275 }, { "epoch": 1.63, "grad_norm": 7.6491556802078415, "learning_rate": 8.688448836897373e-07, "loss": 0.5976, "step": 18276 }, { "epoch": 1.63, "grad_norm": 6.6985071160082565, "learning_rate": 8.684379923674474e-07, "loss": 0.6608, "step": 18277 }, { "epoch": 1.63, "grad_norm": 5.910953575720706, "learning_rate": 8.680311872824665e-07, "loss": 0.5628, "step": 18278 }, { "epoch": 1.63, "grad_norm": 7.26750072139701, "learning_rate": 8.676244684432866e-07, "loss": 0.5808, "step": 18279 }, { "epoch": 1.63, "grad_norm": 6.95859683877297, "learning_rate": 8.672178358583955e-07, "loss": 0.5811, "step": 18280 }, { "epoch": 1.63, "grad_norm": 6.447412100279043, "learning_rate": 8.668112895362812e-07, "loss": 0.6012, "step": 18281 }, { "epoch": 1.63, "grad_norm": 7.9770528790982596, "learning_rate": 8.664048294854299e-07, "loss": 0.5857, "step": 18282 }, { "epoch": 1.63, "grad_norm": 8.00016604063552, "learning_rate": 8.659984557143248e-07, "loss": 0.6173, "step": 18283 }, { "epoch": 1.63, "grad_norm": 9.09395605703525, "learning_rate": 8.655921682314489e-07, "loss": 0.5814, "step": 18284 }, { "epoch": 1.63, "grad_norm": 9.654424856580553, "learning_rate": 8.651859670452839e-07, "loss": 0.6362, "step": 18285 }, { "epoch": 1.63, "grad_norm": 8.086065815868347, "learning_rate": 8.647798521643053e-07, "loss": 0.6027, "step": 18286 }, { "epoch": 1.63, "grad_norm": 6.996278365937551, "learning_rate": 8.643738235969923e-07, "loss": 0.5732, "step": 18287 }, { "epoch": 1.63, "grad_norm": 10.683845737971797, "learning_rate": 8.639678813518176e-07, "loss": 0.6267, "step": 18288 }, { "epoch": 1.63, "grad_norm": 8.447386372645871, "learning_rate": 8.635620254372562e-07, "loss": 0.5446, "step": 18289 }, { "epoch": 1.63, "grad_norm": 6.609530180147943, "learning_rate": 8.631562558617784e-07, "loss": 0.6245, "step": 18290 }, { "epoch": 1.63, "grad_norm": 4.91763858528652, "learning_rate": 8.627505726338542e-07, "loss": 0.5398, "step": 18291 }, { "epoch": 1.63, "grad_norm": 9.5815907877699, "learning_rate": 8.623449757619529e-07, "loss": 0.6624, "step": 18292 }, { "epoch": 1.63, "grad_norm": 5.270017800213312, "learning_rate": 8.619394652545376e-07, "loss": 0.5495, "step": 18293 }, { "epoch": 1.63, "grad_norm": 7.991917250804576, "learning_rate": 8.615340411200734e-07, "loss": 0.5894, "step": 18294 }, { "epoch": 1.63, "grad_norm": 7.465456364022198, "learning_rate": 8.611287033670234e-07, "loss": 0.5374, "step": 18295 }, { "epoch": 1.63, "grad_norm": 5.387797504034625, "learning_rate": 8.607234520038477e-07, "loss": 0.5616, "step": 18296 }, { "epoch": 1.63, "grad_norm": 5.996561667529507, "learning_rate": 8.603182870390048e-07, "loss": 0.5509, "step": 18297 }, { "epoch": 1.63, "grad_norm": 8.091799103368901, "learning_rate": 8.599132084809536e-07, "loss": 0.6327, "step": 18298 }, { "epoch": 1.63, "grad_norm": 11.983132548471609, "learning_rate": 8.595082163381458e-07, "loss": 0.56, "step": 18299 }, { "epoch": 1.63, "grad_norm": 9.926060371433811, "learning_rate": 8.591033106190372e-07, "loss": 0.5755, "step": 18300 }, { "epoch": 1.63, "grad_norm": 5.970776273723415, "learning_rate": 8.586984913320778e-07, "loss": 0.6005, "step": 18301 }, { "epoch": 1.63, "grad_norm": 4.68789766673741, "learning_rate": 8.582937584857198e-07, "loss": 0.5841, "step": 18302 }, { "epoch": 1.63, "grad_norm": 4.882176588253618, "learning_rate": 8.578891120884081e-07, "loss": 0.5497, "step": 18303 }, { "epoch": 1.63, "grad_norm": 8.244517265503795, "learning_rate": 8.574845521485897e-07, "loss": 0.5553, "step": 18304 }, { "epoch": 1.63, "grad_norm": 7.54278847806572, "learning_rate": 8.570800786747107e-07, "loss": 0.5874, "step": 18305 }, { "epoch": 1.63, "grad_norm": 8.557764684020256, "learning_rate": 8.566756916752117e-07, "loss": 0.6201, "step": 18306 }, { "epoch": 1.63, "grad_norm": 7.631090945695476, "learning_rate": 8.56271391158533e-07, "loss": 0.57, "step": 18307 }, { "epoch": 1.63, "grad_norm": 6.631327422169233, "learning_rate": 8.55867177133115e-07, "loss": 0.5737, "step": 18308 }, { "epoch": 1.63, "grad_norm": 7.265102732421731, "learning_rate": 8.55463049607394e-07, "loss": 0.5393, "step": 18309 }, { "epoch": 1.63, "grad_norm": 5.67828985027148, "learning_rate": 8.550590085898053e-07, "loss": 0.5777, "step": 18310 }, { "epoch": 1.63, "grad_norm": 6.504979533999024, "learning_rate": 8.546550540887838e-07, "loss": 0.5829, "step": 18311 }, { "epoch": 1.63, "grad_norm": 7.584739560634968, "learning_rate": 8.542511861127589e-07, "loss": 0.6253, "step": 18312 }, { "epoch": 1.63, "grad_norm": 8.037830336119118, "learning_rate": 8.538474046701611e-07, "loss": 0.5596, "step": 18313 }, { "epoch": 1.63, "grad_norm": 11.11895971433573, "learning_rate": 8.534437097694192e-07, "loss": 0.5497, "step": 18314 }, { "epoch": 1.63, "grad_norm": 4.26677665529349, "learning_rate": 8.530401014189588e-07, "loss": 0.5064, "step": 18315 }, { "epoch": 1.63, "grad_norm": 5.102938683446105, "learning_rate": 8.526365796272057e-07, "loss": 0.5786, "step": 18316 }, { "epoch": 1.63, "grad_norm": 5.323490014530909, "learning_rate": 8.522331444025805e-07, "loss": 0.5206, "step": 18317 }, { "epoch": 1.63, "grad_norm": 8.945711117075765, "learning_rate": 8.51829795753506e-07, "loss": 0.6376, "step": 18318 }, { "epoch": 1.63, "grad_norm": 5.805545505385139, "learning_rate": 8.514265336883993e-07, "loss": 0.5946, "step": 18319 }, { "epoch": 1.63, "grad_norm": 7.115220529627622, "learning_rate": 8.510233582156779e-07, "loss": 0.5462, "step": 18320 }, { "epoch": 1.63, "grad_norm": 6.6669012259309595, "learning_rate": 8.506202693437582e-07, "loss": 0.5461, "step": 18321 }, { "epoch": 1.63, "grad_norm": 8.527211049594577, "learning_rate": 8.502172670810532e-07, "loss": 0.6009, "step": 18322 }, { "epoch": 1.63, "grad_norm": 7.380680942774459, "learning_rate": 8.498143514359764e-07, "loss": 0.5918, "step": 18323 }, { "epoch": 1.63, "grad_norm": 4.948377079877318, "learning_rate": 8.494115224169347e-07, "loss": 0.6099, "step": 18324 }, { "epoch": 1.63, "grad_norm": 7.4111374338024145, "learning_rate": 8.490087800323376e-07, "loss": 0.6559, "step": 18325 }, { "epoch": 1.63, "grad_norm": 5.000457531444563, "learning_rate": 8.486061242905924e-07, "loss": 0.5705, "step": 18326 }, { "epoch": 1.63, "grad_norm": 7.980226250514791, "learning_rate": 8.482035552001028e-07, "loss": 0.5882, "step": 18327 }, { "epoch": 1.64, "grad_norm": 5.99171010435227, "learning_rate": 8.478010727692714e-07, "loss": 0.5775, "step": 18328 }, { "epoch": 1.64, "grad_norm": 6.892311478463849, "learning_rate": 8.473986770065007e-07, "loss": 0.5754, "step": 18329 }, { "epoch": 1.64, "grad_norm": 5.643841575072553, "learning_rate": 8.469963679201887e-07, "loss": 0.6252, "step": 18330 }, { "epoch": 1.64, "grad_norm": 6.424929581527143, "learning_rate": 8.465941455187316e-07, "loss": 0.5828, "step": 18331 }, { "epoch": 1.64, "grad_norm": 5.208605091484272, "learning_rate": 8.461920098105253e-07, "loss": 0.5443, "step": 18332 }, { "epoch": 1.64, "grad_norm": 5.916766019255351, "learning_rate": 8.457899608039644e-07, "loss": 0.5783, "step": 18333 }, { "epoch": 1.64, "grad_norm": 10.508676987467702, "learning_rate": 8.453879985074409e-07, "loss": 0.5832, "step": 18334 }, { "epoch": 1.64, "grad_norm": 8.693719207924325, "learning_rate": 8.449861229293438e-07, "loss": 0.562, "step": 18335 }, { "epoch": 1.64, "grad_norm": 6.797911418691826, "learning_rate": 8.445843340780641e-07, "loss": 0.5117, "step": 18336 }, { "epoch": 1.64, "grad_norm": 5.7738113844680665, "learning_rate": 8.441826319619844e-07, "loss": 0.4941, "step": 18337 }, { "epoch": 1.64, "grad_norm": 8.391707892142685, "learning_rate": 8.43781016589491e-07, "loss": 0.57, "step": 18338 }, { "epoch": 1.64, "grad_norm": 6.301899553051192, "learning_rate": 8.433794879689672e-07, "loss": 0.5887, "step": 18339 }, { "epoch": 1.64, "grad_norm": 7.102860058531695, "learning_rate": 8.429780461087939e-07, "loss": 0.6253, "step": 18340 }, { "epoch": 1.64, "grad_norm": 7.100805384983346, "learning_rate": 8.425766910173499e-07, "loss": 0.5561, "step": 18341 }, { "epoch": 1.64, "grad_norm": 7.9389241001993, "learning_rate": 8.421754227030143e-07, "loss": 0.5569, "step": 18342 }, { "epoch": 1.64, "grad_norm": 8.768141404192999, "learning_rate": 8.417742411741603e-07, "loss": 0.5864, "step": 18343 }, { "epoch": 1.64, "grad_norm": 8.90398598080573, "learning_rate": 8.413731464391622e-07, "loss": 0.5808, "step": 18344 }, { "epoch": 1.64, "grad_norm": 7.083274152815814, "learning_rate": 8.409721385063935e-07, "loss": 0.5402, "step": 18345 }, { "epoch": 1.64, "grad_norm": 5.7148916109339485, "learning_rate": 8.405712173842229e-07, "loss": 0.5446, "step": 18346 }, { "epoch": 1.64, "grad_norm": 7.253897270564336, "learning_rate": 8.401703830810182e-07, "loss": 0.5212, "step": 18347 }, { "epoch": 1.64, "grad_norm": 6.810033767274006, "learning_rate": 8.397696356051471e-07, "loss": 0.5609, "step": 18348 }, { "epoch": 1.64, "grad_norm": 9.323178434173952, "learning_rate": 8.393689749649758e-07, "loss": 0.611, "step": 18349 }, { "epoch": 1.64, "grad_norm": 6.9527752556389775, "learning_rate": 8.389684011688642e-07, "loss": 0.5718, "step": 18350 }, { "epoch": 1.64, "grad_norm": 7.955998385340085, "learning_rate": 8.385679142251746e-07, "loss": 0.5334, "step": 18351 }, { "epoch": 1.64, "grad_norm": 8.080138389775241, "learning_rate": 8.381675141422663e-07, "loss": 0.5473, "step": 18352 }, { "epoch": 1.64, "grad_norm": 6.914079198764076, "learning_rate": 8.377672009284965e-07, "loss": 0.5867, "step": 18353 }, { "epoch": 1.64, "grad_norm": 11.82875431786201, "learning_rate": 8.373669745922219e-07, "loss": 0.6017, "step": 18354 }, { "epoch": 1.64, "grad_norm": 8.648855566181956, "learning_rate": 8.369668351417965e-07, "loss": 0.5278, "step": 18355 }, { "epoch": 1.64, "grad_norm": 8.665460835922133, "learning_rate": 8.365667825855706e-07, "loss": 0.5637, "step": 18356 }, { "epoch": 1.64, "grad_norm": 6.53132647307799, "learning_rate": 8.361668169318954e-07, "loss": 0.5692, "step": 18357 }, { "epoch": 1.64, "grad_norm": 5.70737034368289, "learning_rate": 8.357669381891192e-07, "loss": 0.5326, "step": 18358 }, { "epoch": 1.64, "grad_norm": 7.141488774714178, "learning_rate": 8.353671463655894e-07, "loss": 0.5387, "step": 18359 }, { "epoch": 1.64, "grad_norm": 7.567339501490505, "learning_rate": 8.349674414696496e-07, "loss": 0.6217, "step": 18360 }, { "epoch": 1.64, "grad_norm": 7.757725841017735, "learning_rate": 8.345678235096428e-07, "loss": 0.5118, "step": 18361 }, { "epoch": 1.64, "grad_norm": 5.937835327762397, "learning_rate": 8.341682924939115e-07, "loss": 0.6571, "step": 18362 }, { "epoch": 1.64, "grad_norm": 6.117620642865933, "learning_rate": 8.337688484307932e-07, "loss": 0.5836, "step": 18363 }, { "epoch": 1.64, "grad_norm": 6.398479725230202, "learning_rate": 8.333694913286261e-07, "loss": 0.4731, "step": 18364 }, { "epoch": 1.64, "grad_norm": 6.260003169560137, "learning_rate": 8.329702211957463e-07, "loss": 0.5406, "step": 18365 }, { "epoch": 1.64, "grad_norm": 8.253706875146898, "learning_rate": 8.325710380404872e-07, "loss": 0.5858, "step": 18366 }, { "epoch": 1.64, "grad_norm": 7.797217830115273, "learning_rate": 8.321719418711816e-07, "loss": 0.6039, "step": 18367 }, { "epoch": 1.64, "grad_norm": 8.769105305474817, "learning_rate": 8.317729326961598e-07, "loss": 0.6089, "step": 18368 }, { "epoch": 1.64, "grad_norm": 5.077064818402232, "learning_rate": 8.313740105237494e-07, "loss": 0.5426, "step": 18369 }, { "epoch": 1.64, "grad_norm": 7.480427270604181, "learning_rate": 8.309751753622769e-07, "loss": 0.5913, "step": 18370 }, { "epoch": 1.64, "grad_norm": 5.386080714696423, "learning_rate": 8.305764272200678e-07, "loss": 0.5179, "step": 18371 }, { "epoch": 1.64, "grad_norm": 7.8064521341152195, "learning_rate": 8.301777661054449e-07, "loss": 0.6527, "step": 18372 }, { "epoch": 1.64, "grad_norm": 7.334846924589883, "learning_rate": 8.297791920267295e-07, "loss": 0.607, "step": 18373 }, { "epoch": 1.64, "grad_norm": 7.071571096892972, "learning_rate": 8.29380704992242e-07, "loss": 0.5353, "step": 18374 }, { "epoch": 1.64, "grad_norm": 6.114224187000775, "learning_rate": 8.289823050102985e-07, "loss": 0.5656, "step": 18375 }, { "epoch": 1.64, "grad_norm": 5.945071232802345, "learning_rate": 8.285839920892141e-07, "loss": 0.5793, "step": 18376 }, { "epoch": 1.64, "grad_norm": 7.699113478821209, "learning_rate": 8.281857662373033e-07, "loss": 0.6275, "step": 18377 }, { "epoch": 1.64, "grad_norm": 6.743398708774488, "learning_rate": 8.277876274628788e-07, "loss": 0.5668, "step": 18378 }, { "epoch": 1.64, "grad_norm": 5.375104460711118, "learning_rate": 8.273895757742506e-07, "loss": 0.5446, "step": 18379 }, { "epoch": 1.64, "grad_norm": 6.531842992497854, "learning_rate": 8.269916111797266e-07, "loss": 0.5505, "step": 18380 }, { "epoch": 1.64, "grad_norm": 5.649366647491276, "learning_rate": 8.265937336876156e-07, "loss": 0.5271, "step": 18381 }, { "epoch": 1.64, "grad_norm": 7.109736115688753, "learning_rate": 8.2619594330622e-07, "loss": 0.5538, "step": 18382 }, { "epoch": 1.64, "grad_norm": 7.294945830809467, "learning_rate": 8.25798240043843e-07, "loss": 0.5528, "step": 18383 }, { "epoch": 1.64, "grad_norm": 9.259944562229338, "learning_rate": 8.254006239087864e-07, "loss": 0.5818, "step": 18384 }, { "epoch": 1.64, "grad_norm": 7.394268344918695, "learning_rate": 8.250030949093502e-07, "loss": 0.5325, "step": 18385 }, { "epoch": 1.64, "grad_norm": 7.868022885956684, "learning_rate": 8.246056530538304e-07, "loss": 0.5926, "step": 18386 }, { "epoch": 1.64, "grad_norm": 8.239883290625055, "learning_rate": 8.242082983505256e-07, "loss": 0.6212, "step": 18387 }, { "epoch": 1.64, "grad_norm": 8.042459110384563, "learning_rate": 8.238110308077274e-07, "loss": 0.6344, "step": 18388 }, { "epoch": 1.64, "grad_norm": 6.876540589125333, "learning_rate": 8.234138504337274e-07, "loss": 0.589, "step": 18389 }, { "epoch": 1.64, "grad_norm": 7.044362738141847, "learning_rate": 8.230167572368164e-07, "loss": 0.5972, "step": 18390 }, { "epoch": 1.64, "grad_norm": 6.965982345219756, "learning_rate": 8.226197512252832e-07, "loss": 0.5222, "step": 18391 }, { "epoch": 1.64, "grad_norm": 6.384640688243956, "learning_rate": 8.222228324074144e-07, "loss": 0.6227, "step": 18392 }, { "epoch": 1.64, "grad_norm": 5.78366962350662, "learning_rate": 8.218260007914947e-07, "loss": 0.5409, "step": 18393 }, { "epoch": 1.64, "grad_norm": 6.657027019780211, "learning_rate": 8.214292563858084e-07, "loss": 0.544, "step": 18394 }, { "epoch": 1.64, "grad_norm": 6.180439904615951, "learning_rate": 8.21032599198634e-07, "loss": 0.5869, "step": 18395 }, { "epoch": 1.64, "grad_norm": 6.494839270125743, "learning_rate": 8.206360292382525e-07, "loss": 0.555, "step": 18396 }, { "epoch": 1.64, "grad_norm": 8.421076162304619, "learning_rate": 8.202395465129414e-07, "loss": 0.5447, "step": 18397 }, { "epoch": 1.64, "grad_norm": 6.59237618498641, "learning_rate": 8.198431510309762e-07, "loss": 0.6146, "step": 18398 }, { "epoch": 1.64, "grad_norm": 7.190719618868607, "learning_rate": 8.194468428006319e-07, "loss": 0.5597, "step": 18399 }, { "epoch": 1.64, "grad_norm": 7.509380054737271, "learning_rate": 8.190506218301786e-07, "loss": 0.5445, "step": 18400 }, { "epoch": 1.64, "grad_norm": 5.623266655006669, "learning_rate": 8.186544881278874e-07, "loss": 0.5398, "step": 18401 }, { "epoch": 1.64, "grad_norm": 8.033686402570199, "learning_rate": 8.182584417020267e-07, "loss": 0.605, "step": 18402 }, { "epoch": 1.64, "grad_norm": 7.68802285528086, "learning_rate": 8.178624825608645e-07, "loss": 0.5394, "step": 18403 }, { "epoch": 1.64, "grad_norm": 8.398796979763894, "learning_rate": 8.174666107126627e-07, "loss": 0.5697, "step": 18404 }, { "epoch": 1.64, "grad_norm": 9.312847106730391, "learning_rate": 8.170708261656857e-07, "loss": 0.6253, "step": 18405 }, { "epoch": 1.64, "grad_norm": 5.987843709007214, "learning_rate": 8.166751289281965e-07, "loss": 0.619, "step": 18406 }, { "epoch": 1.64, "grad_norm": 6.57136475148598, "learning_rate": 8.16279519008451e-07, "loss": 0.5493, "step": 18407 }, { "epoch": 1.64, "grad_norm": 7.324105977867103, "learning_rate": 8.158839964147086e-07, "loss": 0.6139, "step": 18408 }, { "epoch": 1.64, "grad_norm": 6.60270649238533, "learning_rate": 8.154885611552244e-07, "loss": 0.5928, "step": 18409 }, { "epoch": 1.64, "grad_norm": 6.22559608870969, "learning_rate": 8.150932132382528e-07, "loss": 0.583, "step": 18410 }, { "epoch": 1.64, "grad_norm": 6.44855956952106, "learning_rate": 8.146979526720456e-07, "loss": 0.5986, "step": 18411 }, { "epoch": 1.64, "grad_norm": 5.9899424013144635, "learning_rate": 8.143027794648544e-07, "loss": 0.5674, "step": 18412 }, { "epoch": 1.64, "grad_norm": 6.154382386812026, "learning_rate": 8.139076936249246e-07, "loss": 0.5701, "step": 18413 }, { "epoch": 1.64, "grad_norm": 7.030091264898395, "learning_rate": 8.135126951605044e-07, "loss": 0.5131, "step": 18414 }, { "epoch": 1.64, "grad_norm": 5.780913175496152, "learning_rate": 8.131177840798388e-07, "loss": 0.5146, "step": 18415 }, { "epoch": 1.64, "grad_norm": 6.413298266505573, "learning_rate": 8.127229603911696e-07, "loss": 0.5451, "step": 18416 }, { "epoch": 1.64, "grad_norm": 8.690138379896133, "learning_rate": 8.123282241027403e-07, "loss": 0.5163, "step": 18417 }, { "epoch": 1.64, "grad_norm": 7.54807653231746, "learning_rate": 8.119335752227875e-07, "loss": 0.5594, "step": 18418 }, { "epoch": 1.64, "grad_norm": 6.1192575435019, "learning_rate": 8.115390137595503e-07, "loss": 0.5914, "step": 18419 }, { "epoch": 1.64, "grad_norm": 5.96023587962027, "learning_rate": 8.111445397212625e-07, "loss": 0.5595, "step": 18420 }, { "epoch": 1.64, "grad_norm": 7.33857444804644, "learning_rate": 8.107501531161593e-07, "loss": 0.6191, "step": 18421 }, { "epoch": 1.64, "grad_norm": 6.4072587021959295, "learning_rate": 8.10355853952472e-07, "loss": 0.513, "step": 18422 }, { "epoch": 1.64, "grad_norm": 7.959006438093087, "learning_rate": 8.099616422384316e-07, "loss": 0.547, "step": 18423 }, { "epoch": 1.64, "grad_norm": 9.481280122107533, "learning_rate": 8.095675179822654e-07, "loss": 0.543, "step": 18424 }, { "epoch": 1.64, "grad_norm": 7.57089957219226, "learning_rate": 8.09173481192202e-07, "loss": 0.5833, "step": 18425 }, { "epoch": 1.64, "grad_norm": 5.719521127469208, "learning_rate": 8.08779531876463e-07, "loss": 0.6178, "step": 18426 }, { "epoch": 1.64, "grad_norm": 5.755967958378412, "learning_rate": 8.083856700432724e-07, "loss": 0.5795, "step": 18427 }, { "epoch": 1.64, "grad_norm": 6.742435810393466, "learning_rate": 8.079918957008514e-07, "loss": 0.5873, "step": 18428 }, { "epoch": 1.64, "grad_norm": 4.810896209349775, "learning_rate": 8.075982088574197e-07, "loss": 0.6162, "step": 18429 }, { "epoch": 1.64, "grad_norm": 7.047397681986977, "learning_rate": 8.072046095211938e-07, "loss": 0.6128, "step": 18430 }, { "epoch": 1.64, "grad_norm": 7.753924884547794, "learning_rate": 8.068110977003912e-07, "loss": 0.6073, "step": 18431 }, { "epoch": 1.64, "grad_norm": 10.163625071799478, "learning_rate": 8.064176734032236e-07, "loss": 0.6688, "step": 18432 }, { "epoch": 1.64, "grad_norm": 6.011963361798186, "learning_rate": 8.060243366379028e-07, "loss": 0.5849, "step": 18433 }, { "epoch": 1.64, "grad_norm": 6.358761392769661, "learning_rate": 8.056310874126383e-07, "loss": 0.5552, "step": 18434 }, { "epoch": 1.64, "grad_norm": 6.193508048280206, "learning_rate": 8.052379257356401e-07, "loss": 0.5183, "step": 18435 }, { "epoch": 1.64, "grad_norm": 6.317180704207342, "learning_rate": 8.048448516151136e-07, "loss": 0.5916, "step": 18436 }, { "epoch": 1.64, "grad_norm": 9.623279080583254, "learning_rate": 8.044518650592637e-07, "loss": 0.5934, "step": 18437 }, { "epoch": 1.64, "grad_norm": 7.5029563716056185, "learning_rate": 8.040589660762937e-07, "loss": 0.6271, "step": 18438 }, { "epoch": 1.64, "grad_norm": 7.120324634867505, "learning_rate": 8.036661546744035e-07, "loss": 0.6195, "step": 18439 }, { "epoch": 1.65, "grad_norm": 6.198340049825372, "learning_rate": 8.032734308617917e-07, "loss": 0.6135, "step": 18440 }, { "epoch": 1.65, "grad_norm": 5.999704104701878, "learning_rate": 8.028807946466566e-07, "loss": 0.5485, "step": 18441 }, { "epoch": 1.65, "grad_norm": 10.640950897463302, "learning_rate": 8.024882460371935e-07, "loss": 0.6243, "step": 18442 }, { "epoch": 1.65, "grad_norm": 7.474663814946242, "learning_rate": 8.020957850415962e-07, "loss": 0.5873, "step": 18443 }, { "epoch": 1.65, "grad_norm": 7.981735709323859, "learning_rate": 8.017034116680567e-07, "loss": 0.5842, "step": 18444 }, { "epoch": 1.65, "grad_norm": 7.0281706129418025, "learning_rate": 8.013111259247635e-07, "loss": 0.5734, "step": 18445 }, { "epoch": 1.65, "grad_norm": 7.315713533005397, "learning_rate": 8.00918927819907e-07, "loss": 0.6075, "step": 18446 }, { "epoch": 1.65, "grad_norm": 7.490775916356115, "learning_rate": 8.005268173616703e-07, "loss": 0.5974, "step": 18447 }, { "epoch": 1.65, "grad_norm": 5.1232602399488405, "learning_rate": 8.001347945582399e-07, "loss": 0.5679, "step": 18448 }, { "epoch": 1.65, "grad_norm": 6.39486736688238, "learning_rate": 7.99742859417798e-07, "loss": 0.575, "step": 18449 }, { "epoch": 1.65, "grad_norm": 6.411909072468878, "learning_rate": 7.993510119485254e-07, "loss": 0.5741, "step": 18450 }, { "epoch": 1.65, "grad_norm": 9.133180080065307, "learning_rate": 7.989592521586026e-07, "loss": 0.5506, "step": 18451 }, { "epoch": 1.65, "grad_norm": 7.603410467080666, "learning_rate": 7.985675800562042e-07, "loss": 0.5638, "step": 18452 }, { "epoch": 1.65, "grad_norm": 7.179075120207025, "learning_rate": 7.981759956495067e-07, "loss": 0.5614, "step": 18453 }, { "epoch": 1.65, "grad_norm": 6.1022844683869915, "learning_rate": 7.977844989466827e-07, "loss": 0.5352, "step": 18454 }, { "epoch": 1.65, "grad_norm": 6.703531469440243, "learning_rate": 7.973930899559052e-07, "loss": 0.5491, "step": 18455 }, { "epoch": 1.65, "grad_norm": 7.452095987625953, "learning_rate": 7.970017686853432e-07, "loss": 0.5444, "step": 18456 }, { "epoch": 1.65, "grad_norm": 11.929247271004897, "learning_rate": 7.966105351431664e-07, "loss": 0.5453, "step": 18457 }, { "epoch": 1.65, "grad_norm": 6.9246923717523075, "learning_rate": 7.962193893375375e-07, "loss": 0.6027, "step": 18458 }, { "epoch": 1.65, "grad_norm": 5.593179876701987, "learning_rate": 7.958283312766235e-07, "loss": 0.5647, "step": 18459 }, { "epoch": 1.65, "grad_norm": 8.234437584685127, "learning_rate": 7.954373609685867e-07, "loss": 0.6105, "step": 18460 }, { "epoch": 1.65, "grad_norm": 5.196807662606069, "learning_rate": 7.950464784215861e-07, "loss": 0.5822, "step": 18461 }, { "epoch": 1.65, "grad_norm": 7.915972218287418, "learning_rate": 7.946556836437818e-07, "loss": 0.6111, "step": 18462 }, { "epoch": 1.65, "grad_norm": 6.461879084297356, "learning_rate": 7.942649766433308e-07, "loss": 0.608, "step": 18463 }, { "epoch": 1.65, "grad_norm": 10.332750622298514, "learning_rate": 7.938743574283886e-07, "loss": 0.6431, "step": 18464 }, { "epoch": 1.65, "grad_norm": 6.184942410920042, "learning_rate": 7.93483826007107e-07, "loss": 0.5329, "step": 18465 }, { "epoch": 1.65, "grad_norm": 6.414006023388483, "learning_rate": 7.930933823876386e-07, "loss": 0.5826, "step": 18466 }, { "epoch": 1.65, "grad_norm": 5.189780697896888, "learning_rate": 7.927030265781327e-07, "loss": 0.6814, "step": 18467 }, { "epoch": 1.65, "grad_norm": 6.006386787402832, "learning_rate": 7.923127585867374e-07, "loss": 0.5934, "step": 18468 }, { "epoch": 1.65, "grad_norm": 9.238356359596235, "learning_rate": 7.919225784215984e-07, "loss": 0.5701, "step": 18469 }, { "epoch": 1.65, "grad_norm": 7.326573433735392, "learning_rate": 7.915324860908613e-07, "loss": 0.5945, "step": 18470 }, { "epoch": 1.65, "grad_norm": 5.872185336640947, "learning_rate": 7.911424816026658e-07, "loss": 0.5914, "step": 18471 }, { "epoch": 1.65, "grad_norm": 6.775453461371875, "learning_rate": 7.907525649651537e-07, "loss": 0.631, "step": 18472 }, { "epoch": 1.65, "grad_norm": 7.267934552548118, "learning_rate": 7.90362736186464e-07, "loss": 0.5714, "step": 18473 }, { "epoch": 1.65, "grad_norm": 6.145931557851759, "learning_rate": 7.899729952747332e-07, "loss": 0.6003, "step": 18474 }, { "epoch": 1.65, "grad_norm": 7.703159097242164, "learning_rate": 7.895833422380977e-07, "loss": 0.6376, "step": 18475 }, { "epoch": 1.65, "grad_norm": 4.8372389857358975, "learning_rate": 7.89193777084688e-07, "loss": 0.5405, "step": 18476 }, { "epoch": 1.65, "grad_norm": 7.081276596007639, "learning_rate": 7.888042998226376e-07, "loss": 0.5466, "step": 18477 }, { "epoch": 1.65, "grad_norm": 9.762203965758758, "learning_rate": 7.884149104600741e-07, "loss": 0.51, "step": 18478 }, { "epoch": 1.65, "grad_norm": 6.113591453398985, "learning_rate": 7.880256090051258e-07, "loss": 0.5533, "step": 18479 }, { "epoch": 1.65, "grad_norm": 6.866316368024405, "learning_rate": 7.876363954659194e-07, "loss": 0.5866, "step": 18480 }, { "epoch": 1.65, "grad_norm": 7.0532276095778625, "learning_rate": 7.872472698505779e-07, "loss": 0.5627, "step": 18481 }, { "epoch": 1.65, "grad_norm": 7.088422148583732, "learning_rate": 7.868582321672252e-07, "loss": 0.6264, "step": 18482 }, { "epoch": 1.65, "grad_norm": 10.004520893253094, "learning_rate": 7.864692824239794e-07, "loss": 0.621, "step": 18483 }, { "epoch": 1.65, "grad_norm": 6.291895097514731, "learning_rate": 7.860804206289597e-07, "loss": 0.5691, "step": 18484 }, { "epoch": 1.65, "grad_norm": 7.59977913718888, "learning_rate": 7.856916467902825e-07, "loss": 0.5368, "step": 18485 }, { "epoch": 1.65, "grad_norm": 6.252789079921171, "learning_rate": 7.853029609160634e-07, "loss": 0.5964, "step": 18486 }, { "epoch": 1.65, "grad_norm": 8.68925027825286, "learning_rate": 7.849143630144146e-07, "loss": 0.5587, "step": 18487 }, { "epoch": 1.65, "grad_norm": 5.432049502858594, "learning_rate": 7.845258530934491e-07, "loss": 0.5983, "step": 18488 }, { "epoch": 1.65, "grad_norm": 7.4365609766453495, "learning_rate": 7.841374311612731e-07, "loss": 0.5912, "step": 18489 }, { "epoch": 1.65, "grad_norm": 5.6075287678113295, "learning_rate": 7.83749097225997e-07, "loss": 0.5826, "step": 18490 }, { "epoch": 1.65, "grad_norm": 8.467804311962306, "learning_rate": 7.833608512957241e-07, "loss": 0.6157, "step": 18491 }, { "epoch": 1.65, "grad_norm": 6.249071809458059, "learning_rate": 7.829726933785591e-07, "loss": 0.6396, "step": 18492 }, { "epoch": 1.65, "grad_norm": 9.02085045649851, "learning_rate": 7.82584623482604e-07, "loss": 0.6119, "step": 18493 }, { "epoch": 1.65, "grad_norm": 7.772931637112214, "learning_rate": 7.821966416159588e-07, "loss": 0.5112, "step": 18494 }, { "epoch": 1.65, "grad_norm": 5.560831521670898, "learning_rate": 7.818087477867231e-07, "loss": 0.5692, "step": 18495 }, { "epoch": 1.65, "grad_norm": 7.90167753048076, "learning_rate": 7.814209420029911e-07, "loss": 0.521, "step": 18496 }, { "epoch": 1.65, "grad_norm": 7.830930956939064, "learning_rate": 7.81033224272858e-07, "loss": 0.5248, "step": 18497 }, { "epoch": 1.65, "grad_norm": 6.896053930836321, "learning_rate": 7.806455946044173e-07, "loss": 0.5823, "step": 18498 }, { "epoch": 1.65, "grad_norm": 4.502397172353447, "learning_rate": 7.802580530057597e-07, "loss": 0.5971, "step": 18499 }, { "epoch": 1.65, "grad_norm": 7.210820175915279, "learning_rate": 7.798705994849737e-07, "loss": 0.5363, "step": 18500 }, { "epoch": 1.65, "grad_norm": 5.413295166889457, "learning_rate": 7.794832340501484e-07, "loss": 0.5768, "step": 18501 }, { "epoch": 1.65, "grad_norm": 6.921624855741802, "learning_rate": 7.790959567093664e-07, "loss": 0.5316, "step": 18502 }, { "epoch": 1.65, "grad_norm": 6.261767629419919, "learning_rate": 7.787087674707134e-07, "loss": 0.5857, "step": 18503 }, { "epoch": 1.65, "grad_norm": 6.702468537812182, "learning_rate": 7.78321666342271e-07, "loss": 0.638, "step": 18504 }, { "epoch": 1.65, "grad_norm": 10.504380768477803, "learning_rate": 7.779346533321175e-07, "loss": 0.618, "step": 18505 }, { "epoch": 1.65, "grad_norm": 6.413499594140099, "learning_rate": 7.775477284483318e-07, "loss": 0.6134, "step": 18506 }, { "epoch": 1.65, "grad_norm": 8.344582191540109, "learning_rate": 7.771608916989904e-07, "loss": 0.5579, "step": 18507 }, { "epoch": 1.65, "grad_norm": 6.726693913995241, "learning_rate": 7.767741430921688e-07, "loss": 0.5534, "step": 18508 }, { "epoch": 1.65, "grad_norm": 7.723186845811823, "learning_rate": 7.763874826359369e-07, "loss": 0.5765, "step": 18509 }, { "epoch": 1.65, "grad_norm": 6.870819049650064, "learning_rate": 7.760009103383665e-07, "loss": 0.5589, "step": 18510 }, { "epoch": 1.65, "grad_norm": 5.467456378735843, "learning_rate": 7.756144262075272e-07, "loss": 0.5985, "step": 18511 }, { "epoch": 1.65, "grad_norm": 9.622277009121818, "learning_rate": 7.752280302514853e-07, "loss": 0.6046, "step": 18512 }, { "epoch": 1.65, "grad_norm": 5.941247032868286, "learning_rate": 7.748417224783062e-07, "loss": 0.5295, "step": 18513 }, { "epoch": 1.65, "grad_norm": 7.132259224279332, "learning_rate": 7.744555028960543e-07, "loss": 0.5811, "step": 18514 }, { "epoch": 1.65, "grad_norm": 7.834414961003755, "learning_rate": 7.74069371512789e-07, "loss": 0.5906, "step": 18515 }, { "epoch": 1.65, "grad_norm": 5.463538861285599, "learning_rate": 7.736833283365702e-07, "loss": 0.5498, "step": 18516 }, { "epoch": 1.65, "grad_norm": 6.902282305239494, "learning_rate": 7.732973733754573e-07, "loss": 0.5644, "step": 18517 }, { "epoch": 1.65, "grad_norm": 7.2951555160481245, "learning_rate": 7.729115066375065e-07, "loss": 0.6084, "step": 18518 }, { "epoch": 1.65, "grad_norm": 6.755316027174562, "learning_rate": 7.725257281307691e-07, "loss": 0.5538, "step": 18519 }, { "epoch": 1.65, "grad_norm": 7.674924485166208, "learning_rate": 7.721400378632999e-07, "loss": 0.6013, "step": 18520 }, { "epoch": 1.65, "grad_norm": 7.69693025873178, "learning_rate": 7.717544358431489e-07, "loss": 0.5791, "step": 18521 }, { "epoch": 1.65, "grad_norm": 8.398408769112057, "learning_rate": 7.713689220783637e-07, "loss": 0.5703, "step": 18522 }, { "epoch": 1.65, "grad_norm": 5.401809854124345, "learning_rate": 7.709834965769919e-07, "loss": 0.5405, "step": 18523 }, { "epoch": 1.65, "grad_norm": 5.861215483641893, "learning_rate": 7.70598159347078e-07, "loss": 0.5569, "step": 18524 }, { "epoch": 1.65, "grad_norm": 8.172857486881899, "learning_rate": 7.702129103966649e-07, "loss": 0.5503, "step": 18525 }, { "epoch": 1.65, "grad_norm": 5.072229724791138, "learning_rate": 7.698277497337947e-07, "loss": 0.5852, "step": 18526 }, { "epoch": 1.65, "grad_norm": 6.615578688952974, "learning_rate": 7.694426773665076e-07, "loss": 0.4889, "step": 18527 }, { "epoch": 1.65, "grad_norm": 7.8136153243673165, "learning_rate": 7.690576933028382e-07, "loss": 0.5178, "step": 18528 }, { "epoch": 1.65, "grad_norm": 6.376655723428289, "learning_rate": 7.68672797550824e-07, "loss": 0.5437, "step": 18529 }, { "epoch": 1.65, "grad_norm": 6.667955408410495, "learning_rate": 7.682879901184986e-07, "loss": 0.555, "step": 18530 }, { "epoch": 1.65, "grad_norm": 7.960045254437673, "learning_rate": 7.679032710138945e-07, "loss": 0.554, "step": 18531 }, { "epoch": 1.65, "grad_norm": 7.4586732033704, "learning_rate": 7.675186402450424e-07, "loss": 0.5475, "step": 18532 }, { "epoch": 1.65, "grad_norm": 8.425712057531369, "learning_rate": 7.671340978199682e-07, "loss": 0.5436, "step": 18533 }, { "epoch": 1.65, "grad_norm": 8.520293919042784, "learning_rate": 7.667496437467015e-07, "loss": 0.5539, "step": 18534 }, { "epoch": 1.65, "grad_norm": 6.271113254641803, "learning_rate": 7.663652780332642e-07, "loss": 0.5848, "step": 18535 }, { "epoch": 1.65, "grad_norm": 6.7972004587419494, "learning_rate": 7.6598100068768e-07, "loss": 0.5975, "step": 18536 }, { "epoch": 1.65, "grad_norm": 6.722343714256844, "learning_rate": 7.655968117179702e-07, "loss": 0.5459, "step": 18537 }, { "epoch": 1.65, "grad_norm": 7.794889242191422, "learning_rate": 7.652127111321539e-07, "loss": 0.5832, "step": 18538 }, { "epoch": 1.65, "grad_norm": 6.309449431028304, "learning_rate": 7.648286989382481e-07, "loss": 0.5542, "step": 18539 }, { "epoch": 1.65, "grad_norm": 7.712729513139768, "learning_rate": 7.64444775144269e-07, "loss": 0.5926, "step": 18540 }, { "epoch": 1.65, "grad_norm": 9.881349408506232, "learning_rate": 7.640609397582294e-07, "loss": 0.5904, "step": 18541 }, { "epoch": 1.65, "grad_norm": 7.353552072120926, "learning_rate": 7.636771927881404e-07, "loss": 0.5574, "step": 18542 }, { "epoch": 1.65, "grad_norm": 5.1315001725763425, "learning_rate": 7.632935342420128e-07, "loss": 0.5645, "step": 18543 }, { "epoch": 1.65, "grad_norm": 7.619942365285354, "learning_rate": 7.629099641278542e-07, "loss": 0.5992, "step": 18544 }, { "epoch": 1.65, "grad_norm": 6.279536057247027, "learning_rate": 7.62526482453671e-07, "loss": 0.5587, "step": 18545 }, { "epoch": 1.65, "grad_norm": 5.677882106829552, "learning_rate": 7.621430892274689e-07, "loss": 0.5187, "step": 18546 }, { "epoch": 1.65, "grad_norm": 7.431041271984883, "learning_rate": 7.617597844572494e-07, "loss": 0.6119, "step": 18547 }, { "epoch": 1.65, "grad_norm": 7.42173809337769, "learning_rate": 7.613765681510116e-07, "loss": 0.5893, "step": 18548 }, { "epoch": 1.65, "grad_norm": 8.215803908928306, "learning_rate": 7.609934403167552e-07, "loss": 0.5892, "step": 18549 }, { "epoch": 1.65, "grad_norm": 6.592797953327762, "learning_rate": 7.606104009624776e-07, "loss": 0.5404, "step": 18550 }, { "epoch": 1.65, "grad_norm": 7.277501009061264, "learning_rate": 7.602274500961737e-07, "loss": 0.5549, "step": 18551 }, { "epoch": 1.66, "grad_norm": 7.343844922343488, "learning_rate": 7.598445877258365e-07, "loss": 0.5004, "step": 18552 }, { "epoch": 1.66, "grad_norm": 6.105406915340262, "learning_rate": 7.594618138594595e-07, "loss": 0.5705, "step": 18553 }, { "epoch": 1.66, "grad_norm": 8.647893967063476, "learning_rate": 7.590791285050292e-07, "loss": 0.5926, "step": 18554 }, { "epoch": 1.66, "grad_norm": 7.6957850584052645, "learning_rate": 7.586965316705347e-07, "loss": 0.5453, "step": 18555 }, { "epoch": 1.66, "grad_norm": 5.636153183401164, "learning_rate": 7.583140233639613e-07, "loss": 0.6279, "step": 18556 }, { "epoch": 1.66, "grad_norm": 8.877912695616361, "learning_rate": 7.579316035932943e-07, "loss": 0.5933, "step": 18557 }, { "epoch": 1.66, "grad_norm": 5.772342885794856, "learning_rate": 7.575492723665157e-07, "loss": 0.5501, "step": 18558 }, { "epoch": 1.66, "grad_norm": 6.434665221273716, "learning_rate": 7.57167029691604e-07, "loss": 0.5629, "step": 18559 }, { "epoch": 1.66, "grad_norm": 6.5926326407758635, "learning_rate": 7.567848755765394e-07, "loss": 0.5608, "step": 18560 }, { "epoch": 1.66, "grad_norm": 7.471635996649574, "learning_rate": 7.564028100292987e-07, "loss": 0.5194, "step": 18561 }, { "epoch": 1.66, "grad_norm": 6.753792910954868, "learning_rate": 7.56020833057855e-07, "loss": 0.5801, "step": 18562 }, { "epoch": 1.66, "grad_norm": 5.604729982440136, "learning_rate": 7.556389446701829e-07, "loss": 0.5661, "step": 18563 }, { "epoch": 1.66, "grad_norm": 6.359980825236722, "learning_rate": 7.552571448742518e-07, "loss": 0.5824, "step": 18564 }, { "epoch": 1.66, "grad_norm": 7.705366826042654, "learning_rate": 7.548754336780328e-07, "loss": 0.5585, "step": 18565 }, { "epoch": 1.66, "grad_norm": 6.511369826601275, "learning_rate": 7.544938110894934e-07, "loss": 0.5417, "step": 18566 }, { "epoch": 1.66, "grad_norm": 9.0263730328385, "learning_rate": 7.541122771165965e-07, "loss": 0.5613, "step": 18567 }, { "epoch": 1.66, "grad_norm": 6.912206738248883, "learning_rate": 7.537308317673081e-07, "loss": 0.5843, "step": 18568 }, { "epoch": 1.66, "grad_norm": 6.9347954118101605, "learning_rate": 7.53349475049589e-07, "loss": 0.5683, "step": 18569 }, { "epoch": 1.66, "grad_norm": 9.534064835232995, "learning_rate": 7.529682069714e-07, "loss": 0.5509, "step": 18570 }, { "epoch": 1.66, "grad_norm": 6.008849574292464, "learning_rate": 7.525870275406999e-07, "loss": 0.5723, "step": 18571 }, { "epoch": 1.66, "grad_norm": 4.422661883713512, "learning_rate": 7.522059367654427e-07, "loss": 0.6093, "step": 18572 }, { "epoch": 1.66, "grad_norm": 7.673561217653939, "learning_rate": 7.518249346535838e-07, "loss": 0.5752, "step": 18573 }, { "epoch": 1.66, "grad_norm": 8.066847278067392, "learning_rate": 7.514440212130764e-07, "loss": 0.5904, "step": 18574 }, { "epoch": 1.66, "grad_norm": 9.237768216850789, "learning_rate": 7.510631964518706e-07, "loss": 0.5894, "step": 18575 }, { "epoch": 1.66, "grad_norm": 6.200674847406414, "learning_rate": 7.506824603779173e-07, "loss": 0.4793, "step": 18576 }, { "epoch": 1.66, "grad_norm": 8.754645973365312, "learning_rate": 7.503018129991601e-07, "loss": 0.6063, "step": 18577 }, { "epoch": 1.66, "grad_norm": 7.615162151614659, "learning_rate": 7.499212543235468e-07, "loss": 0.6375, "step": 18578 }, { "epoch": 1.66, "grad_norm": 7.328288127089694, "learning_rate": 7.495407843590192e-07, "loss": 0.5686, "step": 18579 }, { "epoch": 1.66, "grad_norm": 7.290477639710003, "learning_rate": 7.491604031135191e-07, "loss": 0.5565, "step": 18580 }, { "epoch": 1.66, "grad_norm": 5.515284576952235, "learning_rate": 7.487801105949865e-07, "loss": 0.5527, "step": 18581 }, { "epoch": 1.66, "grad_norm": 5.836041542520796, "learning_rate": 7.48399906811359e-07, "loss": 0.5229, "step": 18582 }, { "epoch": 1.66, "grad_norm": 6.513283737398512, "learning_rate": 7.480197917705729e-07, "loss": 0.5749, "step": 18583 }, { "epoch": 1.66, "grad_norm": 6.015498933113641, "learning_rate": 7.476397654805629e-07, "loss": 0.6021, "step": 18584 }, { "epoch": 1.66, "grad_norm": 6.244230955940849, "learning_rate": 7.472598279492588e-07, "loss": 0.5485, "step": 18585 }, { "epoch": 1.66, "grad_norm": 7.155943675251089, "learning_rate": 7.468799791845926e-07, "loss": 0.5685, "step": 18586 }, { "epoch": 1.66, "grad_norm": 6.822337924308729, "learning_rate": 7.465002191944931e-07, "loss": 0.5514, "step": 18587 }, { "epoch": 1.66, "grad_norm": 6.294443635171104, "learning_rate": 7.461205479868861e-07, "loss": 0.5667, "step": 18588 }, { "epoch": 1.66, "grad_norm": 7.995757602430947, "learning_rate": 7.457409655696974e-07, "loss": 0.5448, "step": 18589 }, { "epoch": 1.66, "grad_norm": 6.2407626508897875, "learning_rate": 7.453614719508495e-07, "loss": 0.613, "step": 18590 }, { "epoch": 1.66, "grad_norm": 7.669333371211093, "learning_rate": 7.449820671382641e-07, "loss": 0.4954, "step": 18591 }, { "epoch": 1.66, "grad_norm": 7.00261761749898, "learning_rate": 7.446027511398584e-07, "loss": 0.5046, "step": 18592 }, { "epoch": 1.66, "grad_norm": 5.54298594135585, "learning_rate": 7.442235239635514e-07, "loss": 0.6041, "step": 18593 }, { "epoch": 1.66, "grad_norm": 6.610275450575934, "learning_rate": 7.438443856172578e-07, "loss": 0.5612, "step": 18594 }, { "epoch": 1.66, "grad_norm": 6.03283612323505, "learning_rate": 7.434653361088922e-07, "loss": 0.6092, "step": 18595 }, { "epoch": 1.66, "grad_norm": 8.837800825025695, "learning_rate": 7.430863754463663e-07, "loss": 0.5989, "step": 18596 }, { "epoch": 1.66, "grad_norm": 8.328324395523541, "learning_rate": 7.427075036375903e-07, "loss": 0.5673, "step": 18597 }, { "epoch": 1.66, "grad_norm": 7.577505155465997, "learning_rate": 7.423287206904712e-07, "loss": 0.567, "step": 18598 }, { "epoch": 1.66, "grad_norm": 7.104704592604466, "learning_rate": 7.419500266129159e-07, "loss": 0.5739, "step": 18599 }, { "epoch": 1.66, "grad_norm": 8.449294448627578, "learning_rate": 7.415714214128283e-07, "loss": 0.5674, "step": 18600 }, { "epoch": 1.66, "grad_norm": 5.762521253783584, "learning_rate": 7.41192905098112e-07, "loss": 0.5794, "step": 18601 }, { "epoch": 1.66, "grad_norm": 5.932503510482977, "learning_rate": 7.408144776766674e-07, "loss": 0.5584, "step": 18602 }, { "epoch": 1.66, "grad_norm": 7.864705191898024, "learning_rate": 7.404361391563936e-07, "loss": 0.5684, "step": 18603 }, { "epoch": 1.66, "grad_norm": 6.675134821013098, "learning_rate": 7.400578895451876e-07, "loss": 0.532, "step": 18604 }, { "epoch": 1.66, "grad_norm": 6.226041500383883, "learning_rate": 7.396797288509428e-07, "loss": 0.5226, "step": 18605 }, { "epoch": 1.66, "grad_norm": 5.906564392208084, "learning_rate": 7.393016570815536e-07, "loss": 0.5608, "step": 18606 }, { "epoch": 1.66, "grad_norm": 8.659609487294826, "learning_rate": 7.389236742449113e-07, "loss": 0.5328, "step": 18607 }, { "epoch": 1.66, "grad_norm": 5.049722274616988, "learning_rate": 7.38545780348906e-07, "loss": 0.5175, "step": 18608 }, { "epoch": 1.66, "grad_norm": 5.7377850034518225, "learning_rate": 7.381679754014254e-07, "loss": 0.5437, "step": 18609 }, { "epoch": 1.66, "grad_norm": 5.733821648543844, "learning_rate": 7.377902594103559e-07, "loss": 0.589, "step": 18610 }, { "epoch": 1.66, "grad_norm": 7.999109437840631, "learning_rate": 7.37412632383579e-07, "loss": 0.6328, "step": 18611 }, { "epoch": 1.66, "grad_norm": 6.623477265812949, "learning_rate": 7.370350943289795e-07, "loss": 0.5535, "step": 18612 }, { "epoch": 1.66, "grad_norm": 8.357716136220102, "learning_rate": 7.366576452544361e-07, "loss": 0.5365, "step": 18613 }, { "epoch": 1.66, "grad_norm": 8.942351944531023, "learning_rate": 7.362802851678275e-07, "loss": 0.5213, "step": 18614 }, { "epoch": 1.66, "grad_norm": 7.177693660022129, "learning_rate": 7.359030140770312e-07, "loss": 0.5869, "step": 18615 }, { "epoch": 1.66, "grad_norm": 5.808089679525784, "learning_rate": 7.355258319899222e-07, "loss": 0.5726, "step": 18616 }, { "epoch": 1.66, "grad_norm": 7.32354715951005, "learning_rate": 7.351487389143707e-07, "loss": 0.6671, "step": 18617 }, { "epoch": 1.66, "grad_norm": 5.101060869939424, "learning_rate": 7.3477173485825e-07, "loss": 0.5667, "step": 18618 }, { "epoch": 1.66, "grad_norm": 6.3293919194574855, "learning_rate": 7.343948198294299e-07, "loss": 0.5676, "step": 18619 }, { "epoch": 1.66, "grad_norm": 5.532579942291563, "learning_rate": 7.34017993835775e-07, "loss": 0.5663, "step": 18620 }, { "epoch": 1.66, "grad_norm": 8.056883143697942, "learning_rate": 7.336412568851525e-07, "loss": 0.6157, "step": 18621 }, { "epoch": 1.66, "grad_norm": 7.9824187760464325, "learning_rate": 7.332646089854251e-07, "loss": 0.5587, "step": 18622 }, { "epoch": 1.66, "grad_norm": 7.1030030105412925, "learning_rate": 7.328880501444563e-07, "loss": 0.5674, "step": 18623 }, { "epoch": 1.66, "grad_norm": 7.257108709007746, "learning_rate": 7.325115803701038e-07, "loss": 0.5365, "step": 18624 }, { "epoch": 1.66, "grad_norm": 6.707902743865074, "learning_rate": 7.321351996702259e-07, "loss": 0.5337, "step": 18625 }, { "epoch": 1.66, "grad_norm": 6.131817029556142, "learning_rate": 7.317589080526794e-07, "loss": 0.5768, "step": 18626 }, { "epoch": 1.66, "grad_norm": 8.904293194958266, "learning_rate": 7.313827055253187e-07, "loss": 0.5186, "step": 18627 }, { "epoch": 1.66, "grad_norm": 6.875943064379474, "learning_rate": 7.310065920959958e-07, "loss": 0.58, "step": 18628 }, { "epoch": 1.66, "grad_norm": 7.173600680305668, "learning_rate": 7.306305677725622e-07, "loss": 0.5499, "step": 18629 }, { "epoch": 1.66, "grad_norm": 6.319265895298604, "learning_rate": 7.302546325628651e-07, "loss": 0.5554, "step": 18630 }, { "epoch": 1.66, "grad_norm": 8.264558975444503, "learning_rate": 7.298787864747514e-07, "loss": 0.5391, "step": 18631 }, { "epoch": 1.66, "grad_norm": 5.897479923246432, "learning_rate": 7.295030295160671e-07, "loss": 0.6011, "step": 18632 }, { "epoch": 1.66, "grad_norm": 8.392347004516212, "learning_rate": 7.291273616946554e-07, "loss": 0.5826, "step": 18633 }, { "epoch": 1.66, "grad_norm": 5.769094701899047, "learning_rate": 7.287517830183566e-07, "loss": 0.6514, "step": 18634 }, { "epoch": 1.66, "grad_norm": 6.123494272997277, "learning_rate": 7.2837629349501e-07, "loss": 0.6001, "step": 18635 }, { "epoch": 1.66, "grad_norm": 10.87676657515052, "learning_rate": 7.280008931324545e-07, "loss": 0.5966, "step": 18636 }, { "epoch": 1.66, "grad_norm": 5.783529813420451, "learning_rate": 7.276255819385241e-07, "loss": 0.5567, "step": 18637 }, { "epoch": 1.66, "grad_norm": 6.27853489836686, "learning_rate": 7.272503599210534e-07, "loss": 0.5889, "step": 18638 }, { "epoch": 1.66, "grad_norm": 5.1056379581585185, "learning_rate": 7.268752270878737e-07, "loss": 0.577, "step": 18639 }, { "epoch": 1.66, "grad_norm": 6.022013039104781, "learning_rate": 7.265001834468161e-07, "loss": 0.6192, "step": 18640 }, { "epoch": 1.66, "grad_norm": 6.304297807417819, "learning_rate": 7.261252290057085e-07, "loss": 0.5629, "step": 18641 }, { "epoch": 1.66, "grad_norm": 6.74885450241976, "learning_rate": 7.257503637723778e-07, "loss": 0.5454, "step": 18642 }, { "epoch": 1.66, "grad_norm": 7.982176648689857, "learning_rate": 7.25375587754647e-07, "loss": 0.57, "step": 18643 }, { "epoch": 1.66, "grad_norm": 8.541529336419861, "learning_rate": 7.250009009603398e-07, "loss": 0.5751, "step": 18644 }, { "epoch": 1.66, "grad_norm": 6.650103547473954, "learning_rate": 7.246263033972762e-07, "loss": 0.6437, "step": 18645 }, { "epoch": 1.66, "grad_norm": 5.748917654076977, "learning_rate": 7.242517950732758e-07, "loss": 0.5764, "step": 18646 }, { "epoch": 1.66, "grad_norm": 4.911560682782207, "learning_rate": 7.238773759961564e-07, "loss": 0.5681, "step": 18647 }, { "epoch": 1.66, "grad_norm": 5.7079078184641645, "learning_rate": 7.235030461737313e-07, "loss": 0.5691, "step": 18648 }, { "epoch": 1.66, "grad_norm": 6.777847785098711, "learning_rate": 7.231288056138159e-07, "loss": 0.5349, "step": 18649 }, { "epoch": 1.66, "grad_norm": 5.76743775229453, "learning_rate": 7.227546543242193e-07, "loss": 0.5476, "step": 18650 }, { "epoch": 1.66, "grad_norm": 8.141345749716066, "learning_rate": 7.223805923127525e-07, "loss": 0.552, "step": 18651 }, { "epoch": 1.66, "grad_norm": 6.175912857858846, "learning_rate": 7.220066195872228e-07, "loss": 0.5819, "step": 18652 }, { "epoch": 1.66, "grad_norm": 4.651008140553142, "learning_rate": 7.216327361554359e-07, "loss": 0.6192, "step": 18653 }, { "epoch": 1.66, "grad_norm": 6.265950931793344, "learning_rate": 7.212589420251981e-07, "loss": 0.6281, "step": 18654 }, { "epoch": 1.66, "grad_norm": 8.377950438085541, "learning_rate": 7.208852372043079e-07, "loss": 0.5185, "step": 18655 }, { "epoch": 1.66, "grad_norm": 8.227941306183167, "learning_rate": 7.20511621700567e-07, "loss": 0.5885, "step": 18656 }, { "epoch": 1.66, "grad_norm": 6.675789424899326, "learning_rate": 7.201380955217746e-07, "loss": 0.5971, "step": 18657 }, { "epoch": 1.66, "grad_norm": 8.12593115679971, "learning_rate": 7.197646586757268e-07, "loss": 0.5305, "step": 18658 }, { "epoch": 1.66, "grad_norm": 7.520701729621175, "learning_rate": 7.193913111702178e-07, "loss": 0.5425, "step": 18659 }, { "epoch": 1.66, "grad_norm": 7.248413749899023, "learning_rate": 7.190180530130419e-07, "loss": 0.5508, "step": 18660 }, { "epoch": 1.66, "grad_norm": 6.158315606482092, "learning_rate": 7.186448842119886e-07, "loss": 0.5598, "step": 18661 }, { "epoch": 1.66, "grad_norm": 6.984874999296665, "learning_rate": 7.182718047748477e-07, "loss": 0.5444, "step": 18662 }, { "epoch": 1.66, "grad_norm": 7.110748210597638, "learning_rate": 7.178988147094051e-07, "loss": 0.5024, "step": 18663 }, { "epoch": 1.67, "grad_norm": 6.586822036717307, "learning_rate": 7.175259140234469e-07, "loss": 0.5832, "step": 18664 }, { "epoch": 1.67, "grad_norm": 8.315036721549612, "learning_rate": 7.171531027247575e-07, "loss": 0.6356, "step": 18665 }, { "epoch": 1.67, "grad_norm": 6.691040891667781, "learning_rate": 7.167803808211171e-07, "loss": 0.5229, "step": 18666 }, { "epoch": 1.67, "grad_norm": 9.681325005006624, "learning_rate": 7.164077483203075e-07, "loss": 0.5439, "step": 18667 }, { "epoch": 1.67, "grad_norm": 5.439042971441842, "learning_rate": 7.160352052301039e-07, "loss": 0.5764, "step": 18668 }, { "epoch": 1.67, "grad_norm": 7.28602361474621, "learning_rate": 7.156627515582843e-07, "loss": 0.6075, "step": 18669 }, { "epoch": 1.67, "grad_norm": 6.568860549564265, "learning_rate": 7.152903873126216e-07, "loss": 0.5176, "step": 18670 }, { "epoch": 1.67, "grad_norm": 9.048156275294534, "learning_rate": 7.149181125008892e-07, "loss": 0.5836, "step": 18671 }, { "epoch": 1.67, "grad_norm": 7.89993615772897, "learning_rate": 7.145459271308564e-07, "loss": 0.607, "step": 18672 }, { "epoch": 1.67, "grad_norm": 8.285752985668125, "learning_rate": 7.141738312102942e-07, "loss": 0.5917, "step": 18673 }, { "epoch": 1.67, "grad_norm": 5.369506918216218, "learning_rate": 7.138018247469658e-07, "loss": 0.5862, "step": 18674 }, { "epoch": 1.67, "grad_norm": 5.710053193021664, "learning_rate": 7.134299077486379e-07, "loss": 0.5193, "step": 18675 }, { "epoch": 1.67, "grad_norm": 8.072010237071831, "learning_rate": 7.130580802230735e-07, "loss": 0.6417, "step": 18676 }, { "epoch": 1.67, "grad_norm": 7.245515967719974, "learning_rate": 7.126863421780344e-07, "loss": 0.5734, "step": 18677 }, { "epoch": 1.67, "grad_norm": 9.232498580156582, "learning_rate": 7.123146936212777e-07, "loss": 0.5279, "step": 18678 }, { "epoch": 1.67, "grad_norm": 7.664449334847816, "learning_rate": 7.119431345605615e-07, "loss": 0.5666, "step": 18679 }, { "epoch": 1.67, "grad_norm": 5.688688098254868, "learning_rate": 7.115716650036431e-07, "loss": 0.5477, "step": 18680 }, { "epoch": 1.67, "grad_norm": 6.075171477177617, "learning_rate": 7.112002849582738e-07, "loss": 0.5649, "step": 18681 }, { "epoch": 1.67, "grad_norm": 6.217644918887227, "learning_rate": 7.10828994432206e-07, "loss": 0.5912, "step": 18682 }, { "epoch": 1.67, "grad_norm": 7.23185791464326, "learning_rate": 7.104577934331897e-07, "loss": 0.6209, "step": 18683 }, { "epoch": 1.67, "grad_norm": 7.117528406015411, "learning_rate": 7.100866819689734e-07, "loss": 0.537, "step": 18684 }, { "epoch": 1.67, "grad_norm": 7.72666004121027, "learning_rate": 7.097156600473027e-07, "loss": 0.6062, "step": 18685 }, { "epoch": 1.67, "grad_norm": 9.56803529877836, "learning_rate": 7.09344727675923e-07, "loss": 0.6482, "step": 18686 }, { "epoch": 1.67, "grad_norm": 7.1979789265901175, "learning_rate": 7.089738848625749e-07, "loss": 0.5351, "step": 18687 }, { "epoch": 1.67, "grad_norm": 7.274290392117034, "learning_rate": 7.086031316149994e-07, "loss": 0.6011, "step": 18688 }, { "epoch": 1.67, "grad_norm": 7.881622047557984, "learning_rate": 7.082324679409359e-07, "loss": 0.6245, "step": 18689 }, { "epoch": 1.67, "grad_norm": 7.417306388040407, "learning_rate": 7.078618938481207e-07, "loss": 0.5234, "step": 18690 }, { "epoch": 1.67, "grad_norm": 5.792101741780786, "learning_rate": 7.074914093442897e-07, "loss": 0.5923, "step": 18691 }, { "epoch": 1.67, "grad_norm": 5.73437880659578, "learning_rate": 7.071210144371743e-07, "loss": 0.5458, "step": 18692 }, { "epoch": 1.67, "grad_norm": 7.6269077018477756, "learning_rate": 7.067507091345078e-07, "loss": 0.5754, "step": 18693 }, { "epoch": 1.67, "grad_norm": 7.471929078348774, "learning_rate": 7.063804934440166e-07, "loss": 0.5746, "step": 18694 }, { "epoch": 1.67, "grad_norm": 5.904466060542601, "learning_rate": 7.060103673734297e-07, "loss": 0.5795, "step": 18695 }, { "epoch": 1.67, "grad_norm": 6.105323647203277, "learning_rate": 7.056403309304733e-07, "loss": 0.5102, "step": 18696 }, { "epoch": 1.67, "grad_norm": 6.414766894151342, "learning_rate": 7.052703841228697e-07, "loss": 0.5483, "step": 18697 }, { "epoch": 1.67, "grad_norm": 7.64479570396298, "learning_rate": 7.04900526958342e-07, "loss": 0.5845, "step": 18698 }, { "epoch": 1.67, "grad_norm": 6.754061116078944, "learning_rate": 7.045307594446111e-07, "loss": 0.5373, "step": 18699 }, { "epoch": 1.67, "grad_norm": 5.350383735338322, "learning_rate": 7.041610815893918e-07, "loss": 0.5393, "step": 18700 }, { "epoch": 1.67, "grad_norm": 5.003381062909269, "learning_rate": 7.037914934004025e-07, "loss": 0.5338, "step": 18701 }, { "epoch": 1.67, "grad_norm": 6.088959912344128, "learning_rate": 7.034219948853577e-07, "loss": 0.5661, "step": 18702 }, { "epoch": 1.67, "grad_norm": 6.733725708744355, "learning_rate": 7.030525860519688e-07, "loss": 0.5823, "step": 18703 }, { "epoch": 1.67, "grad_norm": 7.528017707801505, "learning_rate": 7.026832669079476e-07, "loss": 0.5615, "step": 18704 }, { "epoch": 1.67, "grad_norm": 5.925601395804873, "learning_rate": 7.023140374610027e-07, "loss": 0.6236, "step": 18705 }, { "epoch": 1.67, "grad_norm": 9.542842135152291, "learning_rate": 7.019448977188409e-07, "loss": 0.595, "step": 18706 }, { "epoch": 1.67, "grad_norm": 8.356890442689716, "learning_rate": 7.015758476891659e-07, "loss": 0.6024, "step": 18707 }, { "epoch": 1.67, "grad_norm": 5.997886209615091, "learning_rate": 7.012068873796812e-07, "loss": 0.6002, "step": 18708 }, { "epoch": 1.67, "grad_norm": 6.796145082397513, "learning_rate": 7.008380167980888e-07, "loss": 0.5537, "step": 18709 }, { "epoch": 1.67, "grad_norm": 9.239963522210958, "learning_rate": 7.004692359520876e-07, "loss": 0.617, "step": 18710 }, { "epoch": 1.67, "grad_norm": 5.321059568232503, "learning_rate": 7.001005448493752e-07, "loss": 0.5837, "step": 18711 }, { "epoch": 1.67, "grad_norm": 7.311299161676711, "learning_rate": 6.997319434976491e-07, "loss": 0.6128, "step": 18712 }, { "epoch": 1.67, "grad_norm": 6.215005720842257, "learning_rate": 6.993634319045995e-07, "loss": 0.6269, "step": 18713 }, { "epoch": 1.67, "grad_norm": 6.456462943936085, "learning_rate": 6.989950100779203e-07, "loss": 0.566, "step": 18714 }, { "epoch": 1.67, "grad_norm": 8.834941726039688, "learning_rate": 6.98626678025301e-07, "loss": 0.5575, "step": 18715 }, { "epoch": 1.67, "grad_norm": 6.888359619502168, "learning_rate": 6.982584357544297e-07, "loss": 0.5285, "step": 18716 }, { "epoch": 1.67, "grad_norm": 8.098747007326779, "learning_rate": 6.978902832729934e-07, "loss": 0.5933, "step": 18717 }, { "epoch": 1.67, "grad_norm": 6.544345259790728, "learning_rate": 6.975222205886772e-07, "loss": 0.5085, "step": 18718 }, { "epoch": 1.67, "grad_norm": 8.147526508603773, "learning_rate": 6.971542477091609e-07, "loss": 0.5928, "step": 18719 }, { "epoch": 1.67, "grad_norm": 5.241956454003021, "learning_rate": 6.96786364642128e-07, "loss": 0.53, "step": 18720 }, { "epoch": 1.67, "grad_norm": 7.0727553428761025, "learning_rate": 6.964185713952543e-07, "loss": 0.6135, "step": 18721 }, { "epoch": 1.67, "grad_norm": 5.159982449471985, "learning_rate": 6.960508679762185e-07, "loss": 0.5235, "step": 18722 }, { "epoch": 1.67, "grad_norm": 5.864894478884077, "learning_rate": 6.956832543926956e-07, "loss": 0.5829, "step": 18723 }, { "epoch": 1.67, "grad_norm": 7.642084860783315, "learning_rate": 6.953157306523578e-07, "loss": 0.5657, "step": 18724 }, { "epoch": 1.67, "grad_norm": 7.220734909336617, "learning_rate": 6.94948296762879e-07, "loss": 0.5868, "step": 18725 }, { "epoch": 1.67, "grad_norm": 7.07685488984557, "learning_rate": 6.945809527319247e-07, "loss": 0.509, "step": 18726 }, { "epoch": 1.67, "grad_norm": 5.703640295270385, "learning_rate": 6.942136985671649e-07, "loss": 0.5856, "step": 18727 }, { "epoch": 1.67, "grad_norm": 6.810009436506186, "learning_rate": 6.938465342762646e-07, "loss": 0.5828, "step": 18728 }, { "epoch": 1.67, "grad_norm": 9.965003471760866, "learning_rate": 6.93479459866887e-07, "loss": 0.6228, "step": 18729 }, { "epoch": 1.67, "grad_norm": 6.109170609810283, "learning_rate": 6.931124753466961e-07, "loss": 0.5992, "step": 18730 }, { "epoch": 1.67, "grad_norm": 8.237722885073138, "learning_rate": 6.927455807233491e-07, "loss": 0.581, "step": 18731 }, { "epoch": 1.67, "grad_norm": 7.20712208856185, "learning_rate": 6.923787760045059e-07, "loss": 0.5876, "step": 18732 }, { "epoch": 1.67, "grad_norm": 10.085029075098388, "learning_rate": 6.920120611978215e-07, "loss": 0.5611, "step": 18733 }, { "epoch": 1.67, "grad_norm": 5.6692405277020335, "learning_rate": 6.916454363109526e-07, "loss": 0.5453, "step": 18734 }, { "epoch": 1.67, "grad_norm": 5.6831749969968675, "learning_rate": 6.912789013515486e-07, "loss": 0.5694, "step": 18735 }, { "epoch": 1.67, "grad_norm": 5.3160071959518245, "learning_rate": 6.909124563272618e-07, "loss": 0.5396, "step": 18736 }, { "epoch": 1.67, "grad_norm": 6.853178534807961, "learning_rate": 6.905461012457415e-07, "loss": 0.5342, "step": 18737 }, { "epoch": 1.67, "grad_norm": 6.688804390068174, "learning_rate": 6.901798361146333e-07, "loss": 0.6195, "step": 18738 }, { "epoch": 1.67, "grad_norm": 5.9489721934282755, "learning_rate": 6.898136609415823e-07, "loss": 0.5903, "step": 18739 }, { "epoch": 1.67, "grad_norm": 7.185749223244961, "learning_rate": 6.894475757342323e-07, "loss": 0.5755, "step": 18740 }, { "epoch": 1.67, "grad_norm": 4.504288492776687, "learning_rate": 6.890815805002238e-07, "loss": 0.55, "step": 18741 }, { "epoch": 1.67, "grad_norm": 9.002430118806888, "learning_rate": 6.887156752471963e-07, "loss": 0.5743, "step": 18742 }, { "epoch": 1.67, "grad_norm": 5.154452086167655, "learning_rate": 6.883498599827892e-07, "loss": 0.5793, "step": 18743 }, { "epoch": 1.67, "grad_norm": 6.589666126714272, "learning_rate": 6.879841347146354e-07, "loss": 0.5672, "step": 18744 }, { "epoch": 1.67, "grad_norm": 5.6695765238608224, "learning_rate": 6.876184994503693e-07, "loss": 0.5278, "step": 18745 }, { "epoch": 1.67, "grad_norm": 5.487172838426258, "learning_rate": 6.87252954197623e-07, "loss": 0.5495, "step": 18746 }, { "epoch": 1.67, "grad_norm": 5.585840125163654, "learning_rate": 6.868874989640267e-07, "loss": 0.5679, "step": 18747 }, { "epoch": 1.67, "grad_norm": 8.378562312539302, "learning_rate": 6.865221337572081e-07, "loss": 0.5725, "step": 18748 }, { "epoch": 1.67, "grad_norm": 8.211779636757607, "learning_rate": 6.86156858584795e-07, "loss": 0.5933, "step": 18749 }, { "epoch": 1.67, "grad_norm": 6.989439060179455, "learning_rate": 6.857916734544101e-07, "loss": 0.5798, "step": 18750 }, { "epoch": 1.67, "grad_norm": 7.212689113794405, "learning_rate": 6.854265783736752e-07, "loss": 0.5645, "step": 18751 }, { "epoch": 1.67, "grad_norm": 5.333440539377955, "learning_rate": 6.85061573350212e-07, "loss": 0.5282, "step": 18752 }, { "epoch": 1.67, "grad_norm": 5.607271820728085, "learning_rate": 6.846966583916381e-07, "loss": 0.5245, "step": 18753 }, { "epoch": 1.67, "grad_norm": 6.391278403664465, "learning_rate": 6.843318335055715e-07, "loss": 0.5442, "step": 18754 }, { "epoch": 1.67, "grad_norm": 7.043909403547669, "learning_rate": 6.839670986996267e-07, "loss": 0.6064, "step": 18755 }, { "epoch": 1.67, "grad_norm": 9.378228013436457, "learning_rate": 6.836024539814179e-07, "loss": 0.5873, "step": 18756 }, { "epoch": 1.67, "grad_norm": 6.318488985062379, "learning_rate": 6.832378993585543e-07, "loss": 0.581, "step": 18757 }, { "epoch": 1.67, "grad_norm": 6.756793223529936, "learning_rate": 6.828734348386457e-07, "loss": 0.4844, "step": 18758 }, { "epoch": 1.67, "grad_norm": 11.335201612410094, "learning_rate": 6.825090604292995e-07, "loss": 0.5738, "step": 18759 }, { "epoch": 1.67, "grad_norm": 7.023905326919885, "learning_rate": 6.821447761381216e-07, "loss": 0.5791, "step": 18760 }, { "epoch": 1.67, "grad_norm": 6.078287489474752, "learning_rate": 6.817805819727164e-07, "loss": 0.5299, "step": 18761 }, { "epoch": 1.67, "grad_norm": 6.245017843699664, "learning_rate": 6.814164779406851e-07, "loss": 0.6011, "step": 18762 }, { "epoch": 1.67, "grad_norm": 7.76484488573197, "learning_rate": 6.810524640496274e-07, "loss": 0.5457, "step": 18763 }, { "epoch": 1.67, "grad_norm": 6.087221737572406, "learning_rate": 6.8068854030714e-07, "loss": 0.596, "step": 18764 }, { "epoch": 1.67, "grad_norm": 6.2581433648729226, "learning_rate": 6.803247067208207e-07, "loss": 0.5334, "step": 18765 }, { "epoch": 1.67, "grad_norm": 5.699906844330567, "learning_rate": 6.799609632982629e-07, "loss": 0.6023, "step": 18766 }, { "epoch": 1.67, "grad_norm": 6.9108143228811665, "learning_rate": 6.795973100470587e-07, "loss": 0.5721, "step": 18767 }, { "epoch": 1.67, "grad_norm": 5.211843891150087, "learning_rate": 6.792337469748e-07, "loss": 0.5872, "step": 18768 }, { "epoch": 1.67, "grad_norm": 7.702407954562579, "learning_rate": 6.788702740890752e-07, "loss": 0.5537, "step": 18769 }, { "epoch": 1.67, "grad_norm": 8.372038122474496, "learning_rate": 6.785068913974696e-07, "loss": 0.5813, "step": 18770 }, { "epoch": 1.67, "grad_norm": 6.616550770931809, "learning_rate": 6.781435989075679e-07, "loss": 0.6036, "step": 18771 }, { "epoch": 1.67, "grad_norm": 5.4304274760396405, "learning_rate": 6.777803966269548e-07, "loss": 0.5797, "step": 18772 }, { "epoch": 1.67, "grad_norm": 6.616027884722334, "learning_rate": 6.774172845632099e-07, "loss": 0.5341, "step": 18773 }, { "epoch": 1.67, "grad_norm": 8.009925409586025, "learning_rate": 6.770542627239129e-07, "loss": 0.5396, "step": 18774 }, { "epoch": 1.67, "grad_norm": 5.922225594482779, "learning_rate": 6.766913311166423e-07, "loss": 0.5921, "step": 18775 }, { "epoch": 1.68, "grad_norm": 5.933845732344708, "learning_rate": 6.763284897489714e-07, "loss": 0.5064, "step": 18776 }, { "epoch": 1.68, "grad_norm": 7.530040494106385, "learning_rate": 6.759657386284741e-07, "loss": 0.5509, "step": 18777 }, { "epoch": 1.68, "grad_norm": 6.465859774759495, "learning_rate": 6.756030777627242e-07, "loss": 0.6278, "step": 18778 }, { "epoch": 1.68, "grad_norm": 6.980956829343796, "learning_rate": 6.752405071592882e-07, "loss": 0.521, "step": 18779 }, { "epoch": 1.68, "grad_norm": 8.609240288737123, "learning_rate": 6.74878026825736e-07, "loss": 0.5745, "step": 18780 }, { "epoch": 1.68, "grad_norm": 6.670527244111905, "learning_rate": 6.745156367696326e-07, "loss": 0.5913, "step": 18781 }, { "epoch": 1.68, "grad_norm": 7.780588340578137, "learning_rate": 6.741533369985442e-07, "loss": 0.632, "step": 18782 }, { "epoch": 1.68, "grad_norm": 7.776412430991458, "learning_rate": 6.737911275200299e-07, "loss": 0.5739, "step": 18783 }, { "epoch": 1.68, "grad_norm": 11.70878239371988, "learning_rate": 6.734290083416517e-07, "loss": 0.5665, "step": 18784 }, { "epoch": 1.68, "grad_norm": 6.592203350338689, "learning_rate": 6.730669794709682e-07, "loss": 0.5828, "step": 18785 }, { "epoch": 1.68, "grad_norm": 6.429209256142616, "learning_rate": 6.727050409155356e-07, "loss": 0.5476, "step": 18786 }, { "epoch": 1.68, "grad_norm": 8.42008412669541, "learning_rate": 6.723431926829089e-07, "loss": 0.5732, "step": 18787 }, { "epoch": 1.68, "grad_norm": 5.97629881317841, "learning_rate": 6.719814347806414e-07, "loss": 0.5316, "step": 18788 }, { "epoch": 1.68, "grad_norm": 7.663631241160309, "learning_rate": 6.716197672162822e-07, "loss": 0.5595, "step": 18789 }, { "epoch": 1.68, "grad_norm": 8.116874781957778, "learning_rate": 6.712581899973814e-07, "loss": 0.5232, "step": 18790 }, { "epoch": 1.68, "grad_norm": 7.018151464113047, "learning_rate": 6.708967031314856e-07, "loss": 0.5506, "step": 18791 }, { "epoch": 1.68, "grad_norm": 6.218145677341601, "learning_rate": 6.705353066261421e-07, "loss": 0.4842, "step": 18792 }, { "epoch": 1.68, "grad_norm": 5.675144095481751, "learning_rate": 6.701740004888918e-07, "loss": 0.5459, "step": 18793 }, { "epoch": 1.68, "grad_norm": 7.692457789866794, "learning_rate": 6.698127847272767e-07, "loss": 0.5617, "step": 18794 }, { "epoch": 1.68, "grad_norm": 6.108632728383405, "learning_rate": 6.694516593488381e-07, "loss": 0.5815, "step": 18795 }, { "epoch": 1.68, "grad_norm": 8.053903319691106, "learning_rate": 6.690906243611112e-07, "loss": 0.567, "step": 18796 }, { "epoch": 1.68, "grad_norm": 5.658944717574081, "learning_rate": 6.687296797716331e-07, "loss": 0.5905, "step": 18797 }, { "epoch": 1.68, "grad_norm": 7.39532687998217, "learning_rate": 6.683688255879378e-07, "loss": 0.5759, "step": 18798 }, { "epoch": 1.68, "grad_norm": 6.49582086520836, "learning_rate": 6.680080618175566e-07, "loss": 0.5526, "step": 18799 }, { "epoch": 1.68, "grad_norm": 6.994009290491478, "learning_rate": 6.676473884680207e-07, "loss": 0.5778, "step": 18800 }, { "epoch": 1.68, "grad_norm": 6.7239755606589515, "learning_rate": 6.672868055468584e-07, "loss": 0.5672, "step": 18801 }, { "epoch": 1.68, "grad_norm": 5.349110959927907, "learning_rate": 6.669263130615949e-07, "loss": 0.5272, "step": 18802 }, { "epoch": 1.68, "grad_norm": 5.198362178394492, "learning_rate": 6.665659110197554e-07, "loss": 0.5086, "step": 18803 }, { "epoch": 1.68, "grad_norm": 6.074779064356047, "learning_rate": 6.662055994288625e-07, "loss": 0.5406, "step": 18804 }, { "epoch": 1.68, "grad_norm": 7.459916391306315, "learning_rate": 6.658453782964363e-07, "loss": 0.4976, "step": 18805 }, { "epoch": 1.68, "grad_norm": 5.136413191241806, "learning_rate": 6.654852476299978e-07, "loss": 0.5703, "step": 18806 }, { "epoch": 1.68, "grad_norm": 9.503814022949408, "learning_rate": 6.651252074370612e-07, "loss": 0.5617, "step": 18807 }, { "epoch": 1.68, "grad_norm": 5.7500483386980665, "learning_rate": 6.64765257725144e-07, "loss": 0.5354, "step": 18808 }, { "epoch": 1.68, "grad_norm": 6.814369833312284, "learning_rate": 6.644053985017562e-07, "loss": 0.541, "step": 18809 }, { "epoch": 1.68, "grad_norm": 7.25137842869322, "learning_rate": 6.640456297744114e-07, "loss": 0.5397, "step": 18810 }, { "epoch": 1.68, "grad_norm": 6.3502201467293435, "learning_rate": 6.636859515506183e-07, "loss": 0.588, "step": 18811 }, { "epoch": 1.68, "grad_norm": 6.810472748926223, "learning_rate": 6.633263638378851e-07, "loss": 0.6125, "step": 18812 }, { "epoch": 1.68, "grad_norm": 7.721245419139586, "learning_rate": 6.629668666437177e-07, "loss": 0.5359, "step": 18813 }, { "epoch": 1.68, "grad_norm": 5.87606356444877, "learning_rate": 6.626074599756177e-07, "loss": 0.5623, "step": 18814 }, { "epoch": 1.68, "grad_norm": 8.813522865544947, "learning_rate": 6.622481438410883e-07, "loss": 0.5718, "step": 18815 }, { "epoch": 1.68, "grad_norm": 7.7073305565248384, "learning_rate": 6.618889182476296e-07, "loss": 0.5058, "step": 18816 }, { "epoch": 1.68, "grad_norm": 6.516695352234643, "learning_rate": 6.615297832027395e-07, "loss": 0.5857, "step": 18817 }, { "epoch": 1.68, "grad_norm": 8.018468838088008, "learning_rate": 6.611707387139138e-07, "loss": 0.613, "step": 18818 }, { "epoch": 1.68, "grad_norm": 7.318215491126707, "learning_rate": 6.608117847886481e-07, "loss": 0.6463, "step": 18819 }, { "epoch": 1.68, "grad_norm": 7.55661203817161, "learning_rate": 6.60452921434433e-07, "loss": 0.5531, "step": 18820 }, { "epoch": 1.68, "grad_norm": 5.799424692881998, "learning_rate": 6.600941486587603e-07, "loss": 0.5896, "step": 18821 }, { "epoch": 1.68, "grad_norm": 7.601144408050781, "learning_rate": 6.597354664691175e-07, "loss": 0.573, "step": 18822 }, { "epoch": 1.68, "grad_norm": 7.6409893772529225, "learning_rate": 6.593768748729917e-07, "loss": 0.5406, "step": 18823 }, { "epoch": 1.68, "grad_norm": 6.726992401520035, "learning_rate": 6.590183738778677e-07, "loss": 0.584, "step": 18824 }, { "epoch": 1.68, "grad_norm": 6.192460261554137, "learning_rate": 6.586599634912283e-07, "loss": 0.5349, "step": 18825 }, { "epoch": 1.68, "grad_norm": 5.344657429268577, "learning_rate": 6.583016437205564e-07, "loss": 0.5964, "step": 18826 }, { "epoch": 1.68, "grad_norm": 5.503027925962039, "learning_rate": 6.579434145733276e-07, "loss": 0.5824, "step": 18827 }, { "epoch": 1.68, "grad_norm": 6.911923641265136, "learning_rate": 6.575852760570217e-07, "loss": 0.5303, "step": 18828 }, { "epoch": 1.68, "grad_norm": 7.858446698722333, "learning_rate": 6.572272281791137e-07, "loss": 0.5656, "step": 18829 }, { "epoch": 1.68, "grad_norm": 5.805142288336392, "learning_rate": 6.568692709470765e-07, "loss": 0.5615, "step": 18830 }, { "epoch": 1.68, "grad_norm": 7.3257943037876, "learning_rate": 6.565114043683818e-07, "loss": 0.4914, "step": 18831 }, { "epoch": 1.68, "grad_norm": 8.715461340094754, "learning_rate": 6.56153628450501e-07, "loss": 0.5991, "step": 18832 }, { "epoch": 1.68, "grad_norm": 29.321924962930918, "learning_rate": 6.557959432008987e-07, "loss": 0.7496, "step": 18833 }, { "epoch": 1.68, "grad_norm": 6.786428072304873, "learning_rate": 6.554383486270427e-07, "loss": 0.5416, "step": 18834 }, { "epoch": 1.68, "grad_norm": 8.656064950369164, "learning_rate": 6.550808447363977e-07, "loss": 0.5306, "step": 18835 }, { "epoch": 1.68, "grad_norm": 7.117994606244157, "learning_rate": 6.547234315364243e-07, "loss": 0.6213, "step": 18836 }, { "epoch": 1.68, "grad_norm": 7.445865189010255, "learning_rate": 6.543661090345827e-07, "loss": 0.6063, "step": 18837 }, { "epoch": 1.68, "grad_norm": 8.251337492712066, "learning_rate": 6.540088772383318e-07, "loss": 0.5543, "step": 18838 }, { "epoch": 1.68, "grad_norm": 8.95099479739495, "learning_rate": 6.536517361551292e-07, "loss": 0.5794, "step": 18839 }, { "epoch": 1.68, "grad_norm": 8.177532868716206, "learning_rate": 6.532946857924277e-07, "loss": 0.5515, "step": 18840 }, { "epoch": 1.68, "grad_norm": 11.589111429873162, "learning_rate": 6.529377261576797e-07, "loss": 0.6229, "step": 18841 }, { "epoch": 1.68, "grad_norm": 7.1505555137886425, "learning_rate": 6.525808572583376e-07, "loss": 0.5879, "step": 18842 }, { "epoch": 1.68, "grad_norm": 5.710191984099238, "learning_rate": 6.522240791018486e-07, "loss": 0.6313, "step": 18843 }, { "epoch": 1.68, "grad_norm": 6.335278358144622, "learning_rate": 6.518673916956608e-07, "loss": 0.5043, "step": 18844 }, { "epoch": 1.68, "grad_norm": 6.706837346865468, "learning_rate": 6.515107950472199e-07, "loss": 0.5547, "step": 18845 }, { "epoch": 1.68, "grad_norm": 7.055635324130055, "learning_rate": 6.511542891639677e-07, "loss": 0.5829, "step": 18846 }, { "epoch": 1.68, "grad_norm": 7.482139094299594, "learning_rate": 6.50797874053345e-07, "loss": 0.5505, "step": 18847 }, { "epoch": 1.68, "grad_norm": 6.119250792768566, "learning_rate": 6.504415497227928e-07, "loss": 0.5558, "step": 18848 }, { "epoch": 1.68, "grad_norm": 7.864679887938728, "learning_rate": 6.500853161797477e-07, "loss": 0.5417, "step": 18849 }, { "epoch": 1.68, "grad_norm": 6.606704184365366, "learning_rate": 6.497291734316463e-07, "loss": 0.5505, "step": 18850 }, { "epoch": 1.68, "grad_norm": 7.9258781260246405, "learning_rate": 6.493731214859206e-07, "loss": 0.619, "step": 18851 }, { "epoch": 1.68, "grad_norm": 7.039306097892574, "learning_rate": 6.490171603500039e-07, "loss": 0.5956, "step": 18852 }, { "epoch": 1.68, "grad_norm": 10.132254962253365, "learning_rate": 6.48661290031325e-07, "loss": 0.5945, "step": 18853 }, { "epoch": 1.68, "grad_norm": 5.872016844322838, "learning_rate": 6.48305510537312e-07, "loss": 0.5515, "step": 18854 }, { "epoch": 1.68, "grad_norm": 5.307926348347676, "learning_rate": 6.479498218753921e-07, "loss": 0.5882, "step": 18855 }, { "epoch": 1.68, "grad_norm": 9.33701233142072, "learning_rate": 6.475942240529881e-07, "loss": 0.5361, "step": 18856 }, { "epoch": 1.68, "grad_norm": 7.474261443104198, "learning_rate": 6.472387170775235e-07, "loss": 0.5223, "step": 18857 }, { "epoch": 1.68, "grad_norm": 8.5826781119489, "learning_rate": 6.468833009564191e-07, "loss": 0.5545, "step": 18858 }, { "epoch": 1.68, "grad_norm": 7.753580568110589, "learning_rate": 6.465279756970916e-07, "loss": 0.5861, "step": 18859 }, { "epoch": 1.68, "grad_norm": 7.817911468054885, "learning_rate": 6.461727413069586e-07, "loss": 0.5665, "step": 18860 }, { "epoch": 1.68, "grad_norm": 7.272251931746201, "learning_rate": 6.45817597793435e-07, "loss": 0.5635, "step": 18861 }, { "epoch": 1.68, "grad_norm": 8.669558691619732, "learning_rate": 6.45462545163934e-07, "loss": 0.6146, "step": 18862 }, { "epoch": 1.68, "grad_norm": 7.69145720017826, "learning_rate": 6.451075834258652e-07, "loss": 0.6444, "step": 18863 }, { "epoch": 1.68, "grad_norm": 6.607288333135034, "learning_rate": 6.447527125866405e-07, "loss": 0.5825, "step": 18864 }, { "epoch": 1.68, "grad_norm": 7.365069050573747, "learning_rate": 6.443979326536648e-07, "loss": 0.5694, "step": 18865 }, { "epoch": 1.68, "grad_norm": 6.83092805065577, "learning_rate": 6.440432436343425e-07, "loss": 0.5441, "step": 18866 }, { "epoch": 1.68, "grad_norm": 7.342621958679847, "learning_rate": 6.43688645536078e-07, "loss": 0.6119, "step": 18867 }, { "epoch": 1.68, "grad_norm": 7.865565666350034, "learning_rate": 6.433341383662728e-07, "loss": 0.5185, "step": 18868 }, { "epoch": 1.68, "grad_norm": 5.098905082581208, "learning_rate": 6.429797221323264e-07, "loss": 0.5526, "step": 18869 }, { "epoch": 1.68, "grad_norm": 6.779841551122888, "learning_rate": 6.426253968416368e-07, "loss": 0.5396, "step": 18870 }, { "epoch": 1.68, "grad_norm": 6.984466253006608, "learning_rate": 6.422711625016009e-07, "loss": 0.5772, "step": 18871 }, { "epoch": 1.68, "grad_norm": 6.24311890322206, "learning_rate": 6.4191701911961e-07, "loss": 0.5197, "step": 18872 }, { "epoch": 1.68, "grad_norm": 4.810332912484384, "learning_rate": 6.415629667030571e-07, "loss": 0.6034, "step": 18873 }, { "epoch": 1.68, "grad_norm": 7.222246773034977, "learning_rate": 6.412090052593328e-07, "loss": 0.5928, "step": 18874 }, { "epoch": 1.68, "grad_norm": 5.452383838966274, "learning_rate": 6.408551347958247e-07, "loss": 0.5246, "step": 18875 }, { "epoch": 1.68, "grad_norm": 6.490950268250788, "learning_rate": 6.40501355319919e-07, "loss": 0.5707, "step": 18876 }, { "epoch": 1.68, "grad_norm": 6.400643621422679, "learning_rate": 6.401476668390016e-07, "loss": 0.5239, "step": 18877 }, { "epoch": 1.68, "grad_norm": 9.424243891981137, "learning_rate": 6.397940693604543e-07, "loss": 0.5428, "step": 18878 }, { "epoch": 1.68, "grad_norm": 6.124580133931576, "learning_rate": 6.394405628916555e-07, "loss": 0.6021, "step": 18879 }, { "epoch": 1.68, "grad_norm": 6.406309208254688, "learning_rate": 6.390871474399857e-07, "loss": 0.5572, "step": 18880 }, { "epoch": 1.68, "grad_norm": 4.877279978578899, "learning_rate": 6.387338230128209e-07, "loss": 0.5857, "step": 18881 }, { "epoch": 1.68, "grad_norm": 7.5690298671131115, "learning_rate": 6.383805896175371e-07, "loss": 0.5783, "step": 18882 }, { "epoch": 1.68, "grad_norm": 8.580830253656917, "learning_rate": 6.380274472615067e-07, "loss": 0.5534, "step": 18883 }, { "epoch": 1.68, "grad_norm": 5.3086429205889, "learning_rate": 6.37674395952102e-07, "loss": 0.5023, "step": 18884 }, { "epoch": 1.68, "grad_norm": 6.017225910855992, "learning_rate": 6.373214356966894e-07, "loss": 0.5878, "step": 18885 }, { "epoch": 1.68, "grad_norm": 6.948973237846084, "learning_rate": 6.369685665026381e-07, "loss": 0.614, "step": 18886 }, { "epoch": 1.68, "grad_norm": 8.300911575829993, "learning_rate": 6.366157883773128e-07, "loss": 0.6188, "step": 18887 }, { "epoch": 1.68, "grad_norm": 6.178797171149819, "learning_rate": 6.362631013280774e-07, "loss": 0.5527, "step": 18888 }, { "epoch": 1.69, "grad_norm": 6.0327066672801175, "learning_rate": 6.359105053622943e-07, "loss": 0.6266, "step": 18889 }, { "epoch": 1.69, "grad_norm": 6.762705302196657, "learning_rate": 6.355580004873214e-07, "loss": 0.5629, "step": 18890 }, { "epoch": 1.69, "grad_norm": 7.178532817370461, "learning_rate": 6.352055867105173e-07, "loss": 0.5019, "step": 18891 }, { "epoch": 1.69, "grad_norm": 7.794980904578449, "learning_rate": 6.348532640392375e-07, "loss": 0.6092, "step": 18892 }, { "epoch": 1.69, "grad_norm": 6.676391199337664, "learning_rate": 6.345010324808376e-07, "loss": 0.6671, "step": 18893 }, { "epoch": 1.69, "grad_norm": 4.565822694884817, "learning_rate": 6.341488920426675e-07, "loss": 0.6108, "step": 18894 }, { "epoch": 1.69, "grad_norm": 8.190790186502118, "learning_rate": 6.337968427320778e-07, "loss": 0.5507, "step": 18895 }, { "epoch": 1.69, "grad_norm": 6.376414282470727, "learning_rate": 6.334448845564184e-07, "loss": 0.5287, "step": 18896 }, { "epoch": 1.69, "grad_norm": 6.458842056780733, "learning_rate": 6.330930175230337e-07, "loss": 0.5662, "step": 18897 }, { "epoch": 1.69, "grad_norm": 6.437514913712253, "learning_rate": 6.327412416392687e-07, "loss": 0.6585, "step": 18898 }, { "epoch": 1.69, "grad_norm": 8.029905089667606, "learning_rate": 6.32389556912466e-07, "loss": 0.6368, "step": 18899 }, { "epoch": 1.69, "grad_norm": 7.496775116932812, "learning_rate": 6.320379633499663e-07, "loss": 0.5654, "step": 18900 }, { "epoch": 1.69, "grad_norm": 9.165777463870116, "learning_rate": 6.316864609591084e-07, "loss": 0.5619, "step": 18901 }, { "epoch": 1.69, "grad_norm": 4.8614028401292435, "learning_rate": 6.313350497472304e-07, "loss": 0.5023, "step": 18902 }, { "epoch": 1.69, "grad_norm": 6.105953023442856, "learning_rate": 6.309837297216653e-07, "loss": 0.5781, "step": 18903 }, { "epoch": 1.69, "grad_norm": 8.128712358683185, "learning_rate": 6.306325008897463e-07, "loss": 0.6142, "step": 18904 }, { "epoch": 1.69, "grad_norm": 10.108277967013555, "learning_rate": 6.302813632588056e-07, "loss": 0.5957, "step": 18905 }, { "epoch": 1.69, "grad_norm": 6.799660623405033, "learning_rate": 6.299303168361714e-07, "loss": 0.5837, "step": 18906 }, { "epoch": 1.69, "grad_norm": 6.999237164966168, "learning_rate": 6.295793616291729e-07, "loss": 0.568, "step": 18907 }, { "epoch": 1.69, "grad_norm": 5.422449586789886, "learning_rate": 6.292284976451335e-07, "loss": 0.5297, "step": 18908 }, { "epoch": 1.69, "grad_norm": 5.9693723742317815, "learning_rate": 6.28877724891378e-07, "loss": 0.6109, "step": 18909 }, { "epoch": 1.69, "grad_norm": 7.045375818049848, "learning_rate": 6.285270433752261e-07, "loss": 0.5692, "step": 18910 }, { "epoch": 1.69, "grad_norm": 6.553112804293156, "learning_rate": 6.281764531039991e-07, "loss": 0.5596, "step": 18911 }, { "epoch": 1.69, "grad_norm": 6.210412775616269, "learning_rate": 6.278259540850145e-07, "loss": 0.5821, "step": 18912 }, { "epoch": 1.69, "grad_norm": 6.002990630612759, "learning_rate": 6.274755463255883e-07, "loss": 0.514, "step": 18913 }, { "epoch": 1.69, "grad_norm": 6.613229149851825, "learning_rate": 6.271252298330344e-07, "loss": 0.5406, "step": 18914 }, { "epoch": 1.69, "grad_norm": 12.180184953606888, "learning_rate": 6.267750046146659e-07, "loss": 0.5771, "step": 18915 }, { "epoch": 1.69, "grad_norm": 6.175987278095403, "learning_rate": 6.264248706777908e-07, "loss": 0.4995, "step": 18916 }, { "epoch": 1.69, "grad_norm": 7.285196121682943, "learning_rate": 6.260748280297186e-07, "loss": 0.6042, "step": 18917 }, { "epoch": 1.69, "grad_norm": 5.922147842959956, "learning_rate": 6.257248766777558e-07, "loss": 0.5817, "step": 18918 }, { "epoch": 1.69, "grad_norm": 7.326015210127596, "learning_rate": 6.253750166292067e-07, "loss": 0.5729, "step": 18919 }, { "epoch": 1.69, "grad_norm": 6.579192779897657, "learning_rate": 6.250252478913738e-07, "loss": 0.5559, "step": 18920 }, { "epoch": 1.69, "grad_norm": 5.317479280248291, "learning_rate": 6.246755704715585e-07, "loss": 0.5606, "step": 18921 }, { "epoch": 1.69, "grad_norm": 7.3536689154034285, "learning_rate": 6.243259843770594e-07, "loss": 0.5459, "step": 18922 }, { "epoch": 1.69, "grad_norm": 8.159817965950001, "learning_rate": 6.239764896151713e-07, "loss": 0.5832, "step": 18923 }, { "epoch": 1.69, "grad_norm": 6.824987391693191, "learning_rate": 6.236270861931909e-07, "loss": 0.5565, "step": 18924 }, { "epoch": 1.69, "grad_norm": 5.389013804926853, "learning_rate": 6.23277774118411e-07, "loss": 0.5639, "step": 18925 }, { "epoch": 1.69, "grad_norm": 5.717059472870761, "learning_rate": 6.229285533981228e-07, "loss": 0.514, "step": 18926 }, { "epoch": 1.69, "grad_norm": 5.609567994480676, "learning_rate": 6.225794240396155e-07, "loss": 0.5433, "step": 18927 }, { "epoch": 1.69, "grad_norm": 9.00184275801144, "learning_rate": 6.222303860501771e-07, "loss": 0.5665, "step": 18928 }, { "epoch": 1.69, "grad_norm": 6.702379104834645, "learning_rate": 6.218814394370915e-07, "loss": 0.5551, "step": 18929 }, { "epoch": 1.69, "grad_norm": 5.792444230298871, "learning_rate": 6.215325842076425e-07, "loss": 0.519, "step": 18930 }, { "epoch": 1.69, "grad_norm": 6.317182325276829, "learning_rate": 6.211838203691123e-07, "loss": 0.5645, "step": 18931 }, { "epoch": 1.69, "grad_norm": 7.698935367909271, "learning_rate": 6.208351479287805e-07, "loss": 0.5637, "step": 18932 }, { "epoch": 1.69, "grad_norm": 7.093805217477961, "learning_rate": 6.204865668939247e-07, "loss": 0.5755, "step": 18933 }, { "epoch": 1.69, "grad_norm": 5.894567783421155, "learning_rate": 6.201380772718218e-07, "loss": 0.5622, "step": 18934 }, { "epoch": 1.69, "grad_norm": 8.033144682150617, "learning_rate": 6.197896790697439e-07, "loss": 0.5912, "step": 18935 }, { "epoch": 1.69, "grad_norm": 6.00842784026854, "learning_rate": 6.194413722949649e-07, "loss": 0.5261, "step": 18936 }, { "epoch": 1.69, "grad_norm": 7.688505475149964, "learning_rate": 6.190931569547532e-07, "loss": 0.5735, "step": 18937 }, { "epoch": 1.69, "grad_norm": 8.5165943292433, "learning_rate": 6.187450330563777e-07, "loss": 0.5977, "step": 18938 }, { "epoch": 1.69, "grad_norm": 6.678833741666529, "learning_rate": 6.183970006071049e-07, "loss": 0.5226, "step": 18939 }, { "epoch": 1.69, "grad_norm": 5.357243303401968, "learning_rate": 6.180490596141992e-07, "loss": 0.5885, "step": 18940 }, { "epoch": 1.69, "grad_norm": 8.393318789680654, "learning_rate": 6.177012100849239e-07, "loss": 0.5508, "step": 18941 }, { "epoch": 1.69, "grad_norm": 5.897529237829079, "learning_rate": 6.173534520265379e-07, "loss": 0.5704, "step": 18942 }, { "epoch": 1.69, "grad_norm": 8.069498098497583, "learning_rate": 6.170057854463002e-07, "loss": 0.5459, "step": 18943 }, { "epoch": 1.69, "grad_norm": 8.853617140536203, "learning_rate": 6.16658210351469e-07, "loss": 0.5531, "step": 18944 }, { "epoch": 1.69, "grad_norm": 7.30186853384495, "learning_rate": 6.163107267492974e-07, "loss": 0.551, "step": 18945 }, { "epoch": 1.69, "grad_norm": 5.951803501283672, "learning_rate": 6.159633346470401e-07, "loss": 0.6103, "step": 18946 }, { "epoch": 1.69, "grad_norm": 7.993567274307509, "learning_rate": 6.156160340519479e-07, "loss": 0.6061, "step": 18947 }, { "epoch": 1.69, "grad_norm": 6.750480489614243, "learning_rate": 6.152688249712685e-07, "loss": 0.5605, "step": 18948 }, { "epoch": 1.69, "grad_norm": 6.740640642173404, "learning_rate": 6.149217074122499e-07, "loss": 0.5319, "step": 18949 }, { "epoch": 1.69, "grad_norm": 4.724266978606212, "learning_rate": 6.145746813821374e-07, "loss": 0.5427, "step": 18950 }, { "epoch": 1.69, "grad_norm": 6.413920940265159, "learning_rate": 6.142277468881758e-07, "loss": 0.5638, "step": 18951 }, { "epoch": 1.69, "grad_norm": 6.670657278821217, "learning_rate": 6.138809039376042e-07, "loss": 0.5087, "step": 18952 }, { "epoch": 1.69, "grad_norm": 5.607921597932446, "learning_rate": 6.135341525376631e-07, "loss": 0.5884, "step": 18953 }, { "epoch": 1.69, "grad_norm": 6.566989064524504, "learning_rate": 6.131874926955917e-07, "loss": 0.5261, "step": 18954 }, { "epoch": 1.69, "grad_norm": 5.840820572344754, "learning_rate": 6.128409244186228e-07, "loss": 0.563, "step": 18955 }, { "epoch": 1.69, "grad_norm": 8.83385485003084, "learning_rate": 6.124944477139927e-07, "loss": 0.5739, "step": 18956 }, { "epoch": 1.69, "grad_norm": 7.557325344300222, "learning_rate": 6.12148062588932e-07, "loss": 0.5326, "step": 18957 }, { "epoch": 1.69, "grad_norm": 6.5211304099675855, "learning_rate": 6.118017690506717e-07, "loss": 0.5695, "step": 18958 }, { "epoch": 1.69, "grad_norm": 5.479612808945009, "learning_rate": 6.114555671064388e-07, "loss": 0.5532, "step": 18959 }, { "epoch": 1.69, "grad_norm": 5.606172282171865, "learning_rate": 6.111094567634618e-07, "loss": 0.5348, "step": 18960 }, { "epoch": 1.69, "grad_norm": 5.869737179515736, "learning_rate": 6.107634380289624e-07, "loss": 0.5046, "step": 18961 }, { "epoch": 1.69, "grad_norm": 7.813271500961132, "learning_rate": 6.104175109101635e-07, "loss": 0.5368, "step": 18962 }, { "epoch": 1.69, "grad_norm": 7.09299052541404, "learning_rate": 6.10071675414286e-07, "loss": 0.5557, "step": 18963 }, { "epoch": 1.69, "grad_norm": 9.145060674427496, "learning_rate": 6.097259315485487e-07, "loss": 0.5439, "step": 18964 }, { "epoch": 1.69, "grad_norm": 8.416444108411651, "learning_rate": 6.093802793201692e-07, "loss": 0.535, "step": 18965 }, { "epoch": 1.69, "grad_norm": 6.715219276674035, "learning_rate": 6.090347187363599e-07, "loss": 0.6482, "step": 18966 }, { "epoch": 1.69, "grad_norm": 6.272779095841833, "learning_rate": 6.086892498043362e-07, "loss": 0.6281, "step": 18967 }, { "epoch": 1.69, "grad_norm": 9.729364301083235, "learning_rate": 6.083438725313063e-07, "loss": 0.5668, "step": 18968 }, { "epoch": 1.69, "grad_norm": 6.7402057779917826, "learning_rate": 6.079985869244803e-07, "loss": 0.6069, "step": 18969 }, { "epoch": 1.69, "grad_norm": 5.5990215912099055, "learning_rate": 6.076533929910655e-07, "loss": 0.5031, "step": 18970 }, { "epoch": 1.69, "grad_norm": 5.239932938188158, "learning_rate": 6.073082907382677e-07, "loss": 0.5054, "step": 18971 }, { "epoch": 1.69, "grad_norm": 7.979842494624825, "learning_rate": 6.0696328017329e-07, "loss": 0.5722, "step": 18972 }, { "epoch": 1.69, "grad_norm": 5.860412031989105, "learning_rate": 6.066183613033322e-07, "loss": 0.5885, "step": 18973 }, { "epoch": 1.69, "grad_norm": 5.4122212320912775, "learning_rate": 6.062735341355947e-07, "loss": 0.6083, "step": 18974 }, { "epoch": 1.69, "grad_norm": 6.900810240446636, "learning_rate": 6.059287986772749e-07, "loss": 0.562, "step": 18975 }, { "epoch": 1.69, "grad_norm": 6.88898251353852, "learning_rate": 6.055841549355689e-07, "loss": 0.5718, "step": 18976 }, { "epoch": 1.69, "grad_norm": 5.666640409884414, "learning_rate": 6.052396029176704e-07, "loss": 0.5684, "step": 18977 }, { "epoch": 1.69, "grad_norm": 7.761843215551754, "learning_rate": 6.048951426307714e-07, "loss": 0.5738, "step": 18978 }, { "epoch": 1.69, "grad_norm": 6.4322337950541675, "learning_rate": 6.0455077408206e-07, "loss": 0.5696, "step": 18979 }, { "epoch": 1.69, "grad_norm": 6.974515221183445, "learning_rate": 6.04206497278727e-07, "loss": 0.627, "step": 18980 }, { "epoch": 1.69, "grad_norm": 5.759700016637714, "learning_rate": 6.038623122279552e-07, "loss": 0.5917, "step": 18981 }, { "epoch": 1.69, "grad_norm": 6.275948403257921, "learning_rate": 6.035182189369304e-07, "loss": 0.601, "step": 18982 }, { "epoch": 1.69, "grad_norm": 8.022046387669423, "learning_rate": 6.031742174128346e-07, "loss": 0.6206, "step": 18983 }, { "epoch": 1.69, "grad_norm": 7.6544756360027275, "learning_rate": 6.028303076628478e-07, "loss": 0.611, "step": 18984 }, { "epoch": 1.69, "grad_norm": 6.800875896048132, "learning_rate": 6.024864896941501e-07, "loss": 0.5519, "step": 18985 }, { "epoch": 1.69, "grad_norm": 6.44689105881154, "learning_rate": 6.021427635139155e-07, "loss": 0.537, "step": 18986 }, { "epoch": 1.69, "grad_norm": 7.416079584644411, "learning_rate": 6.017991291293196e-07, "loss": 0.5582, "step": 18987 }, { "epoch": 1.69, "grad_norm": 6.000490980293604, "learning_rate": 6.014555865475347e-07, "loss": 0.5233, "step": 18988 }, { "epoch": 1.69, "grad_norm": 6.42536212664747, "learning_rate": 6.011121357757322e-07, "loss": 0.5126, "step": 18989 }, { "epoch": 1.69, "grad_norm": 6.704908591751175, "learning_rate": 6.007687768210801e-07, "loss": 0.5824, "step": 18990 }, { "epoch": 1.69, "grad_norm": 6.52248744774303, "learning_rate": 6.004255096907469e-07, "loss": 0.5803, "step": 18991 }, { "epoch": 1.69, "grad_norm": 6.46517182669096, "learning_rate": 6.000823343918954e-07, "loss": 0.522, "step": 18992 }, { "epoch": 1.69, "grad_norm": 7.013918618622778, "learning_rate": 5.99739250931689e-07, "loss": 0.534, "step": 18993 }, { "epoch": 1.69, "grad_norm": 6.003693457792655, "learning_rate": 5.993962593172908e-07, "loss": 0.5428, "step": 18994 }, { "epoch": 1.69, "grad_norm": 6.898869674535669, "learning_rate": 5.990533595558573e-07, "loss": 0.544, "step": 18995 }, { "epoch": 1.69, "grad_norm": 5.737435201134384, "learning_rate": 5.987105516545472e-07, "loss": 0.5581, "step": 18996 }, { "epoch": 1.69, "grad_norm": 7.303956481437001, "learning_rate": 5.983678356205152e-07, "loss": 0.5115, "step": 18997 }, { "epoch": 1.69, "grad_norm": 8.070635607130546, "learning_rate": 5.98025211460917e-07, "loss": 0.5834, "step": 18998 }, { "epoch": 1.69, "grad_norm": 5.756638783192319, "learning_rate": 5.976826791829004e-07, "loss": 0.5955, "step": 18999 }, { "epoch": 1.69, "grad_norm": 4.694859608618736, "learning_rate": 5.973402387936178e-07, "loss": 0.547, "step": 19000 }, { "epoch": 1.7, "grad_norm": 6.759035837845706, "learning_rate": 5.969978903002155e-07, "loss": 0.5788, "step": 19001 }, { "epoch": 1.7, "grad_norm": 8.933164747964263, "learning_rate": 5.9665563370984e-07, "loss": 0.6176, "step": 19002 }, { "epoch": 1.7, "grad_norm": 8.284503643220216, "learning_rate": 5.963134690296352e-07, "loss": 0.5917, "step": 19003 }, { "epoch": 1.7, "grad_norm": 4.9370573549582595, "learning_rate": 5.959713962667435e-07, "loss": 0.5778, "step": 19004 }, { "epoch": 1.7, "grad_norm": 6.764708084715923, "learning_rate": 5.956294154283037e-07, "loss": 0.5429, "step": 19005 }, { "epoch": 1.7, "grad_norm": 7.203372011396641, "learning_rate": 5.952875265214541e-07, "loss": 0.6001, "step": 19006 }, { "epoch": 1.7, "grad_norm": 7.313506710314377, "learning_rate": 5.949457295533311e-07, "loss": 0.5295, "step": 19007 }, { "epoch": 1.7, "grad_norm": 6.658479796555576, "learning_rate": 5.946040245310703e-07, "loss": 0.5692, "step": 19008 }, { "epoch": 1.7, "grad_norm": 7.186767672643626, "learning_rate": 5.942624114618017e-07, "loss": 0.5273, "step": 19009 }, { "epoch": 1.7, "grad_norm": 6.80295425633945, "learning_rate": 5.939208903526572e-07, "loss": 0.5312, "step": 19010 }, { "epoch": 1.7, "grad_norm": 4.8452938131159184, "learning_rate": 5.935794612107659e-07, "loss": 0.5455, "step": 19011 }, { "epoch": 1.7, "grad_norm": 6.202115531735215, "learning_rate": 5.932381240432522e-07, "loss": 0.5305, "step": 19012 }, { "epoch": 1.7, "grad_norm": 6.49369238170101, "learning_rate": 5.92896878857242e-07, "loss": 0.4994, "step": 19013 }, { "epoch": 1.7, "grad_norm": 7.47511886426814, "learning_rate": 5.92555725659858e-07, "loss": 0.5703, "step": 19014 }, { "epoch": 1.7, "grad_norm": 7.109749053845435, "learning_rate": 5.922146644582216e-07, "loss": 0.5957, "step": 19015 }, { "epoch": 1.7, "grad_norm": 7.016372156623817, "learning_rate": 5.918736952594506e-07, "loss": 0.5975, "step": 19016 }, { "epoch": 1.7, "grad_norm": 7.685974251928895, "learning_rate": 5.915328180706636e-07, "loss": 0.5177, "step": 19017 }, { "epoch": 1.7, "grad_norm": 6.118161984128971, "learning_rate": 5.91192032898974e-07, "loss": 0.5599, "step": 19018 }, { "epoch": 1.7, "grad_norm": 6.309658708779394, "learning_rate": 5.908513397514959e-07, "loss": 0.5647, "step": 19019 }, { "epoch": 1.7, "grad_norm": 7.8917316825747585, "learning_rate": 5.905107386353393e-07, "loss": 0.5799, "step": 19020 }, { "epoch": 1.7, "grad_norm": 6.180850922694717, "learning_rate": 5.90170229557615e-07, "loss": 0.5549, "step": 19021 }, { "epoch": 1.7, "grad_norm": 7.606988086157251, "learning_rate": 5.898298125254298e-07, "loss": 0.6065, "step": 19022 }, { "epoch": 1.7, "grad_norm": 7.97249869673562, "learning_rate": 5.894894875458901e-07, "loss": 0.5268, "step": 19023 }, { "epoch": 1.7, "grad_norm": 6.087853862983345, "learning_rate": 5.891492546260985e-07, "loss": 0.6073, "step": 19024 }, { "epoch": 1.7, "grad_norm": 7.420801045899042, "learning_rate": 5.888091137731555e-07, "loss": 0.593, "step": 19025 }, { "epoch": 1.7, "grad_norm": 6.227025187496682, "learning_rate": 5.884690649941616e-07, "loss": 0.5959, "step": 19026 }, { "epoch": 1.7, "grad_norm": 9.048602507664945, "learning_rate": 5.881291082962154e-07, "loss": 0.5659, "step": 19027 }, { "epoch": 1.7, "grad_norm": 7.149548644332354, "learning_rate": 5.877892436864118e-07, "loss": 0.5707, "step": 19028 }, { "epoch": 1.7, "grad_norm": 6.578912271562676, "learning_rate": 5.874494711718459e-07, "loss": 0.5857, "step": 19029 }, { "epoch": 1.7, "grad_norm": 6.946419220638033, "learning_rate": 5.871097907596096e-07, "loss": 0.5555, "step": 19030 }, { "epoch": 1.7, "grad_norm": 5.0353785807975315, "learning_rate": 5.867702024567912e-07, "loss": 0.5684, "step": 19031 }, { "epoch": 1.7, "grad_norm": 7.1007264288157215, "learning_rate": 5.8643070627048e-07, "loss": 0.6035, "step": 19032 }, { "epoch": 1.7, "grad_norm": 7.118672684902679, "learning_rate": 5.860913022077619e-07, "loss": 0.5908, "step": 19033 }, { "epoch": 1.7, "grad_norm": 7.655065444722704, "learning_rate": 5.857519902757219e-07, "loss": 0.5513, "step": 19034 }, { "epoch": 1.7, "grad_norm": 6.28166942691206, "learning_rate": 5.854127704814422e-07, "loss": 0.5296, "step": 19035 }, { "epoch": 1.7, "grad_norm": 9.720324929351456, "learning_rate": 5.850736428320042e-07, "loss": 0.5704, "step": 19036 }, { "epoch": 1.7, "grad_norm": 9.213751621430028, "learning_rate": 5.847346073344856e-07, "loss": 0.5676, "step": 19037 }, { "epoch": 1.7, "grad_norm": 6.707403114649427, "learning_rate": 5.843956639959614e-07, "loss": 0.5694, "step": 19038 }, { "epoch": 1.7, "grad_norm": 7.4250369773567355, "learning_rate": 5.840568128235074e-07, "loss": 0.5885, "step": 19039 }, { "epoch": 1.7, "grad_norm": 10.053510846861775, "learning_rate": 5.837180538241971e-07, "loss": 0.597, "step": 19040 }, { "epoch": 1.7, "grad_norm": 8.282699263587892, "learning_rate": 5.833793870051013e-07, "loss": 0.6402, "step": 19041 }, { "epoch": 1.7, "grad_norm": 7.005640149043132, "learning_rate": 5.830408123732878e-07, "loss": 0.5404, "step": 19042 }, { "epoch": 1.7, "grad_norm": 5.278164216070422, "learning_rate": 5.827023299358259e-07, "loss": 0.6239, "step": 19043 }, { "epoch": 1.7, "grad_norm": 6.456627154517253, "learning_rate": 5.823639396997783e-07, "loss": 0.6, "step": 19044 }, { "epoch": 1.7, "grad_norm": 5.628871268303757, "learning_rate": 5.820256416722081e-07, "loss": 0.5716, "step": 19045 }, { "epoch": 1.7, "grad_norm": 6.227678145516003, "learning_rate": 5.816874358601782e-07, "loss": 0.5937, "step": 19046 }, { "epoch": 1.7, "grad_norm": 5.114723011126362, "learning_rate": 5.813493222707467e-07, "loss": 0.5451, "step": 19047 }, { "epoch": 1.7, "grad_norm": 6.284074729684461, "learning_rate": 5.810113009109731e-07, "loss": 0.5732, "step": 19048 }, { "epoch": 1.7, "grad_norm": 5.9428137553330656, "learning_rate": 5.806733717879093e-07, "loss": 0.5449, "step": 19049 }, { "epoch": 1.7, "grad_norm": 8.545216563143326, "learning_rate": 5.803355349086115e-07, "loss": 0.5696, "step": 19050 }, { "epoch": 1.7, "grad_norm": 6.288598720501472, "learning_rate": 5.799977902801312e-07, "loss": 0.6041, "step": 19051 }, { "epoch": 1.7, "grad_norm": 10.0338043242748, "learning_rate": 5.796601379095163e-07, "loss": 0.6047, "step": 19052 }, { "epoch": 1.7, "grad_norm": 5.374045580466187, "learning_rate": 5.793225778038153e-07, "loss": 0.5888, "step": 19053 }, { "epoch": 1.7, "grad_norm": 5.696939849381014, "learning_rate": 5.789851099700749e-07, "loss": 0.6383, "step": 19054 }, { "epoch": 1.7, "grad_norm": 7.774685792968355, "learning_rate": 5.786477344153385e-07, "loss": 0.5209, "step": 19055 }, { "epoch": 1.7, "grad_norm": 8.33623400505331, "learning_rate": 5.783104511466487e-07, "loss": 0.6197, "step": 19056 }, { "epoch": 1.7, "grad_norm": 8.330830016985539, "learning_rate": 5.779732601710441e-07, "loss": 0.572, "step": 19057 }, { "epoch": 1.7, "grad_norm": 6.237326240761746, "learning_rate": 5.776361614955634e-07, "loss": 0.5838, "step": 19058 }, { "epoch": 1.7, "grad_norm": 5.410128964517143, "learning_rate": 5.772991551272434e-07, "loss": 0.5255, "step": 19059 }, { "epoch": 1.7, "grad_norm": 8.526514380422007, "learning_rate": 5.769622410731173e-07, "loss": 0.4987, "step": 19060 }, { "epoch": 1.7, "grad_norm": 8.39842176702046, "learning_rate": 5.766254193402199e-07, "loss": 0.5939, "step": 19061 }, { "epoch": 1.7, "grad_norm": 6.9095607285011615, "learning_rate": 5.762886899355785e-07, "loss": 0.5903, "step": 19062 }, { "epoch": 1.7, "grad_norm": 6.96048259313156, "learning_rate": 5.759520528662232e-07, "loss": 0.5155, "step": 19063 }, { "epoch": 1.7, "grad_norm": 6.1020959017181955, "learning_rate": 5.756155081391801e-07, "loss": 0.6229, "step": 19064 }, { "epoch": 1.7, "grad_norm": 4.335859893232375, "learning_rate": 5.75279055761474e-07, "loss": 0.5208, "step": 19065 }, { "epoch": 1.7, "grad_norm": 8.371622693259138, "learning_rate": 5.749426957401289e-07, "loss": 0.5767, "step": 19066 }, { "epoch": 1.7, "grad_norm": 5.882995764122588, "learning_rate": 5.746064280821628e-07, "loss": 0.5616, "step": 19067 }, { "epoch": 1.7, "grad_norm": 6.0930424802961936, "learning_rate": 5.74270252794597e-07, "loss": 0.6733, "step": 19068 }, { "epoch": 1.7, "grad_norm": 5.829926063995253, "learning_rate": 5.739341698844469e-07, "loss": 0.5871, "step": 19069 }, { "epoch": 1.7, "grad_norm": 6.346167663094094, "learning_rate": 5.73598179358728e-07, "loss": 0.5286, "step": 19070 }, { "epoch": 1.7, "grad_norm": 5.419961296831708, "learning_rate": 5.732622812244537e-07, "loss": 0.5949, "step": 19071 }, { "epoch": 1.7, "grad_norm": 6.558881528836285, "learning_rate": 5.729264754886349e-07, "loss": 0.5405, "step": 19072 }, { "epoch": 1.7, "grad_norm": 7.13152413502468, "learning_rate": 5.725907621582805e-07, "loss": 0.5652, "step": 19073 }, { "epoch": 1.7, "grad_norm": 6.914233366666989, "learning_rate": 5.722551412403987e-07, "loss": 0.627, "step": 19074 }, { "epoch": 1.7, "grad_norm": 9.440617423030988, "learning_rate": 5.719196127419941e-07, "loss": 0.6581, "step": 19075 }, { "epoch": 1.7, "grad_norm": 7.444004010168954, "learning_rate": 5.715841766700697e-07, "loss": 0.5526, "step": 19076 }, { "epoch": 1.7, "grad_norm": 6.679297435636929, "learning_rate": 5.712488330316274e-07, "loss": 0.5817, "step": 19077 }, { "epoch": 1.7, "grad_norm": 4.325270157643071, "learning_rate": 5.709135818336675e-07, "loss": 0.5483, "step": 19078 }, { "epoch": 1.7, "grad_norm": 7.796893557436882, "learning_rate": 5.705784230831862e-07, "loss": 0.5223, "step": 19079 }, { "epoch": 1.7, "grad_norm": 5.249236754851243, "learning_rate": 5.702433567871818e-07, "loss": 0.5322, "step": 19080 }, { "epoch": 1.7, "grad_norm": 5.490670248649101, "learning_rate": 5.699083829526464e-07, "loss": 0.5513, "step": 19081 }, { "epoch": 1.7, "grad_norm": 5.181595580610327, "learning_rate": 5.6957350158657e-07, "loss": 0.5493, "step": 19082 }, { "epoch": 1.7, "grad_norm": 7.223291196004908, "learning_rate": 5.692387126959447e-07, "loss": 0.5633, "step": 19083 }, { "epoch": 1.7, "grad_norm": 6.516392319790798, "learning_rate": 5.689040162877585e-07, "loss": 0.5593, "step": 19084 }, { "epoch": 1.7, "grad_norm": 6.554175770037426, "learning_rate": 5.685694123689966e-07, "loss": 0.5319, "step": 19085 }, { "epoch": 1.7, "grad_norm": 6.004001563774497, "learning_rate": 5.682349009466437e-07, "loss": 0.52, "step": 19086 }, { "epoch": 1.7, "grad_norm": 7.434594190870763, "learning_rate": 5.679004820276824e-07, "loss": 0.6095, "step": 19087 }, { "epoch": 1.7, "grad_norm": 6.508152782857549, "learning_rate": 5.67566155619092e-07, "loss": 0.5507, "step": 19088 }, { "epoch": 1.7, "grad_norm": 9.709586457668753, "learning_rate": 5.67231921727851e-07, "loss": 0.5345, "step": 19089 }, { "epoch": 1.7, "grad_norm": 7.425309453709852, "learning_rate": 5.66897780360936e-07, "loss": 0.6127, "step": 19090 }, { "epoch": 1.7, "grad_norm": 5.876887345335138, "learning_rate": 5.665637315253214e-07, "loss": 0.5789, "step": 19091 }, { "epoch": 1.7, "grad_norm": 6.415390748097458, "learning_rate": 5.6622977522798e-07, "loss": 0.5207, "step": 19092 }, { "epoch": 1.7, "grad_norm": 9.676761435104092, "learning_rate": 5.658959114758833e-07, "loss": 0.6297, "step": 19093 }, { "epoch": 1.7, "grad_norm": 6.99334780251559, "learning_rate": 5.655621402759976e-07, "loss": 0.6306, "step": 19094 }, { "epoch": 1.7, "grad_norm": 10.628010178878265, "learning_rate": 5.652284616352921e-07, "loss": 0.597, "step": 19095 }, { "epoch": 1.7, "grad_norm": 8.446380783433563, "learning_rate": 5.648948755607297e-07, "loss": 0.5833, "step": 19096 }, { "epoch": 1.7, "grad_norm": 7.1836746926029695, "learning_rate": 5.645613820592744e-07, "loss": 0.582, "step": 19097 }, { "epoch": 1.7, "grad_norm": 6.038312623744834, "learning_rate": 5.64227981137886e-07, "loss": 0.6372, "step": 19098 }, { "epoch": 1.7, "grad_norm": 6.468294230075106, "learning_rate": 5.63894672803525e-07, "loss": 0.5325, "step": 19099 }, { "epoch": 1.7, "grad_norm": 8.26228035471566, "learning_rate": 5.635614570631481e-07, "loss": 0.5398, "step": 19100 }, { "epoch": 1.7, "grad_norm": 7.318730178731674, "learning_rate": 5.632283339237094e-07, "loss": 0.6522, "step": 19101 }, { "epoch": 1.7, "grad_norm": 6.574191265879604, "learning_rate": 5.628953033921631e-07, "loss": 0.5473, "step": 19102 }, { "epoch": 1.7, "grad_norm": 7.006764740308048, "learning_rate": 5.625623654754603e-07, "loss": 0.53, "step": 19103 }, { "epoch": 1.7, "grad_norm": 8.843452390600442, "learning_rate": 5.6222952018055e-07, "loss": 0.6279, "step": 19104 }, { "epoch": 1.7, "grad_norm": 6.566153395420582, "learning_rate": 5.618967675143799e-07, "loss": 0.5443, "step": 19105 }, { "epoch": 1.7, "grad_norm": 8.095969023942317, "learning_rate": 5.615641074838968e-07, "loss": 0.5824, "step": 19106 }, { "epoch": 1.7, "grad_norm": 5.789883465374371, "learning_rate": 5.612315400960417e-07, "loss": 0.591, "step": 19107 }, { "epoch": 1.7, "grad_norm": 6.566956511066094, "learning_rate": 5.608990653577578e-07, "loss": 0.5376, "step": 19108 }, { "epoch": 1.7, "grad_norm": 7.1867502166602355, "learning_rate": 5.605666832759849e-07, "loss": 0.5229, "step": 19109 }, { "epoch": 1.7, "grad_norm": 7.72448010142811, "learning_rate": 5.602343938576599e-07, "loss": 0.5298, "step": 19110 }, { "epoch": 1.7, "grad_norm": 7.075153092149934, "learning_rate": 5.599021971097185e-07, "loss": 0.5406, "step": 19111 }, { "epoch": 1.7, "grad_norm": 7.389073607069378, "learning_rate": 5.595700930390952e-07, "loss": 0.5709, "step": 19112 }, { "epoch": 1.71, "grad_norm": 9.733563550270771, "learning_rate": 5.592380816527232e-07, "loss": 0.5816, "step": 19113 }, { "epoch": 1.71, "grad_norm": 5.031636149005233, "learning_rate": 5.589061629575298e-07, "loss": 0.5061, "step": 19114 }, { "epoch": 1.71, "grad_norm": 9.553733823258716, "learning_rate": 5.585743369604446e-07, "loss": 0.5967, "step": 19115 }, { "epoch": 1.71, "grad_norm": 5.904688821346027, "learning_rate": 5.58242603668393e-07, "loss": 0.5867, "step": 19116 }, { "epoch": 1.71, "grad_norm": 6.007371172734647, "learning_rate": 5.579109630883006e-07, "loss": 0.5959, "step": 19117 }, { "epoch": 1.71, "grad_norm": 5.704403188457832, "learning_rate": 5.57579415227088e-07, "loss": 0.5724, "step": 19118 }, { "epoch": 1.71, "grad_norm": 6.736607442586065, "learning_rate": 5.57247960091678e-07, "loss": 0.5904, "step": 19119 }, { "epoch": 1.71, "grad_norm": 5.4250227416406025, "learning_rate": 5.569165976889862e-07, "loss": 0.5725, "step": 19120 }, { "epoch": 1.71, "grad_norm": 6.4791677676016555, "learning_rate": 5.565853280259304e-07, "loss": 0.5811, "step": 19121 }, { "epoch": 1.71, "grad_norm": 5.797238458010775, "learning_rate": 5.562541511094249e-07, "loss": 0.5966, "step": 19122 }, { "epoch": 1.71, "grad_norm": 6.755331007286548, "learning_rate": 5.559230669463823e-07, "loss": 0.5501, "step": 19123 }, { "epoch": 1.71, "grad_norm": 6.97121304661432, "learning_rate": 5.555920755437138e-07, "loss": 0.6285, "step": 19124 }, { "epoch": 1.71, "grad_norm": 6.790935184549811, "learning_rate": 5.552611769083271e-07, "loss": 0.654, "step": 19125 }, { "epoch": 1.71, "grad_norm": 7.046526389961184, "learning_rate": 5.54930371047131e-07, "loss": 0.548, "step": 19126 }, { "epoch": 1.71, "grad_norm": 6.290750568409491, "learning_rate": 5.545996579670271e-07, "loss": 0.555, "step": 19127 }, { "epoch": 1.71, "grad_norm": 6.709688716628476, "learning_rate": 5.542690376749205e-07, "loss": 0.583, "step": 19128 }, { "epoch": 1.71, "grad_norm": 5.762497765200341, "learning_rate": 5.539385101777117e-07, "loss": 0.5282, "step": 19129 }, { "epoch": 1.71, "grad_norm": 7.416600369855901, "learning_rate": 5.536080754822992e-07, "loss": 0.5762, "step": 19130 }, { "epoch": 1.71, "grad_norm": 5.637230739764618, "learning_rate": 5.532777335955813e-07, "loss": 0.5341, "step": 19131 }, { "epoch": 1.71, "grad_norm": 11.934854255215145, "learning_rate": 5.529474845244537e-07, "loss": 0.6211, "step": 19132 }, { "epoch": 1.71, "grad_norm": 6.904980820546287, "learning_rate": 5.526173282758074e-07, "loss": 0.5581, "step": 19133 }, { "epoch": 1.71, "grad_norm": 6.565211822897581, "learning_rate": 5.522872648565342e-07, "loss": 0.5676, "step": 19134 }, { "epoch": 1.71, "grad_norm": 6.388129333356261, "learning_rate": 5.519572942735246e-07, "loss": 0.5533, "step": 19135 }, { "epoch": 1.71, "grad_norm": 6.86501650203383, "learning_rate": 5.516274165336655e-07, "loss": 0.5577, "step": 19136 }, { "epoch": 1.71, "grad_norm": 7.8999527947595425, "learning_rate": 5.512976316438429e-07, "loss": 0.5973, "step": 19137 }, { "epoch": 1.71, "grad_norm": 7.151992448064016, "learning_rate": 5.509679396109391e-07, "loss": 0.5131, "step": 19138 }, { "epoch": 1.71, "grad_norm": 6.418728025390095, "learning_rate": 5.50638340441837e-07, "loss": 0.5679, "step": 19139 }, { "epoch": 1.71, "grad_norm": 5.582562648639339, "learning_rate": 5.503088341434149e-07, "loss": 0.6134, "step": 19140 }, { "epoch": 1.71, "grad_norm": 6.442655526492507, "learning_rate": 5.499794207225512e-07, "loss": 0.5896, "step": 19141 }, { "epoch": 1.71, "grad_norm": 6.140845742317754, "learning_rate": 5.496501001861216e-07, "loss": 0.5971, "step": 19142 }, { "epoch": 1.71, "grad_norm": 7.825858284010325, "learning_rate": 5.493208725409998e-07, "loss": 0.6031, "step": 19143 }, { "epoch": 1.71, "grad_norm": 6.419136127346111, "learning_rate": 5.48991737794059e-07, "loss": 0.5239, "step": 19144 }, { "epoch": 1.71, "grad_norm": 6.009096731450402, "learning_rate": 5.486626959521673e-07, "loss": 0.584, "step": 19145 }, { "epoch": 1.71, "grad_norm": 7.750851383579592, "learning_rate": 5.483337470221939e-07, "loss": 0.5791, "step": 19146 }, { "epoch": 1.71, "grad_norm": 5.462728763440174, "learning_rate": 5.480048910110042e-07, "loss": 0.5149, "step": 19147 }, { "epoch": 1.71, "grad_norm": 8.899297435270581, "learning_rate": 5.476761279254628e-07, "loss": 0.555, "step": 19148 }, { "epoch": 1.71, "grad_norm": 6.2320844764559755, "learning_rate": 5.473474577724319e-07, "loss": 0.6011, "step": 19149 }, { "epoch": 1.71, "grad_norm": 6.955528713722872, "learning_rate": 5.470188805587723e-07, "loss": 0.5677, "step": 19150 }, { "epoch": 1.71, "grad_norm": 5.91801570129733, "learning_rate": 5.466903962913411e-07, "loss": 0.6123, "step": 19151 }, { "epoch": 1.71, "grad_norm": 6.009030119820712, "learning_rate": 5.463620049769963e-07, "loss": 0.5489, "step": 19152 }, { "epoch": 1.71, "grad_norm": 7.290965910772599, "learning_rate": 5.460337066225901e-07, "loss": 0.5082, "step": 19153 }, { "epoch": 1.71, "grad_norm": 5.113003648129236, "learning_rate": 5.457055012349765e-07, "loss": 0.5617, "step": 19154 }, { "epoch": 1.71, "grad_norm": 8.574070506135433, "learning_rate": 5.453773888210057e-07, "loss": 0.5764, "step": 19155 }, { "epoch": 1.71, "grad_norm": 6.451639457273505, "learning_rate": 5.450493693875264e-07, "loss": 0.5806, "step": 19156 }, { "epoch": 1.71, "grad_norm": 7.519702382369728, "learning_rate": 5.447214429413861e-07, "loss": 0.5856, "step": 19157 }, { "epoch": 1.71, "grad_norm": 6.265161181041208, "learning_rate": 5.443936094894281e-07, "loss": 0.5533, "step": 19158 }, { "epoch": 1.71, "grad_norm": 8.33210166084681, "learning_rate": 5.440658690384959e-07, "loss": 0.6145, "step": 19159 }, { "epoch": 1.71, "grad_norm": 5.113219641739811, "learning_rate": 5.437382215954306e-07, "loss": 0.5508, "step": 19160 }, { "epoch": 1.71, "grad_norm": 7.599609456544699, "learning_rate": 5.434106671670703e-07, "loss": 0.6276, "step": 19161 }, { "epoch": 1.71, "grad_norm": 7.173456762507223, "learning_rate": 5.43083205760253e-07, "loss": 0.6255, "step": 19162 }, { "epoch": 1.71, "grad_norm": 5.535875704701766, "learning_rate": 5.427558373818143e-07, "loss": 0.4989, "step": 19163 }, { "epoch": 1.71, "grad_norm": 5.702937746010232, "learning_rate": 5.424285620385849e-07, "loss": 0.5529, "step": 19164 }, { "epoch": 1.71, "grad_norm": 6.425188713686808, "learning_rate": 5.421013797373975e-07, "loss": 0.5633, "step": 19165 }, { "epoch": 1.71, "grad_norm": 6.078964683990989, "learning_rate": 5.417742904850814e-07, "loss": 0.5862, "step": 19166 }, { "epoch": 1.71, "grad_norm": 9.030208770180169, "learning_rate": 5.414472942884646e-07, "loss": 0.5993, "step": 19167 }, { "epoch": 1.71, "grad_norm": 4.879746547300199, "learning_rate": 5.4112039115437e-07, "loss": 0.5657, "step": 19168 }, { "epoch": 1.71, "grad_norm": 6.798628588867052, "learning_rate": 5.407935810896231e-07, "loss": 0.5903, "step": 19169 }, { "epoch": 1.71, "grad_norm": 7.9536106363533685, "learning_rate": 5.404668641010452e-07, "loss": 0.5284, "step": 19170 }, { "epoch": 1.71, "grad_norm": 7.252859025058898, "learning_rate": 5.401402401954547e-07, "loss": 0.6191, "step": 19171 }, { "epoch": 1.71, "grad_norm": 8.175062042890385, "learning_rate": 5.398137093796696e-07, "loss": 0.6105, "step": 19172 }, { "epoch": 1.71, "grad_norm": 8.300608920699718, "learning_rate": 5.394872716605054e-07, "loss": 0.5959, "step": 19173 }, { "epoch": 1.71, "grad_norm": 7.089268615124478, "learning_rate": 5.391609270447762e-07, "loss": 0.5217, "step": 19174 }, { "epoch": 1.71, "grad_norm": 11.894864624684013, "learning_rate": 5.388346755392937e-07, "loss": 0.6596, "step": 19175 }, { "epoch": 1.71, "grad_norm": 8.940059194279009, "learning_rate": 5.385085171508686e-07, "loss": 0.5618, "step": 19176 }, { "epoch": 1.71, "grad_norm": 7.026658441908773, "learning_rate": 5.381824518863065e-07, "loss": 0.5444, "step": 19177 }, { "epoch": 1.71, "grad_norm": 6.892760094991491, "learning_rate": 5.37856479752415e-07, "loss": 0.5662, "step": 19178 }, { "epoch": 1.71, "grad_norm": 7.433885492003733, "learning_rate": 5.375306007559972e-07, "loss": 0.5552, "step": 19179 }, { "epoch": 1.71, "grad_norm": 5.796712401379719, "learning_rate": 5.372048149038555e-07, "loss": 0.6153, "step": 19180 }, { "epoch": 1.71, "grad_norm": 8.834509506144384, "learning_rate": 5.368791222027908e-07, "loss": 0.5705, "step": 19181 }, { "epoch": 1.71, "grad_norm": 7.409687304330135, "learning_rate": 5.365535226595997e-07, "loss": 0.6201, "step": 19182 }, { "epoch": 1.71, "grad_norm": 6.642394786767026, "learning_rate": 5.362280162810796e-07, "loss": 0.524, "step": 19183 }, { "epoch": 1.71, "grad_norm": 6.092196893232274, "learning_rate": 5.359026030740233e-07, "loss": 0.5633, "step": 19184 }, { "epoch": 1.71, "grad_norm": 7.441155030383824, "learning_rate": 5.355772830452238e-07, "loss": 0.6022, "step": 19185 }, { "epoch": 1.71, "grad_norm": 6.331772272716986, "learning_rate": 5.352520562014718e-07, "loss": 0.5838, "step": 19186 }, { "epoch": 1.71, "grad_norm": 7.232479437236316, "learning_rate": 5.349269225495557e-07, "loss": 0.5508, "step": 19187 }, { "epoch": 1.71, "grad_norm": 5.888347364187809, "learning_rate": 5.346018820962618e-07, "loss": 0.5725, "step": 19188 }, { "epoch": 1.71, "grad_norm": 5.751445092278158, "learning_rate": 5.342769348483751e-07, "loss": 0.5536, "step": 19189 }, { "epoch": 1.71, "grad_norm": 7.252832950346197, "learning_rate": 5.339520808126769e-07, "loss": 0.5771, "step": 19190 }, { "epoch": 1.71, "grad_norm": 5.955396944115055, "learning_rate": 5.336273199959485e-07, "loss": 0.5503, "step": 19191 }, { "epoch": 1.71, "grad_norm": 4.223998712183525, "learning_rate": 5.333026524049684e-07, "loss": 0.5458, "step": 19192 }, { "epoch": 1.71, "grad_norm": 7.139431863318768, "learning_rate": 5.329780780465138e-07, "loss": 0.6049, "step": 19193 }, { "epoch": 1.71, "grad_norm": 6.885157642608988, "learning_rate": 5.326535969273589e-07, "loss": 0.5597, "step": 19194 }, { "epoch": 1.71, "grad_norm": 6.823111663416945, "learning_rate": 5.323292090542781e-07, "loss": 0.53, "step": 19195 }, { "epoch": 1.71, "grad_norm": 8.312694344726015, "learning_rate": 5.320049144340412e-07, "loss": 0.5531, "step": 19196 }, { "epoch": 1.71, "grad_norm": 7.656179080445823, "learning_rate": 5.316807130734159e-07, "loss": 0.5864, "step": 19197 }, { "epoch": 1.71, "grad_norm": 7.059279721013413, "learning_rate": 5.313566049791696e-07, "loss": 0.5776, "step": 19198 }, { "epoch": 1.71, "grad_norm": 6.722062718843531, "learning_rate": 5.310325901580688e-07, "loss": 0.5328, "step": 19199 }, { "epoch": 1.71, "grad_norm": 6.168292104870916, "learning_rate": 5.307086686168755e-07, "loss": 0.4644, "step": 19200 }, { "epoch": 1.71, "grad_norm": 5.040838279113908, "learning_rate": 5.303848403623513e-07, "loss": 0.5143, "step": 19201 }, { "epoch": 1.71, "grad_norm": 5.005828551995618, "learning_rate": 5.300611054012561e-07, "loss": 0.5811, "step": 19202 }, { "epoch": 1.71, "grad_norm": 5.812722546965414, "learning_rate": 5.297374637403452e-07, "loss": 0.5781, "step": 19203 }, { "epoch": 1.71, "grad_norm": 6.187542261477316, "learning_rate": 5.294139153863753e-07, "loss": 0.5623, "step": 19204 }, { "epoch": 1.71, "grad_norm": 10.211128732153158, "learning_rate": 5.290904603460994e-07, "loss": 0.5512, "step": 19205 }, { "epoch": 1.71, "grad_norm": 8.185308293820485, "learning_rate": 5.287670986262688e-07, "loss": 0.5673, "step": 19206 }, { "epoch": 1.71, "grad_norm": 7.976479590876485, "learning_rate": 5.284438302336337e-07, "loss": 0.5615, "step": 19207 }, { "epoch": 1.71, "grad_norm": 6.087104575117499, "learning_rate": 5.281206551749418e-07, "loss": 0.5581, "step": 19208 }, { "epoch": 1.71, "grad_norm": 7.046604037626172, "learning_rate": 5.277975734569368e-07, "loss": 0.5404, "step": 19209 }, { "epoch": 1.71, "grad_norm": 7.491873171342029, "learning_rate": 5.274745850863649e-07, "loss": 0.5453, "step": 19210 }, { "epoch": 1.71, "grad_norm": 8.57653264841491, "learning_rate": 5.271516900699652e-07, "loss": 0.5459, "step": 19211 }, { "epoch": 1.71, "grad_norm": 7.18137210299237, "learning_rate": 5.268288884144785e-07, "loss": 0.5669, "step": 19212 }, { "epoch": 1.71, "grad_norm": 6.846701873577875, "learning_rate": 5.265061801266425e-07, "loss": 0.5903, "step": 19213 }, { "epoch": 1.71, "grad_norm": 7.232098063361468, "learning_rate": 5.261835652131936e-07, "loss": 0.5854, "step": 19214 }, { "epoch": 1.71, "grad_norm": 5.070263309547352, "learning_rate": 5.258610436808664e-07, "loss": 0.5333, "step": 19215 }, { "epoch": 1.71, "grad_norm": 6.059854945063307, "learning_rate": 5.255386155363901e-07, "loss": 0.5016, "step": 19216 }, { "epoch": 1.71, "grad_norm": 7.215055142101373, "learning_rate": 5.252162807864969e-07, "loss": 0.5418, "step": 19217 }, { "epoch": 1.71, "grad_norm": 7.61150899569979, "learning_rate": 5.248940394379137e-07, "loss": 0.633, "step": 19218 }, { "epoch": 1.71, "grad_norm": 7.619094490131859, "learning_rate": 5.245718914973674e-07, "loss": 0.5335, "step": 19219 }, { "epoch": 1.71, "grad_norm": 9.826982799291908, "learning_rate": 5.242498369715831e-07, "loss": 0.5781, "step": 19220 }, { "epoch": 1.71, "grad_norm": 6.313756250538101, "learning_rate": 5.239278758672805e-07, "loss": 0.55, "step": 19221 }, { "epoch": 1.71, "grad_norm": 7.1443252251553435, "learning_rate": 5.236060081911809e-07, "loss": 0.6054, "step": 19222 }, { "epoch": 1.71, "grad_norm": 6.491568705036574, "learning_rate": 5.232842339500027e-07, "loss": 0.5601, "step": 19223 }, { "epoch": 1.71, "grad_norm": 7.254470011456196, "learning_rate": 5.229625531504623e-07, "loss": 0.6025, "step": 19224 }, { "epoch": 1.72, "grad_norm": 7.276779902402494, "learning_rate": 5.226409657992754e-07, "loss": 0.5332, "step": 19225 }, { "epoch": 1.72, "grad_norm": 5.451033844015195, "learning_rate": 5.223194719031521e-07, "loss": 0.6382, "step": 19226 }, { "epoch": 1.72, "grad_norm": 6.819546215949375, "learning_rate": 5.219980714688044e-07, "loss": 0.5366, "step": 19227 }, { "epoch": 1.72, "grad_norm": 8.559681168152265, "learning_rate": 5.216767645029397e-07, "loss": 0.5884, "step": 19228 }, { "epoch": 1.72, "grad_norm": 3.9819397891199317, "learning_rate": 5.213555510122647e-07, "loss": 0.5789, "step": 19229 }, { "epoch": 1.72, "grad_norm": 5.7554202294410475, "learning_rate": 5.210344310034848e-07, "loss": 0.5707, "step": 19230 }, { "epoch": 1.72, "grad_norm": 12.005537028557345, "learning_rate": 5.207134044833029e-07, "loss": 0.557, "step": 19231 }, { "epoch": 1.72, "grad_norm": 6.530122962686295, "learning_rate": 5.203924714584185e-07, "loss": 0.5615, "step": 19232 }, { "epoch": 1.72, "grad_norm": 8.10237288132925, "learning_rate": 5.200716319355325e-07, "loss": 0.5823, "step": 19233 }, { "epoch": 1.72, "grad_norm": 6.677227181925658, "learning_rate": 5.19750885921339e-07, "loss": 0.5635, "step": 19234 }, { "epoch": 1.72, "grad_norm": 7.063541718061425, "learning_rate": 5.194302334225343e-07, "loss": 0.5412, "step": 19235 }, { "epoch": 1.72, "grad_norm": 8.001249229832037, "learning_rate": 5.191096744458113e-07, "loss": 0.5943, "step": 19236 }, { "epoch": 1.72, "grad_norm": 6.652211928060776, "learning_rate": 5.187892089978608e-07, "loss": 0.5794, "step": 19237 }, { "epoch": 1.72, "grad_norm": 5.676748666271144, "learning_rate": 5.184688370853718e-07, "loss": 0.5901, "step": 19238 }, { "epoch": 1.72, "grad_norm": 6.9459094465182725, "learning_rate": 5.181485587150325e-07, "loss": 0.533, "step": 19239 }, { "epoch": 1.72, "grad_norm": 7.11192665808139, "learning_rate": 5.17828373893527e-07, "loss": 0.6778, "step": 19240 }, { "epoch": 1.72, "grad_norm": 6.8749805723127135, "learning_rate": 5.175082826275374e-07, "loss": 0.5864, "step": 19241 }, { "epoch": 1.72, "grad_norm": 5.185314423315643, "learning_rate": 5.171882849237458e-07, "loss": 0.509, "step": 19242 }, { "epoch": 1.72, "grad_norm": 7.320260007084134, "learning_rate": 5.168683807888313e-07, "loss": 0.5879, "step": 19243 }, { "epoch": 1.72, "grad_norm": 8.703746557947426, "learning_rate": 5.16548570229472e-07, "loss": 0.6064, "step": 19244 }, { "epoch": 1.72, "grad_norm": 5.479373867619299, "learning_rate": 5.162288532523424e-07, "loss": 0.528, "step": 19245 }, { "epoch": 1.72, "grad_norm": 7.101415704045613, "learning_rate": 5.159092298641172e-07, "loss": 0.542, "step": 19246 }, { "epoch": 1.72, "grad_norm": 6.830933680376042, "learning_rate": 5.155897000714661e-07, "loss": 0.577, "step": 19247 }, { "epoch": 1.72, "grad_norm": 8.47981837693573, "learning_rate": 5.152702638810592e-07, "loss": 0.5774, "step": 19248 }, { "epoch": 1.72, "grad_norm": 8.057985776136146, "learning_rate": 5.14950921299564e-07, "loss": 0.6631, "step": 19249 }, { "epoch": 1.72, "grad_norm": 8.81079428994409, "learning_rate": 5.146316723336464e-07, "loss": 0.6008, "step": 19250 }, { "epoch": 1.72, "grad_norm": 6.8327135673969215, "learning_rate": 5.143125169899693e-07, "loss": 0.6312, "step": 19251 }, { "epoch": 1.72, "grad_norm": 6.757887755134997, "learning_rate": 5.139934552751968e-07, "loss": 0.5292, "step": 19252 }, { "epoch": 1.72, "grad_norm": 7.823177478938973, "learning_rate": 5.136744871959859e-07, "loss": 0.5955, "step": 19253 }, { "epoch": 1.72, "grad_norm": 7.447161056666597, "learning_rate": 5.13355612758995e-07, "loss": 0.5021, "step": 19254 }, { "epoch": 1.72, "grad_norm": 7.0682793689157535, "learning_rate": 5.130368319708795e-07, "loss": 0.6167, "step": 19255 }, { "epoch": 1.72, "grad_norm": 6.053681931780594, "learning_rate": 5.127181448382939e-07, "loss": 0.5193, "step": 19256 }, { "epoch": 1.72, "grad_norm": 7.09226502247413, "learning_rate": 5.123995513678904e-07, "loss": 0.5567, "step": 19257 }, { "epoch": 1.72, "grad_norm": 6.127944996147868, "learning_rate": 5.120810515663183e-07, "loss": 0.5154, "step": 19258 }, { "epoch": 1.72, "grad_norm": 6.101485029334655, "learning_rate": 5.117626454402269e-07, "loss": 0.5817, "step": 19259 }, { "epoch": 1.72, "grad_norm": 8.42937171722228, "learning_rate": 5.114443329962604e-07, "loss": 0.6305, "step": 19260 }, { "epoch": 1.72, "grad_norm": 7.254502111937606, "learning_rate": 5.111261142410634e-07, "loss": 0.5298, "step": 19261 }, { "epoch": 1.72, "grad_norm": 8.340051633725265, "learning_rate": 5.108079891812789e-07, "loss": 0.6497, "step": 19262 }, { "epoch": 1.72, "grad_norm": 4.982442249594672, "learning_rate": 5.104899578235456e-07, "loss": 0.613, "step": 19263 }, { "epoch": 1.72, "grad_norm": 7.09478956960515, "learning_rate": 5.10172020174503e-07, "loss": 0.5538, "step": 19264 }, { "epoch": 1.72, "grad_norm": 9.077518853677551, "learning_rate": 5.098541762407883e-07, "loss": 0.6039, "step": 19265 }, { "epoch": 1.72, "grad_norm": 7.3261375659633385, "learning_rate": 5.095364260290331e-07, "loss": 0.6406, "step": 19266 }, { "epoch": 1.72, "grad_norm": 6.724192105780078, "learning_rate": 5.092187695458705e-07, "loss": 0.5322, "step": 19267 }, { "epoch": 1.72, "grad_norm": 7.471984082158057, "learning_rate": 5.089012067979332e-07, "loss": 0.6028, "step": 19268 }, { "epoch": 1.72, "grad_norm": 6.973116508262568, "learning_rate": 5.085837377918468e-07, "loss": 0.61, "step": 19269 }, { "epoch": 1.72, "grad_norm": 5.76843553253817, "learning_rate": 5.082663625342382e-07, "loss": 0.5394, "step": 19270 }, { "epoch": 1.72, "grad_norm": 6.207002145099065, "learning_rate": 5.079490810317333e-07, "loss": 0.5868, "step": 19271 }, { "epoch": 1.72, "grad_norm": 6.9599250694449, "learning_rate": 5.076318932909546e-07, "loss": 0.5698, "step": 19272 }, { "epoch": 1.72, "grad_norm": 6.713047570783669, "learning_rate": 5.073147993185207e-07, "loss": 0.6016, "step": 19273 }, { "epoch": 1.72, "grad_norm": 6.663640998040794, "learning_rate": 5.069977991210523e-07, "loss": 0.6446, "step": 19274 }, { "epoch": 1.72, "grad_norm": 7.893089910157167, "learning_rate": 5.066808927051648e-07, "loss": 0.5509, "step": 19275 }, { "epoch": 1.72, "grad_norm": 6.651522889505811, "learning_rate": 5.063640800774733e-07, "loss": 0.5685, "step": 19276 }, { "epoch": 1.72, "grad_norm": 8.239088395961359, "learning_rate": 5.06047361244591e-07, "loss": 0.5212, "step": 19277 }, { "epoch": 1.72, "grad_norm": 5.474865591286788, "learning_rate": 5.057307362131298e-07, "loss": 0.4893, "step": 19278 }, { "epoch": 1.72, "grad_norm": 7.838493472446169, "learning_rate": 5.054142049896955e-07, "loss": 0.487, "step": 19279 }, { "epoch": 1.72, "grad_norm": 5.181205621344234, "learning_rate": 5.050977675808971e-07, "loss": 0.5561, "step": 19280 }, { "epoch": 1.72, "grad_norm": 8.283675570722995, "learning_rate": 5.047814239933391e-07, "loss": 0.6018, "step": 19281 }, { "epoch": 1.72, "grad_norm": 6.934600160361875, "learning_rate": 5.044651742336254e-07, "loss": 0.6249, "step": 19282 }, { "epoch": 1.72, "grad_norm": 6.475814835443274, "learning_rate": 5.041490183083553e-07, "loss": 0.5512, "step": 19283 }, { "epoch": 1.72, "grad_norm": 9.062724185903754, "learning_rate": 5.038329562241285e-07, "loss": 0.6022, "step": 19284 }, { "epoch": 1.72, "grad_norm": 6.086858011102138, "learning_rate": 5.035169879875435e-07, "loss": 0.5679, "step": 19285 }, { "epoch": 1.72, "grad_norm": 6.122389201808594, "learning_rate": 5.032011136051934e-07, "loss": 0.5233, "step": 19286 }, { "epoch": 1.72, "grad_norm": 9.805357820931205, "learning_rate": 5.02885333083672e-07, "loss": 0.5856, "step": 19287 }, { "epoch": 1.72, "grad_norm": 5.652777466839532, "learning_rate": 5.025696464295704e-07, "loss": 0.5289, "step": 19288 }, { "epoch": 1.72, "grad_norm": 5.322222051135886, "learning_rate": 5.022540536494785e-07, "loss": 0.5769, "step": 19289 }, { "epoch": 1.72, "grad_norm": 6.522434033769448, "learning_rate": 5.019385547499838e-07, "loss": 0.5446, "step": 19290 }, { "epoch": 1.72, "grad_norm": 7.468669260489667, "learning_rate": 5.016231497376717e-07, "loss": 0.5778, "step": 19291 }, { "epoch": 1.72, "grad_norm": 7.007709250538739, "learning_rate": 5.013078386191239e-07, "loss": 0.6079, "step": 19292 }, { "epoch": 1.72, "grad_norm": 7.208030247197926, "learning_rate": 5.009926214009236e-07, "loss": 0.5595, "step": 19293 }, { "epoch": 1.72, "grad_norm": 5.661476198016453, "learning_rate": 5.006774980896495e-07, "loss": 0.6075, "step": 19294 }, { "epoch": 1.72, "grad_norm": 7.336388046664277, "learning_rate": 5.00362468691879e-07, "loss": 0.6017, "step": 19295 }, { "epoch": 1.72, "grad_norm": 6.982693583438814, "learning_rate": 5.00047533214189e-07, "loss": 0.6129, "step": 19296 }, { "epoch": 1.72, "grad_norm": 7.27886099699583, "learning_rate": 4.997326916631512e-07, "loss": 0.5441, "step": 19297 }, { "epoch": 1.72, "grad_norm": 6.129110353476218, "learning_rate": 4.994179440453384e-07, "loss": 0.5526, "step": 19298 }, { "epoch": 1.72, "grad_norm": 5.856000462473329, "learning_rate": 4.991032903673198e-07, "loss": 0.5402, "step": 19299 }, { "epoch": 1.72, "grad_norm": 8.808762033562825, "learning_rate": 4.987887306356625e-07, "loss": 0.5456, "step": 19300 }, { "epoch": 1.72, "grad_norm": 7.766439359764874, "learning_rate": 4.984742648569335e-07, "loss": 0.6644, "step": 19301 }, { "epoch": 1.72, "grad_norm": 8.557813325310716, "learning_rate": 4.981598930376958e-07, "loss": 0.6003, "step": 19302 }, { "epoch": 1.72, "grad_norm": 6.591130172154082, "learning_rate": 4.978456151845124e-07, "loss": 0.6241, "step": 19303 }, { "epoch": 1.72, "grad_norm": 6.519369507629495, "learning_rate": 4.975314313039415e-07, "loss": 0.514, "step": 19304 }, { "epoch": 1.72, "grad_norm": 6.271098137955858, "learning_rate": 4.972173414025416e-07, "loss": 0.6202, "step": 19305 }, { "epoch": 1.72, "grad_norm": 8.889973568363407, "learning_rate": 4.969033454868688e-07, "loss": 0.613, "step": 19306 }, { "epoch": 1.72, "grad_norm": 4.225439773803859, "learning_rate": 4.965894435634772e-07, "loss": 0.5878, "step": 19307 }, { "epoch": 1.72, "grad_norm": 8.406887708991459, "learning_rate": 4.962756356389182e-07, "loss": 0.5112, "step": 19308 }, { "epoch": 1.72, "grad_norm": 11.288106316938237, "learning_rate": 4.959619217197436e-07, "loss": 0.5601, "step": 19309 }, { "epoch": 1.72, "grad_norm": 6.446747229524267, "learning_rate": 4.956483018124991e-07, "loss": 0.5036, "step": 19310 }, { "epoch": 1.72, "grad_norm": 7.448196832400418, "learning_rate": 4.953347759237331e-07, "loss": 0.5444, "step": 19311 }, { "epoch": 1.72, "grad_norm": 5.854335005664905, "learning_rate": 4.950213440599877e-07, "loss": 0.5294, "step": 19312 }, { "epoch": 1.72, "grad_norm": 6.956740392848371, "learning_rate": 4.947080062278059e-07, "loss": 0.6221, "step": 19313 }, { "epoch": 1.72, "grad_norm": 7.737058727487721, "learning_rate": 4.943947624337281e-07, "loss": 0.5862, "step": 19314 }, { "epoch": 1.72, "grad_norm": 5.6738145587614515, "learning_rate": 4.940816126842924e-07, "loss": 0.4948, "step": 19315 }, { "epoch": 1.72, "grad_norm": 6.497897982584543, "learning_rate": 4.937685569860363e-07, "loss": 0.5769, "step": 19316 }, { "epoch": 1.72, "grad_norm": 5.015728622718626, "learning_rate": 4.934555953454917e-07, "loss": 0.5065, "step": 19317 }, { "epoch": 1.72, "grad_norm": 6.864760898155862, "learning_rate": 4.93142727769193e-07, "loss": 0.5881, "step": 19318 }, { "epoch": 1.72, "grad_norm": 5.5218097842853044, "learning_rate": 4.928299542636694e-07, "loss": 0.5714, "step": 19319 }, { "epoch": 1.72, "grad_norm": 8.083160156770147, "learning_rate": 4.925172748354506e-07, "loss": 0.6348, "step": 19320 }, { "epoch": 1.72, "grad_norm": 6.372166054790135, "learning_rate": 4.92204689491062e-07, "loss": 0.5414, "step": 19321 }, { "epoch": 1.72, "grad_norm": 7.725967718140857, "learning_rate": 4.918921982370301e-07, "loss": 0.5795, "step": 19322 }, { "epoch": 1.72, "grad_norm": 5.9056104550134565, "learning_rate": 4.915798010798744e-07, "loss": 0.5861, "step": 19323 }, { "epoch": 1.72, "grad_norm": 6.881010994061635, "learning_rate": 4.912674980261173e-07, "loss": 0.5187, "step": 19324 }, { "epoch": 1.72, "grad_norm": 6.345883903798714, "learning_rate": 4.909552890822783e-07, "loss": 0.5332, "step": 19325 }, { "epoch": 1.72, "grad_norm": 6.9379887869435235, "learning_rate": 4.906431742548717e-07, "loss": 0.5362, "step": 19326 }, { "epoch": 1.72, "grad_norm": 5.236948955043289, "learning_rate": 4.903311535504141e-07, "loss": 0.6127, "step": 19327 }, { "epoch": 1.72, "grad_norm": 7.486709686912351, "learning_rate": 4.900192269754172e-07, "loss": 0.5359, "step": 19328 }, { "epoch": 1.72, "grad_norm": 4.9003106758823325, "learning_rate": 4.897073945363933e-07, "loss": 0.6314, "step": 19329 }, { "epoch": 1.72, "grad_norm": 6.216385538033852, "learning_rate": 4.893956562398494e-07, "loss": 0.5444, "step": 19330 }, { "epoch": 1.72, "grad_norm": 6.705827142799635, "learning_rate": 4.89084012092293e-07, "loss": 0.5844, "step": 19331 }, { "epoch": 1.72, "grad_norm": 8.232366511184482, "learning_rate": 4.887724621002293e-07, "loss": 0.5572, "step": 19332 }, { "epoch": 1.72, "grad_norm": 6.49548111364951, "learning_rate": 4.884610062701606e-07, "loss": 0.5663, "step": 19333 }, { "epoch": 1.72, "grad_norm": 6.1898167178981085, "learning_rate": 4.881496446085887e-07, "loss": 0.5579, "step": 19334 }, { "epoch": 1.72, "grad_norm": 8.068500430312332, "learning_rate": 4.87838377122013e-07, "loss": 0.572, "step": 19335 }, { "epoch": 1.72, "grad_norm": 6.457498589952259, "learning_rate": 4.875272038169288e-07, "loss": 0.5507, "step": 19336 }, { "epoch": 1.73, "grad_norm": 7.023509125613162, "learning_rate": 4.87216124699832e-07, "loss": 0.5569, "step": 19337 }, { "epoch": 1.73, "grad_norm": 6.968688537484046, "learning_rate": 4.869051397772157e-07, "loss": 0.5662, "step": 19338 }, { "epoch": 1.73, "grad_norm": 7.706046470144435, "learning_rate": 4.865942490555709e-07, "loss": 0.5329, "step": 19339 }, { "epoch": 1.73, "grad_norm": 7.2492840798989295, "learning_rate": 4.862834525413884e-07, "loss": 0.5478, "step": 19340 }, { "epoch": 1.73, "grad_norm": 7.269368514134456, "learning_rate": 4.859727502411526e-07, "loss": 0.613, "step": 19341 }, { "epoch": 1.73, "grad_norm": 6.6593597824437545, "learning_rate": 4.85662142161351e-07, "loss": 0.5452, "step": 19342 }, { "epoch": 1.73, "grad_norm": 5.906582912420032, "learning_rate": 4.853516283084658e-07, "loss": 0.5737, "step": 19343 }, { "epoch": 1.73, "grad_norm": 6.927776107359576, "learning_rate": 4.850412086889778e-07, "loss": 0.5561, "step": 19344 }, { "epoch": 1.73, "grad_norm": 5.954169828803071, "learning_rate": 4.847308833093667e-07, "loss": 0.5339, "step": 19345 }, { "epoch": 1.73, "grad_norm": 7.8864968213449735, "learning_rate": 4.844206521761108e-07, "loss": 0.5779, "step": 19346 }, { "epoch": 1.73, "grad_norm": 5.852329187928231, "learning_rate": 4.841105152956844e-07, "loss": 0.5467, "step": 19347 }, { "epoch": 1.73, "grad_norm": 8.629021958197946, "learning_rate": 4.838004726745627e-07, "loss": 0.5726, "step": 19348 }, { "epoch": 1.73, "grad_norm": 6.904183231014354, "learning_rate": 4.834905243192145e-07, "loss": 0.6291, "step": 19349 }, { "epoch": 1.73, "grad_norm": 9.508433958400593, "learning_rate": 4.831806702361108e-07, "loss": 0.6327, "step": 19350 }, { "epoch": 1.73, "grad_norm": 6.600746207743731, "learning_rate": 4.828709104317186e-07, "loss": 0.6202, "step": 19351 }, { "epoch": 1.73, "grad_norm": 7.551423717402952, "learning_rate": 4.825612449125039e-07, "loss": 0.5421, "step": 19352 }, { "epoch": 1.73, "grad_norm": 6.6849786606963795, "learning_rate": 4.822516736849309e-07, "loss": 0.5849, "step": 19353 }, { "epoch": 1.73, "grad_norm": 6.991540953393871, "learning_rate": 4.819421967554605e-07, "loss": 0.5412, "step": 19354 }, { "epoch": 1.73, "grad_norm": 8.250703871026147, "learning_rate": 4.816328141305532e-07, "loss": 0.5874, "step": 19355 }, { "epoch": 1.73, "grad_norm": 6.73175601384945, "learning_rate": 4.813235258166643e-07, "loss": 0.6141, "step": 19356 }, { "epoch": 1.73, "grad_norm": 7.566532670668538, "learning_rate": 4.81014331820251e-07, "loss": 0.6336, "step": 19357 }, { "epoch": 1.73, "grad_norm": 6.625856191781129, "learning_rate": 4.807052321477674e-07, "loss": 0.5941, "step": 19358 }, { "epoch": 1.73, "grad_norm": 9.666870318758392, "learning_rate": 4.80396226805665e-07, "loss": 0.5297, "step": 19359 }, { "epoch": 1.73, "grad_norm": 6.519065582590679, "learning_rate": 4.800873158003938e-07, "loss": 0.5747, "step": 19360 }, { "epoch": 1.73, "grad_norm": 7.469255760232207, "learning_rate": 4.797784991384019e-07, "loss": 0.5158, "step": 19361 }, { "epoch": 1.73, "grad_norm": 7.871573956959095, "learning_rate": 4.794697768261342e-07, "loss": 0.6057, "step": 19362 }, { "epoch": 1.73, "grad_norm": 6.6511755027024995, "learning_rate": 4.791611488700349e-07, "loss": 0.5306, "step": 19363 }, { "epoch": 1.73, "grad_norm": 5.835016144307784, "learning_rate": 4.78852615276546e-07, "loss": 0.5912, "step": 19364 }, { "epoch": 1.73, "grad_norm": 6.224706729196271, "learning_rate": 4.785441760521081e-07, "loss": 0.534, "step": 19365 }, { "epoch": 1.73, "grad_norm": 5.986751141083325, "learning_rate": 4.782358312031582e-07, "loss": 0.5887, "step": 19366 }, { "epoch": 1.73, "grad_norm": 5.903006714816481, "learning_rate": 4.779275807361339e-07, "loss": 0.5313, "step": 19367 }, { "epoch": 1.73, "grad_norm": 6.279385980730084, "learning_rate": 4.776194246574672e-07, "loss": 0.5882, "step": 19368 }, { "epoch": 1.73, "grad_norm": 6.173874118693422, "learning_rate": 4.773113629735926e-07, "loss": 0.5801, "step": 19369 }, { "epoch": 1.73, "grad_norm": 7.173140860923199, "learning_rate": 4.770033956909376e-07, "loss": 0.598, "step": 19370 }, { "epoch": 1.73, "grad_norm": 6.3641734062848085, "learning_rate": 4.766955228159309e-07, "loss": 0.5069, "step": 19371 }, { "epoch": 1.73, "grad_norm": 6.459408658738114, "learning_rate": 4.7638774435500026e-07, "loss": 0.5673, "step": 19372 }, { "epoch": 1.73, "grad_norm": 5.595546409051166, "learning_rate": 4.760800603145682e-07, "loss": 0.5688, "step": 19373 }, { "epoch": 1.73, "grad_norm": 6.47088545769917, "learning_rate": 4.7577247070105856e-07, "loss": 0.5754, "step": 19374 }, { "epoch": 1.73, "grad_norm": 6.22385880409795, "learning_rate": 4.754649755208901e-07, "loss": 0.6025, "step": 19375 }, { "epoch": 1.73, "grad_norm": 9.136320891195847, "learning_rate": 4.7515757478048206e-07, "loss": 0.6178, "step": 19376 }, { "epoch": 1.73, "grad_norm": 6.9568872313968075, "learning_rate": 4.748502684862499e-07, "loss": 0.6257, "step": 19377 }, { "epoch": 1.73, "grad_norm": 6.303508009911228, "learning_rate": 4.745430566446085e-07, "loss": 0.5526, "step": 19378 }, { "epoch": 1.73, "grad_norm": 5.954122962816615, "learning_rate": 4.7423593926197107e-07, "loss": 0.5937, "step": 19379 }, { "epoch": 1.73, "grad_norm": 6.10097464707848, "learning_rate": 4.739289163447464e-07, "loss": 0.5678, "step": 19380 }, { "epoch": 1.73, "grad_norm": 6.854825842588739, "learning_rate": 4.736219878993437e-07, "loss": 0.5284, "step": 19381 }, { "epoch": 1.73, "grad_norm": 6.057609236861676, "learning_rate": 4.7331515393216955e-07, "loss": 0.5842, "step": 19382 }, { "epoch": 1.73, "grad_norm": 7.116272413425819, "learning_rate": 4.7300841444962885e-07, "loss": 0.6466, "step": 19383 }, { "epoch": 1.73, "grad_norm": 5.048051285019041, "learning_rate": 4.7270176945812264e-07, "loss": 0.5892, "step": 19384 }, { "epoch": 1.73, "grad_norm": 5.7126592333813715, "learning_rate": 4.723952189640529e-07, "loss": 0.5489, "step": 19385 }, { "epoch": 1.73, "grad_norm": 6.652933413170544, "learning_rate": 4.720887629738169e-07, "loss": 0.5761, "step": 19386 }, { "epoch": 1.73, "grad_norm": 5.870849655755925, "learning_rate": 4.7178240149381384e-07, "loss": 0.6134, "step": 19387 }, { "epoch": 1.73, "grad_norm": 9.047434013408829, "learning_rate": 4.7147613453043473e-07, "loss": 0.5379, "step": 19388 }, { "epoch": 1.73, "grad_norm": 8.175338454233335, "learning_rate": 4.7116996209007446e-07, "loss": 0.5769, "step": 19389 }, { "epoch": 1.73, "grad_norm": 7.723106258740303, "learning_rate": 4.7086388417912357e-07, "loss": 0.5946, "step": 19390 }, { "epoch": 1.73, "grad_norm": 6.810193569171156, "learning_rate": 4.705579008039696e-07, "loss": 0.5567, "step": 19391 }, { "epoch": 1.73, "grad_norm": 5.247260855360766, "learning_rate": 4.702520119710019e-07, "loss": 0.6463, "step": 19392 }, { "epoch": 1.73, "grad_norm": 6.1309969964739235, "learning_rate": 4.699462176866021e-07, "loss": 0.5101, "step": 19393 }, { "epoch": 1.73, "grad_norm": 8.485486137340368, "learning_rate": 4.6964051795715447e-07, "loss": 0.5776, "step": 19394 }, { "epoch": 1.73, "grad_norm": 8.353063305764115, "learning_rate": 4.693349127890395e-07, "loss": 0.5897, "step": 19395 }, { "epoch": 1.73, "grad_norm": 6.779164791915477, "learning_rate": 4.690294021886366e-07, "loss": 0.5606, "step": 19396 }, { "epoch": 1.73, "grad_norm": 6.130215520557752, "learning_rate": 4.6872398616232215e-07, "loss": 0.5555, "step": 19397 }, { "epoch": 1.73, "grad_norm": 5.808278991469077, "learning_rate": 4.684186647164718e-07, "loss": 0.5762, "step": 19398 }, { "epoch": 1.73, "grad_norm": 7.282882831319591, "learning_rate": 4.681134378574581e-07, "loss": 0.5883, "step": 19399 }, { "epoch": 1.73, "grad_norm": 6.572389867608683, "learning_rate": 4.6780830559165103e-07, "loss": 0.5835, "step": 19400 }, { "epoch": 1.73, "grad_norm": 5.298789100247537, "learning_rate": 4.675032679254199e-07, "loss": 0.6434, "step": 19401 }, { "epoch": 1.73, "grad_norm": 5.8678607751302865, "learning_rate": 4.671983248651324e-07, "loss": 0.5881, "step": 19402 }, { "epoch": 1.73, "grad_norm": 7.7400369363938015, "learning_rate": 4.668934764171529e-07, "loss": 0.5245, "step": 19403 }, { "epoch": 1.73, "grad_norm": 6.787827952950555, "learning_rate": 4.665887225878446e-07, "loss": 0.5634, "step": 19404 }, { "epoch": 1.73, "grad_norm": 6.643549889337505, "learning_rate": 4.662840633835697e-07, "loss": 0.6457, "step": 19405 }, { "epoch": 1.73, "grad_norm": 6.839191533841817, "learning_rate": 4.6597949881068584e-07, "loss": 0.5467, "step": 19406 }, { "epoch": 1.73, "grad_norm": 10.48631251531014, "learning_rate": 4.6567502887555015e-07, "loss": 0.5612, "step": 19407 }, { "epoch": 1.73, "grad_norm": 5.3906054383085795, "learning_rate": 4.6537065358451814e-07, "loss": 0.5324, "step": 19408 }, { "epoch": 1.73, "grad_norm": 5.263617958800574, "learning_rate": 4.650663729439436e-07, "loss": 0.5711, "step": 19409 }, { "epoch": 1.73, "grad_norm": 6.2754894472017435, "learning_rate": 4.6476218696017704e-07, "loss": 0.5062, "step": 19410 }, { "epoch": 1.73, "grad_norm": 7.95918658256001, "learning_rate": 4.6445809563956836e-07, "loss": 0.5051, "step": 19411 }, { "epoch": 1.73, "grad_norm": 6.6293551593567575, "learning_rate": 4.6415409898846466e-07, "loss": 0.5644, "step": 19412 }, { "epoch": 1.73, "grad_norm": 7.734682719267298, "learning_rate": 4.638501970132098e-07, "loss": 0.5468, "step": 19413 }, { "epoch": 1.73, "grad_norm": 6.15537191739193, "learning_rate": 4.6354638972014807e-07, "loss": 0.5984, "step": 19414 }, { "epoch": 1.73, "grad_norm": 7.869161750853449, "learning_rate": 4.6324267711562067e-07, "loss": 0.5576, "step": 19415 }, { "epoch": 1.73, "grad_norm": 6.41963550394519, "learning_rate": 4.629390592059668e-07, "loss": 0.5739, "step": 19416 }, { "epoch": 1.73, "grad_norm": 7.651653717417522, "learning_rate": 4.626355359975243e-07, "loss": 0.6304, "step": 19417 }, { "epoch": 1.73, "grad_norm": 7.887474044365535, "learning_rate": 4.623321074966297e-07, "loss": 0.5951, "step": 19418 }, { "epoch": 1.73, "grad_norm": 7.717921006027463, "learning_rate": 4.620287737096135e-07, "loss": 0.4947, "step": 19419 }, { "epoch": 1.73, "grad_norm": 5.916811116708596, "learning_rate": 4.61725534642809e-07, "loss": 0.5177, "step": 19420 }, { "epoch": 1.73, "grad_norm": 6.085053110013839, "learning_rate": 4.61422390302545e-07, "loss": 0.5873, "step": 19421 }, { "epoch": 1.73, "grad_norm": 7.868226757163164, "learning_rate": 4.6111934069514975e-07, "loss": 0.5201, "step": 19422 }, { "epoch": 1.73, "grad_norm": 6.412623879367188, "learning_rate": 4.608163858269482e-07, "loss": 0.5286, "step": 19423 }, { "epoch": 1.73, "grad_norm": 6.3677036229341715, "learning_rate": 4.6051352570426476e-07, "loss": 0.6241, "step": 19424 }, { "epoch": 1.73, "grad_norm": 7.221287755135121, "learning_rate": 4.602107603334188e-07, "loss": 0.5265, "step": 19425 }, { "epoch": 1.73, "grad_norm": 7.052411893368286, "learning_rate": 4.5990808972073255e-07, "loss": 0.5632, "step": 19426 }, { "epoch": 1.73, "grad_norm": 5.55027300075432, "learning_rate": 4.596055138725214e-07, "loss": 0.5647, "step": 19427 }, { "epoch": 1.73, "grad_norm": 5.668110890199223, "learning_rate": 4.59303032795102e-07, "loss": 0.6055, "step": 19428 }, { "epoch": 1.73, "grad_norm": 5.312832277198063, "learning_rate": 4.590006464947877e-07, "loss": 0.564, "step": 19429 }, { "epoch": 1.73, "grad_norm": 5.921875474207813, "learning_rate": 4.5869835497789007e-07, "loss": 0.5284, "step": 19430 }, { "epoch": 1.73, "grad_norm": 7.580446871976063, "learning_rate": 4.583961582507201e-07, "loss": 0.5742, "step": 19431 }, { "epoch": 1.73, "grad_norm": 5.127740466810843, "learning_rate": 4.5809405631958404e-07, "loss": 0.6233, "step": 19432 }, { "epoch": 1.73, "grad_norm": 7.509361638584741, "learning_rate": 4.5779204919078723e-07, "loss": 0.5259, "step": 19433 }, { "epoch": 1.73, "grad_norm": 7.910155228137168, "learning_rate": 4.5749013687063416e-07, "loss": 0.5382, "step": 19434 }, { "epoch": 1.73, "grad_norm": 7.084255844239266, "learning_rate": 4.57188319365427e-07, "loss": 0.6455, "step": 19435 }, { "epoch": 1.73, "grad_norm": 6.778452294988229, "learning_rate": 4.568865966814645e-07, "loss": 0.5586, "step": 19436 }, { "epoch": 1.73, "grad_norm": 4.963984387176218, "learning_rate": 4.565849688250462e-07, "loss": 0.5277, "step": 19437 }, { "epoch": 1.73, "grad_norm": 7.9347737759326, "learning_rate": 4.562834358024659e-07, "loss": 0.5785, "step": 19438 }, { "epoch": 1.73, "grad_norm": 7.552812746162098, "learning_rate": 4.5598199762001803e-07, "loss": 0.5813, "step": 19439 }, { "epoch": 1.73, "grad_norm": 6.39216738885419, "learning_rate": 4.5568065428399533e-07, "loss": 0.6358, "step": 19440 }, { "epoch": 1.73, "grad_norm": 5.912036422875484, "learning_rate": 4.5537940580068776e-07, "loss": 0.5354, "step": 19441 }, { "epoch": 1.73, "grad_norm": 7.480778336736666, "learning_rate": 4.5507825217638136e-07, "loss": 0.556, "step": 19442 }, { "epoch": 1.73, "grad_norm": 4.980796344159234, "learning_rate": 4.547771934173634e-07, "loss": 0.5523, "step": 19443 }, { "epoch": 1.73, "grad_norm": 5.442605138064705, "learning_rate": 4.5447622952991875e-07, "loss": 0.5259, "step": 19444 }, { "epoch": 1.73, "grad_norm": 4.688802096088219, "learning_rate": 4.5417536052032695e-07, "loss": 0.5023, "step": 19445 }, { "epoch": 1.73, "grad_norm": 9.871890058454602, "learning_rate": 4.538745863948696e-07, "loss": 0.5648, "step": 19446 }, { "epoch": 1.73, "grad_norm": 6.566036689774609, "learning_rate": 4.535739071598244e-07, "loss": 0.495, "step": 19447 }, { "epoch": 1.73, "grad_norm": 8.05316981616333, "learning_rate": 4.532733228214675e-07, "loss": 0.6281, "step": 19448 }, { "epoch": 1.74, "grad_norm": 5.206722343812226, "learning_rate": 4.5297283338607277e-07, "loss": 0.6174, "step": 19449 }, { "epoch": 1.74, "grad_norm": 5.429438622005737, "learning_rate": 4.52672438859913e-07, "loss": 0.5687, "step": 19450 }, { "epoch": 1.74, "grad_norm": 6.722909491269947, "learning_rate": 4.523721392492569e-07, "loss": 0.5135, "step": 19451 }, { "epoch": 1.74, "grad_norm": 7.75820394411651, "learning_rate": 4.5207193456037304e-07, "loss": 0.5637, "step": 19452 }, { "epoch": 1.74, "grad_norm": 6.6505179748808985, "learning_rate": 4.5177182479952787e-07, "loss": 0.5885, "step": 19453 }, { "epoch": 1.74, "grad_norm": 4.508279026270385, "learning_rate": 4.514718099729853e-07, "loss": 0.5476, "step": 19454 }, { "epoch": 1.74, "grad_norm": 11.11160781272378, "learning_rate": 4.5117189008700877e-07, "loss": 0.5905, "step": 19455 }, { "epoch": 1.74, "grad_norm": 5.644063104018046, "learning_rate": 4.50872065147856e-07, "loss": 0.5581, "step": 19456 }, { "epoch": 1.74, "grad_norm": 7.83409033269218, "learning_rate": 4.50572335161788e-07, "loss": 0.5476, "step": 19457 }, { "epoch": 1.74, "grad_norm": 6.8516078765871535, "learning_rate": 4.502727001350582e-07, "loss": 0.544, "step": 19458 }, { "epoch": 1.74, "grad_norm": 5.929133282275333, "learning_rate": 4.4997316007392154e-07, "loss": 0.5757, "step": 19459 }, { "epoch": 1.74, "grad_norm": 6.2370200660742645, "learning_rate": 4.4967371498463144e-07, "loss": 0.5548, "step": 19460 }, { "epoch": 1.74, "grad_norm": 5.672082077769837, "learning_rate": 4.493743648734372e-07, "loss": 0.6031, "step": 19461 }, { "epoch": 1.74, "grad_norm": 8.71773433719934, "learning_rate": 4.490751097465873e-07, "loss": 0.6109, "step": 19462 }, { "epoch": 1.74, "grad_norm": 7.963620716966938, "learning_rate": 4.487759496103294e-07, "loss": 0.6297, "step": 19463 }, { "epoch": 1.74, "grad_norm": 6.4599310772610075, "learning_rate": 4.484768844709053e-07, "loss": 0.5753, "step": 19464 }, { "epoch": 1.74, "grad_norm": 5.617599894248542, "learning_rate": 4.4817791433455883e-07, "loss": 0.5972, "step": 19465 }, { "epoch": 1.74, "grad_norm": 6.686150666328848, "learning_rate": 4.4787903920752997e-07, "loss": 0.5704, "step": 19466 }, { "epoch": 1.74, "grad_norm": 4.985462343250049, "learning_rate": 4.4758025909605706e-07, "loss": 0.5933, "step": 19467 }, { "epoch": 1.74, "grad_norm": 6.976573467713243, "learning_rate": 4.472815740063774e-07, "loss": 0.6288, "step": 19468 }, { "epoch": 1.74, "grad_norm": 5.804012158339068, "learning_rate": 4.469829839447237e-07, "loss": 0.5102, "step": 19469 }, { "epoch": 1.74, "grad_norm": 8.37295431432846, "learning_rate": 4.4668448891733054e-07, "loss": 0.5826, "step": 19470 }, { "epoch": 1.74, "grad_norm": 5.4797327617345575, "learning_rate": 4.463860889304261e-07, "loss": 0.5026, "step": 19471 }, { "epoch": 1.74, "grad_norm": 7.410786566920826, "learning_rate": 4.4608778399023943e-07, "loss": 0.581, "step": 19472 }, { "epoch": 1.74, "grad_norm": 9.104747470895896, "learning_rate": 4.4578957410299774e-07, "loss": 0.6567, "step": 19473 }, { "epoch": 1.74, "grad_norm": 6.57326787309342, "learning_rate": 4.4549145927492545e-07, "loss": 0.5768, "step": 19474 }, { "epoch": 1.74, "grad_norm": 6.624955460644096, "learning_rate": 4.4519343951224536e-07, "loss": 0.5421, "step": 19475 }, { "epoch": 1.74, "grad_norm": 5.517261562359927, "learning_rate": 4.448955148211759e-07, "loss": 0.5719, "step": 19476 }, { "epoch": 1.74, "grad_norm": 5.696974359447795, "learning_rate": 4.4459768520793755e-07, "loss": 0.6165, "step": 19477 }, { "epoch": 1.74, "grad_norm": 7.416344132161564, "learning_rate": 4.4429995067874656e-07, "loss": 0.5866, "step": 19478 }, { "epoch": 1.74, "grad_norm": 6.113297676799036, "learning_rate": 4.4400231123981676e-07, "loss": 0.5709, "step": 19479 }, { "epoch": 1.74, "grad_norm": 7.833172164564583, "learning_rate": 4.4370476689736154e-07, "loss": 0.5482, "step": 19480 }, { "epoch": 1.74, "grad_norm": 7.69284265061819, "learning_rate": 4.434073176575926e-07, "loss": 0.5913, "step": 19481 }, { "epoch": 1.74, "grad_norm": 5.9865239367218885, "learning_rate": 4.4310996352671555e-07, "loss": 0.5446, "step": 19482 }, { "epoch": 1.74, "grad_norm": 7.663676139550128, "learning_rate": 4.4281270451093874e-07, "loss": 0.5512, "step": 19483 }, { "epoch": 1.74, "grad_norm": 6.247997725914049, "learning_rate": 4.425155406164677e-07, "loss": 0.6155, "step": 19484 }, { "epoch": 1.74, "grad_norm": 6.462384546782858, "learning_rate": 4.4221847184950264e-07, "loss": 0.5153, "step": 19485 }, { "epoch": 1.74, "grad_norm": 9.150770400193105, "learning_rate": 4.419214982162462e-07, "loss": 0.5833, "step": 19486 }, { "epoch": 1.74, "grad_norm": 6.427268855909817, "learning_rate": 4.416246197228963e-07, "loss": 0.5872, "step": 19487 }, { "epoch": 1.74, "grad_norm": 7.055163627749746, "learning_rate": 4.413278363756507e-07, "loss": 0.5841, "step": 19488 }, { "epoch": 1.74, "grad_norm": 11.356795729150889, "learning_rate": 4.4103114818070226e-07, "loss": 0.5212, "step": 19489 }, { "epoch": 1.74, "grad_norm": 9.19756954222699, "learning_rate": 4.4073455514424437e-07, "loss": 0.6042, "step": 19490 }, { "epoch": 1.74, "grad_norm": 6.77521256839744, "learning_rate": 4.4043805727246867e-07, "loss": 0.5715, "step": 19491 }, { "epoch": 1.74, "grad_norm": 6.2174625244851125, "learning_rate": 4.4014165457156257e-07, "loss": 0.5895, "step": 19492 }, { "epoch": 1.74, "grad_norm": 8.210066784928845, "learning_rate": 4.3984534704771376e-07, "loss": 0.4879, "step": 19493 }, { "epoch": 1.74, "grad_norm": 5.685446147847926, "learning_rate": 4.395491347071079e-07, "loss": 0.5202, "step": 19494 }, { "epoch": 1.74, "grad_norm": 7.083276183627929, "learning_rate": 4.392530175559251e-07, "loss": 0.5038, "step": 19495 }, { "epoch": 1.74, "grad_norm": 6.2682418428907, "learning_rate": 4.389569956003481e-07, "loss": 0.5507, "step": 19496 }, { "epoch": 1.74, "grad_norm": 8.017207083682553, "learning_rate": 4.386610688465554e-07, "loss": 0.5647, "step": 19497 }, { "epoch": 1.74, "grad_norm": 9.186013430066737, "learning_rate": 4.3836523730072466e-07, "loss": 0.5868, "step": 19498 }, { "epoch": 1.74, "grad_norm": 6.765438289298087, "learning_rate": 4.3806950096902887e-07, "loss": 0.5796, "step": 19499 }, { "epoch": 1.74, "grad_norm": 6.489491365182356, "learning_rate": 4.377738598576414e-07, "loss": 0.5306, "step": 19500 }, { "epoch": 1.74, "grad_norm": 6.209091495303553, "learning_rate": 4.374783139727351e-07, "loss": 0.5434, "step": 19501 }, { "epoch": 1.74, "grad_norm": 7.171287453550054, "learning_rate": 4.3718286332047556e-07, "loss": 0.596, "step": 19502 }, { "epoch": 1.74, "grad_norm": 6.584728193089351, "learning_rate": 4.3688750790703173e-07, "loss": 0.6529, "step": 19503 }, { "epoch": 1.74, "grad_norm": 7.841150305205505, "learning_rate": 4.365922477385681e-07, "loss": 0.5513, "step": 19504 }, { "epoch": 1.74, "grad_norm": 7.229058464113996, "learning_rate": 4.362970828212476e-07, "loss": 0.5594, "step": 19505 }, { "epoch": 1.74, "grad_norm": 9.524199450805392, "learning_rate": 4.360020131612314e-07, "loss": 0.5706, "step": 19506 }, { "epoch": 1.74, "grad_norm": 7.414270151324955, "learning_rate": 4.357070387646789e-07, "loss": 0.6619, "step": 19507 }, { "epoch": 1.74, "grad_norm": 8.141966767160117, "learning_rate": 4.354121596377453e-07, "loss": 0.5791, "step": 19508 }, { "epoch": 1.74, "grad_norm": 5.721795442160722, "learning_rate": 4.3511737578658677e-07, "loss": 0.5991, "step": 19509 }, { "epoch": 1.74, "grad_norm": 8.272871543053228, "learning_rate": 4.348226872173561e-07, "loss": 0.566, "step": 19510 }, { "epoch": 1.74, "grad_norm": 9.065393872017193, "learning_rate": 4.34528093936204e-07, "loss": 0.5894, "step": 19511 }, { "epoch": 1.74, "grad_norm": 6.9424534078563465, "learning_rate": 4.3423359594927994e-07, "loss": 0.5841, "step": 19512 }, { "epoch": 1.74, "grad_norm": 7.025154010311308, "learning_rate": 4.339391932627318e-07, "loss": 0.6101, "step": 19513 }, { "epoch": 1.74, "grad_norm": 6.714642236206939, "learning_rate": 4.3364488588270304e-07, "loss": 0.5641, "step": 19514 }, { "epoch": 1.74, "grad_norm": 5.209323935102551, "learning_rate": 4.3335067381533645e-07, "loss": 0.6219, "step": 19515 }, { "epoch": 1.74, "grad_norm": 6.502953986171062, "learning_rate": 4.330565570667739e-07, "loss": 0.5999, "step": 19516 }, { "epoch": 1.74, "grad_norm": 7.7691372672810415, "learning_rate": 4.327625356431542e-07, "loss": 0.5874, "step": 19517 }, { "epoch": 1.74, "grad_norm": 6.7821185312419505, "learning_rate": 4.324686095506142e-07, "loss": 0.5011, "step": 19518 }, { "epoch": 1.74, "grad_norm": 6.744629692843127, "learning_rate": 4.3217477879528956e-07, "loss": 0.5585, "step": 19519 }, { "epoch": 1.74, "grad_norm": 6.662715987931336, "learning_rate": 4.318810433833143e-07, "loss": 0.5716, "step": 19520 }, { "epoch": 1.74, "grad_norm": 6.930405151187031, "learning_rate": 4.315874033208167e-07, "loss": 0.6317, "step": 19521 }, { "epoch": 1.74, "grad_norm": 6.908598566831788, "learning_rate": 4.312938586139276e-07, "loss": 0.5721, "step": 19522 }, { "epoch": 1.74, "grad_norm": 6.04129035261346, "learning_rate": 4.310004092687742e-07, "loss": 0.5262, "step": 19523 }, { "epoch": 1.74, "grad_norm": 7.389922616938146, "learning_rate": 4.3070705529148106e-07, "loss": 0.6032, "step": 19524 }, { "epoch": 1.74, "grad_norm": 8.116850144131325, "learning_rate": 4.304137966881716e-07, "loss": 0.5776, "step": 19525 }, { "epoch": 1.74, "grad_norm": 7.2664356210579335, "learning_rate": 4.301206334649677e-07, "loss": 0.5636, "step": 19526 }, { "epoch": 1.74, "grad_norm": 4.714019699834582, "learning_rate": 4.2982756562798767e-07, "loss": 0.5647, "step": 19527 }, { "epoch": 1.74, "grad_norm": 5.3409487131263615, "learning_rate": 4.295345931833478e-07, "loss": 0.5175, "step": 19528 }, { "epoch": 1.74, "grad_norm": 6.718982887953429, "learning_rate": 4.2924171613716426e-07, "loss": 0.5529, "step": 19529 }, { "epoch": 1.74, "grad_norm": 5.743555745622507, "learning_rate": 4.289489344955505e-07, "loss": 0.5281, "step": 19530 }, { "epoch": 1.74, "grad_norm": 9.057076315677909, "learning_rate": 4.286562482646167e-07, "loss": 0.6085, "step": 19531 }, { "epoch": 1.74, "grad_norm": 6.485153116295635, "learning_rate": 4.283636574504729e-07, "loss": 0.536, "step": 19532 }, { "epoch": 1.74, "grad_norm": 11.125507510358856, "learning_rate": 4.2807116205922706e-07, "loss": 0.6311, "step": 19533 }, { "epoch": 1.74, "grad_norm": 5.471770671664696, "learning_rate": 4.27778762096982e-07, "loss": 0.5579, "step": 19534 }, { "epoch": 1.74, "grad_norm": 7.295582893472179, "learning_rate": 4.274864575698429e-07, "loss": 0.5303, "step": 19535 }, { "epoch": 1.74, "grad_norm": 6.362482514196293, "learning_rate": 4.2719424848390977e-07, "loss": 0.5984, "step": 19536 }, { "epoch": 1.74, "grad_norm": 5.849008163059676, "learning_rate": 4.269021348452829e-07, "loss": 0.5443, "step": 19537 }, { "epoch": 1.74, "grad_norm": 7.605104750299689, "learning_rate": 4.266101166600589e-07, "loss": 0.5992, "step": 19538 }, { "epoch": 1.74, "grad_norm": 8.204995873121996, "learning_rate": 4.263181939343336e-07, "loss": 0.5926, "step": 19539 }, { "epoch": 1.74, "grad_norm": 7.3905347631205, "learning_rate": 4.2602636667419985e-07, "loss": 0.586, "step": 19540 }, { "epoch": 1.74, "grad_norm": 9.354597506155264, "learning_rate": 4.257346348857483e-07, "loss": 0.6128, "step": 19541 }, { "epoch": 1.74, "grad_norm": 5.56697419814046, "learning_rate": 4.2544299857506967e-07, "loss": 0.555, "step": 19542 }, { "epoch": 1.74, "grad_norm": 5.244794268370045, "learning_rate": 4.2515145774824906e-07, "loss": 0.5267, "step": 19543 }, { "epoch": 1.74, "grad_norm": 7.021835240042137, "learning_rate": 4.248600124113739e-07, "loss": 0.6466, "step": 19544 }, { "epoch": 1.74, "grad_norm": 6.902044062648817, "learning_rate": 4.245686625705259e-07, "loss": 0.518, "step": 19545 }, { "epoch": 1.74, "grad_norm": 6.779476581420664, "learning_rate": 4.24277408231788e-07, "loss": 0.5485, "step": 19546 }, { "epoch": 1.74, "grad_norm": 6.785942055004502, "learning_rate": 4.239862494012381e-07, "loss": 0.5707, "step": 19547 }, { "epoch": 1.74, "grad_norm": 6.975753411899509, "learning_rate": 4.236951860849536e-07, "loss": 0.6084, "step": 19548 }, { "epoch": 1.74, "grad_norm": 6.732381193617649, "learning_rate": 4.2340421828901023e-07, "loss": 0.5761, "step": 19549 }, { "epoch": 1.74, "grad_norm": 6.436980845332754, "learning_rate": 4.231133460194814e-07, "loss": 0.5762, "step": 19550 }, { "epoch": 1.74, "grad_norm": 7.5183641256989615, "learning_rate": 4.228225692824389e-07, "loss": 0.595, "step": 19551 }, { "epoch": 1.74, "grad_norm": 7.489418278430452, "learning_rate": 4.2253188808395074e-07, "loss": 0.5498, "step": 19552 }, { "epoch": 1.74, "grad_norm": 6.4939550188975215, "learning_rate": 4.2224130243008475e-07, "loss": 0.4962, "step": 19553 }, { "epoch": 1.74, "grad_norm": 5.262098686909548, "learning_rate": 4.2195081232690614e-07, "loss": 0.6027, "step": 19554 }, { "epoch": 1.74, "grad_norm": 10.22436940681599, "learning_rate": 4.2166041778047893e-07, "loss": 0.6078, "step": 19555 }, { "epoch": 1.74, "grad_norm": 6.520070894521177, "learning_rate": 4.213701187968655e-07, "loss": 0.5413, "step": 19556 }, { "epoch": 1.74, "grad_norm": 5.0252144158591365, "learning_rate": 4.210799153821221e-07, "loss": 0.5761, "step": 19557 }, { "epoch": 1.74, "grad_norm": 8.728993546951923, "learning_rate": 4.207898075423089e-07, "loss": 0.5773, "step": 19558 }, { "epoch": 1.74, "grad_norm": 5.197874096537514, "learning_rate": 4.204997952834794e-07, "loss": 0.5847, "step": 19559 }, { "epoch": 1.74, "grad_norm": 7.503700224415165, "learning_rate": 4.2020987861168813e-07, "loss": 0.5614, "step": 19560 }, { "epoch": 1.75, "grad_norm": 7.028679120702142, "learning_rate": 4.199200575329854e-07, "loss": 0.5806, "step": 19561 }, { "epoch": 1.75, "grad_norm": 6.966521346020008, "learning_rate": 4.196303320534217e-07, "loss": 0.5606, "step": 19562 }, { "epoch": 1.75, "grad_norm": 7.974386403767797, "learning_rate": 4.1934070217904355e-07, "loss": 0.5169, "step": 19563 }, { "epoch": 1.75, "grad_norm": 8.32441157132391, "learning_rate": 4.190511679158982e-07, "loss": 0.6075, "step": 19564 }, { "epoch": 1.75, "grad_norm": 6.3441187321156445, "learning_rate": 4.187617292700263e-07, "loss": 0.5657, "step": 19565 }, { "epoch": 1.75, "grad_norm": 7.307415532048551, "learning_rate": 4.18472386247471e-07, "loss": 0.6333, "step": 19566 }, { "epoch": 1.75, "grad_norm": 7.984642000228335, "learning_rate": 4.1818313885427064e-07, "loss": 0.5422, "step": 19567 }, { "epoch": 1.75, "grad_norm": 8.93175364337211, "learning_rate": 4.1789398709646377e-07, "loss": 0.6203, "step": 19568 }, { "epoch": 1.75, "grad_norm": 6.111005548912059, "learning_rate": 4.17604930980085e-07, "loss": 0.5713, "step": 19569 }, { "epoch": 1.75, "grad_norm": 7.901959834357352, "learning_rate": 4.17315970511169e-07, "loss": 0.5153, "step": 19570 }, { "epoch": 1.75, "grad_norm": 5.549466272608496, "learning_rate": 4.1702710569574646e-07, "loss": 0.6212, "step": 19571 }, { "epoch": 1.75, "grad_norm": 4.998539854877805, "learning_rate": 4.167383365398453e-07, "loss": 0.5805, "step": 19572 }, { "epoch": 1.75, "grad_norm": 9.061823567642627, "learning_rate": 4.164496630494941e-07, "loss": 0.6782, "step": 19573 }, { "epoch": 1.75, "grad_norm": 5.175329237579687, "learning_rate": 4.161610852307185e-07, "loss": 0.6098, "step": 19574 }, { "epoch": 1.75, "grad_norm": 6.5759171947605815, "learning_rate": 4.158726030895421e-07, "loss": 0.4765, "step": 19575 }, { "epoch": 1.75, "grad_norm": 8.293575669205936, "learning_rate": 4.155842166319857e-07, "loss": 0.5614, "step": 19576 }, { "epoch": 1.75, "grad_norm": 9.240226721822177, "learning_rate": 4.1529592586406986e-07, "loss": 0.5638, "step": 19577 }, { "epoch": 1.75, "grad_norm": 8.129264392722758, "learning_rate": 4.15007730791811e-07, "loss": 0.6329, "step": 19578 }, { "epoch": 1.75, "grad_norm": 6.5864988472486745, "learning_rate": 4.147196314212243e-07, "loss": 0.5589, "step": 19579 }, { "epoch": 1.75, "grad_norm": 8.088119764851541, "learning_rate": 4.144316277583238e-07, "loss": 0.5748, "step": 19580 }, { "epoch": 1.75, "grad_norm": 6.30953818424619, "learning_rate": 4.1414371980912083e-07, "loss": 0.5491, "step": 19581 }, { "epoch": 1.75, "grad_norm": 9.380457303441355, "learning_rate": 4.13855907579625e-07, "loss": 0.5846, "step": 19582 }, { "epoch": 1.75, "grad_norm": 6.874659520965099, "learning_rate": 4.1356819107584436e-07, "loss": 0.5474, "step": 19583 }, { "epoch": 1.75, "grad_norm": 7.9037714880721675, "learning_rate": 4.1328057030378234e-07, "loss": 0.5465, "step": 19584 }, { "epoch": 1.75, "grad_norm": 5.043225529610049, "learning_rate": 4.129930452694453e-07, "loss": 0.6186, "step": 19585 }, { "epoch": 1.75, "grad_norm": 7.104755531465783, "learning_rate": 4.127056159788317e-07, "loss": 0.5415, "step": 19586 }, { "epoch": 1.75, "grad_norm": 6.495774771769534, "learning_rate": 4.1241828243794245e-07, "loss": 0.5683, "step": 19587 }, { "epoch": 1.75, "grad_norm": 6.853326112838729, "learning_rate": 4.121310446527749e-07, "loss": 0.5553, "step": 19588 }, { "epoch": 1.75, "grad_norm": 5.896284228505057, "learning_rate": 4.118439026293247e-07, "loss": 0.5775, "step": 19589 }, { "epoch": 1.75, "grad_norm": 6.043600940017664, "learning_rate": 4.115568563735861e-07, "loss": 0.536, "step": 19590 }, { "epoch": 1.75, "grad_norm": 4.494709104299569, "learning_rate": 4.1126990589154816e-07, "loss": 0.5587, "step": 19591 }, { "epoch": 1.75, "grad_norm": 5.939396251131095, "learning_rate": 4.109830511892021e-07, "loss": 0.625, "step": 19592 }, { "epoch": 1.75, "grad_norm": 6.909909478371609, "learning_rate": 4.106962922725355e-07, "loss": 0.5488, "step": 19593 }, { "epoch": 1.75, "grad_norm": 7.370413518248144, "learning_rate": 4.1040962914753293e-07, "loss": 0.592, "step": 19594 }, { "epoch": 1.75, "grad_norm": 6.997244266385302, "learning_rate": 4.1012306182017846e-07, "loss": 0.5191, "step": 19595 }, { "epoch": 1.75, "grad_norm": 5.983914973386036, "learning_rate": 4.0983659029645405e-07, "loss": 0.5806, "step": 19596 }, { "epoch": 1.75, "grad_norm": 8.07000655730643, "learning_rate": 4.095502145823382e-07, "loss": 0.583, "step": 19597 }, { "epoch": 1.75, "grad_norm": 6.493505353975712, "learning_rate": 4.092639346838084e-07, "loss": 0.5598, "step": 19598 }, { "epoch": 1.75, "grad_norm": 9.06574225483365, "learning_rate": 4.0897775060684097e-07, "loss": 0.5379, "step": 19599 }, { "epoch": 1.75, "grad_norm": 6.990198161129171, "learning_rate": 4.0869166235740834e-07, "loss": 0.5242, "step": 19600 }, { "epoch": 1.75, "grad_norm": 9.211851700845598, "learning_rate": 4.0840566994148236e-07, "loss": 0.5492, "step": 19601 }, { "epoch": 1.75, "grad_norm": 8.944803565214043, "learning_rate": 4.081197733650327e-07, "loss": 0.561, "step": 19602 }, { "epoch": 1.75, "grad_norm": 6.749754974491633, "learning_rate": 4.078339726340275e-07, "loss": 0.6271, "step": 19603 }, { "epoch": 1.75, "grad_norm": 8.482761969646706, "learning_rate": 4.075482677544307e-07, "loss": 0.5221, "step": 19604 }, { "epoch": 1.75, "grad_norm": 7.033914045969014, "learning_rate": 4.072626587322065e-07, "loss": 0.533, "step": 19605 }, { "epoch": 1.75, "grad_norm": 6.521944947729008, "learning_rate": 4.0697714557331624e-07, "loss": 0.4996, "step": 19606 }, { "epoch": 1.75, "grad_norm": 6.032867736125572, "learning_rate": 4.0669172828371905e-07, "loss": 0.551, "step": 19607 }, { "epoch": 1.75, "grad_norm": 5.71965334245738, "learning_rate": 4.064064068693735e-07, "loss": 0.5636, "step": 19608 }, { "epoch": 1.75, "grad_norm": 6.250768628587512, "learning_rate": 4.061211813362353e-07, "loss": 0.5883, "step": 19609 }, { "epoch": 1.75, "grad_norm": 6.347316546245109, "learning_rate": 4.05836051690256e-07, "loss": 0.5704, "step": 19610 }, { "epoch": 1.75, "grad_norm": 6.488333944777599, "learning_rate": 4.055510179373878e-07, "loss": 0.5414, "step": 19611 }, { "epoch": 1.75, "grad_norm": 7.4774745812018795, "learning_rate": 4.0526608008358113e-07, "loss": 0.5589, "step": 19612 }, { "epoch": 1.75, "grad_norm": 6.333774055737967, "learning_rate": 4.0498123813478227e-07, "loss": 0.5753, "step": 19613 }, { "epoch": 1.75, "grad_norm": 6.740864381298494, "learning_rate": 4.046964920969376e-07, "loss": 0.5938, "step": 19614 }, { "epoch": 1.75, "grad_norm": 5.7873548648798865, "learning_rate": 4.0441184197599013e-07, "loss": 0.5302, "step": 19615 }, { "epoch": 1.75, "grad_norm": 7.828962397026977, "learning_rate": 4.0412728777788177e-07, "loss": 0.487, "step": 19616 }, { "epoch": 1.75, "grad_norm": 6.747157508571282, "learning_rate": 4.038428295085506e-07, "loss": 0.5104, "step": 19617 }, { "epoch": 1.75, "grad_norm": 5.043432526283541, "learning_rate": 4.0355846717393456e-07, "loss": 0.579, "step": 19618 }, { "epoch": 1.75, "grad_norm": 7.474772445135849, "learning_rate": 4.0327420077997004e-07, "loss": 0.5859, "step": 19619 }, { "epoch": 1.75, "grad_norm": 5.510875892457881, "learning_rate": 4.0299003033258954e-07, "loss": 0.5995, "step": 19620 }, { "epoch": 1.75, "grad_norm": 5.638153204280999, "learning_rate": 4.02705955837725e-07, "loss": 0.6042, "step": 19621 }, { "epoch": 1.75, "grad_norm": 7.828864635901741, "learning_rate": 4.024219773013066e-07, "loss": 0.558, "step": 19622 }, { "epoch": 1.75, "grad_norm": 6.804225598889114, "learning_rate": 4.0213809472926026e-07, "loss": 0.5217, "step": 19623 }, { "epoch": 1.75, "grad_norm": 5.529206105044793, "learning_rate": 4.018543081275117e-07, "loss": 0.632, "step": 19624 }, { "epoch": 1.75, "grad_norm": 7.145676611096472, "learning_rate": 4.015706175019851e-07, "loss": 0.5485, "step": 19625 }, { "epoch": 1.75, "grad_norm": 6.035548121425731, "learning_rate": 4.012870228586013e-07, "loss": 0.6035, "step": 19626 }, { "epoch": 1.75, "grad_norm": 5.897499591592463, "learning_rate": 4.010035242032806e-07, "loss": 0.5592, "step": 19627 }, { "epoch": 1.75, "grad_norm": 6.706325618160136, "learning_rate": 4.0072012154193874e-07, "loss": 0.6221, "step": 19628 }, { "epoch": 1.75, "grad_norm": 7.957055338696454, "learning_rate": 4.004368148804932e-07, "loss": 0.5563, "step": 19629 }, { "epoch": 1.75, "grad_norm": 8.083637779593275, "learning_rate": 4.001536042248555e-07, "loss": 0.6039, "step": 19630 }, { "epoch": 1.75, "grad_norm": 7.977073656828253, "learning_rate": 3.99870489580938e-07, "loss": 0.6233, "step": 19631 }, { "epoch": 1.75, "grad_norm": 6.783718591834109, "learning_rate": 3.9958747095464936e-07, "loss": 0.5667, "step": 19632 }, { "epoch": 1.75, "grad_norm": 6.973502012047984, "learning_rate": 3.993045483518976e-07, "loss": 0.5902, "step": 19633 }, { "epoch": 1.75, "grad_norm": 6.656787522549099, "learning_rate": 3.990217217785891e-07, "loss": 0.5359, "step": 19634 }, { "epoch": 1.75, "grad_norm": 7.285131820228348, "learning_rate": 3.987389912406253e-07, "loss": 0.6403, "step": 19635 }, { "epoch": 1.75, "grad_norm": 8.129978007358527, "learning_rate": 3.984563567439087e-07, "loss": 0.5756, "step": 19636 }, { "epoch": 1.75, "grad_norm": 5.873697903969904, "learning_rate": 3.9817381829433787e-07, "loss": 0.636, "step": 19637 }, { "epoch": 1.75, "grad_norm": 8.087186722190522, "learning_rate": 3.9789137589781144e-07, "loss": 0.5165, "step": 19638 }, { "epoch": 1.75, "grad_norm": 7.930119010868773, "learning_rate": 3.976090295602236e-07, "loss": 0.5646, "step": 19639 }, { "epoch": 1.75, "grad_norm": 6.785396388527818, "learning_rate": 3.973267792874691e-07, "loss": 0.4622, "step": 19640 }, { "epoch": 1.75, "grad_norm": 5.681868248652717, "learning_rate": 3.9704462508543763e-07, "loss": 0.6074, "step": 19641 }, { "epoch": 1.75, "grad_norm": 5.320174629464637, "learning_rate": 3.9676256696001947e-07, "loss": 0.5904, "step": 19642 }, { "epoch": 1.75, "grad_norm": 6.149751911381122, "learning_rate": 3.964806049171027e-07, "loss": 0.5406, "step": 19643 }, { "epoch": 1.75, "grad_norm": 5.909769895364347, "learning_rate": 3.9619873896257045e-07, "loss": 0.4871, "step": 19644 }, { "epoch": 1.75, "grad_norm": 5.624790997273116, "learning_rate": 3.959169691023079e-07, "loss": 0.571, "step": 19645 }, { "epoch": 1.75, "grad_norm": 11.803390920989672, "learning_rate": 3.956352953421955e-07, "loss": 0.6361, "step": 19646 }, { "epoch": 1.75, "grad_norm": 9.017606468806923, "learning_rate": 3.9535371768811394e-07, "loss": 0.6447, "step": 19647 }, { "epoch": 1.75, "grad_norm": 7.0913966995425515, "learning_rate": 3.950722361459386e-07, "loss": 0.5459, "step": 19648 }, { "epoch": 1.75, "grad_norm": 5.284812986705919, "learning_rate": 3.947908507215459e-07, "loss": 0.5229, "step": 19649 }, { "epoch": 1.75, "grad_norm": 6.244625809701947, "learning_rate": 3.945095614208089e-07, "loss": 0.5272, "step": 19650 }, { "epoch": 1.75, "grad_norm": 7.467635244493185, "learning_rate": 3.9422836824959844e-07, "loss": 0.5399, "step": 19651 }, { "epoch": 1.75, "grad_norm": 8.853271392184466, "learning_rate": 3.939472712137848e-07, "loss": 0.5826, "step": 19652 }, { "epoch": 1.75, "grad_norm": 6.9165074121467685, "learning_rate": 3.936662703192357e-07, "loss": 0.6326, "step": 19653 }, { "epoch": 1.75, "grad_norm": 5.35021154117626, "learning_rate": 3.933853655718145e-07, "loss": 0.5486, "step": 19654 }, { "epoch": 1.75, "grad_norm": 5.8136929381373585, "learning_rate": 3.931045569773856e-07, "loss": 0.5998, "step": 19655 }, { "epoch": 1.75, "grad_norm": 6.5924693334908655, "learning_rate": 3.928238445418098e-07, "loss": 0.5502, "step": 19656 }, { "epoch": 1.75, "grad_norm": 7.447327998709474, "learning_rate": 3.9254322827094795e-07, "loss": 0.5442, "step": 19657 }, { "epoch": 1.75, "grad_norm": 6.300775980520395, "learning_rate": 3.922627081706548e-07, "loss": 0.5236, "step": 19658 }, { "epoch": 1.75, "grad_norm": 5.699089403460558, "learning_rate": 3.919822842467874e-07, "loss": 0.553, "step": 19659 }, { "epoch": 1.75, "grad_norm": 8.170776528048355, "learning_rate": 3.917019565051988e-07, "loss": 0.5772, "step": 19660 }, { "epoch": 1.75, "grad_norm": 5.9955361881803855, "learning_rate": 3.9142172495173935e-07, "loss": 0.5276, "step": 19661 }, { "epoch": 1.75, "grad_norm": 8.103680470225793, "learning_rate": 3.9114158959225824e-07, "loss": 0.5698, "step": 19662 }, { "epoch": 1.75, "grad_norm": 5.0866402050950965, "learning_rate": 3.9086155043260355e-07, "loss": 0.6669, "step": 19663 }, { "epoch": 1.75, "grad_norm": 6.722969461717848, "learning_rate": 3.905816074786206e-07, "loss": 0.5767, "step": 19664 }, { "epoch": 1.75, "grad_norm": 8.087856638722865, "learning_rate": 3.9030176073615144e-07, "loss": 0.5561, "step": 19665 }, { "epoch": 1.75, "grad_norm": 5.308731137366994, "learning_rate": 3.9002201021103914e-07, "loss": 0.5659, "step": 19666 }, { "epoch": 1.75, "grad_norm": 7.993088919015334, "learning_rate": 3.897423559091207e-07, "loss": 0.5559, "step": 19667 }, { "epoch": 1.75, "grad_norm": 6.734790214046318, "learning_rate": 3.8946279783623474e-07, "loss": 0.5341, "step": 19668 }, { "epoch": 1.75, "grad_norm": 5.904448352937969, "learning_rate": 3.8918333599821554e-07, "loss": 0.5867, "step": 19669 }, { "epoch": 1.75, "grad_norm": 7.344615521789546, "learning_rate": 3.8890397040089677e-07, "loss": 0.5566, "step": 19670 }, { "epoch": 1.75, "grad_norm": 6.072070371082955, "learning_rate": 3.886247010501093e-07, "loss": 0.5497, "step": 19671 }, { "epoch": 1.75, "grad_norm": 5.311034080029039, "learning_rate": 3.883455279516835e-07, "loss": 0.6418, "step": 19672 }, { "epoch": 1.76, "grad_norm": 8.06748364551857, "learning_rate": 3.880664511114457e-07, "loss": 0.5327, "step": 19673 }, { "epoch": 1.76, "grad_norm": 6.038241050006686, "learning_rate": 3.8778747053521914e-07, "loss": 0.561, "step": 19674 }, { "epoch": 1.76, "grad_norm": 11.924382084564126, "learning_rate": 3.8750858622882914e-07, "loss": 0.6141, "step": 19675 }, { "epoch": 1.76, "grad_norm": 7.580861376687991, "learning_rate": 3.8722979819809656e-07, "loss": 0.6454, "step": 19676 }, { "epoch": 1.76, "grad_norm": 5.81660072710745, "learning_rate": 3.869511064488396e-07, "loss": 0.5433, "step": 19677 }, { "epoch": 1.76, "grad_norm": 6.410725997619927, "learning_rate": 3.8667251098687576e-07, "loss": 0.5452, "step": 19678 }, { "epoch": 1.76, "grad_norm": 6.451134818922314, "learning_rate": 3.863940118180215e-07, "loss": 0.5197, "step": 19679 }, { "epoch": 1.76, "grad_norm": 6.812272061215325, "learning_rate": 3.8611560894808776e-07, "loss": 0.6051, "step": 19680 }, { "epoch": 1.76, "grad_norm": 7.35103294818015, "learning_rate": 3.858373023828865e-07, "loss": 0.5789, "step": 19681 }, { "epoch": 1.76, "grad_norm": 7.834090487047694, "learning_rate": 3.855590921282265e-07, "loss": 0.6213, "step": 19682 }, { "epoch": 1.76, "grad_norm": 7.923483746237946, "learning_rate": 3.852809781899147e-07, "loss": 0.5946, "step": 19683 }, { "epoch": 1.76, "grad_norm": 7.6640073399027315, "learning_rate": 3.850029605737571e-07, "loss": 0.5899, "step": 19684 }, { "epoch": 1.76, "grad_norm": 6.63864027281919, "learning_rate": 3.847250392855562e-07, "loss": 0.5325, "step": 19685 }, { "epoch": 1.76, "grad_norm": 7.344464757765747, "learning_rate": 3.844472143311134e-07, "loss": 0.5803, "step": 19686 }, { "epoch": 1.76, "grad_norm": 6.865971007462977, "learning_rate": 3.8416948571622593e-07, "loss": 0.6159, "step": 19687 }, { "epoch": 1.76, "grad_norm": 8.058557885172691, "learning_rate": 3.838918534466918e-07, "loss": 0.5729, "step": 19688 }, { "epoch": 1.76, "grad_norm": 6.442225475485808, "learning_rate": 3.836143175283064e-07, "loss": 0.5163, "step": 19689 }, { "epoch": 1.76, "grad_norm": 7.273984291209826, "learning_rate": 3.8333687796686227e-07, "loss": 0.5195, "step": 19690 }, { "epoch": 1.76, "grad_norm": 6.417060199214073, "learning_rate": 3.8305953476815093e-07, "loss": 0.5476, "step": 19691 }, { "epoch": 1.76, "grad_norm": 5.761896577743787, "learning_rate": 3.827822879379611e-07, "loss": 0.5601, "step": 19692 }, { "epoch": 1.76, "grad_norm": 8.603299699092851, "learning_rate": 3.825051374820793e-07, "loss": 0.6041, "step": 19693 }, { "epoch": 1.76, "grad_norm": 6.128584504828116, "learning_rate": 3.822280834062897e-07, "loss": 0.5263, "step": 19694 }, { "epoch": 1.76, "grad_norm": 6.438728178288485, "learning_rate": 3.8195112571637717e-07, "loss": 0.5325, "step": 19695 }, { "epoch": 1.76, "grad_norm": 7.466573022249899, "learning_rate": 3.8167426441812094e-07, "loss": 0.601, "step": 19696 }, { "epoch": 1.76, "grad_norm": 9.034090999895605, "learning_rate": 3.813974995173003e-07, "loss": 0.5683, "step": 19697 }, { "epoch": 1.76, "grad_norm": 6.95688227074515, "learning_rate": 3.811208310196934e-07, "loss": 0.5495, "step": 19698 }, { "epoch": 1.76, "grad_norm": 4.697796935557323, "learning_rate": 3.808442589310729e-07, "loss": 0.5166, "step": 19699 }, { "epoch": 1.76, "grad_norm": 8.044370306877486, "learning_rate": 3.8056778325721413e-07, "loss": 0.56, "step": 19700 }, { "epoch": 1.76, "grad_norm": 7.400519954001416, "learning_rate": 3.802914040038852e-07, "loss": 0.5208, "step": 19701 }, { "epoch": 1.76, "grad_norm": 5.327879128051387, "learning_rate": 3.80015121176856e-07, "loss": 0.5337, "step": 19702 }, { "epoch": 1.76, "grad_norm": 5.7260738906728506, "learning_rate": 3.797389347818942e-07, "loss": 0.5595, "step": 19703 }, { "epoch": 1.76, "grad_norm": 6.361503936910675, "learning_rate": 3.7946284482476337e-07, "loss": 0.6005, "step": 19704 }, { "epoch": 1.76, "grad_norm": 6.9419428618247165, "learning_rate": 3.791868513112279e-07, "loss": 0.6043, "step": 19705 }, { "epoch": 1.76, "grad_norm": 5.976117698044675, "learning_rate": 3.78910954247047e-07, "loss": 0.5631, "step": 19706 }, { "epoch": 1.76, "grad_norm": 7.097718921842231, "learning_rate": 3.7863515363797955e-07, "loss": 0.5758, "step": 19707 }, { "epoch": 1.76, "grad_norm": 7.828543222660143, "learning_rate": 3.783594494897824e-07, "loss": 0.5585, "step": 19708 }, { "epoch": 1.76, "grad_norm": 6.847761748363026, "learning_rate": 3.780838418082111e-07, "loss": 0.579, "step": 19709 }, { "epoch": 1.76, "grad_norm": 6.141851893266935, "learning_rate": 3.778083305990182e-07, "loss": 0.5307, "step": 19710 }, { "epoch": 1.76, "grad_norm": 10.464513958055207, "learning_rate": 3.775329158679536e-07, "loss": 0.5844, "step": 19711 }, { "epoch": 1.76, "grad_norm": 5.803489293195507, "learning_rate": 3.77257597620766e-07, "loss": 0.5404, "step": 19712 }, { "epoch": 1.76, "grad_norm": 5.249506640416286, "learning_rate": 3.769823758632024e-07, "loss": 0.5818, "step": 19713 }, { "epoch": 1.76, "grad_norm": 6.2149037699577985, "learning_rate": 3.767072506010072e-07, "loss": 0.5565, "step": 19714 }, { "epoch": 1.76, "grad_norm": 6.9714467332937575, "learning_rate": 3.7643222183992465e-07, "loss": 0.5618, "step": 19715 }, { "epoch": 1.76, "grad_norm": 7.076929775087157, "learning_rate": 3.7615728958569296e-07, "loss": 0.5475, "step": 19716 }, { "epoch": 1.76, "grad_norm": 5.796781702767706, "learning_rate": 3.75882453844052e-07, "loss": 0.5355, "step": 19717 }, { "epoch": 1.76, "grad_norm": 7.586681731787227, "learning_rate": 3.7560771462073876e-07, "loss": 0.6308, "step": 19718 }, { "epoch": 1.76, "grad_norm": 7.332125875279872, "learning_rate": 3.753330719214865e-07, "loss": 0.5621, "step": 19719 }, { "epoch": 1.76, "grad_norm": 6.860786109682944, "learning_rate": 3.750585257520284e-07, "loss": 0.5771, "step": 19720 }, { "epoch": 1.76, "grad_norm": 9.947297923046218, "learning_rate": 3.7478407611809544e-07, "loss": 0.5491, "step": 19721 }, { "epoch": 1.76, "grad_norm": 7.345106314638253, "learning_rate": 3.745097230254152e-07, "loss": 0.64, "step": 19722 }, { "epoch": 1.76, "grad_norm": 6.481295585429623, "learning_rate": 3.7423546647971654e-07, "loss": 0.5628, "step": 19723 }, { "epoch": 1.76, "grad_norm": 7.134732626018723, "learning_rate": 3.7396130648672093e-07, "loss": 0.542, "step": 19724 }, { "epoch": 1.76, "grad_norm": 7.0671710304577395, "learning_rate": 3.736872430521521e-07, "loss": 0.5529, "step": 19725 }, { "epoch": 1.76, "grad_norm": 8.304724173653264, "learning_rate": 3.7341327618173053e-07, "loss": 0.5916, "step": 19726 }, { "epoch": 1.76, "grad_norm": 6.030357661682252, "learning_rate": 3.731394058811755e-07, "loss": 0.5302, "step": 19727 }, { "epoch": 1.76, "grad_norm": 7.320311793260315, "learning_rate": 3.7286563215620184e-07, "loss": 0.5766, "step": 19728 }, { "epoch": 1.76, "grad_norm": 6.893324175858843, "learning_rate": 3.725919550125262e-07, "loss": 0.5099, "step": 19729 }, { "epoch": 1.76, "grad_norm": 6.458103224275066, "learning_rate": 3.723183744558595e-07, "loss": 0.5881, "step": 19730 }, { "epoch": 1.76, "grad_norm": 5.858791109486914, "learning_rate": 3.720448904919116e-07, "loss": 0.6084, "step": 19731 }, { "epoch": 1.76, "grad_norm": 6.4961283967112955, "learning_rate": 3.7177150312639134e-07, "loss": 0.5737, "step": 19732 }, { "epoch": 1.76, "grad_norm": 6.450248048136858, "learning_rate": 3.714982123650057e-07, "loss": 0.6134, "step": 19733 }, { "epoch": 1.76, "grad_norm": 7.048636809310132, "learning_rate": 3.71225018213458e-07, "loss": 0.5653, "step": 19734 }, { "epoch": 1.76, "grad_norm": 4.867077477204794, "learning_rate": 3.7095192067745145e-07, "loss": 0.5277, "step": 19735 }, { "epoch": 1.76, "grad_norm": 6.943465998775884, "learning_rate": 3.7067891976268697e-07, "loss": 0.5379, "step": 19736 }, { "epoch": 1.76, "grad_norm": 7.585122244417388, "learning_rate": 3.7040601547486175e-07, "loss": 0.6059, "step": 19737 }, { "epoch": 1.76, "grad_norm": 8.6994514473219, "learning_rate": 3.7013320781967175e-07, "loss": 0.5257, "step": 19738 }, { "epoch": 1.76, "grad_norm": 6.184764067835705, "learning_rate": 3.698604968028119e-07, "loss": 0.5752, "step": 19739 }, { "epoch": 1.76, "grad_norm": 7.6572518244666465, "learning_rate": 3.695878824299742e-07, "loss": 0.4934, "step": 19740 }, { "epoch": 1.76, "grad_norm": 5.806123344110607, "learning_rate": 3.693153647068487e-07, "loss": 0.533, "step": 19741 }, { "epoch": 1.76, "grad_norm": 6.146215916881925, "learning_rate": 3.6904294363912576e-07, "loss": 0.5193, "step": 19742 }, { "epoch": 1.76, "grad_norm": 5.652121123894609, "learning_rate": 3.68770619232488e-07, "loss": 0.5629, "step": 19743 }, { "epoch": 1.76, "grad_norm": 6.285310359550078, "learning_rate": 3.684983914926227e-07, "loss": 0.5133, "step": 19744 }, { "epoch": 1.76, "grad_norm": 9.350167239186117, "learning_rate": 3.682262604252096e-07, "loss": 0.5507, "step": 19745 }, { "epoch": 1.76, "grad_norm": 6.274868050331939, "learning_rate": 3.679542260359298e-07, "loss": 0.6073, "step": 19746 }, { "epoch": 1.76, "grad_norm": 6.936635482477075, "learning_rate": 3.67682288330462e-07, "loss": 0.5798, "step": 19747 }, { "epoch": 1.76, "grad_norm": 5.5961920605956905, "learning_rate": 3.6741044731448127e-07, "loss": 0.5442, "step": 19748 }, { "epoch": 1.76, "grad_norm": 4.8173863664966206, "learning_rate": 3.671387029936635e-07, "loss": 0.5364, "step": 19749 }, { "epoch": 1.76, "grad_norm": 7.304272652554368, "learning_rate": 3.668670553736786e-07, "loss": 0.5379, "step": 19750 }, { "epoch": 1.76, "grad_norm": 5.74849161684889, "learning_rate": 3.665955044601971e-07, "loss": 0.5789, "step": 19751 }, { "epoch": 1.76, "grad_norm": 5.634909395963369, "learning_rate": 3.663240502588877e-07, "loss": 0.5602, "step": 19752 }, { "epoch": 1.76, "grad_norm": 5.403485718847094, "learning_rate": 3.6605269277541657e-07, "loss": 0.5026, "step": 19753 }, { "epoch": 1.76, "grad_norm": 7.050318399358755, "learning_rate": 3.657814320154468e-07, "loss": 0.5482, "step": 19754 }, { "epoch": 1.76, "grad_norm": 6.405296612528212, "learning_rate": 3.655102679846423e-07, "loss": 0.6187, "step": 19755 }, { "epoch": 1.76, "grad_norm": 6.025888001570944, "learning_rate": 3.6523920068866014e-07, "loss": 0.6519, "step": 19756 }, { "epoch": 1.76, "grad_norm": 8.588271850408525, "learning_rate": 3.649682301331597e-07, "loss": 0.5306, "step": 19757 }, { "epoch": 1.76, "grad_norm": 7.668522562020544, "learning_rate": 3.646973563237982e-07, "loss": 0.5286, "step": 19758 }, { "epoch": 1.76, "grad_norm": 6.799509987159986, "learning_rate": 3.6442657926622713e-07, "loss": 0.5474, "step": 19759 }, { "epoch": 1.76, "grad_norm": 5.581517093933606, "learning_rate": 3.6415589896609984e-07, "loss": 0.5749, "step": 19760 }, { "epoch": 1.76, "grad_norm": 8.787511155511702, "learning_rate": 3.6388531542906556e-07, "loss": 0.5561, "step": 19761 }, { "epoch": 1.76, "grad_norm": 10.273321978579942, "learning_rate": 3.6361482866077326e-07, "loss": 0.6136, "step": 19762 }, { "epoch": 1.76, "grad_norm": 7.526908755554605, "learning_rate": 3.6334443866686663e-07, "loss": 0.6197, "step": 19763 }, { "epoch": 1.76, "grad_norm": 8.26608377834526, "learning_rate": 3.6307414545299124e-07, "loss": 0.5708, "step": 19764 }, { "epoch": 1.76, "grad_norm": 7.801813393890665, "learning_rate": 3.628039490247881e-07, "loss": 0.5803, "step": 19765 }, { "epoch": 1.76, "grad_norm": 6.790158674123941, "learning_rate": 3.625338493878977e-07, "loss": 0.6113, "step": 19766 }, { "epoch": 1.76, "grad_norm": 8.263280593630943, "learning_rate": 3.622638465479572e-07, "loss": 0.634, "step": 19767 }, { "epoch": 1.76, "grad_norm": 4.74747306367983, "learning_rate": 3.6199394051060266e-07, "loss": 0.5422, "step": 19768 }, { "epoch": 1.76, "grad_norm": 6.885327655526138, "learning_rate": 3.6172413128146734e-07, "loss": 0.5585, "step": 19769 }, { "epoch": 1.76, "grad_norm": 7.927287605789044, "learning_rate": 3.614544188661828e-07, "loss": 0.6487, "step": 19770 }, { "epoch": 1.76, "grad_norm": 7.734015023086613, "learning_rate": 3.6118480327037965e-07, "loss": 0.5412, "step": 19771 }, { "epoch": 1.76, "grad_norm": 8.311331912711692, "learning_rate": 3.6091528449968493e-07, "loss": 0.5211, "step": 19772 }, { "epoch": 1.76, "grad_norm": 6.212081011957654, "learning_rate": 3.606458625597237e-07, "loss": 0.5607, "step": 19773 }, { "epoch": 1.76, "grad_norm": 4.974769716714912, "learning_rate": 3.6037653745612024e-07, "loss": 0.5809, "step": 19774 }, { "epoch": 1.76, "grad_norm": 8.546083207619608, "learning_rate": 3.6010730919449676e-07, "loss": 0.5804, "step": 19775 }, { "epoch": 1.76, "grad_norm": 7.117149025502638, "learning_rate": 3.59838177780471e-07, "loss": 0.6099, "step": 19776 }, { "epoch": 1.76, "grad_norm": 7.0548294562165665, "learning_rate": 3.595691432196613e-07, "loss": 0.5846, "step": 19777 }, { "epoch": 1.76, "grad_norm": 6.532277937696835, "learning_rate": 3.593002055176836e-07, "loss": 0.5448, "step": 19778 }, { "epoch": 1.76, "grad_norm": 6.9295487164270915, "learning_rate": 3.5903136468015134e-07, "loss": 0.6589, "step": 19779 }, { "epoch": 1.76, "grad_norm": 7.6568565573973695, "learning_rate": 3.587626207126754e-07, "loss": 0.5938, "step": 19780 }, { "epoch": 1.76, "grad_norm": 8.411649343190227, "learning_rate": 3.5849397362086704e-07, "loss": 0.5294, "step": 19781 }, { "epoch": 1.76, "grad_norm": 6.243812073088526, "learning_rate": 3.5822542341033105e-07, "loss": 0.6329, "step": 19782 }, { "epoch": 1.76, "grad_norm": 6.840356616702833, "learning_rate": 3.5795697008667363e-07, "loss": 0.6179, "step": 19783 }, { "epoch": 1.76, "grad_norm": 11.0107135816396, "learning_rate": 3.576886136554991e-07, "loss": 0.6116, "step": 19784 }, { "epoch": 1.77, "grad_norm": 8.242020509575376, "learning_rate": 3.5742035412240806e-07, "loss": 0.6004, "step": 19785 }, { "epoch": 1.77, "grad_norm": 7.579636082252926, "learning_rate": 3.5715219149300097e-07, "loss": 0.52, "step": 19786 }, { "epoch": 1.77, "grad_norm": 5.526261763109346, "learning_rate": 3.5688412577287334e-07, "loss": 0.4934, "step": 19787 }, { "epoch": 1.77, "grad_norm": 6.872883082228418, "learning_rate": 3.5661615696762244e-07, "loss": 0.5101, "step": 19788 }, { "epoch": 1.77, "grad_norm": 7.388711770039403, "learning_rate": 3.5634828508283925e-07, "loss": 0.572, "step": 19789 }, { "epoch": 1.77, "grad_norm": 5.7266480388264505, "learning_rate": 3.560805101241166e-07, "loss": 0.5758, "step": 19790 }, { "epoch": 1.77, "grad_norm": 10.266569134607847, "learning_rate": 3.558128320970428e-07, "loss": 0.5476, "step": 19791 }, { "epoch": 1.77, "grad_norm": 5.9299683055178445, "learning_rate": 3.555452510072055e-07, "loss": 0.5567, "step": 19792 }, { "epoch": 1.77, "grad_norm": 9.048253508202187, "learning_rate": 3.552777668601903e-07, "loss": 0.5712, "step": 19793 }, { "epoch": 1.77, "grad_norm": 7.271724802297707, "learning_rate": 3.550103796615806e-07, "loss": 0.567, "step": 19794 }, { "epoch": 1.77, "grad_norm": 9.02523986090543, "learning_rate": 3.5474308941695625e-07, "loss": 0.5673, "step": 19795 }, { "epoch": 1.77, "grad_norm": 5.922454154372157, "learning_rate": 3.544758961318967e-07, "loss": 0.5283, "step": 19796 }, { "epoch": 1.77, "grad_norm": 7.850571909802351, "learning_rate": 3.542087998119792e-07, "loss": 0.5819, "step": 19797 }, { "epoch": 1.77, "grad_norm": 6.415124762273409, "learning_rate": 3.539418004627793e-07, "loss": 0.4967, "step": 19798 }, { "epoch": 1.77, "grad_norm": 9.01876678519449, "learning_rate": 3.536748980898702e-07, "loss": 0.5514, "step": 19799 }, { "epoch": 1.77, "grad_norm": 6.816093656609749, "learning_rate": 3.534080926988215e-07, "loss": 0.5981, "step": 19800 }, { "epoch": 1.77, "grad_norm": 6.019152331932099, "learning_rate": 3.531413842952036e-07, "loss": 0.5694, "step": 19801 }, { "epoch": 1.77, "grad_norm": 8.310338777569969, "learning_rate": 3.5287477288458214e-07, "loss": 0.5202, "step": 19802 }, { "epoch": 1.77, "grad_norm": 7.998713206641457, "learning_rate": 3.526082584725232e-07, "loss": 0.5964, "step": 19803 }, { "epoch": 1.77, "grad_norm": 5.790716865541022, "learning_rate": 3.523418410645885e-07, "loss": 0.595, "step": 19804 }, { "epoch": 1.77, "grad_norm": 6.140708123764078, "learning_rate": 3.520755206663401e-07, "loss": 0.589, "step": 19805 }, { "epoch": 1.77, "grad_norm": 7.8760759167707155, "learning_rate": 3.5180929728333757e-07, "loss": 0.6384, "step": 19806 }, { "epoch": 1.77, "grad_norm": 5.382948107444717, "learning_rate": 3.515431709211353e-07, "loss": 0.5672, "step": 19807 }, { "epoch": 1.77, "grad_norm": 6.198849219329035, "learning_rate": 3.5127714158528945e-07, "loss": 0.5979, "step": 19808 }, { "epoch": 1.77, "grad_norm": 7.105843434513304, "learning_rate": 3.510112092813528e-07, "loss": 0.5652, "step": 19809 }, { "epoch": 1.77, "grad_norm": 7.4604144045706375, "learning_rate": 3.5074537401487643e-07, "loss": 0.5487, "step": 19810 }, { "epoch": 1.77, "grad_norm": 7.585257963615427, "learning_rate": 3.504796357914081e-07, "loss": 0.5278, "step": 19811 }, { "epoch": 1.77, "grad_norm": 6.85687431488126, "learning_rate": 3.502139946164962e-07, "loss": 0.6007, "step": 19812 }, { "epoch": 1.77, "grad_norm": 6.204431428670946, "learning_rate": 3.499484504956835e-07, "loss": 0.5582, "step": 19813 }, { "epoch": 1.77, "grad_norm": 7.930613096449291, "learning_rate": 3.496830034345133e-07, "loss": 0.573, "step": 19814 }, { "epoch": 1.77, "grad_norm": 6.825722637993685, "learning_rate": 3.494176534385263e-07, "loss": 0.6175, "step": 19815 }, { "epoch": 1.77, "grad_norm": 6.248278537176491, "learning_rate": 3.4915240051326236e-07, "loss": 0.6196, "step": 19816 }, { "epoch": 1.77, "grad_norm": 6.35738898678409, "learning_rate": 3.4888724466425605e-07, "loss": 0.5608, "step": 19817 }, { "epoch": 1.77, "grad_norm": 6.526357044254078, "learning_rate": 3.4862218589704235e-07, "loss": 0.5674, "step": 19818 }, { "epoch": 1.77, "grad_norm": 6.052975924643498, "learning_rate": 3.483572242171551e-07, "loss": 0.5131, "step": 19819 }, { "epoch": 1.77, "grad_norm": 5.815312778465246, "learning_rate": 3.4809235963012334e-07, "loss": 0.5455, "step": 19820 }, { "epoch": 1.77, "grad_norm": 7.709245496910512, "learning_rate": 3.478275921414759e-07, "loss": 0.5722, "step": 19821 }, { "epoch": 1.77, "grad_norm": 7.120071872697752, "learning_rate": 3.475629217567394e-07, "loss": 0.5583, "step": 19822 }, { "epoch": 1.77, "grad_norm": 6.776473695584824, "learning_rate": 3.4729834848143795e-07, "loss": 0.5678, "step": 19823 }, { "epoch": 1.77, "grad_norm": 6.567680665028, "learning_rate": 3.4703387232109475e-07, "loss": 0.5727, "step": 19824 }, { "epoch": 1.77, "grad_norm": 6.764916745623874, "learning_rate": 3.467694932812299e-07, "loss": 0.5915, "step": 19825 }, { "epoch": 1.77, "grad_norm": 7.460790126270922, "learning_rate": 3.4650521136736114e-07, "loss": 0.5938, "step": 19826 }, { "epoch": 1.77, "grad_norm": 6.131463882155137, "learning_rate": 3.4624102658500524e-07, "loss": 0.5127, "step": 19827 }, { "epoch": 1.77, "grad_norm": 8.949487518865457, "learning_rate": 3.459769389396761e-07, "loss": 0.5432, "step": 19828 }, { "epoch": 1.77, "grad_norm": 7.177134577126785, "learning_rate": 3.45712948436886e-07, "loss": 0.4936, "step": 19829 }, { "epoch": 1.77, "grad_norm": 8.065609896269185, "learning_rate": 3.4544905508214655e-07, "loss": 0.5712, "step": 19830 }, { "epoch": 1.77, "grad_norm": 5.648440366391174, "learning_rate": 3.4518525888096343e-07, "loss": 0.5951, "step": 19831 }, { "epoch": 1.77, "grad_norm": 7.300283756571457, "learning_rate": 3.4492155983884556e-07, "loss": 0.6456, "step": 19832 }, { "epoch": 1.77, "grad_norm": 6.828136074145293, "learning_rate": 3.4465795796129464e-07, "loss": 0.5434, "step": 19833 }, { "epoch": 1.77, "grad_norm": 8.303864997169875, "learning_rate": 3.443944532538135e-07, "loss": 0.562, "step": 19834 }, { "epoch": 1.77, "grad_norm": 5.939664514824431, "learning_rate": 3.4413104572190324e-07, "loss": 0.532, "step": 19835 }, { "epoch": 1.77, "grad_norm": 6.598829608815354, "learning_rate": 3.438677353710607e-07, "loss": 0.5154, "step": 19836 }, { "epoch": 1.77, "grad_norm": 5.942962859242887, "learning_rate": 3.436045222067824e-07, "loss": 0.5506, "step": 19837 }, { "epoch": 1.77, "grad_norm": 8.225452730414359, "learning_rate": 3.4334140623456356e-07, "loss": 0.6093, "step": 19838 }, { "epoch": 1.77, "grad_norm": 7.473021262714535, "learning_rate": 3.430783874598942e-07, "loss": 0.5466, "step": 19839 }, { "epoch": 1.77, "grad_norm": 7.3416327601327245, "learning_rate": 3.428154658882649e-07, "loss": 0.5535, "step": 19840 }, { "epoch": 1.77, "grad_norm": 6.236169734630806, "learning_rate": 3.4255264152516345e-07, "loss": 0.5628, "step": 19841 }, { "epoch": 1.77, "grad_norm": 5.260951355414242, "learning_rate": 3.4228991437607605e-07, "loss": 0.6115, "step": 19842 }, { "epoch": 1.77, "grad_norm": 6.292846752515909, "learning_rate": 3.4202728444648615e-07, "loss": 0.5502, "step": 19843 }, { "epoch": 1.77, "grad_norm": 7.559231224773722, "learning_rate": 3.417647517418771e-07, "loss": 0.5444, "step": 19844 }, { "epoch": 1.77, "grad_norm": 7.999625901639536, "learning_rate": 3.415023162677278e-07, "loss": 0.5805, "step": 19845 }, { "epoch": 1.77, "grad_norm": 6.530595238096674, "learning_rate": 3.4123997802951447e-07, "loss": 0.534, "step": 19846 }, { "epoch": 1.77, "grad_norm": 5.052958632332279, "learning_rate": 3.4097773703271386e-07, "loss": 0.5417, "step": 19847 }, { "epoch": 1.77, "grad_norm": 7.833686918324576, "learning_rate": 3.407155932828005e-07, "loss": 0.5903, "step": 19848 }, { "epoch": 1.77, "grad_norm": 6.491370053090296, "learning_rate": 3.4045354678524545e-07, "loss": 0.5532, "step": 19849 }, { "epoch": 1.77, "grad_norm": 6.564490451322855, "learning_rate": 3.401915975455178e-07, "loss": 0.5619, "step": 19850 }, { "epoch": 1.77, "grad_norm": 8.996210561883526, "learning_rate": 3.39929745569087e-07, "loss": 0.5359, "step": 19851 }, { "epoch": 1.77, "grad_norm": 8.824081712006613, "learning_rate": 3.3966799086141645e-07, "loss": 0.552, "step": 19852 }, { "epoch": 1.77, "grad_norm": 7.931618013253737, "learning_rate": 3.394063334279707e-07, "loss": 0.6182, "step": 19853 }, { "epoch": 1.77, "grad_norm": 7.894340030052003, "learning_rate": 3.3914477327421094e-07, "loss": 0.5615, "step": 19854 }, { "epoch": 1.77, "grad_norm": 8.863943483280599, "learning_rate": 3.388833104055972e-07, "loss": 0.558, "step": 19855 }, { "epoch": 1.77, "grad_norm": 6.204441547676108, "learning_rate": 3.386219448275868e-07, "loss": 0.536, "step": 19856 }, { "epoch": 1.77, "grad_norm": 7.454503651815953, "learning_rate": 3.383606765456354e-07, "loss": 0.5627, "step": 19857 }, { "epoch": 1.77, "grad_norm": 8.258598421760949, "learning_rate": 3.380995055651959e-07, "loss": 0.6474, "step": 19858 }, { "epoch": 1.77, "grad_norm": 5.882433445377432, "learning_rate": 3.3783843189171996e-07, "loss": 0.5382, "step": 19859 }, { "epoch": 1.77, "grad_norm": 8.444347492728252, "learning_rate": 3.375774555306566e-07, "loss": 0.5777, "step": 19860 }, { "epoch": 1.77, "grad_norm": 5.597255840515881, "learning_rate": 3.373165764874531e-07, "loss": 0.6223, "step": 19861 }, { "epoch": 1.77, "grad_norm": 6.943766263272918, "learning_rate": 3.370557947675546e-07, "loss": 0.504, "step": 19862 }, { "epoch": 1.77, "grad_norm": 4.1621031127805495, "learning_rate": 3.36795110376405e-07, "loss": 0.5519, "step": 19863 }, { "epoch": 1.77, "grad_norm": 6.044063591679987, "learning_rate": 3.3653452331944615e-07, "loss": 0.5793, "step": 19864 }, { "epoch": 1.77, "grad_norm": 7.2166516687121725, "learning_rate": 3.3627403360211586e-07, "loss": 0.6351, "step": 19865 }, { "epoch": 1.77, "grad_norm": 5.244270833836846, "learning_rate": 3.3601364122985094e-07, "loss": 0.5477, "step": 19866 }, { "epoch": 1.77, "grad_norm": 7.3067780375560565, "learning_rate": 3.3575334620808755e-07, "loss": 0.5119, "step": 19867 }, { "epoch": 1.77, "grad_norm": 5.531854930729359, "learning_rate": 3.3549314854225856e-07, "loss": 0.5325, "step": 19868 }, { "epoch": 1.77, "grad_norm": 6.075554432814713, "learning_rate": 3.3523304823779636e-07, "loss": 0.5928, "step": 19869 }, { "epoch": 1.77, "grad_norm": 7.577545386442936, "learning_rate": 3.349730453001271e-07, "loss": 0.5395, "step": 19870 }, { "epoch": 1.77, "grad_norm": 7.080993615097719, "learning_rate": 3.3471313973467923e-07, "loss": 0.5691, "step": 19871 }, { "epoch": 1.77, "grad_norm": 6.262079866286539, "learning_rate": 3.344533315468784e-07, "loss": 0.5027, "step": 19872 }, { "epoch": 1.77, "grad_norm": 7.376951835057217, "learning_rate": 3.34193620742147e-07, "loss": 0.5458, "step": 19873 }, { "epoch": 1.77, "grad_norm": 5.627236103788549, "learning_rate": 3.339340073259056e-07, "loss": 0.5524, "step": 19874 }, { "epoch": 1.77, "grad_norm": 7.4779485235456615, "learning_rate": 3.336744913035733e-07, "loss": 0.5455, "step": 19875 }, { "epoch": 1.77, "grad_norm": 5.369958093907991, "learning_rate": 3.334150726805663e-07, "loss": 0.6611, "step": 19876 }, { "epoch": 1.77, "grad_norm": 7.294401342969553, "learning_rate": 3.3315575146230126e-07, "loss": 0.5769, "step": 19877 }, { "epoch": 1.77, "grad_norm": 7.014655902332615, "learning_rate": 3.32896527654189e-07, "loss": 0.609, "step": 19878 }, { "epoch": 1.77, "grad_norm": 6.3182136684944705, "learning_rate": 3.326374012616407e-07, "loss": 0.5464, "step": 19879 }, { "epoch": 1.77, "grad_norm": 6.159900933542396, "learning_rate": 3.323783722900653e-07, "loss": 0.6302, "step": 19880 }, { "epoch": 1.77, "grad_norm": 4.707812345858786, "learning_rate": 3.321194407448697e-07, "loss": 0.5835, "step": 19881 }, { "epoch": 1.77, "grad_norm": 6.855744651937401, "learning_rate": 3.318606066314589e-07, "loss": 0.5771, "step": 19882 }, { "epoch": 1.77, "grad_norm": 6.816772915787004, "learning_rate": 3.316018699552337e-07, "loss": 0.6057, "step": 19883 }, { "epoch": 1.77, "grad_norm": 8.477727885597911, "learning_rate": 3.3134323072159633e-07, "loss": 0.5244, "step": 19884 }, { "epoch": 1.77, "grad_norm": 4.934849044716972, "learning_rate": 3.310846889359448e-07, "loss": 0.5992, "step": 19885 }, { "epoch": 1.77, "grad_norm": 5.708116320934139, "learning_rate": 3.3082624460367587e-07, "loss": 0.5422, "step": 19886 }, { "epoch": 1.77, "grad_norm": 7.106721428417182, "learning_rate": 3.305678977301835e-07, "loss": 0.5262, "step": 19887 }, { "epoch": 1.77, "grad_norm": 5.824795206033562, "learning_rate": 3.303096483208607e-07, "loss": 0.5524, "step": 19888 }, { "epoch": 1.77, "grad_norm": 5.961877435484607, "learning_rate": 3.30051496381098e-07, "loss": 0.5791, "step": 19889 }, { "epoch": 1.77, "grad_norm": 4.848628772443872, "learning_rate": 3.2979344191628295e-07, "loss": 0.5303, "step": 19890 }, { "epoch": 1.77, "grad_norm": 9.129635218576166, "learning_rate": 3.295354849318017e-07, "loss": 0.5339, "step": 19891 }, { "epoch": 1.77, "grad_norm": 5.499808763268714, "learning_rate": 3.2927762543303877e-07, "loss": 0.5304, "step": 19892 }, { "epoch": 1.77, "grad_norm": 6.252816620916252, "learning_rate": 3.290198634253772e-07, "loss": 0.5544, "step": 19893 }, { "epoch": 1.77, "grad_norm": 5.652323560548498, "learning_rate": 3.287621989141965e-07, "loss": 0.5564, "step": 19894 }, { "epoch": 1.77, "grad_norm": 7.124341573778996, "learning_rate": 3.2850463190487627e-07, "loss": 0.532, "step": 19895 }, { "epoch": 1.77, "grad_norm": 12.040230421749376, "learning_rate": 3.2824716240278997e-07, "loss": 0.5727, "step": 19896 }, { "epoch": 1.78, "grad_norm": 5.679275173992141, "learning_rate": 3.279897904133139e-07, "loss": 0.5457, "step": 19897 }, { "epoch": 1.78, "grad_norm": 6.3535600027842625, "learning_rate": 3.2773251594181874e-07, "loss": 0.507, "step": 19898 }, { "epoch": 1.78, "grad_norm": 8.012256757711764, "learning_rate": 3.274753389936758e-07, "loss": 0.5887, "step": 19899 }, { "epoch": 1.78, "grad_norm": 6.586176287636549, "learning_rate": 3.2721825957425236e-07, "loss": 0.5903, "step": 19900 }, { "epoch": 1.78, "grad_norm": 5.112065606871026, "learning_rate": 3.269612776889153e-07, "loss": 0.5484, "step": 19901 }, { "epoch": 1.78, "grad_norm": 5.3339476509372625, "learning_rate": 3.267043933430275e-07, "loss": 0.5646, "step": 19902 }, { "epoch": 1.78, "grad_norm": 8.681134005179397, "learning_rate": 3.264476065419503e-07, "loss": 0.5492, "step": 19903 }, { "epoch": 1.78, "grad_norm": 8.318287383606528, "learning_rate": 3.261909172910449e-07, "loss": 0.5883, "step": 19904 }, { "epoch": 1.78, "grad_norm": 11.353144395074082, "learning_rate": 3.2593432559566816e-07, "loss": 0.6049, "step": 19905 }, { "epoch": 1.78, "grad_norm": 5.856366768934138, "learning_rate": 3.2567783146117694e-07, "loss": 0.5644, "step": 19906 }, { "epoch": 1.78, "grad_norm": 6.517478836855554, "learning_rate": 3.254214348929241e-07, "loss": 0.6249, "step": 19907 }, { "epoch": 1.78, "grad_norm": 7.8722163204096, "learning_rate": 3.251651358962621e-07, "loss": 0.6208, "step": 19908 }, { "epoch": 1.78, "grad_norm": 5.606752278814589, "learning_rate": 3.249089344765399e-07, "loss": 0.5371, "step": 19909 }, { "epoch": 1.78, "grad_norm": 5.370040541005374, "learning_rate": 3.2465283063910503e-07, "loss": 0.5353, "step": 19910 }, { "epoch": 1.78, "grad_norm": 7.267514551466646, "learning_rate": 3.2439682438930363e-07, "loss": 0.6554, "step": 19911 }, { "epoch": 1.78, "grad_norm": 7.762833467586259, "learning_rate": 3.241409157324793e-07, "loss": 0.5756, "step": 19912 }, { "epoch": 1.78, "grad_norm": 7.5093480017842245, "learning_rate": 3.2388510467397326e-07, "loss": 0.5616, "step": 19913 }, { "epoch": 1.78, "grad_norm": 7.9079793787060515, "learning_rate": 3.2362939121912574e-07, "loss": 0.5724, "step": 19914 }, { "epoch": 1.78, "grad_norm": 6.4384529111924795, "learning_rate": 3.2337377537327297e-07, "loss": 0.5957, "step": 19915 }, { "epoch": 1.78, "grad_norm": 5.581381527023659, "learning_rate": 3.2311825714175126e-07, "loss": 0.5328, "step": 19916 }, { "epoch": 1.78, "grad_norm": 7.11689208054004, "learning_rate": 3.2286283652989416e-07, "loss": 0.5658, "step": 19917 }, { "epoch": 1.78, "grad_norm": 6.1125420684208, "learning_rate": 3.2260751354303174e-07, "loss": 0.5828, "step": 19918 }, { "epoch": 1.78, "grad_norm": 7.190081231509544, "learning_rate": 3.2235228818649433e-07, "loss": 0.6032, "step": 19919 }, { "epoch": 1.78, "grad_norm": 7.641057504384174, "learning_rate": 3.220971604656092e-07, "loss": 0.5526, "step": 19920 }, { "epoch": 1.78, "grad_norm": 6.609030226601544, "learning_rate": 3.2184213038570167e-07, "loss": 0.6041, "step": 19921 }, { "epoch": 1.78, "grad_norm": 6.8615369959995185, "learning_rate": 3.215871979520946e-07, "loss": 0.5481, "step": 19922 }, { "epoch": 1.78, "grad_norm": 5.491288470638519, "learning_rate": 3.2133236317010875e-07, "loss": 0.5825, "step": 19923 }, { "epoch": 1.78, "grad_norm": 5.7488551780205155, "learning_rate": 3.2107762604506377e-07, "loss": 0.5388, "step": 19924 }, { "epoch": 1.78, "grad_norm": 5.941077614973662, "learning_rate": 3.2082298658227704e-07, "loss": 0.6139, "step": 19925 }, { "epoch": 1.78, "grad_norm": 8.440466377744492, "learning_rate": 3.2056844478706273e-07, "loss": 0.5848, "step": 19926 }, { "epoch": 1.78, "grad_norm": 6.880109375237851, "learning_rate": 3.203140006647354e-07, "loss": 0.5202, "step": 19927 }, { "epoch": 1.78, "grad_norm": 5.728455151003148, "learning_rate": 3.2005965422060416e-07, "loss": 0.5413, "step": 19928 }, { "epoch": 1.78, "grad_norm": 5.85808800180692, "learning_rate": 3.198054054599786e-07, "loss": 0.5935, "step": 19929 }, { "epoch": 1.78, "grad_norm": 7.2079445216345235, "learning_rate": 3.1955125438816626e-07, "loss": 0.4988, "step": 19930 }, { "epoch": 1.78, "grad_norm": 9.390979116043923, "learning_rate": 3.192972010104717e-07, "loss": 0.541, "step": 19931 }, { "epoch": 1.78, "grad_norm": 6.204364360990222, "learning_rate": 3.190432453321968e-07, "loss": 0.5915, "step": 19932 }, { "epoch": 1.78, "grad_norm": 6.6490733845241925, "learning_rate": 3.187893873586434e-07, "loss": 0.5655, "step": 19933 }, { "epoch": 1.78, "grad_norm": 5.902796862462204, "learning_rate": 3.185356270951101e-07, "loss": 0.5611, "step": 19934 }, { "epoch": 1.78, "grad_norm": 8.871835270853392, "learning_rate": 3.182819645468932e-07, "loss": 0.5676, "step": 19935 }, { "epoch": 1.78, "grad_norm": 8.061395383849746, "learning_rate": 3.180283997192868e-07, "loss": 0.607, "step": 19936 }, { "epoch": 1.78, "grad_norm": 7.867495299422822, "learning_rate": 3.1777493261758497e-07, "loss": 0.5616, "step": 19937 }, { "epoch": 1.78, "grad_norm": 7.593568104160547, "learning_rate": 3.175215632470768e-07, "loss": 0.5132, "step": 19938 }, { "epoch": 1.78, "grad_norm": 5.18067524867868, "learning_rate": 3.1726829161305204e-07, "loss": 0.5689, "step": 19939 }, { "epoch": 1.78, "grad_norm": 5.8338065624457505, "learning_rate": 3.1701511772079696e-07, "loss": 0.6029, "step": 19940 }, { "epoch": 1.78, "grad_norm": 6.856658728274167, "learning_rate": 3.1676204157559566e-07, "loss": 0.5926, "step": 19941 }, { "epoch": 1.78, "grad_norm": 5.5134771501440385, "learning_rate": 3.1650906318273e-07, "loss": 0.5303, "step": 19942 }, { "epoch": 1.78, "grad_norm": 4.889318488219453, "learning_rate": 3.1625618254748136e-07, "loss": 0.6141, "step": 19943 }, { "epoch": 1.78, "grad_norm": 7.902986609068479, "learning_rate": 3.1600339967512774e-07, "loss": 0.5249, "step": 19944 }, { "epoch": 1.78, "grad_norm": 5.736455874819436, "learning_rate": 3.1575071457094595e-07, "loss": 0.5951, "step": 19945 }, { "epoch": 1.78, "grad_norm": 6.03421371727678, "learning_rate": 3.1549812724020856e-07, "loss": 0.5702, "step": 19946 }, { "epoch": 1.78, "grad_norm": 7.277024105709893, "learning_rate": 3.1524563768819015e-07, "loss": 0.5371, "step": 19947 }, { "epoch": 1.78, "grad_norm": 6.670900985220695, "learning_rate": 3.149932459201588e-07, "loss": 0.578, "step": 19948 }, { "epoch": 1.78, "grad_norm": 5.461478097027307, "learning_rate": 3.14740951941383e-07, "loss": 0.5873, "step": 19949 }, { "epoch": 1.78, "grad_norm": 8.376348893147746, "learning_rate": 3.1448875575712965e-07, "loss": 0.5645, "step": 19950 }, { "epoch": 1.78, "grad_norm": 7.020813598735923, "learning_rate": 3.142366573726624e-07, "loss": 0.564, "step": 19951 }, { "epoch": 1.78, "grad_norm": 6.508072728340795, "learning_rate": 3.139846567932436e-07, "loss": 0.6183, "step": 19952 }, { "epoch": 1.78, "grad_norm": 6.444977334774294, "learning_rate": 3.1373275402413306e-07, "loss": 0.5702, "step": 19953 }, { "epoch": 1.78, "grad_norm": 7.088837154159336, "learning_rate": 3.134809490705881e-07, "loss": 0.6158, "step": 19954 }, { "epoch": 1.78, "grad_norm": 6.664560501031596, "learning_rate": 3.1322924193786517e-07, "loss": 0.5365, "step": 19955 }, { "epoch": 1.78, "grad_norm": 4.5033430031775445, "learning_rate": 3.129776326312178e-07, "loss": 0.5174, "step": 19956 }, { "epoch": 1.78, "grad_norm": 6.477097912531358, "learning_rate": 3.1272612115589793e-07, "loss": 0.5167, "step": 19957 }, { "epoch": 1.78, "grad_norm": 6.16485114274206, "learning_rate": 3.1247470751715583e-07, "loss": 0.5916, "step": 19958 }, { "epoch": 1.78, "grad_norm": 9.037509202936494, "learning_rate": 3.122233917202383e-07, "loss": 0.6143, "step": 19959 }, { "epoch": 1.78, "grad_norm": 6.163394329996369, "learning_rate": 3.119721737703918e-07, "loss": 0.544, "step": 19960 }, { "epoch": 1.78, "grad_norm": 7.33878713165649, "learning_rate": 3.117210536728593e-07, "loss": 0.5623, "step": 19961 }, { "epoch": 1.78, "grad_norm": 4.9105274370656495, "learning_rate": 3.1147003143288225e-07, "loss": 0.5111, "step": 19962 }, { "epoch": 1.78, "grad_norm": 5.237143804635172, "learning_rate": 3.112191070557008e-07, "loss": 0.6147, "step": 19963 }, { "epoch": 1.78, "grad_norm": 5.369140642497617, "learning_rate": 3.109682805465525e-07, "loss": 0.5318, "step": 19964 }, { "epoch": 1.78, "grad_norm": 6.360607519036494, "learning_rate": 3.107175519106725e-07, "loss": 0.5274, "step": 19965 }, { "epoch": 1.78, "grad_norm": 6.48550638956406, "learning_rate": 3.10466921153294e-07, "loss": 0.5713, "step": 19966 }, { "epoch": 1.78, "grad_norm": 6.779915923415577, "learning_rate": 3.1021638827964884e-07, "loss": 0.5421, "step": 19967 }, { "epoch": 1.78, "grad_norm": 7.09051195617959, "learning_rate": 3.099659532949656e-07, "loss": 0.6235, "step": 19968 }, { "epoch": 1.78, "grad_norm": 7.552184461517378, "learning_rate": 3.097156162044723e-07, "loss": 0.5354, "step": 19969 }, { "epoch": 1.78, "grad_norm": 10.149378475491043, "learning_rate": 3.094653770133943e-07, "loss": 0.5878, "step": 19970 }, { "epoch": 1.78, "grad_norm": 5.613567513605767, "learning_rate": 3.092152357269551e-07, "loss": 0.5218, "step": 19971 }, { "epoch": 1.78, "grad_norm": 7.598520161054192, "learning_rate": 3.0896519235037393e-07, "loss": 0.6006, "step": 19972 }, { "epoch": 1.78, "grad_norm": 6.489200072187707, "learning_rate": 3.0871524688887155e-07, "loss": 0.5665, "step": 19973 }, { "epoch": 1.78, "grad_norm": 5.590907813021528, "learning_rate": 3.084653993476655e-07, "loss": 0.5378, "step": 19974 }, { "epoch": 1.78, "grad_norm": 5.598848156363632, "learning_rate": 3.082156497319694e-07, "loss": 0.6342, "step": 19975 }, { "epoch": 1.78, "grad_norm": 8.148583599775362, "learning_rate": 3.079659980469962e-07, "loss": 0.569, "step": 19976 }, { "epoch": 1.78, "grad_norm": 7.127137398707646, "learning_rate": 3.07716444297958e-07, "loss": 0.5638, "step": 19977 }, { "epoch": 1.78, "grad_norm": 8.175072029865344, "learning_rate": 3.0746698849006327e-07, "loss": 0.616, "step": 19978 }, { "epoch": 1.78, "grad_norm": 7.43241974538059, "learning_rate": 3.072176306285185e-07, "loss": 0.554, "step": 19979 }, { "epoch": 1.78, "grad_norm": 6.63019666956596, "learning_rate": 3.069683707185284e-07, "loss": 0.5456, "step": 19980 }, { "epoch": 1.78, "grad_norm": 6.436098471871223, "learning_rate": 3.0671920876529593e-07, "loss": 0.5816, "step": 19981 }, { "epoch": 1.78, "grad_norm": 8.65944392488762, "learning_rate": 3.0647014477402147e-07, "loss": 0.6021, "step": 19982 }, { "epoch": 1.78, "grad_norm": 6.721634623171956, "learning_rate": 3.062211787499048e-07, "loss": 0.568, "step": 19983 }, { "epoch": 1.78, "grad_norm": 6.408777260532234, "learning_rate": 3.059723106981421e-07, "loss": 0.5464, "step": 19984 }, { "epoch": 1.78, "grad_norm": 8.420219221266803, "learning_rate": 3.0572354062392727e-07, "loss": 0.5582, "step": 19985 }, { "epoch": 1.78, "grad_norm": 7.806484928670248, "learning_rate": 3.0547486853245325e-07, "loss": 0.5477, "step": 19986 }, { "epoch": 1.78, "grad_norm": 6.65389241864085, "learning_rate": 3.0522629442891026e-07, "loss": 0.5494, "step": 19987 }, { "epoch": 1.78, "grad_norm": 8.355409377885232, "learning_rate": 3.0497781831848703e-07, "loss": 0.5632, "step": 19988 }, { "epoch": 1.78, "grad_norm": 8.445521977960574, "learning_rate": 3.047294402063705e-07, "loss": 0.5303, "step": 19989 }, { "epoch": 1.78, "grad_norm": 6.560659638175641, "learning_rate": 3.0448116009774433e-07, "loss": 0.4908, "step": 19990 }, { "epoch": 1.78, "grad_norm": 6.606933352353607, "learning_rate": 3.0423297799779096e-07, "loss": 0.5919, "step": 19991 }, { "epoch": 1.78, "grad_norm": 6.139233276894587, "learning_rate": 3.0398489391169016e-07, "loss": 0.5856, "step": 19992 }, { "epoch": 1.78, "grad_norm": 7.795825923382389, "learning_rate": 3.0373690784462065e-07, "loss": 0.5371, "step": 19993 }, { "epoch": 1.78, "grad_norm": 6.777719644848041, "learning_rate": 3.0348901980175873e-07, "loss": 0.6136, "step": 19994 }, { "epoch": 1.78, "grad_norm": 8.626564971563297, "learning_rate": 3.032412297882781e-07, "loss": 0.5849, "step": 19995 }, { "epoch": 1.78, "grad_norm": 7.725997650649992, "learning_rate": 3.029935378093507e-07, "loss": 0.6495, "step": 19996 }, { "epoch": 1.78, "grad_norm": 5.2333274046757, "learning_rate": 3.0274594387014856e-07, "loss": 0.5761, "step": 19997 }, { "epoch": 1.78, "grad_norm": 7.374000655647602, "learning_rate": 3.0249844797583637e-07, "loss": 0.5634, "step": 19998 }, { "epoch": 1.78, "grad_norm": 4.852321610005478, "learning_rate": 3.022510501315823e-07, "loss": 0.5247, "step": 19999 }, { "epoch": 1.78, "grad_norm": 5.9111073618822605, "learning_rate": 3.0200375034254936e-07, "loss": 0.6089, "step": 20000 }, { "epoch": 1.78, "grad_norm": 8.4327216108502, "learning_rate": 3.0175654861390015e-07, "loss": 0.5755, "step": 20001 }, { "epoch": 1.78, "grad_norm": 7.5583233957231295, "learning_rate": 3.015094449507938e-07, "loss": 0.5844, "step": 20002 }, { "epoch": 1.78, "grad_norm": 4.8598549055235996, "learning_rate": 3.012624393583885e-07, "loss": 0.5733, "step": 20003 }, { "epoch": 1.78, "grad_norm": 6.649337722780526, "learning_rate": 3.010155318418406e-07, "loss": 0.5675, "step": 20004 }, { "epoch": 1.78, "grad_norm": 7.976002327871897, "learning_rate": 3.007687224063016e-07, "loss": 0.6447, "step": 20005 }, { "epoch": 1.78, "grad_norm": 5.61105855163293, "learning_rate": 3.0052201105692404e-07, "loss": 0.5379, "step": 20006 }, { "epoch": 1.78, "grad_norm": 7.930824452874397, "learning_rate": 3.002753977988582e-07, "loss": 0.5548, "step": 20007 }, { "epoch": 1.78, "grad_norm": 7.2894448531210925, "learning_rate": 3.000288826372516e-07, "loss": 0.5651, "step": 20008 }, { "epoch": 1.79, "grad_norm": 6.368295200929125, "learning_rate": 2.9978246557724856e-07, "loss": 0.544, "step": 20009 }, { "epoch": 1.79, "grad_norm": 6.1909488984673535, "learning_rate": 2.9953614662399487e-07, "loss": 0.5263, "step": 20010 }, { "epoch": 1.79, "grad_norm": 4.574841672318607, "learning_rate": 2.992899257826287e-07, "loss": 0.5005, "step": 20011 }, { "epoch": 1.79, "grad_norm": 7.6369663076523295, "learning_rate": 2.990438030582915e-07, "loss": 0.5912, "step": 20012 }, { "epoch": 1.79, "grad_norm": 9.027818270058656, "learning_rate": 2.9879777845611966e-07, "loss": 0.5875, "step": 20013 }, { "epoch": 1.79, "grad_norm": 8.07636476034707, "learning_rate": 2.9855185198124916e-07, "loss": 0.5281, "step": 20014 }, { "epoch": 1.79, "grad_norm": 6.022251470348601, "learning_rate": 2.9830602363881245e-07, "loss": 0.5855, "step": 20015 }, { "epoch": 1.79, "grad_norm": 7.216429239939149, "learning_rate": 2.980602934339416e-07, "loss": 0.5948, "step": 20016 }, { "epoch": 1.79, "grad_norm": 7.2770665488088975, "learning_rate": 2.978146613717642e-07, "loss": 0.5382, "step": 20017 }, { "epoch": 1.79, "grad_norm": 5.883293298891001, "learning_rate": 2.975691274574094e-07, "loss": 0.5701, "step": 20018 }, { "epoch": 1.79, "grad_norm": 5.468611407099075, "learning_rate": 2.9732369169599984e-07, "loss": 0.59, "step": 20019 }, { "epoch": 1.79, "grad_norm": 9.420396199017945, "learning_rate": 2.970783540926592e-07, "loss": 0.5871, "step": 20020 }, { "epoch": 1.79, "grad_norm": 7.077486990142298, "learning_rate": 2.968331146525094e-07, "loss": 0.5283, "step": 20021 }, { "epoch": 1.79, "grad_norm": 7.794369306752069, "learning_rate": 2.9658797338066814e-07, "loss": 0.5757, "step": 20022 }, { "epoch": 1.79, "grad_norm": 4.752776892192753, "learning_rate": 2.9634293028225346e-07, "loss": 0.5395, "step": 20023 }, { "epoch": 1.79, "grad_norm": 5.79792030062455, "learning_rate": 2.9609798536237854e-07, "loss": 0.575, "step": 20024 }, { "epoch": 1.79, "grad_norm": 11.017351965576053, "learning_rate": 2.958531386261565e-07, "loss": 0.5872, "step": 20025 }, { "epoch": 1.79, "grad_norm": 5.90667213472733, "learning_rate": 2.956083900786988e-07, "loss": 0.5241, "step": 20026 }, { "epoch": 1.79, "grad_norm": 7.645793675341324, "learning_rate": 2.9536373972511354e-07, "loss": 0.5911, "step": 20027 }, { "epoch": 1.79, "grad_norm": 5.935833361214213, "learning_rate": 2.9511918757050673e-07, "loss": 0.5566, "step": 20028 }, { "epoch": 1.79, "grad_norm": 6.083138449082243, "learning_rate": 2.9487473361998477e-07, "loss": 0.5534, "step": 20029 }, { "epoch": 1.79, "grad_norm": 6.531448200287632, "learning_rate": 2.946303778786475e-07, "loss": 0.6642, "step": 20030 }, { "epoch": 1.79, "grad_norm": 9.741820981070681, "learning_rate": 2.943861203515963e-07, "loss": 0.5382, "step": 20031 }, { "epoch": 1.79, "grad_norm": 6.287714841712965, "learning_rate": 2.9414196104393055e-07, "loss": 0.5545, "step": 20032 }, { "epoch": 1.79, "grad_norm": 7.15074527272271, "learning_rate": 2.938978999607456e-07, "loss": 0.5641, "step": 20033 }, { "epoch": 1.79, "grad_norm": 6.307327988638788, "learning_rate": 2.936539371071351e-07, "loss": 0.5998, "step": 20034 }, { "epoch": 1.79, "grad_norm": 7.759251423109145, "learning_rate": 2.934100724881922e-07, "loss": 0.6089, "step": 20035 }, { "epoch": 1.79, "grad_norm": 6.610813508605387, "learning_rate": 2.9316630610900786e-07, "loss": 0.5549, "step": 20036 }, { "epoch": 1.79, "grad_norm": 5.02581836063025, "learning_rate": 2.9292263797466804e-07, "loss": 0.5156, "step": 20037 }, { "epoch": 1.79, "grad_norm": 6.171809603361232, "learning_rate": 2.926790680902597e-07, "loss": 0.5229, "step": 20038 }, { "epoch": 1.79, "grad_norm": 8.066923120965326, "learning_rate": 2.924355964608666e-07, "loss": 0.529, "step": 20039 }, { "epoch": 1.79, "grad_norm": 7.08464746908951, "learning_rate": 2.921922230915719e-07, "loss": 0.5894, "step": 20040 }, { "epoch": 1.79, "grad_norm": 4.971039554479666, "learning_rate": 2.9194894798745487e-07, "loss": 0.6143, "step": 20041 }, { "epoch": 1.79, "grad_norm": 7.175729814466849, "learning_rate": 2.9170577115359256e-07, "loss": 0.5427, "step": 20042 }, { "epoch": 1.79, "grad_norm": 7.080903762714071, "learning_rate": 2.914626925950609e-07, "loss": 0.5623, "step": 20043 }, { "epoch": 1.79, "grad_norm": 5.2685744109991015, "learning_rate": 2.9121971231693416e-07, "loss": 0.5779, "step": 20044 }, { "epoch": 1.79, "grad_norm": 7.626158456181663, "learning_rate": 2.9097683032428436e-07, "loss": 0.5886, "step": 20045 }, { "epoch": 1.79, "grad_norm": 6.633412617223977, "learning_rate": 2.907340466221809e-07, "loss": 0.5824, "step": 20046 }, { "epoch": 1.79, "grad_norm": 6.577616841077556, "learning_rate": 2.9049136121569066e-07, "loss": 0.5658, "step": 20047 }, { "epoch": 1.79, "grad_norm": 7.56266342898239, "learning_rate": 2.902487741098797e-07, "loss": 0.5685, "step": 20048 }, { "epoch": 1.79, "grad_norm": 5.919900203647548, "learning_rate": 2.900062853098112e-07, "loss": 0.5509, "step": 20049 }, { "epoch": 1.79, "grad_norm": 8.76893650320256, "learning_rate": 2.8976389482054666e-07, "loss": 0.5857, "step": 20050 }, { "epoch": 1.79, "grad_norm": 7.386722961385307, "learning_rate": 2.895216026471459e-07, "loss": 0.5011, "step": 20051 }, { "epoch": 1.79, "grad_norm": 6.47798903737093, "learning_rate": 2.8927940879466543e-07, "loss": 0.569, "step": 20052 }, { "epoch": 1.79, "grad_norm": 8.630274324488346, "learning_rate": 2.890373132681612e-07, "loss": 0.5251, "step": 20053 }, { "epoch": 1.79, "grad_norm": 8.501999471921446, "learning_rate": 2.8879531607268696e-07, "loss": 0.6033, "step": 20054 }, { "epoch": 1.79, "grad_norm": 5.656707458593224, "learning_rate": 2.8855341721329256e-07, "loss": 0.5285, "step": 20055 }, { "epoch": 1.79, "grad_norm": 6.618707329758404, "learning_rate": 2.883116166950273e-07, "loss": 0.5577, "step": 20056 }, { "epoch": 1.79, "grad_norm": 9.935551070342399, "learning_rate": 2.8806991452293873e-07, "loss": 0.5234, "step": 20057 }, { "epoch": 1.79, "grad_norm": 8.475685462178188, "learning_rate": 2.8782831070207185e-07, "loss": 0.4757, "step": 20058 }, { "epoch": 1.79, "grad_norm": 6.575138999245838, "learning_rate": 2.875868052374697e-07, "loss": 0.6101, "step": 20059 }, { "epoch": 1.79, "grad_norm": 6.743419866100189, "learning_rate": 2.873453981341734e-07, "loss": 0.5723, "step": 20060 }, { "epoch": 1.79, "grad_norm": 5.971624256789703, "learning_rate": 2.871040893972216e-07, "loss": 0.5488, "step": 20061 }, { "epoch": 1.79, "grad_norm": 5.428105035918969, "learning_rate": 2.868628790316502e-07, "loss": 0.6055, "step": 20062 }, { "epoch": 1.79, "grad_norm": 5.930397584315674, "learning_rate": 2.866217670424942e-07, "loss": 0.5573, "step": 20063 }, { "epoch": 1.79, "grad_norm": 6.057558700354545, "learning_rate": 2.8638075343478676e-07, "loss": 0.52, "step": 20064 }, { "epoch": 1.79, "grad_norm": 4.383391848950644, "learning_rate": 2.8613983821355886e-07, "loss": 0.5659, "step": 20065 }, { "epoch": 1.79, "grad_norm": 6.571498570221796, "learning_rate": 2.858990213838386e-07, "loss": 0.475, "step": 20066 }, { "epoch": 1.79, "grad_norm": 6.54414147152359, "learning_rate": 2.8565830295065267e-07, "loss": 0.5577, "step": 20067 }, { "epoch": 1.79, "grad_norm": 8.306921497907714, "learning_rate": 2.854176829190253e-07, "loss": 0.5579, "step": 20068 }, { "epoch": 1.79, "grad_norm": 7.371600005457422, "learning_rate": 2.851771612939791e-07, "loss": 0.5818, "step": 20069 }, { "epoch": 1.79, "grad_norm": 8.29877764730848, "learning_rate": 2.84936738080534e-07, "loss": 0.5357, "step": 20070 }, { "epoch": 1.79, "grad_norm": 7.396033657904993, "learning_rate": 2.846964132837088e-07, "loss": 0.6002, "step": 20071 }, { "epoch": 1.79, "grad_norm": 7.142398131015172, "learning_rate": 2.8445618690851995e-07, "loss": 0.4959, "step": 20072 }, { "epoch": 1.79, "grad_norm": 8.281011624109869, "learning_rate": 2.842160589599813e-07, "loss": 0.5536, "step": 20073 }, { "epoch": 1.79, "grad_norm": 8.658376277847436, "learning_rate": 2.839760294431049e-07, "loss": 0.6011, "step": 20074 }, { "epoch": 1.79, "grad_norm": 7.597513769399369, "learning_rate": 2.8373609836290174e-07, "loss": 0.642, "step": 20075 }, { "epoch": 1.79, "grad_norm": 8.767142909881834, "learning_rate": 2.8349626572437836e-07, "loss": 0.6042, "step": 20076 }, { "epoch": 1.79, "grad_norm": 6.039028439613305, "learning_rate": 2.8325653153254086e-07, "loss": 0.5743, "step": 20077 }, { "epoch": 1.79, "grad_norm": 8.469337560411864, "learning_rate": 2.8301689579239457e-07, "loss": 0.5987, "step": 20078 }, { "epoch": 1.79, "grad_norm": 11.433543756344712, "learning_rate": 2.8277735850893995e-07, "loss": 0.6014, "step": 20079 }, { "epoch": 1.79, "grad_norm": 7.273955263599302, "learning_rate": 2.8253791968717857e-07, "loss": 0.5375, "step": 20080 }, { "epoch": 1.79, "grad_norm": 5.917419973075672, "learning_rate": 2.822985793321059e-07, "loss": 0.5912, "step": 20081 }, { "epoch": 1.79, "grad_norm": 7.278213429846615, "learning_rate": 2.8205933744871904e-07, "loss": 0.5604, "step": 20082 }, { "epoch": 1.79, "grad_norm": 7.307008760094153, "learning_rate": 2.8182019404201176e-07, "loss": 0.5227, "step": 20083 }, { "epoch": 1.79, "grad_norm": 8.844916833471185, "learning_rate": 2.8158114911697456e-07, "loss": 0.5908, "step": 20084 }, { "epoch": 1.79, "grad_norm": 7.012835286282313, "learning_rate": 2.813422026785984e-07, "loss": 0.5559, "step": 20085 }, { "epoch": 1.79, "grad_norm": 7.037807048226236, "learning_rate": 2.8110335473187033e-07, "loss": 0.5188, "step": 20086 }, { "epoch": 1.79, "grad_norm": 5.677385792994709, "learning_rate": 2.8086460528177484e-07, "loss": 0.5502, "step": 20087 }, { "epoch": 1.79, "grad_norm": 7.816195303426281, "learning_rate": 2.806259543332962e-07, "loss": 0.5844, "step": 20088 }, { "epoch": 1.79, "grad_norm": 5.319970857596945, "learning_rate": 2.8038740189141544e-07, "loss": 0.5195, "step": 20089 }, { "epoch": 1.79, "grad_norm": 8.62959581043223, "learning_rate": 2.801489479611125e-07, "loss": 0.6171, "step": 20090 }, { "epoch": 1.79, "grad_norm": 4.823164471673383, "learning_rate": 2.799105925473633e-07, "loss": 0.5438, "step": 20091 }, { "epoch": 1.79, "grad_norm": 8.055849503894976, "learning_rate": 2.7967233565514395e-07, "loss": 0.5271, "step": 20092 }, { "epoch": 1.79, "grad_norm": 7.171608570997626, "learning_rate": 2.794341772894277e-07, "loss": 0.5432, "step": 20093 }, { "epoch": 1.79, "grad_norm": 8.434908647170557, "learning_rate": 2.791961174551844e-07, "loss": 0.542, "step": 20094 }, { "epoch": 1.79, "grad_norm": 5.893723459877089, "learning_rate": 2.7895815615738395e-07, "loss": 0.5197, "step": 20095 }, { "epoch": 1.79, "grad_norm": 4.68024606193015, "learning_rate": 2.78720293400993e-07, "loss": 0.5191, "step": 20096 }, { "epoch": 1.79, "grad_norm": 7.417760912338335, "learning_rate": 2.784825291909765e-07, "loss": 0.5594, "step": 20097 }, { "epoch": 1.79, "grad_norm": 9.024787299493008, "learning_rate": 2.782448635322971e-07, "loss": 0.5439, "step": 20098 }, { "epoch": 1.79, "grad_norm": 7.040007624121557, "learning_rate": 2.780072964299163e-07, "loss": 0.5906, "step": 20099 }, { "epoch": 1.79, "grad_norm": 6.761824104452046, "learning_rate": 2.777698278887919e-07, "loss": 0.6003, "step": 20100 }, { "epoch": 1.79, "grad_norm": 6.183857096685306, "learning_rate": 2.775324579138805e-07, "loss": 0.6285, "step": 20101 }, { "epoch": 1.79, "grad_norm": 9.608729120117474, "learning_rate": 2.7729518651013696e-07, "loss": 0.5871, "step": 20102 }, { "epoch": 1.79, "grad_norm": 6.718157675119265, "learning_rate": 2.7705801368251407e-07, "loss": 0.5476, "step": 20103 }, { "epoch": 1.79, "grad_norm": 6.607760833026992, "learning_rate": 2.7682093943596275e-07, "loss": 0.5542, "step": 20104 }, { "epoch": 1.79, "grad_norm": 7.612835912144923, "learning_rate": 2.7658396377543027e-07, "loss": 0.5605, "step": 20105 }, { "epoch": 1.79, "grad_norm": 7.665896476257027, "learning_rate": 2.7634708670586376e-07, "loss": 0.5668, "step": 20106 }, { "epoch": 1.79, "grad_norm": 8.196592338875362, "learning_rate": 2.7611030823220694e-07, "loss": 0.5852, "step": 20107 }, { "epoch": 1.79, "grad_norm": 7.688859439047316, "learning_rate": 2.7587362835940214e-07, "loss": 0.5902, "step": 20108 }, { "epoch": 1.79, "grad_norm": 6.917909872703726, "learning_rate": 2.7563704709238915e-07, "loss": 0.5674, "step": 20109 }, { "epoch": 1.79, "grad_norm": 6.110566334008197, "learning_rate": 2.7540056443610743e-07, "loss": 0.6073, "step": 20110 }, { "epoch": 1.79, "grad_norm": 7.679001371213017, "learning_rate": 2.7516418039549134e-07, "loss": 0.5868, "step": 20111 }, { "epoch": 1.79, "grad_norm": 6.369106403109633, "learning_rate": 2.749278949754769e-07, "loss": 0.5003, "step": 20112 }, { "epoch": 1.79, "grad_norm": 5.919410758715684, "learning_rate": 2.7469170818099467e-07, "loss": 0.5187, "step": 20113 }, { "epoch": 1.79, "grad_norm": 7.631282369987722, "learning_rate": 2.74455620016974e-07, "loss": 0.5695, "step": 20114 }, { "epoch": 1.79, "grad_norm": 6.980788960100082, "learning_rate": 2.742196304883443e-07, "loss": 0.5769, "step": 20115 }, { "epoch": 1.79, "grad_norm": 5.246336638819135, "learning_rate": 2.7398373960003e-07, "loss": 0.553, "step": 20116 }, { "epoch": 1.79, "grad_norm": 5.200200675840701, "learning_rate": 2.73747947356956e-07, "loss": 0.5499, "step": 20117 }, { "epoch": 1.79, "grad_norm": 6.374147287579982, "learning_rate": 2.7351225376404276e-07, "loss": 0.486, "step": 20118 }, { "epoch": 1.79, "grad_norm": 5.953679671078105, "learning_rate": 2.732766588262109e-07, "loss": 0.5706, "step": 20119 }, { "epoch": 1.79, "grad_norm": 8.072275544364853, "learning_rate": 2.7304116254837696e-07, "loss": 0.6194, "step": 20120 }, { "epoch": 1.79, "grad_norm": 5.949555330510494, "learning_rate": 2.7280576493545705e-07, "loss": 0.5475, "step": 20121 }, { "epoch": 1.8, "grad_norm": 7.462675483886381, "learning_rate": 2.725704659923639e-07, "loss": 0.5424, "step": 20122 }, { "epoch": 1.8, "grad_norm": 6.5105995948920805, "learning_rate": 2.723352657240097e-07, "loss": 0.5179, "step": 20123 }, { "epoch": 1.8, "grad_norm": 9.986808834047178, "learning_rate": 2.721001641353044e-07, "loss": 0.5718, "step": 20124 }, { "epoch": 1.8, "grad_norm": 7.2272071566708656, "learning_rate": 2.7186516123115294e-07, "loss": 0.5719, "step": 20125 }, { "epoch": 1.8, "grad_norm": 7.384344030928999, "learning_rate": 2.716302570164625e-07, "loss": 0.6195, "step": 20126 }, { "epoch": 1.8, "grad_norm": 5.897320981823818, "learning_rate": 2.713954514961348e-07, "loss": 0.5489, "step": 20127 }, { "epoch": 1.8, "grad_norm": 7.019304680050157, "learning_rate": 2.711607446750719e-07, "loss": 0.55, "step": 20128 }, { "epoch": 1.8, "grad_norm": 5.689313764272906, "learning_rate": 2.7092613655817223e-07, "loss": 0.574, "step": 20129 }, { "epoch": 1.8, "grad_norm": 7.784032683477858, "learning_rate": 2.70691627150334e-07, "loss": 0.5173, "step": 20130 }, { "epoch": 1.8, "grad_norm": 7.985281964924715, "learning_rate": 2.704572164564506e-07, "loss": 0.5751, "step": 20131 }, { "epoch": 1.8, "grad_norm": 7.56397800230942, "learning_rate": 2.7022290448141477e-07, "loss": 0.554, "step": 20132 }, { "epoch": 1.8, "grad_norm": 8.661211026814701, "learning_rate": 2.699886912301186e-07, "loss": 0.6258, "step": 20133 }, { "epoch": 1.8, "grad_norm": 5.660108703984498, "learning_rate": 2.697545767074494e-07, "loss": 0.5797, "step": 20134 }, { "epoch": 1.8, "grad_norm": 8.948699010113168, "learning_rate": 2.6952056091829384e-07, "loss": 0.6447, "step": 20135 }, { "epoch": 1.8, "grad_norm": 6.422904983296428, "learning_rate": 2.6928664386753734e-07, "loss": 0.5593, "step": 20136 }, { "epoch": 1.8, "grad_norm": 6.407476945382753, "learning_rate": 2.690528255600633e-07, "loss": 0.6067, "step": 20137 }, { "epoch": 1.8, "grad_norm": 6.167292018908641, "learning_rate": 2.6881910600074945e-07, "loss": 0.5571, "step": 20138 }, { "epoch": 1.8, "grad_norm": 7.984016320395842, "learning_rate": 2.685854851944758e-07, "loss": 0.5658, "step": 20139 }, { "epoch": 1.8, "grad_norm": 6.262631120747608, "learning_rate": 2.68351963146119e-07, "loss": 0.5426, "step": 20140 }, { "epoch": 1.8, "grad_norm": 7.850754359742553, "learning_rate": 2.681185398605524e-07, "loss": 0.5607, "step": 20141 }, { "epoch": 1.8, "grad_norm": 5.3884075369153255, "learning_rate": 2.6788521534264866e-07, "loss": 0.5297, "step": 20142 }, { "epoch": 1.8, "grad_norm": 6.731237016526866, "learning_rate": 2.6765198959727846e-07, "loss": 0.5714, "step": 20143 }, { "epoch": 1.8, "grad_norm": 7.4810396948687865, "learning_rate": 2.6741886262930895e-07, "loss": 0.5405, "step": 20144 }, { "epoch": 1.8, "grad_norm": 7.198120580196714, "learning_rate": 2.671858344436062e-07, "loss": 0.546, "step": 20145 }, { "epoch": 1.8, "grad_norm": 8.608420625930954, "learning_rate": 2.669529050450348e-07, "loss": 0.567, "step": 20146 }, { "epoch": 1.8, "grad_norm": 6.108039014843654, "learning_rate": 2.667200744384568e-07, "loss": 0.5863, "step": 20147 }, { "epoch": 1.8, "grad_norm": 7.695144933653571, "learning_rate": 2.664873426287312e-07, "loss": 0.6712, "step": 20148 }, { "epoch": 1.8, "grad_norm": 5.252375085094501, "learning_rate": 2.662547096207158e-07, "loss": 0.5698, "step": 20149 }, { "epoch": 1.8, "grad_norm": 8.930649568365828, "learning_rate": 2.660221754192671e-07, "loss": 0.5541, "step": 20150 }, { "epoch": 1.8, "grad_norm": 7.688669633855491, "learning_rate": 2.6578974002923805e-07, "loss": 0.5335, "step": 20151 }, { "epoch": 1.8, "grad_norm": 6.18244435583854, "learning_rate": 2.6555740345547974e-07, "loss": 0.5315, "step": 20152 }, { "epoch": 1.8, "grad_norm": 6.229205834832341, "learning_rate": 2.6532516570284273e-07, "loss": 0.5196, "step": 20153 }, { "epoch": 1.8, "grad_norm": 7.160749000997784, "learning_rate": 2.650930267761742e-07, "loss": 0.55, "step": 20154 }, { "epoch": 1.8, "grad_norm": 5.866709075995652, "learning_rate": 2.648609866803198e-07, "loss": 0.578, "step": 20155 }, { "epoch": 1.8, "grad_norm": 6.339963671152519, "learning_rate": 2.646290454201228e-07, "loss": 0.545, "step": 20156 }, { "epoch": 1.8, "grad_norm": 5.256990399892547, "learning_rate": 2.643972030004233e-07, "loss": 0.5233, "step": 20157 }, { "epoch": 1.8, "grad_norm": 9.22568983669438, "learning_rate": 2.6416545942606176e-07, "loss": 0.5678, "step": 20158 }, { "epoch": 1.8, "grad_norm": 6.186034803637399, "learning_rate": 2.63933814701875e-07, "loss": 0.57, "step": 20159 }, { "epoch": 1.8, "grad_norm": 8.515086336854688, "learning_rate": 2.637022688326979e-07, "loss": 0.6178, "step": 20160 }, { "epoch": 1.8, "grad_norm": 6.018057135058336, "learning_rate": 2.634708218233634e-07, "loss": 0.5687, "step": 20161 }, { "epoch": 1.8, "grad_norm": 6.675948241946451, "learning_rate": 2.6323947367870364e-07, "loss": 0.6308, "step": 20162 }, { "epoch": 1.8, "grad_norm": 8.805872540355585, "learning_rate": 2.6300822440354644e-07, "loss": 0.6143, "step": 20163 }, { "epoch": 1.8, "grad_norm": 6.770733787243477, "learning_rate": 2.6277707400271855e-07, "loss": 0.5573, "step": 20164 }, { "epoch": 1.8, "grad_norm": 5.359593674477983, "learning_rate": 2.6254602248104446e-07, "loss": 0.5852, "step": 20165 }, { "epoch": 1.8, "grad_norm": 7.433254958115622, "learning_rate": 2.6231506984334686e-07, "loss": 0.5307, "step": 20166 }, { "epoch": 1.8, "grad_norm": 7.39839573302087, "learning_rate": 2.620842160944476e-07, "loss": 0.5182, "step": 20167 }, { "epoch": 1.8, "grad_norm": 8.666709855565117, "learning_rate": 2.6185346123916376e-07, "loss": 0.5462, "step": 20168 }, { "epoch": 1.8, "grad_norm": 9.514473207529655, "learning_rate": 2.6162280528231385e-07, "loss": 0.5772, "step": 20169 }, { "epoch": 1.8, "grad_norm": 6.258844379299862, "learning_rate": 2.613922482287101e-07, "loss": 0.5361, "step": 20170 }, { "epoch": 1.8, "grad_norm": 8.698236717602715, "learning_rate": 2.611617900831653e-07, "loss": 0.5526, "step": 20171 }, { "epoch": 1.8, "grad_norm": 5.778092805130735, "learning_rate": 2.609314308504912e-07, "loss": 0.661, "step": 20172 }, { "epoch": 1.8, "grad_norm": 5.933060416071443, "learning_rate": 2.607011705354945e-07, "loss": 0.5092, "step": 20173 }, { "epoch": 1.8, "grad_norm": 6.096568076146124, "learning_rate": 2.6047100914298187e-07, "loss": 0.5249, "step": 20174 }, { "epoch": 1.8, "grad_norm": 7.871722454429138, "learning_rate": 2.6024094667775833e-07, "loss": 0.5452, "step": 20175 }, { "epoch": 1.8, "grad_norm": 7.487861840642195, "learning_rate": 2.600109831446257e-07, "loss": 0.5312, "step": 20176 }, { "epoch": 1.8, "grad_norm": 7.34964581755061, "learning_rate": 2.597811185483823e-07, "loss": 0.5704, "step": 20177 }, { "epoch": 1.8, "grad_norm": 5.295483374946834, "learning_rate": 2.5955135289382707e-07, "loss": 0.5899, "step": 20178 }, { "epoch": 1.8, "grad_norm": 6.16422746457735, "learning_rate": 2.593216861857556e-07, "loss": 0.5234, "step": 20179 }, { "epoch": 1.8, "grad_norm": 7.587631067781655, "learning_rate": 2.590921184289624e-07, "loss": 0.5666, "step": 20180 }, { "epoch": 1.8, "grad_norm": 8.352644872255844, "learning_rate": 2.5886264962823873e-07, "loss": 0.6265, "step": 20181 }, { "epoch": 1.8, "grad_norm": 7.523037238592465, "learning_rate": 2.5863327978837503e-07, "loss": 0.5481, "step": 20182 }, { "epoch": 1.8, "grad_norm": 9.092102603594158, "learning_rate": 2.584040089141576e-07, "loss": 0.5649, "step": 20183 }, { "epoch": 1.8, "grad_norm": 6.491995993082319, "learning_rate": 2.581748370103726e-07, "loss": 0.5705, "step": 20184 }, { "epoch": 1.8, "grad_norm": 6.821583778949467, "learning_rate": 2.5794576408180336e-07, "loss": 0.5242, "step": 20185 }, { "epoch": 1.8, "grad_norm": 7.617725783770447, "learning_rate": 2.577167901332317e-07, "loss": 0.5566, "step": 20186 }, { "epoch": 1.8, "grad_norm": 5.418464877121288, "learning_rate": 2.574879151694365e-07, "loss": 0.5672, "step": 20187 }, { "epoch": 1.8, "grad_norm": 9.137378178050847, "learning_rate": 2.5725913919519563e-07, "loss": 0.5827, "step": 20188 }, { "epoch": 1.8, "grad_norm": 6.5065553676603045, "learning_rate": 2.5703046221528306e-07, "loss": 0.6023, "step": 20189 }, { "epoch": 1.8, "grad_norm": 6.3121738196496855, "learning_rate": 2.568018842344727e-07, "loss": 0.6181, "step": 20190 }, { "epoch": 1.8, "grad_norm": 8.595294062484992, "learning_rate": 2.565734052575364e-07, "loss": 0.5329, "step": 20191 }, { "epoch": 1.8, "grad_norm": 6.272949341921487, "learning_rate": 2.563450252892413e-07, "loss": 0.5955, "step": 20192 }, { "epoch": 1.8, "grad_norm": 6.459732987583873, "learning_rate": 2.5611674433435597e-07, "loss": 0.5526, "step": 20193 }, { "epoch": 1.8, "grad_norm": 6.848475052652493, "learning_rate": 2.5588856239764424e-07, "loss": 0.5465, "step": 20194 }, { "epoch": 1.8, "grad_norm": 5.997668478173891, "learning_rate": 2.5566047948387016e-07, "loss": 0.5143, "step": 20195 }, { "epoch": 1.8, "grad_norm": 8.810408219611556, "learning_rate": 2.554324955977927e-07, "loss": 0.5023, "step": 20196 }, { "epoch": 1.8, "grad_norm": 4.661909265502535, "learning_rate": 2.552046107441719e-07, "loss": 0.5549, "step": 20197 }, { "epoch": 1.8, "grad_norm": 6.224249175598741, "learning_rate": 2.5497682492776397e-07, "loss": 0.5587, "step": 20198 }, { "epoch": 1.8, "grad_norm": 8.038842813400061, "learning_rate": 2.5474913815332294e-07, "loss": 0.6019, "step": 20199 }, { "epoch": 1.8, "grad_norm": 7.209182126591057, "learning_rate": 2.5452155042560265e-07, "loss": 0.5777, "step": 20200 }, { "epoch": 1.8, "grad_norm": 6.660550765357079, "learning_rate": 2.542940617493517e-07, "loss": 0.5454, "step": 20201 }, { "epoch": 1.8, "grad_norm": 5.971933064081867, "learning_rate": 2.540666721293195e-07, "loss": 0.5236, "step": 20202 }, { "epoch": 1.8, "grad_norm": 6.3567883050424046, "learning_rate": 2.538393815702522e-07, "loss": 0.5965, "step": 20203 }, { "epoch": 1.8, "grad_norm": 5.427784124290656, "learning_rate": 2.53612190076894e-07, "loss": 0.5048, "step": 20204 }, { "epoch": 1.8, "grad_norm": 6.712114182341302, "learning_rate": 2.53385097653987e-07, "loss": 0.5217, "step": 20205 }, { "epoch": 1.8, "grad_norm": 5.386064799644375, "learning_rate": 2.5315810430627084e-07, "loss": 0.5181, "step": 20206 }, { "epoch": 1.8, "grad_norm": 8.36655898038565, "learning_rate": 2.5293121003848396e-07, "loss": 0.598, "step": 20207 }, { "epoch": 1.8, "grad_norm": 7.361123196420978, "learning_rate": 2.527044148553631e-07, "loss": 0.5368, "step": 20208 }, { "epoch": 1.8, "grad_norm": 8.475683347322963, "learning_rate": 2.524777187616401e-07, "loss": 0.5551, "step": 20209 }, { "epoch": 1.8, "grad_norm": 6.328834547261899, "learning_rate": 2.522511217620477e-07, "loss": 0.5589, "step": 20210 }, { "epoch": 1.8, "grad_norm": 6.281499826677118, "learning_rate": 2.520246238613161e-07, "loss": 0.5942, "step": 20211 }, { "epoch": 1.8, "grad_norm": 7.2196985975073735, "learning_rate": 2.5179822506417205e-07, "loss": 0.6575, "step": 20212 }, { "epoch": 1.8, "grad_norm": 5.254794794472285, "learning_rate": 2.515719253753429e-07, "loss": 0.5362, "step": 20213 }, { "epoch": 1.8, "grad_norm": 8.10013068512519, "learning_rate": 2.513457247995499e-07, "loss": 0.6038, "step": 20214 }, { "epoch": 1.8, "grad_norm": 8.14510569168985, "learning_rate": 2.5111962334151585e-07, "loss": 0.5891, "step": 20215 }, { "epoch": 1.8, "grad_norm": 7.123661629703746, "learning_rate": 2.5089362100595983e-07, "loss": 0.6303, "step": 20216 }, { "epoch": 1.8, "grad_norm": 6.58658268470892, "learning_rate": 2.5066771779759855e-07, "loss": 0.562, "step": 20217 }, { "epoch": 1.8, "grad_norm": 8.368661770724051, "learning_rate": 2.5044191372114836e-07, "loss": 0.591, "step": 20218 }, { "epoch": 1.8, "grad_norm": 6.4507698823086415, "learning_rate": 2.50216208781322e-07, "loss": 0.5436, "step": 20219 }, { "epoch": 1.8, "grad_norm": 7.112014714534617, "learning_rate": 2.4999060298283085e-07, "loss": 0.5592, "step": 20220 }, { "epoch": 1.8, "grad_norm": 7.789597482733471, "learning_rate": 2.497650963303827e-07, "loss": 0.5485, "step": 20221 }, { "epoch": 1.8, "grad_norm": 8.821937137363907, "learning_rate": 2.495396888286855e-07, "loss": 0.5878, "step": 20222 }, { "epoch": 1.8, "grad_norm": 6.962520806929792, "learning_rate": 2.493143804824438e-07, "loss": 0.5223, "step": 20223 }, { "epoch": 1.8, "grad_norm": 7.078670628325172, "learning_rate": 2.4908917129636055e-07, "loss": 0.6204, "step": 20224 }, { "epoch": 1.8, "grad_norm": 10.806981495697736, "learning_rate": 2.48864061275137e-07, "loss": 0.558, "step": 20225 }, { "epoch": 1.8, "grad_norm": 7.248956448085863, "learning_rate": 2.4863905042347145e-07, "loss": 0.5859, "step": 20226 }, { "epoch": 1.8, "grad_norm": 8.002940868574399, "learning_rate": 2.484141387460598e-07, "loss": 0.5836, "step": 20227 }, { "epoch": 1.8, "grad_norm": 6.708152259768651, "learning_rate": 2.481893262475976e-07, "loss": 0.5205, "step": 20228 }, { "epoch": 1.8, "grad_norm": 8.006763682248472, "learning_rate": 2.479646129327767e-07, "loss": 0.6026, "step": 20229 }, { "epoch": 1.8, "grad_norm": 7.037161750533865, "learning_rate": 2.4773999880628787e-07, "loss": 0.5617, "step": 20230 }, { "epoch": 1.8, "grad_norm": 9.497997463141866, "learning_rate": 2.4751548387281886e-07, "loss": 0.5581, "step": 20231 }, { "epoch": 1.8, "grad_norm": 5.78184234266397, "learning_rate": 2.472910681370572e-07, "loss": 0.5204, "step": 20232 }, { "epoch": 1.8, "grad_norm": 7.139118241036792, "learning_rate": 2.4706675160368564e-07, "loss": 0.5963, "step": 20233 }, { "epoch": 1.81, "grad_norm": 5.926055683870987, "learning_rate": 2.468425342773878e-07, "loss": 0.5462, "step": 20234 }, { "epoch": 1.81, "grad_norm": 6.5045596124541225, "learning_rate": 2.466184161628421e-07, "loss": 0.5637, "step": 20235 }, { "epoch": 1.81, "grad_norm": 8.538217192743138, "learning_rate": 2.46394397264727e-07, "loss": 0.6163, "step": 20236 }, { "epoch": 1.81, "grad_norm": 8.13355933098946, "learning_rate": 2.4617047758771886e-07, "loss": 0.5522, "step": 20237 }, { "epoch": 1.81, "grad_norm": 6.939928613629592, "learning_rate": 2.45946657136491e-07, "loss": 0.5702, "step": 20238 }, { "epoch": 1.81, "grad_norm": 5.3412676032403885, "learning_rate": 2.4572293591571595e-07, "loss": 0.5327, "step": 20239 }, { "epoch": 1.81, "grad_norm": 8.91457334607669, "learning_rate": 2.454993139300627e-07, "loss": 0.6077, "step": 20240 }, { "epoch": 1.81, "grad_norm": 5.605575552697134, "learning_rate": 2.452757911841991e-07, "loss": 0.6251, "step": 20241 }, { "epoch": 1.81, "grad_norm": 4.548840595188242, "learning_rate": 2.450523676827904e-07, "loss": 0.5479, "step": 20242 }, { "epoch": 1.81, "grad_norm": 5.658580595516935, "learning_rate": 2.448290434305006e-07, "loss": 0.5561, "step": 20243 }, { "epoch": 1.81, "grad_norm": 9.257071663618545, "learning_rate": 2.4460581843199103e-07, "loss": 0.648, "step": 20244 }, { "epoch": 1.81, "grad_norm": 7.462969411566627, "learning_rate": 2.4438269269192125e-07, "loss": 0.5774, "step": 20245 }, { "epoch": 1.81, "grad_norm": 5.835006273534454, "learning_rate": 2.4415966621494757e-07, "loss": 0.5829, "step": 20246 }, { "epoch": 1.81, "grad_norm": 5.6425944640351196, "learning_rate": 2.4393673900572565e-07, "loss": 0.5863, "step": 20247 }, { "epoch": 1.81, "grad_norm": 8.427490403234392, "learning_rate": 2.4371391106890904e-07, "loss": 0.5843, "step": 20248 }, { "epoch": 1.81, "grad_norm": 5.805059156213748, "learning_rate": 2.434911824091485e-07, "loss": 0.6033, "step": 20249 }, { "epoch": 1.81, "grad_norm": 5.7804428134438455, "learning_rate": 2.4326855303109243e-07, "loss": 0.5301, "step": 20250 }, { "epoch": 1.81, "grad_norm": 5.279891578865532, "learning_rate": 2.4304602293938883e-07, "loss": 0.5779, "step": 20251 }, { "epoch": 1.81, "grad_norm": 8.01653329615071, "learning_rate": 2.428235921386818e-07, "loss": 0.5792, "step": 20252 }, { "epoch": 1.81, "grad_norm": 7.693282072244704, "learning_rate": 2.4260126063361424e-07, "loss": 0.552, "step": 20253 }, { "epoch": 1.81, "grad_norm": 7.409428415424622, "learning_rate": 2.423790284288263e-07, "loss": 0.5432, "step": 20254 }, { "epoch": 1.81, "grad_norm": 6.868955644553105, "learning_rate": 2.4215689552895773e-07, "loss": 0.5725, "step": 20255 }, { "epoch": 1.81, "grad_norm": 4.922575558955085, "learning_rate": 2.419348619386441e-07, "loss": 0.6063, "step": 20256 }, { "epoch": 1.81, "grad_norm": 6.9727995920039625, "learning_rate": 2.417129276625202e-07, "loss": 0.5953, "step": 20257 }, { "epoch": 1.81, "grad_norm": 6.675560956718508, "learning_rate": 2.4149109270521943e-07, "loss": 0.5438, "step": 20258 }, { "epoch": 1.81, "grad_norm": 5.2145684580957665, "learning_rate": 2.4126935707137035e-07, "loss": 0.5508, "step": 20259 }, { "epoch": 1.81, "grad_norm": 8.423860542698833, "learning_rate": 2.41047720765602e-07, "loss": 0.5719, "step": 20260 }, { "epoch": 1.81, "grad_norm": 6.966114480203932, "learning_rate": 2.408261837925402e-07, "loss": 0.5997, "step": 20261 }, { "epoch": 1.81, "grad_norm": 7.495993202105955, "learning_rate": 2.406047461568095e-07, "loss": 0.6325, "step": 20262 }, { "epoch": 1.81, "grad_norm": 9.221114166692875, "learning_rate": 2.4038340786303293e-07, "loss": 0.596, "step": 20263 }, { "epoch": 1.81, "grad_norm": 9.145792665420482, "learning_rate": 2.4016216891582846e-07, "loss": 0.6185, "step": 20264 }, { "epoch": 1.81, "grad_norm": 9.06869178338252, "learning_rate": 2.3994102931981565e-07, "loss": 0.6484, "step": 20265 }, { "epoch": 1.81, "grad_norm": 5.962909196054485, "learning_rate": 2.397199890796087e-07, "loss": 0.5368, "step": 20266 }, { "epoch": 1.81, "grad_norm": 7.862883749916136, "learning_rate": 2.394990481998216e-07, "loss": 0.554, "step": 20267 }, { "epoch": 1.81, "grad_norm": 8.030391744636105, "learning_rate": 2.392782066850674e-07, "loss": 0.5446, "step": 20268 }, { "epoch": 1.81, "grad_norm": 7.086423353242284, "learning_rate": 2.390574645399546e-07, "loss": 0.5412, "step": 20269 }, { "epoch": 1.81, "grad_norm": 7.139246017518816, "learning_rate": 2.388368217690906e-07, "loss": 0.5763, "step": 20270 }, { "epoch": 1.81, "grad_norm": 7.942525645131948, "learning_rate": 2.386162783770818e-07, "loss": 0.5864, "step": 20271 }, { "epoch": 1.81, "grad_norm": 5.362242995371688, "learning_rate": 2.3839583436853053e-07, "loss": 0.5967, "step": 20272 }, { "epoch": 1.81, "grad_norm": 7.369010466697994, "learning_rate": 2.3817548974803873e-07, "loss": 0.5328, "step": 20273 }, { "epoch": 1.81, "grad_norm": 8.515571463138, "learning_rate": 2.3795524452020491e-07, "loss": 0.5719, "step": 20274 }, { "epoch": 1.81, "grad_norm": 6.726760212563206, "learning_rate": 2.3773509868962708e-07, "loss": 0.5471, "step": 20275 }, { "epoch": 1.81, "grad_norm": 6.421711806862612, "learning_rate": 2.3751505226090044e-07, "loss": 0.5811, "step": 20276 }, { "epoch": 1.81, "grad_norm": 6.054321393280794, "learning_rate": 2.3729510523861632e-07, "loss": 0.5798, "step": 20277 }, { "epoch": 1.81, "grad_norm": 7.8725345648529155, "learning_rate": 2.3707525762736771e-07, "loss": 0.5734, "step": 20278 }, { "epoch": 1.81, "grad_norm": 5.477584645140424, "learning_rate": 2.3685550943174207e-07, "loss": 0.5117, "step": 20279 }, { "epoch": 1.81, "grad_norm": 4.822659256609229, "learning_rate": 2.3663586065632626e-07, "loss": 0.5982, "step": 20280 }, { "epoch": 1.81, "grad_norm": 6.470572118417263, "learning_rate": 2.3641631130570553e-07, "loss": 0.6265, "step": 20281 }, { "epoch": 1.81, "grad_norm": 4.698795042756981, "learning_rate": 2.3619686138446175e-07, "loss": 0.4987, "step": 20282 }, { "epoch": 1.81, "grad_norm": 6.527633305099976, "learning_rate": 2.3597751089717623e-07, "loss": 0.5295, "step": 20283 }, { "epoch": 1.81, "grad_norm": 7.45247531869541, "learning_rate": 2.3575825984842814e-07, "loss": 0.5226, "step": 20284 }, { "epoch": 1.81, "grad_norm": 7.2717056198569745, "learning_rate": 2.355391082427916e-07, "loss": 0.5261, "step": 20285 }, { "epoch": 1.81, "grad_norm": 7.025750631293702, "learning_rate": 2.3532005608484232e-07, "loss": 0.5597, "step": 20286 }, { "epoch": 1.81, "grad_norm": 7.754779751304873, "learning_rate": 2.3510110337915282e-07, "loss": 0.6012, "step": 20287 }, { "epoch": 1.81, "grad_norm": 8.282886537852962, "learning_rate": 2.348822501302922e-07, "loss": 0.6136, "step": 20288 }, { "epoch": 1.81, "grad_norm": 7.4521609585655835, "learning_rate": 2.3466349634283014e-07, "loss": 0.6117, "step": 20289 }, { "epoch": 1.81, "grad_norm": 8.037015599327136, "learning_rate": 2.3444484202133132e-07, "loss": 0.5338, "step": 20290 }, { "epoch": 1.81, "grad_norm": 5.888350929403232, "learning_rate": 2.3422628717035988e-07, "loss": 0.5756, "step": 20291 }, { "epoch": 1.81, "grad_norm": 5.97800575102655, "learning_rate": 2.3400783179447827e-07, "loss": 0.5804, "step": 20292 }, { "epoch": 1.81, "grad_norm": 6.013635008901206, "learning_rate": 2.3378947589824507e-07, "loss": 0.5601, "step": 20293 }, { "epoch": 1.81, "grad_norm": 8.258622299466971, "learning_rate": 2.3357121948621885e-07, "loss": 0.5408, "step": 20294 }, { "epoch": 1.81, "grad_norm": 5.817804621970263, "learning_rate": 2.3335306256295543e-07, "loss": 0.5166, "step": 20295 }, { "epoch": 1.81, "grad_norm": 7.912078959927987, "learning_rate": 2.3313500513300836e-07, "loss": 0.6116, "step": 20296 }, { "epoch": 1.81, "grad_norm": 4.768575105397824, "learning_rate": 2.3291704720092845e-07, "loss": 0.5132, "step": 20297 }, { "epoch": 1.81, "grad_norm": 8.953804543936467, "learning_rate": 2.326991887712654e-07, "loss": 0.5926, "step": 20298 }, { "epoch": 1.81, "grad_norm": 5.755694945178848, "learning_rate": 2.3248142984856615e-07, "loss": 0.5903, "step": 20299 }, { "epoch": 1.81, "grad_norm": 6.970329605926503, "learning_rate": 2.32263770437377e-07, "loss": 0.5757, "step": 20300 }, { "epoch": 1.81, "grad_norm": 6.766425488596052, "learning_rate": 2.3204621054223998e-07, "loss": 0.5234, "step": 20301 }, { "epoch": 1.81, "grad_norm": 7.555744698525791, "learning_rate": 2.3182875016769746e-07, "loss": 0.534, "step": 20302 }, { "epoch": 1.81, "grad_norm": 8.0790004250328, "learning_rate": 2.3161138931828698e-07, "loss": 0.6034, "step": 20303 }, { "epoch": 1.81, "grad_norm": 9.068260988058736, "learning_rate": 2.31394127998546e-07, "loss": 0.6468, "step": 20304 }, { "epoch": 1.81, "grad_norm": 7.2744644569363635, "learning_rate": 2.3117696621300977e-07, "loss": 0.5185, "step": 20305 }, { "epoch": 1.81, "grad_norm": 6.652952767291488, "learning_rate": 2.3095990396621137e-07, "loss": 0.623, "step": 20306 }, { "epoch": 1.81, "grad_norm": 10.3061385646532, "learning_rate": 2.307429412626805e-07, "loss": 0.5231, "step": 20307 }, { "epoch": 1.81, "grad_norm": 5.0456883788843925, "learning_rate": 2.3052607810694627e-07, "loss": 0.6147, "step": 20308 }, { "epoch": 1.81, "grad_norm": 5.48923810934394, "learning_rate": 2.3030931450353565e-07, "loss": 0.5635, "step": 20309 }, { "epoch": 1.81, "grad_norm": 6.8509502556002655, "learning_rate": 2.3009265045697226e-07, "loss": 0.576, "step": 20310 }, { "epoch": 1.81, "grad_norm": 6.159652899628439, "learning_rate": 2.2987608597177856e-07, "loss": 0.5843, "step": 20311 }, { "epoch": 1.81, "grad_norm": 7.569583914283195, "learning_rate": 2.2965962105247486e-07, "loss": 0.5574, "step": 20312 }, { "epoch": 1.81, "grad_norm": 7.379800991621863, "learning_rate": 2.294432557035803e-07, "loss": 0.5765, "step": 20313 }, { "epoch": 1.81, "grad_norm": 6.005612108773395, "learning_rate": 2.2922698992961011e-07, "loss": 0.5852, "step": 20314 }, { "epoch": 1.81, "grad_norm": 6.802530881160066, "learning_rate": 2.2901082373507965e-07, "loss": 0.5243, "step": 20315 }, { "epoch": 1.81, "grad_norm": 5.518139807223092, "learning_rate": 2.2879475712449918e-07, "loss": 0.4985, "step": 20316 }, { "epoch": 1.81, "grad_norm": 7.564910749318091, "learning_rate": 2.285787901023795e-07, "loss": 0.5628, "step": 20317 }, { "epoch": 1.81, "grad_norm": 6.436699581239236, "learning_rate": 2.2836292267322814e-07, "loss": 0.5573, "step": 20318 }, { "epoch": 1.81, "grad_norm": 6.387014539461933, "learning_rate": 2.281471548415509e-07, "loss": 0.5511, "step": 20319 }, { "epoch": 1.81, "grad_norm": 5.687883795236095, "learning_rate": 2.279314866118526e-07, "loss": 0.5797, "step": 20320 }, { "epoch": 1.81, "grad_norm": 7.282020325526409, "learning_rate": 2.2771591798863292e-07, "loss": 0.5292, "step": 20321 }, { "epoch": 1.81, "grad_norm": 6.466570479630273, "learning_rate": 2.2750044897639322e-07, "loss": 0.4911, "step": 20322 }, { "epoch": 1.81, "grad_norm": 6.1673479147168235, "learning_rate": 2.2728507957962942e-07, "loss": 0.5686, "step": 20323 }, { "epoch": 1.81, "grad_norm": 7.885223293914555, "learning_rate": 2.2706980980283734e-07, "loss": 0.4986, "step": 20324 }, { "epoch": 1.81, "grad_norm": 6.4644937804045615, "learning_rate": 2.2685463965051057e-07, "loss": 0.5569, "step": 20325 }, { "epoch": 1.81, "grad_norm": 7.720995733154218, "learning_rate": 2.2663956912714004e-07, "loss": 0.5614, "step": 20326 }, { "epoch": 1.81, "grad_norm": 6.281391106730937, "learning_rate": 2.2642459823721485e-07, "loss": 0.5958, "step": 20327 }, { "epoch": 1.81, "grad_norm": 6.185822149745963, "learning_rate": 2.2620972698522314e-07, "loss": 0.5975, "step": 20328 }, { "epoch": 1.81, "grad_norm": 6.602707156663584, "learning_rate": 2.2599495537564853e-07, "loss": 0.5763, "step": 20329 }, { "epoch": 1.81, "grad_norm": 7.248202989030173, "learning_rate": 2.2578028341297408e-07, "loss": 0.562, "step": 20330 }, { "epoch": 1.81, "grad_norm": 7.9621458971478045, "learning_rate": 2.2556571110168123e-07, "loss": 0.4817, "step": 20331 }, { "epoch": 1.81, "grad_norm": 6.3292638724103565, "learning_rate": 2.253512384462475e-07, "loss": 0.592, "step": 20332 }, { "epoch": 1.81, "grad_norm": 5.732958776985698, "learning_rate": 2.2513686545115098e-07, "loss": 0.5825, "step": 20333 }, { "epoch": 1.81, "grad_norm": 7.439820134610554, "learning_rate": 2.2492259212086642e-07, "loss": 0.5692, "step": 20334 }, { "epoch": 1.81, "grad_norm": 6.328872209916557, "learning_rate": 2.2470841845986524e-07, "loss": 0.617, "step": 20335 }, { "epoch": 1.81, "grad_norm": 8.346839092780693, "learning_rate": 2.244943444726172e-07, "loss": 0.5514, "step": 20336 }, { "epoch": 1.81, "grad_norm": 7.164455384443192, "learning_rate": 2.2428037016359207e-07, "loss": 0.5066, "step": 20337 }, { "epoch": 1.81, "grad_norm": 10.162037166219404, "learning_rate": 2.2406649553725512e-07, "loss": 0.5792, "step": 20338 }, { "epoch": 1.81, "grad_norm": 6.332782643582894, "learning_rate": 2.2385272059807117e-07, "loss": 0.5366, "step": 20339 }, { "epoch": 1.81, "grad_norm": 9.91812411166732, "learning_rate": 2.236390453505022e-07, "loss": 0.6217, "step": 20340 }, { "epoch": 1.81, "grad_norm": 6.226934221983952, "learning_rate": 2.2342546979900847e-07, "loss": 0.5992, "step": 20341 }, { "epoch": 1.81, "grad_norm": 7.304173516408155, "learning_rate": 2.2321199394804705e-07, "loss": 0.5989, "step": 20342 }, { "epoch": 1.81, "grad_norm": 7.776586743412852, "learning_rate": 2.2299861780207433e-07, "loss": 0.6497, "step": 20343 }, { "epoch": 1.81, "grad_norm": 4.981550200929377, "learning_rate": 2.227853413655434e-07, "loss": 0.5079, "step": 20344 }, { "epoch": 1.81, "grad_norm": 7.373752041155732, "learning_rate": 2.2257216464290743e-07, "loss": 0.5626, "step": 20345 }, { "epoch": 1.82, "grad_norm": 6.711095571130825, "learning_rate": 2.2235908763861447e-07, "loss": 0.5841, "step": 20346 }, { "epoch": 1.82, "grad_norm": 6.27261517869933, "learning_rate": 2.2214611035711376e-07, "loss": 0.6024, "step": 20347 }, { "epoch": 1.82, "grad_norm": 6.999472446176799, "learning_rate": 2.2193323280284896e-07, "loss": 0.5451, "step": 20348 }, { "epoch": 1.82, "grad_norm": 6.920481885578876, "learning_rate": 2.2172045498026483e-07, "loss": 0.5568, "step": 20349 }, { "epoch": 1.82, "grad_norm": 5.2537681777874115, "learning_rate": 2.2150777689380122e-07, "loss": 0.5309, "step": 20350 }, { "epoch": 1.82, "grad_norm": 6.727950782253402, "learning_rate": 2.2129519854789784e-07, "loss": 0.5635, "step": 20351 }, { "epoch": 1.82, "grad_norm": 5.890640442800136, "learning_rate": 2.2108271994699227e-07, "loss": 0.5647, "step": 20352 }, { "epoch": 1.82, "grad_norm": 7.32152987026481, "learning_rate": 2.208703410955193e-07, "loss": 0.5503, "step": 20353 }, { "epoch": 1.82, "grad_norm": 7.648278347492774, "learning_rate": 2.2065806199791262e-07, "loss": 0.6538, "step": 20354 }, { "epoch": 1.82, "grad_norm": 7.115367528182663, "learning_rate": 2.2044588265860145e-07, "loss": 0.5933, "step": 20355 }, { "epoch": 1.82, "grad_norm": 6.534842227471008, "learning_rate": 2.2023380308201504e-07, "loss": 0.5525, "step": 20356 }, { "epoch": 1.82, "grad_norm": 7.13181886579683, "learning_rate": 2.2002182327258092e-07, "loss": 0.6053, "step": 20357 }, { "epoch": 1.82, "grad_norm": 5.450643871223588, "learning_rate": 2.1980994323472338e-07, "loss": 0.5309, "step": 20358 }, { "epoch": 1.82, "grad_norm": 7.214602193393517, "learning_rate": 2.1959816297286497e-07, "loss": 0.6458, "step": 20359 }, { "epoch": 1.82, "grad_norm": 7.524801737445293, "learning_rate": 2.1938648249142602e-07, "loss": 0.5763, "step": 20360 }, { "epoch": 1.82, "grad_norm": 8.051750166319282, "learning_rate": 2.191749017948247e-07, "loss": 0.599, "step": 20361 }, { "epoch": 1.82, "grad_norm": 6.653680850772671, "learning_rate": 2.1896342088747746e-07, "loss": 0.5354, "step": 20362 }, { "epoch": 1.82, "grad_norm": 4.673092269826447, "learning_rate": 2.18752039773798e-07, "loss": 0.5441, "step": 20363 }, { "epoch": 1.82, "grad_norm": 6.538872380699893, "learning_rate": 2.185407584582e-07, "loss": 0.5881, "step": 20364 }, { "epoch": 1.82, "grad_norm": 5.64364718863824, "learning_rate": 2.1832957694509215e-07, "loss": 0.5804, "step": 20365 }, { "epoch": 1.82, "grad_norm": 6.668728031753405, "learning_rate": 2.1811849523888208e-07, "loss": 0.5657, "step": 20366 }, { "epoch": 1.82, "grad_norm": 8.541118594081484, "learning_rate": 2.179075133439773e-07, "loss": 0.5883, "step": 20367 }, { "epoch": 1.82, "grad_norm": 4.696639277199813, "learning_rate": 2.176966312647799e-07, "loss": 0.5629, "step": 20368 }, { "epoch": 1.82, "grad_norm": 8.22416829046417, "learning_rate": 2.1748584900569249e-07, "loss": 0.5691, "step": 20369 }, { "epoch": 1.82, "grad_norm": 8.768375380788974, "learning_rate": 2.172751665711137e-07, "loss": 0.6473, "step": 20370 }, { "epoch": 1.82, "grad_norm": 6.632056137911872, "learning_rate": 2.1706458396544226e-07, "loss": 0.5746, "step": 20371 }, { "epoch": 1.82, "grad_norm": 6.248121823525085, "learning_rate": 2.1685410119307416e-07, "loss": 0.6362, "step": 20372 }, { "epoch": 1.82, "grad_norm": 9.254585851863926, "learning_rate": 2.166437182584008e-07, "loss": 0.5677, "step": 20373 }, { "epoch": 1.82, "grad_norm": 6.263297139127617, "learning_rate": 2.1643343516581428e-07, "loss": 0.5926, "step": 20374 }, { "epoch": 1.82, "grad_norm": 7.563720904712251, "learning_rate": 2.162232519197044e-07, "loss": 0.5665, "step": 20375 }, { "epoch": 1.82, "grad_norm": 6.9926385921479985, "learning_rate": 2.1601316852445765e-07, "loss": 0.5229, "step": 20376 }, { "epoch": 1.82, "grad_norm": 6.330287588076863, "learning_rate": 2.1580318498445885e-07, "loss": 0.5752, "step": 20377 }, { "epoch": 1.82, "grad_norm": 9.670324581710089, "learning_rate": 2.1559330130409285e-07, "loss": 0.5971, "step": 20378 }, { "epoch": 1.82, "grad_norm": 6.330010512539535, "learning_rate": 2.1538351748773834e-07, "loss": 0.5424, "step": 20379 }, { "epoch": 1.82, "grad_norm": 6.373724027223369, "learning_rate": 2.1517383353977407e-07, "loss": 0.5695, "step": 20380 }, { "epoch": 1.82, "grad_norm": 5.866746359797262, "learning_rate": 2.149642494645776e-07, "loss": 0.5455, "step": 20381 }, { "epoch": 1.82, "grad_norm": 9.02980418262046, "learning_rate": 2.1475476526652383e-07, "loss": 0.5529, "step": 20382 }, { "epoch": 1.82, "grad_norm": 8.03487652768313, "learning_rate": 2.1454538094998424e-07, "loss": 0.5389, "step": 20383 }, { "epoch": 1.82, "grad_norm": 8.09687546602805, "learning_rate": 2.143360965193303e-07, "loss": 0.6027, "step": 20384 }, { "epoch": 1.82, "grad_norm": 6.699081777004381, "learning_rate": 2.141269119789302e-07, "loss": 0.5738, "step": 20385 }, { "epoch": 1.82, "grad_norm": 6.078788152964231, "learning_rate": 2.1391782733314937e-07, "loss": 0.6008, "step": 20386 }, { "epoch": 1.82, "grad_norm": 6.784109823546863, "learning_rate": 2.137088425863526e-07, "loss": 0.5546, "step": 20387 }, { "epoch": 1.82, "grad_norm": 7.743314726582956, "learning_rate": 2.13499957742902e-07, "loss": 0.5479, "step": 20388 }, { "epoch": 1.82, "grad_norm": 7.043306769680538, "learning_rate": 2.1329117280715738e-07, "loss": 0.5635, "step": 20389 }, { "epoch": 1.82, "grad_norm": 6.622676714881441, "learning_rate": 2.1308248778347696e-07, "loss": 0.5287, "step": 20390 }, { "epoch": 1.82, "grad_norm": 7.279210963857248, "learning_rate": 2.128739026762172e-07, "loss": 0.5399, "step": 20391 }, { "epoch": 1.82, "grad_norm": 6.083016477051384, "learning_rate": 2.1266541748973024e-07, "loss": 0.5516, "step": 20392 }, { "epoch": 1.82, "grad_norm": 8.298826184499651, "learning_rate": 2.1245703222836923e-07, "loss": 0.5709, "step": 20393 }, { "epoch": 1.82, "grad_norm": 8.30460091480953, "learning_rate": 2.1224874689648235e-07, "loss": 0.4972, "step": 20394 }, { "epoch": 1.82, "grad_norm": 7.29398130575607, "learning_rate": 2.1204056149841833e-07, "loss": 0.5604, "step": 20395 }, { "epoch": 1.82, "grad_norm": 5.331060689004122, "learning_rate": 2.1183247603852152e-07, "loss": 0.5269, "step": 20396 }, { "epoch": 1.82, "grad_norm": 7.6150439010416, "learning_rate": 2.116244905211362e-07, "loss": 0.5505, "step": 20397 }, { "epoch": 1.82, "grad_norm": 6.350466777641433, "learning_rate": 2.114166049506039e-07, "loss": 0.5473, "step": 20398 }, { "epoch": 1.82, "grad_norm": 9.008374196485153, "learning_rate": 2.112088193312628e-07, "loss": 0.5543, "step": 20399 }, { "epoch": 1.82, "grad_norm": 6.637621747455729, "learning_rate": 2.1100113366745e-07, "loss": 0.4869, "step": 20400 }, { "epoch": 1.82, "grad_norm": 6.352906939206318, "learning_rate": 2.1079354796350093e-07, "loss": 0.5321, "step": 20401 }, { "epoch": 1.82, "grad_norm": 5.628200477132338, "learning_rate": 2.1058606222374822e-07, "loss": 0.5705, "step": 20402 }, { "epoch": 1.82, "grad_norm": 7.1779917967788185, "learning_rate": 2.1037867645252285e-07, "loss": 0.6331, "step": 20403 }, { "epoch": 1.82, "grad_norm": 8.023588815436085, "learning_rate": 2.1017139065415414e-07, "loss": 0.5904, "step": 20404 }, { "epoch": 1.82, "grad_norm": 7.245603987394275, "learning_rate": 2.0996420483296753e-07, "loss": 0.6412, "step": 20405 }, { "epoch": 1.82, "grad_norm": 6.666378280626616, "learning_rate": 2.0975711899328843e-07, "loss": 0.6069, "step": 20406 }, { "epoch": 1.82, "grad_norm": 6.67954607803919, "learning_rate": 2.0955013313943952e-07, "loss": 0.5165, "step": 20407 }, { "epoch": 1.82, "grad_norm": 6.776180826879517, "learning_rate": 2.0934324727573953e-07, "loss": 0.6086, "step": 20408 }, { "epoch": 1.82, "grad_norm": 8.752649705282286, "learning_rate": 2.0913646140650835e-07, "loss": 0.5747, "step": 20409 }, { "epoch": 1.82, "grad_norm": 6.689917978072819, "learning_rate": 2.089297755360614e-07, "loss": 0.5863, "step": 20410 }, { "epoch": 1.82, "grad_norm": 9.650003916958465, "learning_rate": 2.087231896687142e-07, "loss": 0.5626, "step": 20411 }, { "epoch": 1.82, "grad_norm": 8.111181806243508, "learning_rate": 2.085167038087771e-07, "loss": 0.5216, "step": 20412 }, { "epoch": 1.82, "grad_norm": 9.060854453590139, "learning_rate": 2.0831031796056e-07, "loss": 0.5259, "step": 20413 }, { "epoch": 1.82, "grad_norm": 5.74420943501903, "learning_rate": 2.081040321283717e-07, "loss": 0.5636, "step": 20414 }, { "epoch": 1.82, "grad_norm": 6.895155157666161, "learning_rate": 2.078978463165182e-07, "loss": 0.5941, "step": 20415 }, { "epoch": 1.82, "grad_norm": 5.161517244977868, "learning_rate": 2.076917605293022e-07, "loss": 0.5567, "step": 20416 }, { "epoch": 1.82, "grad_norm": 5.07699876221817, "learning_rate": 2.0748577477102628e-07, "loss": 0.5462, "step": 20417 }, { "epoch": 1.82, "grad_norm": 5.237011103189711, "learning_rate": 2.0727988904598874e-07, "loss": 0.5657, "step": 20418 }, { "epoch": 1.82, "grad_norm": 7.552052709643952, "learning_rate": 2.0707410335848776e-07, "loss": 0.6128, "step": 20419 }, { "epoch": 1.82, "grad_norm": 8.60987458526532, "learning_rate": 2.068684177128183e-07, "loss": 0.5235, "step": 20420 }, { "epoch": 1.82, "grad_norm": 6.263312045753581, "learning_rate": 2.0666283211327464e-07, "loss": 0.577, "step": 20421 }, { "epoch": 1.82, "grad_norm": 7.669672053924077, "learning_rate": 2.0645734656414672e-07, "loss": 0.5412, "step": 20422 }, { "epoch": 1.82, "grad_norm": 9.828775305809325, "learning_rate": 2.062519610697239e-07, "loss": 0.487, "step": 20423 }, { "epoch": 1.82, "grad_norm": 8.332992734467085, "learning_rate": 2.0604667563429382e-07, "loss": 0.5058, "step": 20424 }, { "epoch": 1.82, "grad_norm": 7.96663326901817, "learning_rate": 2.0584149026213973e-07, "loss": 0.605, "step": 20425 }, { "epoch": 1.82, "grad_norm": 8.521144792061442, "learning_rate": 2.05636404957546e-07, "loss": 0.5555, "step": 20426 }, { "epoch": 1.82, "grad_norm": 6.03475592442481, "learning_rate": 2.054314197247931e-07, "loss": 0.5203, "step": 20427 }, { "epoch": 1.82, "grad_norm": 5.333731284143635, "learning_rate": 2.0522653456815867e-07, "loss": 0.5184, "step": 20428 }, { "epoch": 1.82, "grad_norm": 4.957549715389349, "learning_rate": 2.0502174949192043e-07, "loss": 0.5797, "step": 20429 }, { "epoch": 1.82, "grad_norm": 9.18609628662794, "learning_rate": 2.0481706450035276e-07, "loss": 0.6062, "step": 20430 }, { "epoch": 1.82, "grad_norm": 6.60962596064513, "learning_rate": 2.0461247959772722e-07, "loss": 0.5519, "step": 20431 }, { "epoch": 1.82, "grad_norm": 6.1850230947131015, "learning_rate": 2.044079947883143e-07, "loss": 0.6035, "step": 20432 }, { "epoch": 1.82, "grad_norm": 6.469050471292772, "learning_rate": 2.0420361007638224e-07, "loss": 0.6026, "step": 20433 }, { "epoch": 1.82, "grad_norm": 5.82918424130985, "learning_rate": 2.039993254661976e-07, "loss": 0.5146, "step": 20434 }, { "epoch": 1.82, "grad_norm": 8.283950625548787, "learning_rate": 2.037951409620237e-07, "loss": 0.6239, "step": 20435 }, { "epoch": 1.82, "grad_norm": 6.15172142841754, "learning_rate": 2.0359105656812316e-07, "loss": 0.5507, "step": 20436 }, { "epoch": 1.82, "grad_norm": 8.62070697102389, "learning_rate": 2.0338707228875543e-07, "loss": 0.587, "step": 20437 }, { "epoch": 1.82, "grad_norm": 10.97755321489954, "learning_rate": 2.031831881281776e-07, "loss": 0.6402, "step": 20438 }, { "epoch": 1.82, "grad_norm": 87.1265144626567, "learning_rate": 2.029794040906463e-07, "loss": 0.7129, "step": 20439 }, { "epoch": 1.82, "grad_norm": 11.6718028617347, "learning_rate": 2.0277572018041424e-07, "loss": 0.5319, "step": 20440 }, { "epoch": 1.82, "grad_norm": 7.770461968969511, "learning_rate": 2.0257213640173302e-07, "loss": 0.6158, "step": 20441 }, { "epoch": 1.82, "grad_norm": 6.781326265654025, "learning_rate": 2.0236865275885254e-07, "loss": 0.5361, "step": 20442 }, { "epoch": 1.82, "grad_norm": 6.624956735971415, "learning_rate": 2.0216526925602053e-07, "loss": 0.4998, "step": 20443 }, { "epoch": 1.82, "grad_norm": 6.480797294743205, "learning_rate": 2.0196198589748084e-07, "loss": 0.5972, "step": 20444 }, { "epoch": 1.82, "grad_norm": 6.901760078470299, "learning_rate": 2.017588026874767e-07, "loss": 0.6088, "step": 20445 }, { "epoch": 1.82, "grad_norm": 6.785967621461534, "learning_rate": 2.0155571963025034e-07, "loss": 0.6178, "step": 20446 }, { "epoch": 1.82, "grad_norm": 11.4221677062742, "learning_rate": 2.0135273673003942e-07, "loss": 0.5629, "step": 20447 }, { "epoch": 1.82, "grad_norm": 6.492879925208589, "learning_rate": 2.0114985399108222e-07, "loss": 0.515, "step": 20448 }, { "epoch": 1.82, "grad_norm": 7.055463088821366, "learning_rate": 2.009470714176115e-07, "loss": 0.5262, "step": 20449 }, { "epoch": 1.82, "grad_norm": 7.339667480675694, "learning_rate": 2.007443890138616e-07, "loss": 0.5455, "step": 20450 }, { "epoch": 1.82, "grad_norm": 7.515248328669999, "learning_rate": 2.0054180678406198e-07, "loss": 0.5939, "step": 20451 }, { "epoch": 1.82, "grad_norm": 6.25963111267426, "learning_rate": 2.0033932473244145e-07, "loss": 0.5512, "step": 20452 }, { "epoch": 1.82, "grad_norm": 7.648022089915887, "learning_rate": 2.0013694286322604e-07, "loss": 0.5635, "step": 20453 }, { "epoch": 1.82, "grad_norm": 7.483444867460799, "learning_rate": 1.9993466118064075e-07, "loss": 0.5764, "step": 20454 }, { "epoch": 1.82, "grad_norm": 6.116851861642821, "learning_rate": 1.997324796889083e-07, "loss": 0.5632, "step": 20455 }, { "epoch": 1.82, "grad_norm": 8.780173175199488, "learning_rate": 1.9953039839224698e-07, "loss": 0.5372, "step": 20456 }, { "epoch": 1.82, "grad_norm": 6.2090648385741325, "learning_rate": 1.9932841729487562e-07, "loss": 0.5622, "step": 20457 }, { "epoch": 1.83, "grad_norm": 5.909724162124632, "learning_rate": 1.9912653640101088e-07, "loss": 0.5931, "step": 20458 }, { "epoch": 1.83, "grad_norm": 6.548760202888542, "learning_rate": 1.9892475571486547e-07, "loss": 0.5893, "step": 20459 }, { "epoch": 1.83, "grad_norm": 6.01112858806052, "learning_rate": 1.987230752406516e-07, "loss": 0.5782, "step": 20460 }, { "epoch": 1.83, "grad_norm": 7.884939750989552, "learning_rate": 1.9852149498257977e-07, "loss": 0.5956, "step": 20461 }, { "epoch": 1.83, "grad_norm": 5.79764087783772, "learning_rate": 1.9832001494485553e-07, "loss": 0.6102, "step": 20462 }, { "epoch": 1.83, "grad_norm": 8.624676222568793, "learning_rate": 1.9811863513168604e-07, "loss": 0.5921, "step": 20463 }, { "epoch": 1.83, "grad_norm": 7.3734844495624285, "learning_rate": 1.979173555472741e-07, "loss": 0.559, "step": 20464 }, { "epoch": 1.83, "grad_norm": 7.320615064425367, "learning_rate": 1.9771617619582128e-07, "loss": 0.5696, "step": 20465 }, { "epoch": 1.83, "grad_norm": 5.298984837552271, "learning_rate": 1.9751509708152593e-07, "loss": 0.6083, "step": 20466 }, { "epoch": 1.83, "grad_norm": 8.676029817425633, "learning_rate": 1.973141182085858e-07, "loss": 0.5644, "step": 20467 }, { "epoch": 1.83, "grad_norm": 5.40319304041225, "learning_rate": 1.9711323958119644e-07, "loss": 0.5086, "step": 20468 }, { "epoch": 1.83, "grad_norm": 6.227176543209952, "learning_rate": 1.9691246120354944e-07, "loss": 0.5921, "step": 20469 }, { "epoch": 1.83, "grad_norm": 8.746354427927626, "learning_rate": 1.9671178307983595e-07, "loss": 0.5186, "step": 20470 }, { "epoch": 1.83, "grad_norm": 5.360235988423362, "learning_rate": 1.9651120521424482e-07, "loss": 0.5587, "step": 20471 }, { "epoch": 1.83, "grad_norm": 8.825659187244895, "learning_rate": 1.9631072761096326e-07, "loss": 0.5666, "step": 20472 }, { "epoch": 1.83, "grad_norm": 5.956631391073639, "learning_rate": 1.9611035027417514e-07, "loss": 0.5666, "step": 20473 }, { "epoch": 1.83, "grad_norm": 7.534469255229354, "learning_rate": 1.9591007320806376e-07, "loss": 0.5593, "step": 20474 }, { "epoch": 1.83, "grad_norm": 7.691856937612313, "learning_rate": 1.957098964168086e-07, "loss": 0.5315, "step": 20475 }, { "epoch": 1.83, "grad_norm": 7.214973825952561, "learning_rate": 1.955098199045874e-07, "loss": 0.5409, "step": 20476 }, { "epoch": 1.83, "grad_norm": 5.5839598885858175, "learning_rate": 1.9530984367557792e-07, "loss": 0.6, "step": 20477 }, { "epoch": 1.83, "grad_norm": 8.849183831160639, "learning_rate": 1.951099677339524e-07, "loss": 0.618, "step": 20478 }, { "epoch": 1.83, "grad_norm": 6.010060078654609, "learning_rate": 1.949101920838853e-07, "loss": 0.5413, "step": 20479 }, { "epoch": 1.83, "grad_norm": 7.858561614479666, "learning_rate": 1.9471051672954378e-07, "loss": 0.5549, "step": 20480 }, { "epoch": 1.83, "grad_norm": 5.985118660322044, "learning_rate": 1.9451094167509731e-07, "loss": 0.5969, "step": 20481 }, { "epoch": 1.83, "grad_norm": 6.750083546594355, "learning_rate": 1.9431146692471092e-07, "loss": 0.5589, "step": 20482 }, { "epoch": 1.83, "grad_norm": 4.8235582399638455, "learning_rate": 1.9411209248254848e-07, "loss": 0.5898, "step": 20483 }, { "epoch": 1.83, "grad_norm": 6.181679531428085, "learning_rate": 1.939128183527711e-07, "loss": 0.6147, "step": 20484 }, { "epoch": 1.83, "grad_norm": 7.0117752321798825, "learning_rate": 1.9371364453953822e-07, "loss": 0.567, "step": 20485 }, { "epoch": 1.83, "grad_norm": 7.440936937138381, "learning_rate": 1.9351457104700766e-07, "loss": 0.634, "step": 20486 }, { "epoch": 1.83, "grad_norm": 8.008518239983848, "learning_rate": 1.9331559787933495e-07, "loss": 0.6317, "step": 20487 }, { "epoch": 1.83, "grad_norm": 6.850205980235074, "learning_rate": 1.9311672504067235e-07, "loss": 0.5379, "step": 20488 }, { "epoch": 1.83, "grad_norm": 5.41262437523909, "learning_rate": 1.9291795253517097e-07, "loss": 0.5, "step": 20489 }, { "epoch": 1.83, "grad_norm": 10.400692954633453, "learning_rate": 1.9271928036698028e-07, "loss": 0.5931, "step": 20490 }, { "epoch": 1.83, "grad_norm": 6.45540512859701, "learning_rate": 1.925207085402464e-07, "loss": 0.5477, "step": 20491 }, { "epoch": 1.83, "grad_norm": 5.6690567359634185, "learning_rate": 1.923222370591149e-07, "loss": 0.5613, "step": 20492 }, { "epoch": 1.83, "grad_norm": 9.801136489273054, "learning_rate": 1.9212386592772858e-07, "loss": 0.5561, "step": 20493 }, { "epoch": 1.83, "grad_norm": 6.5102574063153416, "learning_rate": 1.9192559515022747e-07, "loss": 0.5462, "step": 20494 }, { "epoch": 1.83, "grad_norm": 4.914886929942341, "learning_rate": 1.9172742473074934e-07, "loss": 0.5857, "step": 20495 }, { "epoch": 1.83, "grad_norm": 7.526457650109771, "learning_rate": 1.915293546734315e-07, "loss": 0.6289, "step": 20496 }, { "epoch": 1.83, "grad_norm": 6.766455141920986, "learning_rate": 1.9133138498240776e-07, "loss": 0.5401, "step": 20497 }, { "epoch": 1.83, "grad_norm": 5.493991312445989, "learning_rate": 1.9113351566181105e-07, "loss": 0.6428, "step": 20498 }, { "epoch": 1.83, "grad_norm": 5.3785527867819605, "learning_rate": 1.909357467157702e-07, "loss": 0.5616, "step": 20499 }, { "epoch": 1.83, "grad_norm": 4.531002773947975, "learning_rate": 1.9073807814841528e-07, "loss": 0.5734, "step": 20500 }, { "epoch": 1.83, "grad_norm": 9.863778177938391, "learning_rate": 1.905405099638702e-07, "loss": 0.6263, "step": 20501 }, { "epoch": 1.83, "grad_norm": 4.774935404995329, "learning_rate": 1.9034304216625888e-07, "loss": 0.5295, "step": 20502 }, { "epoch": 1.83, "grad_norm": 7.594889837993192, "learning_rate": 1.9014567475970414e-07, "loss": 0.5955, "step": 20503 }, { "epoch": 1.83, "grad_norm": 7.51213376563046, "learning_rate": 1.8994840774832436e-07, "loss": 0.5461, "step": 20504 }, { "epoch": 1.83, "grad_norm": 6.44399336858761, "learning_rate": 1.8975124113623844e-07, "loss": 0.5255, "step": 20505 }, { "epoch": 1.83, "grad_norm": 7.540364689901551, "learning_rate": 1.8955417492756146e-07, "loss": 0.5801, "step": 20506 }, { "epoch": 1.83, "grad_norm": 4.808116696624899, "learning_rate": 1.8935720912640565e-07, "loss": 0.5442, "step": 20507 }, { "epoch": 1.83, "grad_norm": 8.512301712187089, "learning_rate": 1.8916034373688386e-07, "loss": 0.5589, "step": 20508 }, { "epoch": 1.83, "grad_norm": 5.552659179667349, "learning_rate": 1.8896357876310334e-07, "loss": 0.4933, "step": 20509 }, { "epoch": 1.83, "grad_norm": 4.822747546019246, "learning_rate": 1.8876691420917247e-07, "loss": 0.6186, "step": 20510 }, { "epoch": 1.83, "grad_norm": 6.668168126507062, "learning_rate": 1.8857035007919578e-07, "loss": 0.5726, "step": 20511 }, { "epoch": 1.83, "grad_norm": 9.25252382168987, "learning_rate": 1.8837388637727604e-07, "loss": 0.5497, "step": 20512 }, { "epoch": 1.83, "grad_norm": 6.394982308962592, "learning_rate": 1.8817752310751503e-07, "loss": 0.5146, "step": 20513 }, { "epoch": 1.83, "grad_norm": 6.023439932792848, "learning_rate": 1.8798126027400943e-07, "loss": 0.5808, "step": 20514 }, { "epoch": 1.83, "grad_norm": 5.411326408644978, "learning_rate": 1.8778509788085708e-07, "loss": 0.5764, "step": 20515 }, { "epoch": 1.83, "grad_norm": 6.000731823699352, "learning_rate": 1.875890359321525e-07, "loss": 0.5576, "step": 20516 }, { "epoch": 1.83, "grad_norm": 5.235548447960247, "learning_rate": 1.873930744319874e-07, "loss": 0.5319, "step": 20517 }, { "epoch": 1.83, "grad_norm": 6.201313025011415, "learning_rate": 1.8719721338445297e-07, "loss": 0.585, "step": 20518 }, { "epoch": 1.83, "grad_norm": 6.909144468141908, "learning_rate": 1.8700145279363702e-07, "loss": 0.5728, "step": 20519 }, { "epoch": 1.83, "grad_norm": 6.8916160277305645, "learning_rate": 1.8680579266362464e-07, "loss": 0.6033, "step": 20520 }, { "epoch": 1.83, "grad_norm": 7.056578034269325, "learning_rate": 1.8661023299850088e-07, "loss": 0.5331, "step": 20521 }, { "epoch": 1.83, "grad_norm": 5.81521244705786, "learning_rate": 1.8641477380234806e-07, "loss": 0.5393, "step": 20522 }, { "epoch": 1.83, "grad_norm": 5.9427681369927425, "learning_rate": 1.8621941507924512e-07, "loss": 0.6002, "step": 20523 }, { "epoch": 1.83, "grad_norm": 6.243086083353086, "learning_rate": 1.8602415683326936e-07, "loss": 0.5631, "step": 20524 }, { "epoch": 1.83, "grad_norm": 6.139142010332657, "learning_rate": 1.8582899906849695e-07, "loss": 0.5531, "step": 20525 }, { "epoch": 1.83, "grad_norm": 5.034200215991555, "learning_rate": 1.8563394178900185e-07, "loss": 0.6206, "step": 20526 }, { "epoch": 1.83, "grad_norm": 6.512701892531282, "learning_rate": 1.8543898499885415e-07, "loss": 0.5549, "step": 20527 }, { "epoch": 1.83, "grad_norm": 6.069490271297436, "learning_rate": 1.8524412870212448e-07, "loss": 0.5648, "step": 20528 }, { "epoch": 1.83, "grad_norm": 7.404130774484126, "learning_rate": 1.8504937290287905e-07, "loss": 0.5587, "step": 20529 }, { "epoch": 1.83, "grad_norm": 6.160351574777903, "learning_rate": 1.8485471760518347e-07, "loss": 0.5191, "step": 20530 }, { "epoch": 1.83, "grad_norm": 7.012094233701174, "learning_rate": 1.8466016281310117e-07, "loss": 0.5937, "step": 20531 }, { "epoch": 1.83, "grad_norm": 8.31209278941808, "learning_rate": 1.8446570853069225e-07, "loss": 0.5911, "step": 20532 }, { "epoch": 1.83, "grad_norm": 5.712534670907481, "learning_rate": 1.8427135476201562e-07, "loss": 0.5809, "step": 20533 }, { "epoch": 1.83, "grad_norm": 7.105480755103412, "learning_rate": 1.8407710151112867e-07, "loss": 0.5462, "step": 20534 }, { "epoch": 1.83, "grad_norm": 5.455549501758667, "learning_rate": 1.838829487820848e-07, "loss": 0.5728, "step": 20535 }, { "epoch": 1.83, "grad_norm": 7.007491025938872, "learning_rate": 1.8368889657893796e-07, "loss": 0.6133, "step": 20536 }, { "epoch": 1.83, "grad_norm": 7.569733734313795, "learning_rate": 1.834949449057377e-07, "loss": 0.6627, "step": 20537 }, { "epoch": 1.83, "grad_norm": 6.377258838073588, "learning_rate": 1.833010937665325e-07, "loss": 0.5487, "step": 20538 }, { "epoch": 1.83, "grad_norm": 6.733676934603241, "learning_rate": 1.8310734316536905e-07, "loss": 0.619, "step": 20539 }, { "epoch": 1.83, "grad_norm": 5.781531552477668, "learning_rate": 1.8291369310629026e-07, "loss": 0.5519, "step": 20540 }, { "epoch": 1.83, "grad_norm": 7.021595324472358, "learning_rate": 1.82720143593339e-07, "loss": 0.5241, "step": 20541 }, { "epoch": 1.83, "grad_norm": 6.567755600548578, "learning_rate": 1.8252669463055484e-07, "loss": 0.5037, "step": 20542 }, { "epoch": 1.83, "grad_norm": 8.23301331942959, "learning_rate": 1.823333462219762e-07, "loss": 0.597, "step": 20543 }, { "epoch": 1.83, "grad_norm": 7.344234960928545, "learning_rate": 1.821400983716387e-07, "loss": 0.5471, "step": 20544 }, { "epoch": 1.83, "grad_norm": 6.650330377656289, "learning_rate": 1.819469510835753e-07, "loss": 0.5905, "step": 20545 }, { "epoch": 1.83, "grad_norm": 6.524706681850245, "learning_rate": 1.8175390436181827e-07, "loss": 0.5502, "step": 20546 }, { "epoch": 1.83, "grad_norm": 7.925619267853445, "learning_rate": 1.815609582103961e-07, "loss": 0.6254, "step": 20547 }, { "epoch": 1.83, "grad_norm": 5.4783942201599904, "learning_rate": 1.813681126333372e-07, "loss": 0.6185, "step": 20548 }, { "epoch": 1.83, "grad_norm": 7.023649098377665, "learning_rate": 1.8117536763466613e-07, "loss": 0.485, "step": 20549 }, { "epoch": 1.83, "grad_norm": 7.641043358880881, "learning_rate": 1.809827232184064e-07, "loss": 0.5897, "step": 20550 }, { "epoch": 1.83, "grad_norm": 8.736048692597262, "learning_rate": 1.8079017938857968e-07, "loss": 0.5537, "step": 20551 }, { "epoch": 1.83, "grad_norm": 6.057018733646689, "learning_rate": 1.8059773614920285e-07, "loss": 0.5242, "step": 20552 }, { "epoch": 1.83, "grad_norm": 4.985378865240564, "learning_rate": 1.804053935042943e-07, "loss": 0.5388, "step": 20553 }, { "epoch": 1.83, "grad_norm": 8.499315849638082, "learning_rate": 1.802131514578681e-07, "loss": 0.5432, "step": 20554 }, { "epoch": 1.83, "grad_norm": 6.175874916886208, "learning_rate": 1.800210100139377e-07, "loss": 0.5524, "step": 20555 }, { "epoch": 1.83, "grad_norm": 7.75116381359166, "learning_rate": 1.798289691765126e-07, "loss": 0.5424, "step": 20556 }, { "epoch": 1.83, "grad_norm": 5.666910065694054, "learning_rate": 1.7963702894960245e-07, "loss": 0.5929, "step": 20557 }, { "epoch": 1.83, "grad_norm": 6.17378371706061, "learning_rate": 1.794451893372129e-07, "loss": 0.5085, "step": 20558 }, { "epoch": 1.83, "grad_norm": 6.778893209826911, "learning_rate": 1.7925345034334741e-07, "loss": 0.5547, "step": 20559 }, { "epoch": 1.83, "grad_norm": 7.618248351620059, "learning_rate": 1.7906181197200945e-07, "loss": 0.5759, "step": 20560 }, { "epoch": 1.83, "grad_norm": 8.215854309210181, "learning_rate": 1.7887027422719806e-07, "loss": 0.5545, "step": 20561 }, { "epoch": 1.83, "grad_norm": 8.648398383380762, "learning_rate": 1.7867883711291222e-07, "loss": 0.568, "step": 20562 }, { "epoch": 1.83, "grad_norm": 6.973951410737096, "learning_rate": 1.7848750063314714e-07, "loss": 0.571, "step": 20563 }, { "epoch": 1.83, "grad_norm": 7.827115326116493, "learning_rate": 1.782962647918962e-07, "loss": 0.5615, "step": 20564 }, { "epoch": 1.83, "grad_norm": 5.5611942164720745, "learning_rate": 1.781051295931513e-07, "loss": 0.525, "step": 20565 }, { "epoch": 1.83, "grad_norm": 7.331898476982622, "learning_rate": 1.7791409504090252e-07, "loss": 0.5641, "step": 20566 }, { "epoch": 1.83, "grad_norm": 7.343842515876296, "learning_rate": 1.777231611391367e-07, "loss": 0.5353, "step": 20567 }, { "epoch": 1.83, "grad_norm": 7.256052043421368, "learning_rate": 1.7753232789183895e-07, "loss": 0.6052, "step": 20568 }, { "epoch": 1.83, "grad_norm": 10.150019865955187, "learning_rate": 1.7734159530299278e-07, "loss": 0.5993, "step": 20569 }, { "epoch": 1.84, "grad_norm": 6.257002136699931, "learning_rate": 1.7715096337658e-07, "loss": 0.58, "step": 20570 }, { "epoch": 1.84, "grad_norm": 6.8176732898595755, "learning_rate": 1.7696043211657854e-07, "loss": 0.5953, "step": 20571 }, { "epoch": 1.84, "grad_norm": 6.181080241560242, "learning_rate": 1.7677000152696576e-07, "loss": 0.5559, "step": 20572 }, { "epoch": 1.84, "grad_norm": 9.301970072252466, "learning_rate": 1.765796716117163e-07, "loss": 0.5428, "step": 20573 }, { "epoch": 1.84, "grad_norm": 5.604052057216837, "learning_rate": 1.7638944237480305e-07, "loss": 0.5683, "step": 20574 }, { "epoch": 1.84, "grad_norm": 6.2795176523219745, "learning_rate": 1.7619931382019727e-07, "loss": 0.5616, "step": 20575 }, { "epoch": 1.84, "grad_norm": 5.753000941821374, "learning_rate": 1.7600928595186696e-07, "loss": 0.585, "step": 20576 }, { "epoch": 1.84, "grad_norm": 9.195842247806896, "learning_rate": 1.7581935877377775e-07, "loss": 0.5479, "step": 20577 }, { "epoch": 1.84, "grad_norm": 5.576741740200964, "learning_rate": 1.7562953228989487e-07, "loss": 0.5742, "step": 20578 }, { "epoch": 1.84, "grad_norm": 5.4949020152651045, "learning_rate": 1.7543980650418013e-07, "loss": 0.5808, "step": 20579 }, { "epoch": 1.84, "grad_norm": 6.884247373400638, "learning_rate": 1.752501814205948e-07, "loss": 0.5317, "step": 20580 }, { "epoch": 1.84, "grad_norm": 6.279222604874481, "learning_rate": 1.7506065704309517e-07, "loss": 0.5885, "step": 20581 }, { "epoch": 1.84, "grad_norm": 8.616160351957546, "learning_rate": 1.748712333756375e-07, "loss": 0.5853, "step": 20582 }, { "epoch": 1.84, "grad_norm": 6.179798290897012, "learning_rate": 1.7468191042217696e-07, "loss": 0.5749, "step": 20583 }, { "epoch": 1.84, "grad_norm": 5.5286318134227, "learning_rate": 1.744926881866632e-07, "loss": 0.5298, "step": 20584 }, { "epoch": 1.84, "grad_norm": 7.982310228780247, "learning_rate": 1.743035666730475e-07, "loss": 0.5605, "step": 20585 }, { "epoch": 1.84, "grad_norm": 5.654867160169335, "learning_rate": 1.7411454588527666e-07, "loss": 0.5707, "step": 20586 }, { "epoch": 1.84, "grad_norm": 5.199575587706497, "learning_rate": 1.739256258272959e-07, "loss": 0.5284, "step": 20587 }, { "epoch": 1.84, "grad_norm": 8.504493593250443, "learning_rate": 1.7373680650304869e-07, "loss": 0.6131, "step": 20588 }, { "epoch": 1.84, "grad_norm": 7.007058234551393, "learning_rate": 1.7354808791647748e-07, "loss": 0.5685, "step": 20589 }, { "epoch": 1.84, "grad_norm": 6.215801908787185, "learning_rate": 1.733594700715191e-07, "loss": 0.5271, "step": 20590 }, { "epoch": 1.84, "grad_norm": 9.187340979066992, "learning_rate": 1.7317095297211152e-07, "loss": 0.5429, "step": 20591 }, { "epoch": 1.84, "grad_norm": 11.423582294095121, "learning_rate": 1.7298253662219045e-07, "loss": 0.6098, "step": 20592 }, { "epoch": 1.84, "grad_norm": 5.447013232987154, "learning_rate": 1.7279422102568721e-07, "loss": 0.5292, "step": 20593 }, { "epoch": 1.84, "grad_norm": 6.1436312442008445, "learning_rate": 1.7260600618653423e-07, "loss": 0.5149, "step": 20594 }, { "epoch": 1.84, "grad_norm": 7.412966700829481, "learning_rate": 1.724178921086578e-07, "loss": 0.5885, "step": 20595 }, { "epoch": 1.84, "grad_norm": 7.368196018116045, "learning_rate": 1.7222987879598697e-07, "loss": 0.545, "step": 20596 }, { "epoch": 1.84, "grad_norm": 6.204424713814753, "learning_rate": 1.7204196625244362e-07, "loss": 0.5535, "step": 20597 }, { "epoch": 1.84, "grad_norm": 6.847989482722372, "learning_rate": 1.7185415448195187e-07, "loss": 0.5556, "step": 20598 }, { "epoch": 1.84, "grad_norm": 6.0190238746350415, "learning_rate": 1.7166644348843076e-07, "loss": 0.5279, "step": 20599 }, { "epoch": 1.84, "grad_norm": 7.218219763586711, "learning_rate": 1.7147883327579885e-07, "loss": 0.6, "step": 20600 }, { "epoch": 1.84, "grad_norm": 6.865882020959749, "learning_rate": 1.7129132384797186e-07, "loss": 0.577, "step": 20601 }, { "epoch": 1.84, "grad_norm": 8.767851021780876, "learning_rate": 1.7110391520886449e-07, "loss": 0.5916, "step": 20602 }, { "epoch": 1.84, "grad_norm": 7.7311602834285305, "learning_rate": 1.7091660736238745e-07, "loss": 0.5576, "step": 20603 }, { "epoch": 1.84, "grad_norm": 7.110694541604403, "learning_rate": 1.7072940031245043e-07, "loss": 0.5825, "step": 20604 }, { "epoch": 1.84, "grad_norm": 6.681090915181193, "learning_rate": 1.7054229406296086e-07, "loss": 0.5971, "step": 20605 }, { "epoch": 1.84, "grad_norm": 8.44866720228712, "learning_rate": 1.7035528861782502e-07, "loss": 0.6166, "step": 20606 }, { "epoch": 1.84, "grad_norm": 8.739578940587425, "learning_rate": 1.7016838398094648e-07, "loss": 0.5325, "step": 20607 }, { "epoch": 1.84, "grad_norm": 7.118913271491661, "learning_rate": 1.6998158015622546e-07, "loss": 0.5272, "step": 20608 }, { "epoch": 1.84, "grad_norm": 8.095108419045275, "learning_rate": 1.697948771475616e-07, "loss": 0.5672, "step": 20609 }, { "epoch": 1.84, "grad_norm": 6.460942594749149, "learning_rate": 1.6960827495885067e-07, "loss": 0.5358, "step": 20610 }, { "epoch": 1.84, "grad_norm": 5.82603937270263, "learning_rate": 1.6942177359398904e-07, "loss": 0.5682, "step": 20611 }, { "epoch": 1.84, "grad_norm": 6.805303963182185, "learning_rate": 1.692353730568691e-07, "loss": 0.59, "step": 20612 }, { "epoch": 1.84, "grad_norm": 10.674598605006066, "learning_rate": 1.6904907335138166e-07, "loss": 0.5725, "step": 20613 }, { "epoch": 1.84, "grad_norm": 5.171454943630189, "learning_rate": 1.6886287448141524e-07, "loss": 0.5612, "step": 20614 }, { "epoch": 1.84, "grad_norm": 6.052066638022161, "learning_rate": 1.686767764508568e-07, "loss": 0.5165, "step": 20615 }, { "epoch": 1.84, "grad_norm": 6.5919631351502135, "learning_rate": 1.684907792635898e-07, "loss": 0.5054, "step": 20616 }, { "epoch": 1.84, "grad_norm": 4.7353476754433865, "learning_rate": 1.6830488292349677e-07, "loss": 0.5124, "step": 20617 }, { "epoch": 1.84, "grad_norm": 7.204958966073327, "learning_rate": 1.6811908743445794e-07, "loss": 0.6068, "step": 20618 }, { "epoch": 1.84, "grad_norm": 5.141231863710975, "learning_rate": 1.6793339280035181e-07, "loss": 0.5395, "step": 20619 }, { "epoch": 1.84, "grad_norm": 7.166457008307585, "learning_rate": 1.6774779902505422e-07, "loss": 0.5799, "step": 20620 }, { "epoch": 1.84, "grad_norm": 7.1074055015778494, "learning_rate": 1.6756230611243873e-07, "loss": 0.514, "step": 20621 }, { "epoch": 1.84, "grad_norm": 9.25815280742036, "learning_rate": 1.6737691406637724e-07, "loss": 0.5643, "step": 20622 }, { "epoch": 1.84, "grad_norm": 7.6254913932324095, "learning_rate": 1.6719162289074e-07, "loss": 0.5724, "step": 20623 }, { "epoch": 1.84, "grad_norm": 9.085864675061949, "learning_rate": 1.6700643258939275e-07, "loss": 0.5756, "step": 20624 }, { "epoch": 1.84, "grad_norm": 5.041295468637495, "learning_rate": 1.6682134316620301e-07, "loss": 0.5489, "step": 20625 }, { "epoch": 1.84, "grad_norm": 8.12739041768074, "learning_rate": 1.666363546250327e-07, "loss": 0.51, "step": 20626 }, { "epoch": 1.84, "grad_norm": 5.985130977525909, "learning_rate": 1.6645146696974423e-07, "loss": 0.5693, "step": 20627 }, { "epoch": 1.84, "grad_norm": 6.060312315167556, "learning_rate": 1.6626668020419568e-07, "loss": 0.5088, "step": 20628 }, { "epoch": 1.84, "grad_norm": 6.550445470383628, "learning_rate": 1.660819943322445e-07, "loss": 0.5307, "step": 20629 }, { "epoch": 1.84, "grad_norm": 6.17270312755417, "learning_rate": 1.6589740935774534e-07, "loss": 0.538, "step": 20630 }, { "epoch": 1.84, "grad_norm": 6.098320704753383, "learning_rate": 1.6571292528455184e-07, "loss": 0.5299, "step": 20631 }, { "epoch": 1.84, "grad_norm": 9.412574609084226, "learning_rate": 1.655285421165137e-07, "loss": 0.5262, "step": 20632 }, { "epoch": 1.84, "grad_norm": 5.495406545634089, "learning_rate": 1.6534425985748058e-07, "loss": 0.6123, "step": 20633 }, { "epoch": 1.84, "grad_norm": 7.119548540429959, "learning_rate": 1.6516007851129722e-07, "loss": 0.6113, "step": 20634 }, { "epoch": 1.84, "grad_norm": 7.015780757802399, "learning_rate": 1.6497599808180996e-07, "loss": 0.5299, "step": 20635 }, { "epoch": 1.84, "grad_norm": 5.91290315546167, "learning_rate": 1.6479201857285966e-07, "loss": 0.5914, "step": 20636 }, { "epoch": 1.84, "grad_norm": 8.182518205296983, "learning_rate": 1.6460813998828706e-07, "loss": 0.5972, "step": 20637 }, { "epoch": 1.84, "grad_norm": 6.321433817476633, "learning_rate": 1.6442436233193083e-07, "loss": 0.5803, "step": 20638 }, { "epoch": 1.84, "grad_norm": 7.553572613996043, "learning_rate": 1.642406856076262e-07, "loss": 0.502, "step": 20639 }, { "epoch": 1.84, "grad_norm": 5.965231264948768, "learning_rate": 1.640571098192073e-07, "loss": 0.5064, "step": 20640 }, { "epoch": 1.84, "grad_norm": 8.852506111449145, "learning_rate": 1.6387363497050502e-07, "loss": 0.5843, "step": 20641 }, { "epoch": 1.84, "grad_norm": 6.417552391475473, "learning_rate": 1.6369026106535013e-07, "loss": 0.5277, "step": 20642 }, { "epoch": 1.84, "grad_norm": 5.352046883286437, "learning_rate": 1.6350698810756905e-07, "loss": 0.6596, "step": 20643 }, { "epoch": 1.84, "grad_norm": 9.15741264665353, "learning_rate": 1.6332381610098812e-07, "loss": 0.6115, "step": 20644 }, { "epoch": 1.84, "grad_norm": 5.734780462390892, "learning_rate": 1.6314074504943045e-07, "loss": 0.5393, "step": 20645 }, { "epoch": 1.84, "grad_norm": 8.174027053604659, "learning_rate": 1.6295777495671794e-07, "loss": 0.5653, "step": 20646 }, { "epoch": 1.84, "grad_norm": 7.526712950684415, "learning_rate": 1.627749058266681e-07, "loss": 0.5756, "step": 20647 }, { "epoch": 1.84, "grad_norm": 6.806740319255307, "learning_rate": 1.62592137663099e-07, "loss": 0.5499, "step": 20648 }, { "epoch": 1.84, "grad_norm": 5.805197189264749, "learning_rate": 1.6240947046982482e-07, "loss": 0.5787, "step": 20649 }, { "epoch": 1.84, "grad_norm": 4.879679664692137, "learning_rate": 1.6222690425065913e-07, "loss": 0.5088, "step": 20650 }, { "epoch": 1.84, "grad_norm": 5.66902208922615, "learning_rate": 1.6204443900941225e-07, "loss": 0.5851, "step": 20651 }, { "epoch": 1.84, "grad_norm": 8.610626200010827, "learning_rate": 1.6186207474989336e-07, "loss": 0.5686, "step": 20652 }, { "epoch": 1.84, "grad_norm": 5.879336768763866, "learning_rate": 1.6167981147590827e-07, "loss": 0.5695, "step": 20653 }, { "epoch": 1.84, "grad_norm": 6.444710868017044, "learning_rate": 1.614976491912612e-07, "loss": 0.5149, "step": 20654 }, { "epoch": 1.84, "grad_norm": 9.702470203042676, "learning_rate": 1.613155878997541e-07, "loss": 0.576, "step": 20655 }, { "epoch": 1.84, "grad_norm": 6.521313268884404, "learning_rate": 1.611336276051878e-07, "loss": 0.6021, "step": 20656 }, { "epoch": 1.84, "grad_norm": 7.2175942695285435, "learning_rate": 1.6095176831135984e-07, "loss": 0.5669, "step": 20657 }, { "epoch": 1.84, "grad_norm": 7.7327185475437865, "learning_rate": 1.607700100220666e-07, "loss": 0.569, "step": 20658 }, { "epoch": 1.84, "grad_norm": 7.925217472774974, "learning_rate": 1.6058835274110175e-07, "loss": 0.5659, "step": 20659 }, { "epoch": 1.84, "grad_norm": 8.245009724545737, "learning_rate": 1.604067964722572e-07, "loss": 0.5274, "step": 20660 }, { "epoch": 1.84, "grad_norm": 6.193378223488177, "learning_rate": 1.6022534121932165e-07, "loss": 0.5313, "step": 20661 }, { "epoch": 1.84, "grad_norm": 5.363165464399734, "learning_rate": 1.6004398698608315e-07, "loss": 0.5404, "step": 20662 }, { "epoch": 1.84, "grad_norm": 5.699463848639689, "learning_rate": 1.59862733776327e-07, "loss": 0.5442, "step": 20663 }, { "epoch": 1.84, "grad_norm": 7.225396966498009, "learning_rate": 1.5968158159383686e-07, "loss": 0.6238, "step": 20664 }, { "epoch": 1.84, "grad_norm": 5.779454844668766, "learning_rate": 1.5950053044239412e-07, "loss": 0.5854, "step": 20665 }, { "epoch": 1.84, "grad_norm": 7.0156551760393455, "learning_rate": 1.5931958032577743e-07, "loss": 0.6239, "step": 20666 }, { "epoch": 1.84, "grad_norm": 6.373027958224076, "learning_rate": 1.5913873124776268e-07, "loss": 0.5647, "step": 20667 }, { "epoch": 1.84, "grad_norm": 9.580236403917825, "learning_rate": 1.5895798321212575e-07, "loss": 0.5732, "step": 20668 }, { "epoch": 1.84, "grad_norm": 7.34703536297252, "learning_rate": 1.587773362226397e-07, "loss": 0.5578, "step": 20669 }, { "epoch": 1.84, "grad_norm": 7.206467444491746, "learning_rate": 1.5859679028307373e-07, "loss": 0.6023, "step": 20670 }, { "epoch": 1.84, "grad_norm": 7.936489137472094, "learning_rate": 1.584163453971982e-07, "loss": 0.5782, "step": 20671 }, { "epoch": 1.84, "grad_norm": 5.984013154560975, "learning_rate": 1.5823600156877838e-07, "loss": 0.5931, "step": 20672 }, { "epoch": 1.84, "grad_norm": 6.626837171686694, "learning_rate": 1.5805575880157853e-07, "loss": 0.5274, "step": 20673 }, { "epoch": 1.84, "grad_norm": 9.033289739035647, "learning_rate": 1.5787561709936116e-07, "loss": 0.5758, "step": 20674 }, { "epoch": 1.84, "grad_norm": 5.368076671729043, "learning_rate": 1.5769557646588608e-07, "loss": 0.6066, "step": 20675 }, { "epoch": 1.84, "grad_norm": 6.7553218412895815, "learning_rate": 1.5751563690491134e-07, "loss": 0.579, "step": 20676 }, { "epoch": 1.84, "grad_norm": 5.911690875194166, "learning_rate": 1.5733579842019286e-07, "loss": 0.5366, "step": 20677 }, { "epoch": 1.84, "grad_norm": 6.823752747637525, "learning_rate": 1.5715606101548486e-07, "loss": 0.5844, "step": 20678 }, { "epoch": 1.84, "grad_norm": 6.670256139896281, "learning_rate": 1.5697642469453765e-07, "loss": 0.5191, "step": 20679 }, { "epoch": 1.84, "grad_norm": 7.058900612450303, "learning_rate": 1.5679688946110218e-07, "loss": 0.6104, "step": 20680 }, { "epoch": 1.84, "grad_norm": 6.717185135150641, "learning_rate": 1.566174553189248e-07, "loss": 0.5512, "step": 20681 }, { "epoch": 1.85, "grad_norm": 7.94179318805623, "learning_rate": 1.5643812227175147e-07, "loss": 0.5279, "step": 20682 }, { "epoch": 1.85, "grad_norm": 4.825478602575518, "learning_rate": 1.5625889032332475e-07, "loss": 0.4881, "step": 20683 }, { "epoch": 1.85, "grad_norm": 6.454847107451756, "learning_rate": 1.5607975947738608e-07, "loss": 0.6306, "step": 20684 }, { "epoch": 1.85, "grad_norm": 5.749403955468157, "learning_rate": 1.5590072973767523e-07, "loss": 0.5705, "step": 20685 }, { "epoch": 1.85, "grad_norm": 5.243477390781048, "learning_rate": 1.5572180110792755e-07, "loss": 0.5854, "step": 20686 }, { "epoch": 1.85, "grad_norm": 6.181351908085328, "learning_rate": 1.5554297359187842e-07, "loss": 0.5901, "step": 20687 }, { "epoch": 1.85, "grad_norm": 5.4255511317015905, "learning_rate": 1.5536424719326093e-07, "loss": 0.6489, "step": 20688 }, { "epoch": 1.85, "grad_norm": 6.2962015572329735, "learning_rate": 1.5518562191580487e-07, "loss": 0.5604, "step": 20689 }, { "epoch": 1.85, "grad_norm": 5.107226514259649, "learning_rate": 1.5500709776323896e-07, "loss": 0.5724, "step": 20690 }, { "epoch": 1.85, "grad_norm": 5.9905807097479835, "learning_rate": 1.548286747392902e-07, "loss": 0.5349, "step": 20691 }, { "epoch": 1.85, "grad_norm": 7.513159803246822, "learning_rate": 1.5465035284768114e-07, "loss": 0.5339, "step": 20692 }, { "epoch": 1.85, "grad_norm": 8.619768601955238, "learning_rate": 1.5447213209213552e-07, "loss": 0.5616, "step": 20693 }, { "epoch": 1.85, "grad_norm": 7.951223224382077, "learning_rate": 1.54294012476372e-07, "loss": 0.5639, "step": 20694 }, { "epoch": 1.85, "grad_norm": 6.804612928825943, "learning_rate": 1.541159940041098e-07, "loss": 0.6178, "step": 20695 }, { "epoch": 1.85, "grad_norm": 7.9672974410657575, "learning_rate": 1.539380766790638e-07, "loss": 0.6019, "step": 20696 }, { "epoch": 1.85, "grad_norm": 6.563748236630898, "learning_rate": 1.5376026050494708e-07, "loss": 0.6108, "step": 20697 }, { "epoch": 1.85, "grad_norm": 5.59648961699994, "learning_rate": 1.535825454854728e-07, "loss": 0.5885, "step": 20698 }, { "epoch": 1.85, "grad_norm": 5.375746423288026, "learning_rate": 1.534049316243491e-07, "loss": 0.5861, "step": 20699 }, { "epoch": 1.85, "grad_norm": 6.919163397385305, "learning_rate": 1.5322741892528302e-07, "loss": 0.528, "step": 20700 }, { "epoch": 1.85, "grad_norm": 4.902082114687394, "learning_rate": 1.530500073919805e-07, "loss": 0.6444, "step": 20701 }, { "epoch": 1.85, "grad_norm": 8.140864365959969, "learning_rate": 1.5287269702814466e-07, "loss": 0.6305, "step": 20702 }, { "epoch": 1.85, "grad_norm": 6.367806913185126, "learning_rate": 1.5269548783747646e-07, "loss": 0.5972, "step": 20703 }, { "epoch": 1.85, "grad_norm": 6.135615776841737, "learning_rate": 1.5251837982367402e-07, "loss": 0.5133, "step": 20704 }, { "epoch": 1.85, "grad_norm": 5.295566547168751, "learning_rate": 1.5234137299043494e-07, "loss": 0.562, "step": 20705 }, { "epoch": 1.85, "grad_norm": 6.001047020620564, "learning_rate": 1.5216446734145352e-07, "loss": 0.6087, "step": 20706 }, { "epoch": 1.85, "grad_norm": 10.608212647899032, "learning_rate": 1.5198766288042178e-07, "loss": 0.5224, "step": 20707 }, { "epoch": 1.85, "grad_norm": 6.660837851279341, "learning_rate": 1.5181095961103122e-07, "loss": 0.5568, "step": 20708 }, { "epoch": 1.85, "grad_norm": 8.741720853501345, "learning_rate": 1.5163435753696943e-07, "loss": 0.574, "step": 20709 }, { "epoch": 1.85, "grad_norm": 7.816033902830379, "learning_rate": 1.514578566619229e-07, "loss": 0.5683, "step": 20710 }, { "epoch": 1.85, "grad_norm": 6.306716640962868, "learning_rate": 1.5128145698957486e-07, "loss": 0.6095, "step": 20711 }, { "epoch": 1.85, "grad_norm": 7.668564815262459, "learning_rate": 1.5110515852360842e-07, "loss": 0.5726, "step": 20712 }, { "epoch": 1.85, "grad_norm": 7.812263292813327, "learning_rate": 1.5092896126770228e-07, "loss": 0.5464, "step": 20713 }, { "epoch": 1.85, "grad_norm": 11.67851967749272, "learning_rate": 1.5075286522553521e-07, "loss": 0.5075, "step": 20714 }, { "epoch": 1.85, "grad_norm": 6.508848499774697, "learning_rate": 1.5057687040078207e-07, "loss": 0.5227, "step": 20715 }, { "epoch": 1.85, "grad_norm": 5.52348829954265, "learning_rate": 1.5040097679711763e-07, "loss": 0.5905, "step": 20716 }, { "epoch": 1.85, "grad_norm": 5.361670595288296, "learning_rate": 1.5022518441821176e-07, "loss": 0.5709, "step": 20717 }, { "epoch": 1.85, "grad_norm": 6.6156427270427125, "learning_rate": 1.5004949326773376e-07, "loss": 0.5605, "step": 20718 }, { "epoch": 1.85, "grad_norm": 8.059451107160532, "learning_rate": 1.498739033493518e-07, "loss": 0.5048, "step": 20719 }, { "epoch": 1.85, "grad_norm": 5.962871173530319, "learning_rate": 1.4969841466673075e-07, "loss": 0.5331, "step": 20720 }, { "epoch": 1.85, "grad_norm": 5.3847774780850886, "learning_rate": 1.4952302722353319e-07, "loss": 0.5216, "step": 20721 }, { "epoch": 1.85, "grad_norm": 5.130320238681146, "learning_rate": 1.4934774102342065e-07, "loss": 0.6532, "step": 20722 }, { "epoch": 1.85, "grad_norm": 7.561056384119012, "learning_rate": 1.4917255607005022e-07, "loss": 0.5603, "step": 20723 }, { "epoch": 1.85, "grad_norm": 6.344088377169165, "learning_rate": 1.489974723670806e-07, "loss": 0.5112, "step": 20724 }, { "epoch": 1.85, "grad_norm": 6.254261475571364, "learning_rate": 1.488224899181645e-07, "loss": 0.5632, "step": 20725 }, { "epoch": 1.85, "grad_norm": 7.190624237221434, "learning_rate": 1.4864760872695504e-07, "loss": 0.5705, "step": 20726 }, { "epoch": 1.85, "grad_norm": 6.5855103262337185, "learning_rate": 1.4847282879710268e-07, "loss": 0.5835, "step": 20727 }, { "epoch": 1.85, "grad_norm": 6.447979700376296, "learning_rate": 1.4829815013225502e-07, "loss": 0.5879, "step": 20728 }, { "epoch": 1.85, "grad_norm": 7.006537134390363, "learning_rate": 1.4812357273605916e-07, "loss": 0.6094, "step": 20729 }, { "epoch": 1.85, "grad_norm": 6.1509200504931885, "learning_rate": 1.4794909661215773e-07, "loss": 0.5813, "step": 20730 }, { "epoch": 1.85, "grad_norm": 5.840434545625151, "learning_rate": 1.4777472176419284e-07, "loss": 0.6159, "step": 20731 }, { "epoch": 1.85, "grad_norm": 6.008186713956841, "learning_rate": 1.4760044819580488e-07, "loss": 0.5735, "step": 20732 }, { "epoch": 1.85, "grad_norm": 8.551331632345947, "learning_rate": 1.4742627591063096e-07, "loss": 0.5805, "step": 20733 }, { "epoch": 1.85, "grad_norm": 12.372222775031402, "learning_rate": 1.4725220491230597e-07, "loss": 0.5552, "step": 20734 }, { "epoch": 1.85, "grad_norm": 6.881821916252152, "learning_rate": 1.470782352044653e-07, "loss": 0.5411, "step": 20735 }, { "epoch": 1.85, "grad_norm": 6.493200857403462, "learning_rate": 1.4690436679073772e-07, "loss": 0.5579, "step": 20736 }, { "epoch": 1.85, "grad_norm": 8.082053577151761, "learning_rate": 1.467305996747531e-07, "loss": 0.5706, "step": 20737 }, { "epoch": 1.85, "grad_norm": 8.135973036058362, "learning_rate": 1.4655693386013914e-07, "loss": 0.6296, "step": 20738 }, { "epoch": 1.85, "grad_norm": 4.657370109019666, "learning_rate": 1.4638336935052067e-07, "loss": 0.5509, "step": 20739 }, { "epoch": 1.85, "grad_norm": 8.457991224327344, "learning_rate": 1.462099061495198e-07, "loss": 0.5412, "step": 20740 }, { "epoch": 1.85, "grad_norm": 6.806944106259335, "learning_rate": 1.4603654426075752e-07, "loss": 0.5778, "step": 20741 }, { "epoch": 1.85, "grad_norm": 8.699963751859817, "learning_rate": 1.4586328368785262e-07, "loss": 0.595, "step": 20742 }, { "epoch": 1.85, "grad_norm": 9.774215037343122, "learning_rate": 1.456901244344211e-07, "loss": 0.6045, "step": 20743 }, { "epoch": 1.85, "grad_norm": 6.616621245571267, "learning_rate": 1.4551706650407727e-07, "loss": 0.6048, "step": 20744 }, { "epoch": 1.85, "grad_norm": 6.719868377370761, "learning_rate": 1.453441099004338e-07, "loss": 0.5295, "step": 20745 }, { "epoch": 1.85, "grad_norm": 8.960038654519149, "learning_rate": 1.4517125462710001e-07, "loss": 0.6466, "step": 20746 }, { "epoch": 1.85, "grad_norm": 7.082738278824946, "learning_rate": 1.4499850068768474e-07, "loss": 0.5853, "step": 20747 }, { "epoch": 1.85, "grad_norm": 7.370544068563108, "learning_rate": 1.448258480857939e-07, "loss": 0.5584, "step": 20748 }, { "epoch": 1.85, "grad_norm": 4.2634582266000844, "learning_rate": 1.4465329682503082e-07, "loss": 0.5014, "step": 20749 }, { "epoch": 1.85, "grad_norm": 5.714959066875765, "learning_rate": 1.4448084690899643e-07, "loss": 0.5435, "step": 20750 }, { "epoch": 1.85, "grad_norm": 6.6697051482836525, "learning_rate": 1.4430849834129123e-07, "loss": 0.5463, "step": 20751 }, { "epoch": 1.85, "grad_norm": 7.160913512351617, "learning_rate": 1.4413625112551233e-07, "loss": 0.5489, "step": 20752 }, { "epoch": 1.85, "grad_norm": 6.534741951836044, "learning_rate": 1.4396410526525571e-07, "loss": 0.6571, "step": 20753 }, { "epoch": 1.85, "grad_norm": 6.796412790025985, "learning_rate": 1.43792060764113e-07, "loss": 0.5198, "step": 20754 }, { "epoch": 1.85, "grad_norm": 7.113735277983158, "learning_rate": 1.4362011762567686e-07, "loss": 0.6048, "step": 20755 }, { "epoch": 1.85, "grad_norm": 6.993419552464397, "learning_rate": 1.4344827585353492e-07, "loss": 0.5444, "step": 20756 }, { "epoch": 1.85, "grad_norm": 7.6414674370545566, "learning_rate": 1.432765354512744e-07, "loss": 0.5535, "step": 20757 }, { "epoch": 1.85, "grad_norm": 6.266687094261093, "learning_rate": 1.4310489642248071e-07, "loss": 0.5557, "step": 20758 }, { "epoch": 1.85, "grad_norm": 4.820637049068365, "learning_rate": 1.4293335877073545e-07, "loss": 0.5465, "step": 20759 }, { "epoch": 1.85, "grad_norm": 5.898243643478518, "learning_rate": 1.4276192249961907e-07, "loss": 0.4958, "step": 20760 }, { "epoch": 1.85, "grad_norm": 10.9913655452052, "learning_rate": 1.425905876127115e-07, "loss": 0.5509, "step": 20761 }, { "epoch": 1.85, "grad_norm": 6.436011563774006, "learning_rate": 1.4241935411358766e-07, "loss": 0.5622, "step": 20762 }, { "epoch": 1.85, "grad_norm": 6.500006090618431, "learning_rate": 1.4224822200582134e-07, "loss": 0.5736, "step": 20763 }, { "epoch": 1.85, "grad_norm": 8.136794612910185, "learning_rate": 1.4207719129298524e-07, "loss": 0.5803, "step": 20764 }, { "epoch": 1.85, "grad_norm": 5.353492221688485, "learning_rate": 1.4190626197864876e-07, "loss": 0.5763, "step": 20765 }, { "epoch": 1.85, "grad_norm": 9.132683403428883, "learning_rate": 1.4173543406638068e-07, "loss": 0.6115, "step": 20766 }, { "epoch": 1.85, "grad_norm": 6.5486957638558865, "learning_rate": 1.4156470755974593e-07, "loss": 0.557, "step": 20767 }, { "epoch": 1.85, "grad_norm": 4.8945024644753214, "learning_rate": 1.4139408246230834e-07, "loss": 0.5156, "step": 20768 }, { "epoch": 1.85, "grad_norm": 9.965774831561184, "learning_rate": 1.4122355877762895e-07, "loss": 0.5065, "step": 20769 }, { "epoch": 1.85, "grad_norm": 7.2362957058002095, "learning_rate": 1.4105313650926655e-07, "loss": 0.5478, "step": 20770 }, { "epoch": 1.85, "grad_norm": 5.647771875611597, "learning_rate": 1.4088281566077945e-07, "loss": 0.5278, "step": 20771 }, { "epoch": 1.85, "grad_norm": 9.333874294630814, "learning_rate": 1.4071259623572198e-07, "loss": 0.5102, "step": 20772 }, { "epoch": 1.85, "grad_norm": 10.199259542717519, "learning_rate": 1.4054247823764799e-07, "loss": 0.6378, "step": 20773 }, { "epoch": 1.85, "grad_norm": 8.585911573462356, "learning_rate": 1.4037246167010797e-07, "loss": 0.5811, "step": 20774 }, { "epoch": 1.85, "grad_norm": 9.315257002957978, "learning_rate": 1.4020254653664966e-07, "loss": 0.5754, "step": 20775 }, { "epoch": 1.85, "grad_norm": 6.8464622808205915, "learning_rate": 1.4003273284082076e-07, "loss": 0.5394, "step": 20776 }, { "epoch": 1.85, "grad_norm": 5.974948297992769, "learning_rate": 1.3986302058616507e-07, "loss": 0.6491, "step": 20777 }, { "epoch": 1.85, "grad_norm": 6.977434017019341, "learning_rate": 1.3969340977622537e-07, "loss": 0.5342, "step": 20778 }, { "epoch": 1.85, "grad_norm": 7.021562676372832, "learning_rate": 1.3952390041454267e-07, "loss": 0.5317, "step": 20779 }, { "epoch": 1.85, "grad_norm": 7.535531062988102, "learning_rate": 1.393544925046536e-07, "loss": 0.5866, "step": 20780 }, { "epoch": 1.85, "grad_norm": 8.902467961139712, "learning_rate": 1.391851860500948e-07, "loss": 0.5785, "step": 20781 }, { "epoch": 1.85, "grad_norm": 9.505384638575148, "learning_rate": 1.3901598105440063e-07, "loss": 0.5561, "step": 20782 }, { "epoch": 1.85, "grad_norm": 7.662422340849546, "learning_rate": 1.3884687752110216e-07, "loss": 0.5131, "step": 20783 }, { "epoch": 1.85, "grad_norm": 5.929889555351067, "learning_rate": 1.3867787545372936e-07, "loss": 0.6142, "step": 20784 }, { "epoch": 1.85, "grad_norm": 7.177602044673022, "learning_rate": 1.385089748558094e-07, "loss": 0.5389, "step": 20785 }, { "epoch": 1.85, "grad_norm": 7.81832985850233, "learning_rate": 1.3834017573086945e-07, "loss": 0.6021, "step": 20786 }, { "epoch": 1.85, "grad_norm": 7.309583836331467, "learning_rate": 1.3817147808243003e-07, "loss": 0.5492, "step": 20787 }, { "epoch": 1.85, "grad_norm": 7.624355433945249, "learning_rate": 1.3800288191401445e-07, "loss": 0.5648, "step": 20788 }, { "epoch": 1.85, "grad_norm": 8.52686104361023, "learning_rate": 1.37834387229141e-07, "loss": 0.5744, "step": 20789 }, { "epoch": 1.85, "grad_norm": 7.4883410230955985, "learning_rate": 1.3766599403132687e-07, "loss": 0.5737, "step": 20790 }, { "epoch": 1.85, "grad_norm": 7.104573207856971, "learning_rate": 1.3749770232408643e-07, "loss": 0.5482, "step": 20791 }, { "epoch": 1.85, "grad_norm": 6.1341930326638865, "learning_rate": 1.373295121109336e-07, "loss": 0.6058, "step": 20792 }, { "epoch": 1.85, "grad_norm": 5.347110008424269, "learning_rate": 1.3716142339537718e-07, "loss": 0.5119, "step": 20793 }, { "epoch": 1.86, "grad_norm": 7.447875234027292, "learning_rate": 1.3699343618092664e-07, "loss": 0.5528, "step": 20794 }, { "epoch": 1.86, "grad_norm": 7.699203313825528, "learning_rate": 1.368255504710886e-07, "loss": 0.5578, "step": 20795 }, { "epoch": 1.86, "grad_norm": 8.579204373057108, "learning_rate": 1.36657766269368e-07, "loss": 0.6023, "step": 20796 }, { "epoch": 1.86, "grad_norm": 7.431015194307729, "learning_rate": 1.3649008357926485e-07, "loss": 0.6472, "step": 20797 }, { "epoch": 1.86, "grad_norm": 7.566677492814019, "learning_rate": 1.3632250240428024e-07, "loss": 0.6206, "step": 20798 }, { "epoch": 1.86, "grad_norm": 5.658093124515533, "learning_rate": 1.3615502274791304e-07, "loss": 0.5456, "step": 20799 }, { "epoch": 1.86, "grad_norm": 6.119947799283983, "learning_rate": 1.359876446136571e-07, "loss": 0.6193, "step": 20800 }, { "epoch": 1.86, "grad_norm": 7.601273303765995, "learning_rate": 1.3582036800500741e-07, "loss": 0.5532, "step": 20801 }, { "epoch": 1.86, "grad_norm": 5.919294445629335, "learning_rate": 1.3565319292545508e-07, "loss": 0.6154, "step": 20802 }, { "epoch": 1.86, "grad_norm": 6.956389198203883, "learning_rate": 1.3548611937848953e-07, "loss": 0.6708, "step": 20803 }, { "epoch": 1.86, "grad_norm": 7.660266923665702, "learning_rate": 1.3531914736759853e-07, "loss": 0.5521, "step": 20804 }, { "epoch": 1.86, "grad_norm": 5.09289370177187, "learning_rate": 1.351522768962671e-07, "loss": 0.5698, "step": 20805 }, { "epoch": 1.86, "grad_norm": 5.808385857494279, "learning_rate": 1.3498550796797737e-07, "loss": 0.557, "step": 20806 }, { "epoch": 1.86, "grad_norm": 7.926376441595453, "learning_rate": 1.3481884058621108e-07, "loss": 0.5682, "step": 20807 }, { "epoch": 1.86, "grad_norm": 5.96441644043271, "learning_rate": 1.3465227475444708e-07, "loss": 0.4399, "step": 20808 }, { "epoch": 1.86, "grad_norm": 7.1681812128371565, "learning_rate": 1.344858104761615e-07, "loss": 0.5539, "step": 20809 }, { "epoch": 1.86, "grad_norm": 5.781405172357765, "learning_rate": 1.3431944775482984e-07, "loss": 0.5094, "step": 20810 }, { "epoch": 1.86, "grad_norm": 6.1363584471298145, "learning_rate": 1.341531865939244e-07, "loss": 0.5563, "step": 20811 }, { "epoch": 1.86, "grad_norm": 7.476718555566055, "learning_rate": 1.3398702699691512e-07, "loss": 0.6181, "step": 20812 }, { "epoch": 1.86, "grad_norm": 7.76993116161206, "learning_rate": 1.338209689672698e-07, "loss": 0.534, "step": 20813 }, { "epoch": 1.86, "grad_norm": 4.605057315364204, "learning_rate": 1.336550125084546e-07, "loss": 0.5288, "step": 20814 }, { "epoch": 1.86, "grad_norm": 7.196881878408631, "learning_rate": 1.3348915762393445e-07, "loss": 0.5766, "step": 20815 }, { "epoch": 1.86, "grad_norm": 7.4163874436204145, "learning_rate": 1.333234043171705e-07, "loss": 0.6161, "step": 20816 }, { "epoch": 1.86, "grad_norm": 6.419143592644683, "learning_rate": 1.3315775259162277e-07, "loss": 0.5454, "step": 20817 }, { "epoch": 1.86, "grad_norm": 7.045115092518291, "learning_rate": 1.3299220245074905e-07, "loss": 0.5819, "step": 20818 }, { "epoch": 1.86, "grad_norm": 5.637496955232273, "learning_rate": 1.3282675389800382e-07, "loss": 0.5535, "step": 20819 }, { "epoch": 1.86, "grad_norm": 7.353833345294148, "learning_rate": 1.3266140693684148e-07, "loss": 0.5358, "step": 20820 }, { "epoch": 1.86, "grad_norm": 7.091492953576116, "learning_rate": 1.3249616157071322e-07, "loss": 0.5758, "step": 20821 }, { "epoch": 1.86, "grad_norm": 7.654040464489238, "learning_rate": 1.3233101780306734e-07, "loss": 0.6387, "step": 20822 }, { "epoch": 1.86, "grad_norm": 5.171394925134168, "learning_rate": 1.3216597563735167e-07, "loss": 0.4972, "step": 20823 }, { "epoch": 1.86, "grad_norm": 5.989950266775283, "learning_rate": 1.320010350770118e-07, "loss": 0.5936, "step": 20824 }, { "epoch": 1.86, "grad_norm": 6.40998223035195, "learning_rate": 1.3183619612548882e-07, "loss": 0.5748, "step": 20825 }, { "epoch": 1.86, "grad_norm": 5.960343529002361, "learning_rate": 1.3167145878622446e-07, "loss": 0.5498, "step": 20826 }, { "epoch": 1.86, "grad_norm": 8.074714944985482, "learning_rate": 1.315068230626565e-07, "loss": 0.587, "step": 20827 }, { "epoch": 1.86, "grad_norm": 10.930983615210451, "learning_rate": 1.3134228895822166e-07, "loss": 0.5997, "step": 20828 }, { "epoch": 1.86, "grad_norm": 7.833404069106863, "learning_rate": 1.3117785647635438e-07, "loss": 0.5204, "step": 20829 }, { "epoch": 1.86, "grad_norm": 7.2435678506377705, "learning_rate": 1.3101352562048698e-07, "loss": 0.5362, "step": 20830 }, { "epoch": 1.86, "grad_norm": 4.9831901449143645, "learning_rate": 1.3084929639404997e-07, "loss": 0.5828, "step": 20831 }, { "epoch": 1.86, "grad_norm": 8.808364014579539, "learning_rate": 1.306851688004701e-07, "loss": 0.6058, "step": 20832 }, { "epoch": 1.86, "grad_norm": 6.259270901014186, "learning_rate": 1.3052114284317351e-07, "loss": 0.5461, "step": 20833 }, { "epoch": 1.86, "grad_norm": 9.385996076363854, "learning_rate": 1.3035721852558413e-07, "loss": 0.5949, "step": 20834 }, { "epoch": 1.86, "grad_norm": 7.876072029494765, "learning_rate": 1.3019339585112368e-07, "loss": 0.5218, "step": 20835 }, { "epoch": 1.86, "grad_norm": 5.732081845177594, "learning_rate": 1.3002967482321106e-07, "loss": 0.5774, "step": 20836 }, { "epoch": 1.86, "grad_norm": 7.692333821457527, "learning_rate": 1.2986605544526465e-07, "loss": 0.6332, "step": 20837 }, { "epoch": 1.86, "grad_norm": 5.651818823129153, "learning_rate": 1.297025377206984e-07, "loss": 0.5356, "step": 20838 }, { "epoch": 1.86, "grad_norm": 5.153943524509841, "learning_rate": 1.2953912165292626e-07, "loss": 0.586, "step": 20839 }, { "epoch": 1.86, "grad_norm": 6.24557584481248, "learning_rate": 1.293758072453588e-07, "loss": 0.5762, "step": 20840 }, { "epoch": 1.86, "grad_norm": 6.415173725690853, "learning_rate": 1.2921259450140445e-07, "loss": 0.5409, "step": 20841 }, { "epoch": 1.86, "grad_norm": 5.337528505154716, "learning_rate": 1.2904948342447043e-07, "loss": 0.5569, "step": 20842 }, { "epoch": 1.86, "grad_norm": 8.087247129376046, "learning_rate": 1.2888647401796129e-07, "loss": 0.5116, "step": 20843 }, { "epoch": 1.86, "grad_norm": 6.288121198820275, "learning_rate": 1.2872356628527982e-07, "loss": 0.5713, "step": 20844 }, { "epoch": 1.86, "grad_norm": 9.361416278766429, "learning_rate": 1.2856076022982557e-07, "loss": 0.6492, "step": 20845 }, { "epoch": 1.86, "grad_norm": 7.226769858454966, "learning_rate": 1.2839805585499744e-07, "loss": 0.596, "step": 20846 }, { "epoch": 1.86, "grad_norm": 7.005302319448655, "learning_rate": 1.2823545316419105e-07, "loss": 0.5067, "step": 20847 }, { "epoch": 1.86, "grad_norm": 7.01187269728679, "learning_rate": 1.280729521608004e-07, "loss": 0.5797, "step": 20848 }, { "epoch": 1.86, "grad_norm": 7.620131355174027, "learning_rate": 1.2791055284821774e-07, "loss": 0.5408, "step": 20849 }, { "epoch": 1.86, "grad_norm": 5.62215942999358, "learning_rate": 1.2774825522983314e-07, "loss": 0.5317, "step": 20850 }, { "epoch": 1.86, "grad_norm": 7.6831874515070515, "learning_rate": 1.275860593090328e-07, "loss": 0.5427, "step": 20851 }, { "epoch": 1.86, "grad_norm": 6.295267420827264, "learning_rate": 1.2742396508920342e-07, "loss": 0.495, "step": 20852 }, { "epoch": 1.86, "grad_norm": 8.049834909850775, "learning_rate": 1.2726197257372786e-07, "loss": 0.5081, "step": 20853 }, { "epoch": 1.86, "grad_norm": 7.0816144956393074, "learning_rate": 1.2710008176598843e-07, "loss": 0.5619, "step": 20854 }, { "epoch": 1.86, "grad_norm": 6.191829277850562, "learning_rate": 1.269382926693624e-07, "loss": 0.5489, "step": 20855 }, { "epoch": 1.86, "grad_norm": 7.426504896641189, "learning_rate": 1.2677660528722768e-07, "loss": 0.532, "step": 20856 }, { "epoch": 1.86, "grad_norm": 6.530443049887126, "learning_rate": 1.2661501962295985e-07, "loss": 0.5781, "step": 20857 }, { "epoch": 1.86, "grad_norm": 7.371552076556492, "learning_rate": 1.2645353567993014e-07, "loss": 0.5566, "step": 20858 }, { "epoch": 1.86, "grad_norm": 6.945652668476601, "learning_rate": 1.2629215346151025e-07, "loss": 0.5395, "step": 20859 }, { "epoch": 1.86, "grad_norm": 7.150754472671299, "learning_rate": 1.2613087297106863e-07, "loss": 0.4997, "step": 20860 }, { "epoch": 1.86, "grad_norm": 6.2640518658192885, "learning_rate": 1.259696942119709e-07, "loss": 0.5972, "step": 20861 }, { "epoch": 1.86, "grad_norm": 5.17709732377082, "learning_rate": 1.2580861718758275e-07, "loss": 0.5554, "step": 20862 }, { "epoch": 1.86, "grad_norm": 5.668104139429877, "learning_rate": 1.2564764190126477e-07, "loss": 0.5183, "step": 20863 }, { "epoch": 1.86, "grad_norm": 6.8538791527927065, "learning_rate": 1.2548676835637762e-07, "loss": 0.5451, "step": 20864 }, { "epoch": 1.86, "grad_norm": 6.031819516555116, "learning_rate": 1.253259965562792e-07, "loss": 0.5951, "step": 20865 }, { "epoch": 1.86, "grad_norm": 7.5835770701555685, "learning_rate": 1.2516532650432512e-07, "loss": 0.5433, "step": 20866 }, { "epoch": 1.86, "grad_norm": 7.904840384640677, "learning_rate": 1.2500475820386936e-07, "loss": 0.5453, "step": 20867 }, { "epoch": 1.86, "grad_norm": 5.49736475495323, "learning_rate": 1.2484429165826374e-07, "loss": 0.5838, "step": 20868 }, { "epoch": 1.86, "grad_norm": 7.474244509951982, "learning_rate": 1.2468392687085663e-07, "loss": 0.5884, "step": 20869 }, { "epoch": 1.86, "grad_norm": 7.347121883761366, "learning_rate": 1.2452366384499592e-07, "loss": 0.5726, "step": 20870 }, { "epoch": 1.86, "grad_norm": 8.336977061124907, "learning_rate": 1.2436350258402618e-07, "loss": 0.6086, "step": 20871 }, { "epoch": 1.86, "grad_norm": 7.483167921849278, "learning_rate": 1.242034430912914e-07, "loss": 0.561, "step": 20872 }, { "epoch": 1.86, "grad_norm": 6.553893645184795, "learning_rate": 1.2404348537013167e-07, "loss": 0.5229, "step": 20873 }, { "epoch": 1.86, "grad_norm": 9.888150082564447, "learning_rate": 1.2388362942388544e-07, "loss": 0.5729, "step": 20874 }, { "epoch": 1.86, "grad_norm": 6.166723804715248, "learning_rate": 1.2372387525589114e-07, "loss": 0.5586, "step": 20875 }, { "epoch": 1.86, "grad_norm": 6.031752396610235, "learning_rate": 1.235642228694811e-07, "loss": 0.5922, "step": 20876 }, { "epoch": 1.86, "grad_norm": 5.636527516106462, "learning_rate": 1.2340467226798935e-07, "loss": 0.6135, "step": 20877 }, { "epoch": 1.86, "grad_norm": 8.491756950911448, "learning_rate": 1.232452234547449e-07, "loss": 0.5919, "step": 20878 }, { "epoch": 1.86, "grad_norm": 9.593700584333767, "learning_rate": 1.230858764330767e-07, "loss": 0.5372, "step": 20879 }, { "epoch": 1.86, "grad_norm": 6.620912018552872, "learning_rate": 1.2292663120631042e-07, "loss": 0.5331, "step": 20880 }, { "epoch": 1.86, "grad_norm": 7.113053622045773, "learning_rate": 1.2276748777777124e-07, "loss": 0.6008, "step": 20881 }, { "epoch": 1.86, "grad_norm": 7.0007817788367195, "learning_rate": 1.226084461507787e-07, "loss": 0.5586, "step": 20882 }, { "epoch": 1.86, "grad_norm": 8.566830754484338, "learning_rate": 1.2244950632865405e-07, "loss": 0.5883, "step": 20883 }, { "epoch": 1.86, "grad_norm": 7.055544912452524, "learning_rate": 1.2229066831471404e-07, "loss": 0.5592, "step": 20884 }, { "epoch": 1.86, "grad_norm": 6.506648661483889, "learning_rate": 1.221319321122738e-07, "loss": 0.6495, "step": 20885 }, { "epoch": 1.86, "grad_norm": 5.552023336318937, "learning_rate": 1.2197329772464793e-07, "loss": 0.5753, "step": 20886 }, { "epoch": 1.86, "grad_norm": 7.438698453303239, "learning_rate": 1.21814765155146e-07, "loss": 0.5488, "step": 20887 }, { "epoch": 1.86, "grad_norm": 5.954586186042278, "learning_rate": 1.216563344070787e-07, "loss": 0.5422, "step": 20888 }, { "epoch": 1.86, "grad_norm": 8.804441995457722, "learning_rate": 1.2149800548375167e-07, "loss": 0.5825, "step": 20889 }, { "epoch": 1.86, "grad_norm": 5.234809180184427, "learning_rate": 1.213397783884701e-07, "loss": 0.496, "step": 20890 }, { "epoch": 1.86, "grad_norm": 7.548833529805375, "learning_rate": 1.2118165312453633e-07, "loss": 0.5632, "step": 20891 }, { "epoch": 1.86, "grad_norm": 5.539988231818698, "learning_rate": 1.2102362969525107e-07, "loss": 0.5504, "step": 20892 }, { "epoch": 1.86, "grad_norm": 5.211054118868753, "learning_rate": 1.2086570810391275e-07, "loss": 0.5877, "step": 20893 }, { "epoch": 1.86, "grad_norm": 8.198400075389241, "learning_rate": 1.207078883538182e-07, "loss": 0.6082, "step": 20894 }, { "epoch": 1.86, "grad_norm": 5.831563359671443, "learning_rate": 1.2055017044826033e-07, "loss": 0.5316, "step": 20895 }, { "epoch": 1.86, "grad_norm": 6.960349363198147, "learning_rate": 1.203925543905321e-07, "loss": 0.4851, "step": 20896 }, { "epoch": 1.86, "grad_norm": 6.347508470135518, "learning_rate": 1.2023504018392363e-07, "loss": 0.542, "step": 20897 }, { "epoch": 1.86, "grad_norm": 7.668066741262089, "learning_rate": 1.200776278317217e-07, "loss": 0.5246, "step": 20898 }, { "epoch": 1.86, "grad_norm": 8.318392218451947, "learning_rate": 1.1992031733721265e-07, "loss": 0.5722, "step": 20899 }, { "epoch": 1.86, "grad_norm": 7.3442136685444215, "learning_rate": 1.1976310870367935e-07, "loss": 0.4921, "step": 20900 }, { "epoch": 1.86, "grad_norm": 6.285326748487118, "learning_rate": 1.1960600193440418e-07, "loss": 0.6312, "step": 20901 }, { "epoch": 1.86, "grad_norm": 6.6632030455943045, "learning_rate": 1.1944899703266566e-07, "loss": 0.5718, "step": 20902 }, { "epoch": 1.86, "grad_norm": 7.882800978957217, "learning_rate": 1.1929209400174058e-07, "loss": 0.5611, "step": 20903 }, { "epoch": 1.86, "grad_norm": 7.551947262804913, "learning_rate": 1.1913529284490522e-07, "loss": 0.5343, "step": 20904 }, { "epoch": 1.86, "grad_norm": 7.936624604155431, "learning_rate": 1.1897859356543085e-07, "loss": 0.5736, "step": 20905 }, { "epoch": 1.87, "grad_norm": 6.756978794402099, "learning_rate": 1.1882199616658985e-07, "loss": 0.512, "step": 20906 }, { "epoch": 1.87, "grad_norm": 6.1468495364979425, "learning_rate": 1.1866550065165017e-07, "loss": 0.5894, "step": 20907 }, { "epoch": 1.87, "grad_norm": 7.674355866854298, "learning_rate": 1.1850910702387752e-07, "loss": 0.5549, "step": 20908 }, { "epoch": 1.87, "grad_norm": 8.934674538597452, "learning_rate": 1.1835281528653763e-07, "loss": 0.5953, "step": 20909 }, { "epoch": 1.87, "grad_norm": 6.311335061490136, "learning_rate": 1.1819662544289178e-07, "loss": 0.5661, "step": 20910 }, { "epoch": 1.87, "grad_norm": 7.720342663478497, "learning_rate": 1.1804053749620015e-07, "loss": 0.4986, "step": 20911 }, { "epoch": 1.87, "grad_norm": 7.933016112160173, "learning_rate": 1.1788455144972178e-07, "loss": 0.5975, "step": 20912 }, { "epoch": 1.87, "grad_norm": 5.853590284522304, "learning_rate": 1.1772866730671128e-07, "loss": 0.5763, "step": 20913 }, { "epoch": 1.87, "grad_norm": 5.609953813246108, "learning_rate": 1.1757288507042331e-07, "loss": 0.5236, "step": 20914 }, { "epoch": 1.87, "grad_norm": 6.1080928441080165, "learning_rate": 1.17417204744108e-07, "loss": 0.5645, "step": 20915 }, { "epoch": 1.87, "grad_norm": 6.319052581198775, "learning_rate": 1.1726162633101668e-07, "loss": 0.5532, "step": 20916 }, { "epoch": 1.87, "grad_norm": 8.300198881567914, "learning_rate": 1.1710614983439561e-07, "loss": 0.6368, "step": 20917 }, { "epoch": 1.87, "grad_norm": 8.485380255574585, "learning_rate": 1.1695077525748999e-07, "loss": 0.6009, "step": 20918 }, { "epoch": 1.87, "grad_norm": 8.273778558564048, "learning_rate": 1.1679550260354334e-07, "loss": 0.5272, "step": 20919 }, { "epoch": 1.87, "grad_norm": 7.059699153916687, "learning_rate": 1.1664033187579749e-07, "loss": 0.61, "step": 20920 }, { "epoch": 1.87, "grad_norm": 8.274940176486949, "learning_rate": 1.164852630774893e-07, "loss": 0.5857, "step": 20921 }, { "epoch": 1.87, "grad_norm": 5.672126319038237, "learning_rate": 1.1633029621185677e-07, "loss": 0.6091, "step": 20922 }, { "epoch": 1.87, "grad_norm": 6.810306771626415, "learning_rate": 1.161754312821345e-07, "loss": 0.5741, "step": 20923 }, { "epoch": 1.87, "grad_norm": 8.37046194831419, "learning_rate": 1.1602066829155434e-07, "loss": 0.5647, "step": 20924 }, { "epoch": 1.87, "grad_norm": 5.998727723491224, "learning_rate": 1.1586600724334707e-07, "loss": 0.5206, "step": 20925 }, { "epoch": 1.87, "grad_norm": 7.77527443332122, "learning_rate": 1.157114481407412e-07, "loss": 0.583, "step": 20926 }, { "epoch": 1.87, "grad_norm": 9.44946736760188, "learning_rate": 1.155569909869625e-07, "loss": 0.5324, "step": 20927 }, { "epoch": 1.87, "grad_norm": 10.616989072778397, "learning_rate": 1.1540263578523447e-07, "loss": 0.5438, "step": 20928 }, { "epoch": 1.87, "grad_norm": 6.702694502700782, "learning_rate": 1.1524838253877956e-07, "loss": 0.5663, "step": 20929 }, { "epoch": 1.87, "grad_norm": 7.639624171003314, "learning_rate": 1.1509423125081687e-07, "loss": 0.5548, "step": 20930 }, { "epoch": 1.87, "grad_norm": 5.129699777622099, "learning_rate": 1.1494018192456491e-07, "loss": 0.5153, "step": 20931 }, { "epoch": 1.87, "grad_norm": 5.229384835533354, "learning_rate": 1.1478623456323778e-07, "loss": 0.5887, "step": 20932 }, { "epoch": 1.87, "grad_norm": 8.334385949405162, "learning_rate": 1.146323891700507e-07, "loss": 0.5369, "step": 20933 }, { "epoch": 1.87, "grad_norm": 10.752366608220989, "learning_rate": 1.1447864574821277e-07, "loss": 0.6436, "step": 20934 }, { "epoch": 1.87, "grad_norm": 6.002575225995843, "learning_rate": 1.1432500430093418e-07, "loss": 0.5511, "step": 20935 }, { "epoch": 1.87, "grad_norm": 6.088028013609253, "learning_rate": 1.1417146483142183e-07, "loss": 0.5521, "step": 20936 }, { "epoch": 1.87, "grad_norm": 7.6453532235425055, "learning_rate": 1.1401802734287981e-07, "loss": 0.5975, "step": 20937 }, { "epoch": 1.87, "grad_norm": 6.321766881540197, "learning_rate": 1.1386469183851223e-07, "loss": 0.6044, "step": 20938 }, { "epoch": 1.87, "grad_norm": 8.173029638014793, "learning_rate": 1.1371145832151875e-07, "loss": 0.6119, "step": 20939 }, { "epoch": 1.87, "grad_norm": 7.131477204313819, "learning_rate": 1.1355832679509737e-07, "loss": 0.5596, "step": 20940 }, { "epoch": 1.87, "grad_norm": 6.580047662161449, "learning_rate": 1.1340529726244497e-07, "loss": 0.6023, "step": 20941 }, { "epoch": 1.87, "grad_norm": 6.619435017507248, "learning_rate": 1.1325236972675513e-07, "loss": 0.5588, "step": 20942 }, { "epoch": 1.87, "grad_norm": 6.683804453365449, "learning_rate": 1.1309954419122083e-07, "loss": 0.5869, "step": 20943 }, { "epoch": 1.87, "grad_norm": 7.09944224744641, "learning_rate": 1.1294682065903062e-07, "loss": 0.6749, "step": 20944 }, { "epoch": 1.87, "grad_norm": 7.5487041492200975, "learning_rate": 1.127941991333742e-07, "loss": 0.5304, "step": 20945 }, { "epoch": 1.87, "grad_norm": 6.446764296626316, "learning_rate": 1.1264167961743511e-07, "loss": 0.547, "step": 20946 }, { "epoch": 1.87, "grad_norm": 6.2545087510964334, "learning_rate": 1.1248926211439804e-07, "loss": 0.5487, "step": 20947 }, { "epoch": 1.87, "grad_norm": 7.510454450931272, "learning_rate": 1.1233694662744433e-07, "loss": 0.604, "step": 20948 }, { "epoch": 1.87, "grad_norm": 6.054648532294999, "learning_rate": 1.1218473315975309e-07, "loss": 0.5751, "step": 20949 }, { "epoch": 1.87, "grad_norm": 7.07277547793017, "learning_rate": 1.1203262171450125e-07, "loss": 0.5575, "step": 20950 }, { "epoch": 1.87, "grad_norm": 7.336523506779946, "learning_rate": 1.1188061229486457e-07, "loss": 0.5812, "step": 20951 }, { "epoch": 1.87, "grad_norm": 8.672214679922554, "learning_rate": 1.1172870490401443e-07, "loss": 0.5661, "step": 20952 }, { "epoch": 1.87, "grad_norm": 7.9845773082643925, "learning_rate": 1.1157689954512274e-07, "loss": 0.6505, "step": 20953 }, { "epoch": 1.87, "grad_norm": 6.350269679368775, "learning_rate": 1.114251962213575e-07, "loss": 0.6356, "step": 20954 }, { "epoch": 1.87, "grad_norm": 6.744722879055875, "learning_rate": 1.1127359493588675e-07, "loss": 0.5528, "step": 20955 }, { "epoch": 1.87, "grad_norm": 5.24211728915567, "learning_rate": 1.1112209569187237e-07, "loss": 0.6083, "step": 20956 }, { "epoch": 1.87, "grad_norm": 5.003613297908868, "learning_rate": 1.1097069849247799e-07, "loss": 0.5467, "step": 20957 }, { "epoch": 1.87, "grad_norm": 7.354998931589032, "learning_rate": 1.1081940334086383e-07, "loss": 0.6045, "step": 20958 }, { "epoch": 1.87, "grad_norm": 7.334818328275256, "learning_rate": 1.1066821024018737e-07, "loss": 0.5575, "step": 20959 }, { "epoch": 1.87, "grad_norm": 7.340929795306152, "learning_rate": 1.1051711919360442e-07, "loss": 0.6377, "step": 20960 }, { "epoch": 1.87, "grad_norm": 6.7613701049822925, "learning_rate": 1.1036613020426857e-07, "loss": 0.5538, "step": 20961 }, { "epoch": 1.87, "grad_norm": 7.517978576874103, "learning_rate": 1.1021524327533173e-07, "loss": 0.5762, "step": 20962 }, { "epoch": 1.87, "grad_norm": 6.2094377973948465, "learning_rate": 1.1006445840994362e-07, "loss": 0.5722, "step": 20963 }, { "epoch": 1.87, "grad_norm": 5.1648454765545, "learning_rate": 1.0991377561125117e-07, "loss": 0.6041, "step": 20964 }, { "epoch": 1.87, "grad_norm": 6.6184597610100715, "learning_rate": 1.0976319488239962e-07, "loss": 0.6285, "step": 20965 }, { "epoch": 1.87, "grad_norm": 6.117048501331203, "learning_rate": 1.0961271622653146e-07, "loss": 0.5369, "step": 20966 }, { "epoch": 1.87, "grad_norm": 7.095108293115146, "learning_rate": 1.094623396467881e-07, "loss": 0.5412, "step": 20967 }, { "epoch": 1.87, "grad_norm": 5.926405046707017, "learning_rate": 1.0931206514630865e-07, "loss": 0.5289, "step": 20968 }, { "epoch": 1.87, "grad_norm": 5.513219753036493, "learning_rate": 1.0916189272822952e-07, "loss": 0.5609, "step": 20969 }, { "epoch": 1.87, "grad_norm": 6.2191461126698595, "learning_rate": 1.0901182239568486e-07, "loss": 0.5208, "step": 20970 }, { "epoch": 1.87, "grad_norm": 7.723353851902933, "learning_rate": 1.0886185415180772e-07, "loss": 0.5385, "step": 20971 }, { "epoch": 1.87, "grad_norm": 7.861800870430771, "learning_rate": 1.0871198799972727e-07, "loss": 0.5556, "step": 20972 }, { "epoch": 1.87, "grad_norm": 6.978839196497429, "learning_rate": 1.0856222394257266e-07, "loss": 0.5457, "step": 20973 }, { "epoch": 1.87, "grad_norm": 7.247056373850858, "learning_rate": 1.0841256198346972e-07, "loss": 0.5456, "step": 20974 }, { "epoch": 1.87, "grad_norm": 5.466506386462839, "learning_rate": 1.0826300212554152e-07, "loss": 0.5465, "step": 20975 }, { "epoch": 1.87, "grad_norm": 5.973019177804934, "learning_rate": 1.0811354437191112e-07, "loss": 0.6181, "step": 20976 }, { "epoch": 1.87, "grad_norm": 6.052942396593039, "learning_rate": 1.079641887256977e-07, "loss": 0.5722, "step": 20977 }, { "epoch": 1.87, "grad_norm": 9.88685081089164, "learning_rate": 1.0781493519001762e-07, "loss": 0.657, "step": 20978 }, { "epoch": 1.87, "grad_norm": 8.703250707936471, "learning_rate": 1.076657837679873e-07, "loss": 0.6004, "step": 20979 }, { "epoch": 1.87, "grad_norm": 6.4981857177443105, "learning_rate": 1.0751673446271982e-07, "loss": 0.5229, "step": 20980 }, { "epoch": 1.87, "grad_norm": 5.9910162353900835, "learning_rate": 1.0736778727732599e-07, "loss": 0.5412, "step": 20981 }, { "epoch": 1.87, "grad_norm": 7.811396228691841, "learning_rate": 1.0721894221491503e-07, "loss": 0.5812, "step": 20982 }, { "epoch": 1.87, "grad_norm": 6.135355408093023, "learning_rate": 1.0707019927859442e-07, "loss": 0.5629, "step": 20983 }, { "epoch": 1.87, "grad_norm": 7.618619654228134, "learning_rate": 1.0692155847146778e-07, "loss": 0.5609, "step": 20984 }, { "epoch": 1.87, "grad_norm": 7.008841302458632, "learning_rate": 1.0677301979663712e-07, "loss": 0.5995, "step": 20985 }, { "epoch": 1.87, "grad_norm": 8.076161352596728, "learning_rate": 1.0662458325720437e-07, "loss": 0.5478, "step": 20986 }, { "epoch": 1.87, "grad_norm": 6.39499712359614, "learning_rate": 1.0647624885626651e-07, "loss": 0.5045, "step": 20987 }, { "epoch": 1.87, "grad_norm": 6.227426236675855, "learning_rate": 1.0632801659692049e-07, "loss": 0.5288, "step": 20988 }, { "epoch": 1.87, "grad_norm": 7.673261044738194, "learning_rate": 1.0617988648226052e-07, "loss": 0.5762, "step": 20989 }, { "epoch": 1.87, "grad_norm": 5.52449066866207, "learning_rate": 1.0603185851537801e-07, "loss": 0.6155, "step": 20990 }, { "epoch": 1.87, "grad_norm": 6.871960270340291, "learning_rate": 1.0588393269936271e-07, "loss": 0.5442, "step": 20991 }, { "epoch": 1.87, "grad_norm": 6.360867096397531, "learning_rate": 1.057361090373027e-07, "loss": 0.6224, "step": 20992 }, { "epoch": 1.87, "grad_norm": 8.191812047350124, "learning_rate": 1.0558838753228274e-07, "loss": 0.5469, "step": 20993 }, { "epoch": 1.87, "grad_norm": 8.220967750961645, "learning_rate": 1.0544076818738647e-07, "loss": 0.567, "step": 20994 }, { "epoch": 1.87, "grad_norm": 5.918973395484816, "learning_rate": 1.0529325100569587e-07, "loss": 0.5688, "step": 20995 }, { "epoch": 1.87, "grad_norm": 7.1131889417481675, "learning_rate": 1.051458359902896e-07, "loss": 0.577, "step": 20996 }, { "epoch": 1.87, "grad_norm": 7.500139646554104, "learning_rate": 1.0499852314424352e-07, "loss": 0.5688, "step": 20997 }, { "epoch": 1.87, "grad_norm": 5.784883374748337, "learning_rate": 1.0485131247063462e-07, "loss": 0.5661, "step": 20998 }, { "epoch": 1.87, "grad_norm": 5.105611509148954, "learning_rate": 1.0470420397253323e-07, "loss": 0.531, "step": 20999 }, { "epoch": 1.87, "grad_norm": 7.081659600619727, "learning_rate": 1.0455719765301132e-07, "loss": 0.5507, "step": 21000 }, { "epoch": 1.87, "grad_norm": 5.7648986896866505, "learning_rate": 1.0441029351513754e-07, "loss": 0.5222, "step": 21001 }, { "epoch": 1.87, "grad_norm": 6.882754037045833, "learning_rate": 1.0426349156197724e-07, "loss": 0.6034, "step": 21002 }, { "epoch": 1.87, "grad_norm": 8.564706896905118, "learning_rate": 1.0411679179659572e-07, "loss": 0.6193, "step": 21003 }, { "epoch": 1.87, "grad_norm": 5.9486969852878975, "learning_rate": 1.039701942220539e-07, "loss": 0.5172, "step": 21004 }, { "epoch": 1.87, "grad_norm": 7.182818354273246, "learning_rate": 1.0382369884141208e-07, "loss": 0.5954, "step": 21005 }, { "epoch": 1.87, "grad_norm": 6.117792817763418, "learning_rate": 1.0367730565772782e-07, "loss": 0.5806, "step": 21006 }, { "epoch": 1.87, "grad_norm": 7.744833521114091, "learning_rate": 1.0353101467405757e-07, "loss": 0.5652, "step": 21007 }, { "epoch": 1.87, "grad_norm": 7.043595638035254, "learning_rate": 1.0338482589345388e-07, "loss": 0.5976, "step": 21008 }, { "epoch": 1.87, "grad_norm": 6.92044000804046, "learning_rate": 1.0323873931896932e-07, "loss": 0.6178, "step": 21009 }, { "epoch": 1.87, "grad_norm": 5.756595721755721, "learning_rate": 1.0309275495365145e-07, "loss": 0.5571, "step": 21010 }, { "epoch": 1.87, "grad_norm": 7.8404246638191335, "learning_rate": 1.0294687280054893e-07, "loss": 0.5876, "step": 21011 }, { "epoch": 1.87, "grad_norm": 6.324573143669161, "learning_rate": 1.0280109286270545e-07, "loss": 0.5683, "step": 21012 }, { "epoch": 1.87, "grad_norm": 5.392224068117696, "learning_rate": 1.0265541514316524e-07, "loss": 0.5924, "step": 21013 }, { "epoch": 1.87, "grad_norm": 4.7213037041337795, "learning_rate": 1.0250983964496753e-07, "loss": 0.5692, "step": 21014 }, { "epoch": 1.87, "grad_norm": 7.3262848901877815, "learning_rate": 1.023643663711521e-07, "loss": 0.5796, "step": 21015 }, { "epoch": 1.87, "grad_norm": 7.765760935001783, "learning_rate": 1.0221899532475487e-07, "loss": 0.5945, "step": 21016 }, { "epoch": 1.87, "grad_norm": 4.663834158201626, "learning_rate": 1.0207372650881009e-07, "loss": 0.5627, "step": 21017 }, { "epoch": 1.88, "grad_norm": 6.761356836217155, "learning_rate": 1.0192855992634976e-07, "loss": 0.5876, "step": 21018 }, { "epoch": 1.88, "grad_norm": 6.477685171264411, "learning_rate": 1.0178349558040423e-07, "loss": 0.5668, "step": 21019 }, { "epoch": 1.88, "grad_norm": 6.966789202727508, "learning_rate": 1.016385334740011e-07, "loss": 0.5409, "step": 21020 }, { "epoch": 1.88, "grad_norm": 8.540290575300181, "learning_rate": 1.0149367361016681e-07, "loss": 0.5866, "step": 21021 }, { "epoch": 1.88, "grad_norm": 5.877045220239096, "learning_rate": 1.0134891599192454e-07, "loss": 0.5554, "step": 21022 }, { "epoch": 1.88, "grad_norm": 6.4945530247157635, "learning_rate": 1.0120426062229516e-07, "loss": 0.6048, "step": 21023 }, { "epoch": 1.88, "grad_norm": 5.756123808370641, "learning_rate": 1.0105970750429905e-07, "loss": 0.5804, "step": 21024 }, { "epoch": 1.88, "grad_norm": 5.80553577021573, "learning_rate": 1.0091525664095325e-07, "loss": 0.5756, "step": 21025 }, { "epoch": 1.88, "grad_norm": 8.028187409184863, "learning_rate": 1.0077090803527201e-07, "loss": 0.5451, "step": 21026 }, { "epoch": 1.88, "grad_norm": 5.726121164201468, "learning_rate": 1.0062666169027013e-07, "loss": 0.5572, "step": 21027 }, { "epoch": 1.88, "grad_norm": 8.044147922793677, "learning_rate": 1.0048251760895633e-07, "loss": 0.573, "step": 21028 }, { "epoch": 1.88, "grad_norm": 8.32404009080991, "learning_rate": 1.0033847579434097e-07, "loss": 0.53, "step": 21029 }, { "epoch": 1.88, "grad_norm": 6.017190452028039, "learning_rate": 1.0019453624942888e-07, "loss": 0.6251, "step": 21030 }, { "epoch": 1.88, "grad_norm": 6.984505869247188, "learning_rate": 1.0005069897722596e-07, "loss": 0.5828, "step": 21031 }, { "epoch": 1.88, "grad_norm": 8.251839727816092, "learning_rate": 9.990696398073374e-08, "loss": 0.5571, "step": 21032 }, { "epoch": 1.88, "grad_norm": 9.674990465783313, "learning_rate": 9.976333126295257e-08, "loss": 0.5807, "step": 21033 }, { "epoch": 1.88, "grad_norm": 6.183177169012831, "learning_rate": 9.961980082688116e-08, "loss": 0.5347, "step": 21034 }, { "epoch": 1.88, "grad_norm": 7.2984786773149954, "learning_rate": 9.947637267551436e-08, "loss": 0.5431, "step": 21035 }, { "epoch": 1.88, "grad_norm": 8.213698527413909, "learning_rate": 9.933304681184585e-08, "loss": 0.5312, "step": 21036 }, { "epoch": 1.88, "grad_norm": 6.125524649620422, "learning_rate": 9.918982323886828e-08, "loss": 0.5488, "step": 21037 }, { "epoch": 1.88, "grad_norm": 7.878777547878165, "learning_rate": 9.904670195957034e-08, "loss": 0.6652, "step": 21038 }, { "epoch": 1.88, "grad_norm": 7.154940016007884, "learning_rate": 9.890368297693965e-08, "loss": 0.5184, "step": 21039 }, { "epoch": 1.88, "grad_norm": 9.121495592600011, "learning_rate": 9.876076629396158e-08, "loss": 0.5908, "step": 21040 }, { "epoch": 1.88, "grad_norm": 7.9152238896204805, "learning_rate": 9.861795191361933e-08, "loss": 0.5123, "step": 21041 }, { "epoch": 1.88, "grad_norm": 5.2665991966493175, "learning_rate": 9.847523983889273e-08, "loss": 0.6131, "step": 21042 }, { "epoch": 1.88, "grad_norm": 6.613200426822425, "learning_rate": 9.833263007276162e-08, "loss": 0.6033, "step": 21043 }, { "epoch": 1.88, "grad_norm": 6.690416990376193, "learning_rate": 9.819012261820193e-08, "loss": 0.5369, "step": 21044 }, { "epoch": 1.88, "grad_norm": 6.902464524271597, "learning_rate": 9.804771747818909e-08, "loss": 0.5978, "step": 21045 }, { "epoch": 1.88, "grad_norm": 6.754476106472237, "learning_rate": 9.790541465569513e-08, "loss": 0.6242, "step": 21046 }, { "epoch": 1.88, "grad_norm": 7.929307900018466, "learning_rate": 9.77632141536905e-08, "loss": 0.5393, "step": 21047 }, { "epoch": 1.88, "grad_norm": 8.144310903613064, "learning_rate": 9.762111597514224e-08, "loss": 0.5392, "step": 21048 }, { "epoch": 1.88, "grad_norm": 8.798252309915991, "learning_rate": 9.747912012301796e-08, "loss": 0.5879, "step": 21049 }, { "epoch": 1.88, "grad_norm": 7.087811128086951, "learning_rate": 9.733722660028033e-08, "loss": 0.5224, "step": 21050 }, { "epoch": 1.88, "grad_norm": 7.717106677846806, "learning_rate": 9.719543540989084e-08, "loss": 0.5735, "step": 21051 }, { "epoch": 1.88, "grad_norm": 7.386410185427043, "learning_rate": 9.705374655481048e-08, "loss": 0.5465, "step": 21052 }, { "epoch": 1.88, "grad_norm": 8.342076035618675, "learning_rate": 9.691216003799631e-08, "loss": 0.6147, "step": 21053 }, { "epoch": 1.88, "grad_norm": 6.373354154442819, "learning_rate": 9.677067586240208e-08, "loss": 0.5481, "step": 21054 }, { "epoch": 1.88, "grad_norm": 6.026883464010685, "learning_rate": 9.662929403098264e-08, "loss": 0.5855, "step": 21055 }, { "epoch": 1.88, "grad_norm": 8.948727389901416, "learning_rate": 9.6488014546689e-08, "loss": 0.5914, "step": 21056 }, { "epoch": 1.88, "grad_norm": 7.0119345841496035, "learning_rate": 9.634683741246876e-08, "loss": 0.512, "step": 21057 }, { "epoch": 1.88, "grad_norm": 7.93871094655152, "learning_rate": 9.620576263126957e-08, "loss": 0.6586, "step": 21058 }, { "epoch": 1.88, "grad_norm": 6.693270915639578, "learning_rate": 9.606479020603632e-08, "loss": 0.6209, "step": 21059 }, { "epoch": 1.88, "grad_norm": 6.232523815545704, "learning_rate": 9.592392013971108e-08, "loss": 0.5739, "step": 21060 }, { "epoch": 1.88, "grad_norm": 8.112241687000637, "learning_rate": 9.578315243523429e-08, "loss": 0.5929, "step": 21061 }, { "epoch": 1.88, "grad_norm": 6.042098238482723, "learning_rate": 9.564248709554414e-08, "loss": 0.5416, "step": 21062 }, { "epoch": 1.88, "grad_norm": 7.324781172846449, "learning_rate": 9.550192412357606e-08, "loss": 0.6331, "step": 21063 }, { "epoch": 1.88, "grad_norm": 8.330404089884158, "learning_rate": 9.53614635222655e-08, "loss": 0.5862, "step": 21064 }, { "epoch": 1.88, "grad_norm": 8.357932501359688, "learning_rate": 9.522110529454287e-08, "loss": 0.5769, "step": 21065 }, { "epoch": 1.88, "grad_norm": 6.0861906281064835, "learning_rate": 9.508084944333862e-08, "loss": 0.5198, "step": 21066 }, { "epoch": 1.88, "grad_norm": 6.706221033533223, "learning_rate": 9.494069597157985e-08, "loss": 0.6321, "step": 21067 }, { "epoch": 1.88, "grad_norm": 5.181119666370835, "learning_rate": 9.480064488219254e-08, "loss": 0.4918, "step": 21068 }, { "epoch": 1.88, "grad_norm": 7.457176072222198, "learning_rate": 9.466069617809937e-08, "loss": 0.6288, "step": 21069 }, { "epoch": 1.88, "grad_norm": 9.307425518226696, "learning_rate": 9.452084986222187e-08, "loss": 0.5613, "step": 21070 }, { "epoch": 1.88, "grad_norm": 7.995049484840391, "learning_rate": 9.438110593747829e-08, "loss": 0.5403, "step": 21071 }, { "epoch": 1.88, "grad_norm": 8.358372076397222, "learning_rate": 9.424146440678628e-08, "loss": 0.4983, "step": 21072 }, { "epoch": 1.88, "grad_norm": 10.894386721979007, "learning_rate": 9.410192527306072e-08, "loss": 0.576, "step": 21073 }, { "epoch": 1.88, "grad_norm": 7.358989337853672, "learning_rate": 9.39624885392132e-08, "loss": 0.5125, "step": 21074 }, { "epoch": 1.88, "grad_norm": 9.017168031045633, "learning_rate": 9.382315420815469e-08, "loss": 0.5543, "step": 21075 }, { "epoch": 1.88, "grad_norm": 7.25357257921551, "learning_rate": 9.368392228279344e-08, "loss": 0.5578, "step": 21076 }, { "epoch": 1.88, "grad_norm": 12.40406806151217, "learning_rate": 9.354479276603546e-08, "loss": 0.6401, "step": 21077 }, { "epoch": 1.88, "grad_norm": 7.523919425834857, "learning_rate": 9.340576566078563e-08, "loss": 0.5109, "step": 21078 }, { "epoch": 1.88, "grad_norm": 7.746705850901944, "learning_rate": 9.326684096994498e-08, "loss": 0.5777, "step": 21079 }, { "epoch": 1.88, "grad_norm": 5.720976722641286, "learning_rate": 9.312801869641341e-08, "loss": 0.5493, "step": 21080 }, { "epoch": 1.88, "grad_norm": 6.147691602844768, "learning_rate": 9.29892988430886e-08, "loss": 0.5294, "step": 21081 }, { "epoch": 1.88, "grad_norm": 5.4350075382095335, "learning_rate": 9.285068141286547e-08, "loss": 0.535, "step": 21082 }, { "epoch": 1.88, "grad_norm": 4.994920264991677, "learning_rate": 9.271216640863834e-08, "loss": 0.5257, "step": 21083 }, { "epoch": 1.88, "grad_norm": 10.093781463787423, "learning_rate": 9.257375383329825e-08, "loss": 0.5624, "step": 21084 }, { "epoch": 1.88, "grad_norm": 6.430438183147152, "learning_rate": 9.2435443689734e-08, "loss": 0.5839, "step": 21085 }, { "epoch": 1.88, "grad_norm": 7.982229787678982, "learning_rate": 9.229723598083218e-08, "loss": 0.5251, "step": 21086 }, { "epoch": 1.88, "grad_norm": 6.998294347362701, "learning_rate": 9.215913070947769e-08, "loss": 0.5367, "step": 21087 }, { "epoch": 1.88, "grad_norm": 9.631673910914987, "learning_rate": 9.202112787855377e-08, "loss": 0.5538, "step": 21088 }, { "epoch": 1.88, "grad_norm": 7.076637514098293, "learning_rate": 9.188322749094037e-08, "loss": 0.5761, "step": 21089 }, { "epoch": 1.88, "grad_norm": 5.797484830147023, "learning_rate": 9.174542954951571e-08, "loss": 0.5927, "step": 21090 }, { "epoch": 1.88, "grad_norm": 5.26199837826957, "learning_rate": 9.160773405715639e-08, "loss": 0.5355, "step": 21091 }, { "epoch": 1.88, "grad_norm": 6.311031241913768, "learning_rate": 9.14701410167368e-08, "loss": 0.5415, "step": 21092 }, { "epoch": 1.88, "grad_norm": 6.031135181305323, "learning_rate": 9.133265043112848e-08, "loss": 0.5719, "step": 21093 }, { "epoch": 1.88, "grad_norm": 7.08666209420605, "learning_rate": 9.119526230320086e-08, "loss": 0.5189, "step": 21094 }, { "epoch": 1.88, "grad_norm": 7.278733883808456, "learning_rate": 9.105797663582217e-08, "loss": 0.5825, "step": 21095 }, { "epoch": 1.88, "grad_norm": 6.731023127240319, "learning_rate": 9.09207934318579e-08, "loss": 0.5376, "step": 21096 }, { "epoch": 1.88, "grad_norm": 7.491362047355932, "learning_rate": 9.07837126941713e-08, "loss": 0.5216, "step": 21097 }, { "epoch": 1.88, "grad_norm": 8.135138066493788, "learning_rate": 9.064673442562399e-08, "loss": 0.5512, "step": 21098 }, { "epoch": 1.88, "grad_norm": 6.720871081480444, "learning_rate": 9.05098586290748e-08, "loss": 0.5985, "step": 21099 }, { "epoch": 1.88, "grad_norm": 7.329273175589783, "learning_rate": 9.037308530738032e-08, "loss": 0.6054, "step": 21100 }, { "epoch": 1.88, "grad_norm": 6.983989579848732, "learning_rate": 9.023641446339604e-08, "loss": 0.5724, "step": 21101 }, { "epoch": 1.88, "grad_norm": 7.68364464774362, "learning_rate": 9.009984609997414e-08, "loss": 0.5728, "step": 21102 }, { "epoch": 1.88, "grad_norm": 5.45456930749683, "learning_rate": 8.996338021996564e-08, "loss": 0.5548, "step": 21103 }, { "epoch": 1.88, "grad_norm": 4.485024020850431, "learning_rate": 8.982701682621887e-08, "loss": 0.5456, "step": 21104 }, { "epoch": 1.88, "grad_norm": 11.012736006234121, "learning_rate": 8.969075592158039e-08, "loss": 0.5319, "step": 21105 }, { "epoch": 1.88, "grad_norm": 4.680700642372978, "learning_rate": 8.955459750889295e-08, "loss": 0.5453, "step": 21106 }, { "epoch": 1.88, "grad_norm": 6.97370394914972, "learning_rate": 8.941854159100038e-08, "loss": 0.6031, "step": 21107 }, { "epoch": 1.88, "grad_norm": 10.223889324051077, "learning_rate": 8.928258817074153e-08, "loss": 0.5341, "step": 21108 }, { "epoch": 1.88, "grad_norm": 9.920022119853549, "learning_rate": 8.914673725095413e-08, "loss": 0.58, "step": 21109 }, { "epoch": 1.88, "grad_norm": 6.539416770130542, "learning_rate": 8.901098883447478e-08, "loss": 0.5349, "step": 21110 }, { "epoch": 1.88, "grad_norm": 7.829898130753236, "learning_rate": 8.887534292413569e-08, "loss": 0.5597, "step": 21111 }, { "epoch": 1.88, "grad_norm": 6.405169741408688, "learning_rate": 8.873979952276846e-08, "loss": 0.5541, "step": 21112 }, { "epoch": 1.88, "grad_norm": 6.890574789813791, "learning_rate": 8.860435863320249e-08, "loss": 0.5733, "step": 21113 }, { "epoch": 1.88, "grad_norm": 7.06281027199534, "learning_rate": 8.846902025826498e-08, "loss": 0.6075, "step": 21114 }, { "epoch": 1.88, "grad_norm": 6.769969459543523, "learning_rate": 8.833378440078034e-08, "loss": 0.6162, "step": 21115 }, { "epoch": 1.88, "grad_norm": 7.033262339254884, "learning_rate": 8.819865106357183e-08, "loss": 0.5981, "step": 21116 }, { "epoch": 1.88, "grad_norm": 6.9277828219379085, "learning_rate": 8.806362024946003e-08, "loss": 0.55, "step": 21117 }, { "epoch": 1.88, "grad_norm": 4.531070891734815, "learning_rate": 8.792869196126263e-08, "loss": 0.4928, "step": 21118 }, { "epoch": 1.88, "grad_norm": 8.297239634440928, "learning_rate": 8.779386620179687e-08, "loss": 0.5872, "step": 21119 }, { "epoch": 1.88, "grad_norm": 8.347112691020662, "learning_rate": 8.765914297387657e-08, "loss": 0.5995, "step": 21120 }, { "epoch": 1.88, "grad_norm": 6.840416030735981, "learning_rate": 8.752452228031339e-08, "loss": 0.5812, "step": 21121 }, { "epoch": 1.88, "grad_norm": 7.97885316150236, "learning_rate": 8.739000412391785e-08, "loss": 0.5771, "step": 21122 }, { "epoch": 1.88, "grad_norm": 9.114544796708218, "learning_rate": 8.725558850749827e-08, "loss": 0.5627, "step": 21123 }, { "epoch": 1.88, "grad_norm": 10.219581524215707, "learning_rate": 8.712127543385907e-08, "loss": 0.5639, "step": 21124 }, { "epoch": 1.88, "grad_norm": 7.8106195448937585, "learning_rate": 8.698706490580411e-08, "loss": 0.5008, "step": 21125 }, { "epoch": 1.88, "grad_norm": 6.497132225304615, "learning_rate": 8.685295692613448e-08, "loss": 0.5673, "step": 21126 }, { "epoch": 1.88, "grad_norm": 4.903177687800151, "learning_rate": 8.671895149765019e-08, "loss": 0.5933, "step": 21127 }, { "epoch": 1.88, "grad_norm": 8.34520062040471, "learning_rate": 8.658504862314842e-08, "loss": 0.58, "step": 21128 }, { "epoch": 1.88, "grad_norm": 8.978786452738884, "learning_rate": 8.645124830542306e-08, "loss": 0.545, "step": 21129 }, { "epoch": 1.89, "grad_norm": 8.546217499072739, "learning_rate": 8.631755054726742e-08, "loss": 0.5541, "step": 21130 }, { "epoch": 1.89, "grad_norm": 5.829793517665486, "learning_rate": 8.618395535147205e-08, "loss": 0.5557, "step": 21131 }, { "epoch": 1.89, "grad_norm": 6.180026328352093, "learning_rate": 8.605046272082529e-08, "loss": 0.5721, "step": 21132 }, { "epoch": 1.89, "grad_norm": 6.189375459895696, "learning_rate": 8.591707265811433e-08, "loss": 0.645, "step": 21133 }, { "epoch": 1.89, "grad_norm": 8.758828113363906, "learning_rate": 8.578378516612251e-08, "loss": 0.5518, "step": 21134 }, { "epoch": 1.89, "grad_norm": 8.270019547459423, "learning_rate": 8.565060024763205e-08, "loss": 0.5654, "step": 21135 }, { "epoch": 1.89, "grad_norm": 4.945740115012732, "learning_rate": 8.551751790542351e-08, "loss": 0.6497, "step": 21136 }, { "epoch": 1.89, "grad_norm": 6.48431986513705, "learning_rate": 8.53845381422741e-08, "loss": 0.5342, "step": 21137 }, { "epoch": 1.89, "grad_norm": 5.816646463486906, "learning_rate": 8.525166096095938e-08, "loss": 0.5494, "step": 21138 }, { "epoch": 1.89, "grad_norm": 8.0944829235967, "learning_rate": 8.511888636425325e-08, "loss": 0.5972, "step": 21139 }, { "epoch": 1.89, "grad_norm": 5.087622257138645, "learning_rate": 8.498621435492682e-08, "loss": 0.5373, "step": 21140 }, { "epoch": 1.89, "grad_norm": 9.190088149044145, "learning_rate": 8.485364493574954e-08, "loss": 0.5279, "step": 21141 }, { "epoch": 1.89, "grad_norm": 9.026852965193367, "learning_rate": 8.472117810948866e-08, "loss": 0.5252, "step": 21142 }, { "epoch": 1.89, "grad_norm": 6.460827744204616, "learning_rate": 8.458881387890915e-08, "loss": 0.5311, "step": 21143 }, { "epoch": 1.89, "grad_norm": 4.948383371409069, "learning_rate": 8.44565522467733e-08, "loss": 0.5511, "step": 21144 }, { "epoch": 1.89, "grad_norm": 7.7385698948371235, "learning_rate": 8.432439321584163e-08, "loss": 0.5371, "step": 21145 }, { "epoch": 1.89, "grad_norm": 7.518330506943037, "learning_rate": 8.419233678887362e-08, "loss": 0.5279, "step": 21146 }, { "epoch": 1.89, "grad_norm": 6.740748241442324, "learning_rate": 8.406038296862484e-08, "loss": 0.6462, "step": 21147 }, { "epoch": 1.89, "grad_norm": 7.412072516617177, "learning_rate": 8.392853175784976e-08, "loss": 0.5529, "step": 21148 }, { "epoch": 1.89, "grad_norm": 5.6597061762987915, "learning_rate": 8.379678315930118e-08, "loss": 0.547, "step": 21149 }, { "epoch": 1.89, "grad_norm": 8.81068304869317, "learning_rate": 8.366513717572799e-08, "loss": 0.5885, "step": 21150 }, { "epoch": 1.89, "grad_norm": 8.475895930156181, "learning_rate": 8.353359380987858e-08, "loss": 0.6308, "step": 21151 }, { "epoch": 1.89, "grad_norm": 8.28513526965134, "learning_rate": 8.340215306449851e-08, "loss": 0.5955, "step": 21152 }, { "epoch": 1.89, "grad_norm": 8.592654312280175, "learning_rate": 8.327081494233113e-08, "loss": 0.524, "step": 21153 }, { "epoch": 1.89, "grad_norm": 7.450810856364779, "learning_rate": 8.313957944611817e-08, "loss": 0.5524, "step": 21154 }, { "epoch": 1.89, "grad_norm": 6.181207319846565, "learning_rate": 8.300844657859908e-08, "loss": 0.5395, "step": 21155 }, { "epoch": 1.89, "grad_norm": 8.190229305582598, "learning_rate": 8.287741634251001e-08, "loss": 0.5736, "step": 21156 }, { "epoch": 1.89, "grad_norm": 5.261313972629176, "learning_rate": 8.274648874058766e-08, "loss": 0.5485, "step": 21157 }, { "epoch": 1.89, "grad_norm": 7.364280200376816, "learning_rate": 8.261566377556262e-08, "loss": 0.6123, "step": 21158 }, { "epoch": 1.89, "grad_norm": 6.137916298788042, "learning_rate": 8.248494145016717e-08, "loss": 0.5289, "step": 21159 }, { "epoch": 1.89, "grad_norm": 6.113797752586996, "learning_rate": 8.23543217671291e-08, "loss": 0.5605, "step": 21160 }, { "epoch": 1.89, "grad_norm": 6.287510052170146, "learning_rate": 8.222380472917513e-08, "loss": 0.5493, "step": 21161 }, { "epoch": 1.89, "grad_norm": 7.270832573943363, "learning_rate": 8.20933903390303e-08, "loss": 0.544, "step": 21162 }, { "epoch": 1.89, "grad_norm": 8.439224567632623, "learning_rate": 8.196307859941521e-08, "loss": 0.5788, "step": 21163 }, { "epoch": 1.89, "grad_norm": 8.924388574510065, "learning_rate": 8.183286951305047e-08, "loss": 0.5449, "step": 21164 }, { "epoch": 1.89, "grad_norm": 6.369660143928247, "learning_rate": 8.17027630826539e-08, "loss": 0.5215, "step": 21165 }, { "epoch": 1.89, "grad_norm": 7.029320396172398, "learning_rate": 8.157275931094111e-08, "loss": 0.6069, "step": 21166 }, { "epoch": 1.89, "grad_norm": 9.843167276843955, "learning_rate": 8.144285820062658e-08, "loss": 0.5663, "step": 21167 }, { "epoch": 1.89, "grad_norm": 5.099140189899764, "learning_rate": 8.131305975442039e-08, "loss": 0.5821, "step": 21168 }, { "epoch": 1.89, "grad_norm": 5.727717331201871, "learning_rate": 8.118336397503257e-08, "loss": 0.5515, "step": 21169 }, { "epoch": 1.89, "grad_norm": 7.390398060383969, "learning_rate": 8.105377086516986e-08, "loss": 0.5671, "step": 21170 }, { "epoch": 1.89, "grad_norm": 6.111677188852284, "learning_rate": 8.092428042753786e-08, "loss": 0.5328, "step": 21171 }, { "epoch": 1.89, "grad_norm": 6.418843425236351, "learning_rate": 8.079489266483831e-08, "loss": 0.5185, "step": 21172 }, { "epoch": 1.89, "grad_norm": 10.170038208841909, "learning_rate": 8.066560757977238e-08, "loss": 0.5309, "step": 21173 }, { "epoch": 1.89, "grad_norm": 7.341451944030578, "learning_rate": 8.053642517503901e-08, "loss": 0.5658, "step": 21174 }, { "epoch": 1.89, "grad_norm": 7.008435330145526, "learning_rate": 8.04073454533344e-08, "loss": 0.6095, "step": 21175 }, { "epoch": 1.89, "grad_norm": 6.377374831233594, "learning_rate": 8.027836841735304e-08, "loss": 0.6157, "step": 21176 }, { "epoch": 1.89, "grad_norm": 6.546115291842907, "learning_rate": 8.014949406978612e-08, "loss": 0.5847, "step": 21177 }, { "epoch": 1.89, "grad_norm": 6.312430108215395, "learning_rate": 8.002072241332425e-08, "loss": 0.5361, "step": 21178 }, { "epoch": 1.89, "grad_norm": 6.927394348321175, "learning_rate": 7.989205345065532e-08, "loss": 0.5488, "step": 21179 }, { "epoch": 1.89, "grad_norm": 8.012208391183124, "learning_rate": 7.976348718446492e-08, "loss": 0.5325, "step": 21180 }, { "epoch": 1.89, "grad_norm": 8.398578408294263, "learning_rate": 7.963502361743702e-08, "loss": 0.5452, "step": 21181 }, { "epoch": 1.89, "grad_norm": 8.192189895145455, "learning_rate": 7.950666275225227e-08, "loss": 0.5998, "step": 21182 }, { "epoch": 1.89, "grad_norm": 8.400783734311167, "learning_rate": 7.937840459159018e-08, "loss": 0.6337, "step": 21183 }, { "epoch": 1.89, "grad_norm": 7.838336171114387, "learning_rate": 7.925024913812806e-08, "loss": 0.6085, "step": 21184 }, { "epoch": 1.89, "grad_norm": 8.19701411916909, "learning_rate": 7.912219639454044e-08, "loss": 0.6112, "step": 21185 }, { "epoch": 1.89, "grad_norm": 8.226439058555018, "learning_rate": 7.899424636350073e-08, "loss": 0.5772, "step": 21186 }, { "epoch": 1.89, "grad_norm": 7.138423338776228, "learning_rate": 7.886639904767901e-08, "loss": 0.5434, "step": 21187 }, { "epoch": 1.89, "grad_norm": 4.896775179310554, "learning_rate": 7.873865444974483e-08, "loss": 0.5994, "step": 21188 }, { "epoch": 1.89, "grad_norm": 6.514032696115205, "learning_rate": 7.861101257236325e-08, "loss": 0.5089, "step": 21189 }, { "epoch": 1.89, "grad_norm": 7.995714588032202, "learning_rate": 7.84834734181994e-08, "loss": 0.5645, "step": 21190 }, { "epoch": 1.89, "grad_norm": 6.697463427384625, "learning_rate": 7.835603698991445e-08, "loss": 0.5965, "step": 21191 }, { "epoch": 1.89, "grad_norm": 6.659054167553722, "learning_rate": 7.822870329016963e-08, "loss": 0.6033, "step": 21192 }, { "epoch": 1.89, "grad_norm": 5.943248928584518, "learning_rate": 7.810147232162225e-08, "loss": 0.5464, "step": 21193 }, { "epoch": 1.89, "grad_norm": 6.247359601012381, "learning_rate": 7.797434408692739e-08, "loss": 0.5598, "step": 21194 }, { "epoch": 1.89, "grad_norm": 7.997616606952509, "learning_rate": 7.784731858873906e-08, "loss": 0.5522, "step": 21195 }, { "epoch": 1.89, "grad_norm": 9.034705519507975, "learning_rate": 7.772039582970903e-08, "loss": 0.5776, "step": 21196 }, { "epoch": 1.89, "grad_norm": 6.549796533642617, "learning_rate": 7.759357581248572e-08, "loss": 0.5341, "step": 21197 }, { "epoch": 1.89, "grad_norm": 5.796084991934007, "learning_rate": 7.746685853971702e-08, "loss": 0.6157, "step": 21198 }, { "epoch": 1.89, "grad_norm": 7.386655552388123, "learning_rate": 7.73402440140475e-08, "loss": 0.5507, "step": 21199 }, { "epoch": 1.89, "grad_norm": 9.034859642829629, "learning_rate": 7.721373223812e-08, "loss": 0.5426, "step": 21200 }, { "epoch": 1.89, "grad_norm": 6.850136365421014, "learning_rate": 7.708732321457469e-08, "loss": 0.5223, "step": 21201 }, { "epoch": 1.89, "grad_norm": 5.976969913020208, "learning_rate": 7.696101694605052e-08, "loss": 0.5359, "step": 21202 }, { "epoch": 1.89, "grad_norm": 6.129082694731104, "learning_rate": 7.683481343518373e-08, "loss": 0.588, "step": 21203 }, { "epoch": 1.89, "grad_norm": 8.601553382291334, "learning_rate": 7.670871268460889e-08, "loss": 0.5444, "step": 21204 }, { "epoch": 1.89, "grad_norm": 6.034120440568441, "learning_rate": 7.658271469695777e-08, "loss": 0.5219, "step": 21205 }, { "epoch": 1.89, "grad_norm": 5.619076675569958, "learning_rate": 7.645681947486106e-08, "loss": 0.51, "step": 21206 }, { "epoch": 1.89, "grad_norm": 7.440947258866161, "learning_rate": 7.633102702094497e-08, "loss": 0.591, "step": 21207 }, { "epoch": 1.89, "grad_norm": 12.116005485657649, "learning_rate": 7.62053373378363e-08, "loss": 0.5893, "step": 21208 }, { "epoch": 1.89, "grad_norm": 7.116217436243719, "learning_rate": 7.607975042815852e-08, "loss": 0.5742, "step": 21209 }, { "epoch": 1.89, "grad_norm": 5.953501520362834, "learning_rate": 7.595426629453229e-08, "loss": 0.5154, "step": 21210 }, { "epoch": 1.89, "grad_norm": 6.214152565203953, "learning_rate": 7.582888493957774e-08, "loss": 0.5891, "step": 21211 }, { "epoch": 1.89, "grad_norm": 6.502129342070258, "learning_rate": 7.570360636591167e-08, "loss": 0.5748, "step": 21212 }, { "epoch": 1.89, "grad_norm": 8.327396089627522, "learning_rate": 7.55784305761481e-08, "loss": 0.5789, "step": 21213 }, { "epoch": 1.89, "grad_norm": 7.541653550181685, "learning_rate": 7.545335757290106e-08, "loss": 0.5964, "step": 21214 }, { "epoch": 1.89, "grad_norm": 6.0797506364099165, "learning_rate": 7.532838735878067e-08, "loss": 0.5404, "step": 21215 }, { "epoch": 1.89, "grad_norm": 6.908760230571993, "learning_rate": 7.520351993639485e-08, "loss": 0.5488, "step": 21216 }, { "epoch": 1.89, "grad_norm": 7.8061141081547545, "learning_rate": 7.50787553083504e-08, "loss": 0.5748, "step": 21217 }, { "epoch": 1.89, "grad_norm": 6.544581225643431, "learning_rate": 7.495409347725192e-08, "loss": 0.5468, "step": 21218 }, { "epoch": 1.89, "grad_norm": 7.02726339707892, "learning_rate": 7.482953444570119e-08, "loss": 0.5672, "step": 21219 }, { "epoch": 1.89, "grad_norm": 6.81385148460501, "learning_rate": 7.47050782162978e-08, "loss": 0.519, "step": 21220 }, { "epoch": 1.89, "grad_norm": 6.043106575442452, "learning_rate": 7.458072479163914e-08, "loss": 0.565, "step": 21221 }, { "epoch": 1.89, "grad_norm": 7.9407432509502245, "learning_rate": 7.4456474174322e-08, "loss": 0.5581, "step": 21222 }, { "epoch": 1.89, "grad_norm": 6.557421435801596, "learning_rate": 7.433232636693877e-08, "loss": 0.5084, "step": 21223 }, { "epoch": 1.89, "grad_norm": 8.81752414440379, "learning_rate": 7.420828137208124e-08, "loss": 0.5961, "step": 21224 }, { "epoch": 1.89, "grad_norm": 7.334190301771621, "learning_rate": 7.408433919233904e-08, "loss": 0.5896, "step": 21225 }, { "epoch": 1.89, "grad_norm": 9.1536672033219, "learning_rate": 7.39604998302984e-08, "loss": 0.5689, "step": 21226 }, { "epoch": 1.89, "grad_norm": 8.257805724980575, "learning_rate": 7.38367632885445e-08, "loss": 0.5815, "step": 21227 }, { "epoch": 1.89, "grad_norm": 8.388189220844321, "learning_rate": 7.371312956966025e-08, "loss": 0.5561, "step": 21228 }, { "epoch": 1.89, "grad_norm": 5.649777089405236, "learning_rate": 7.358959867622584e-08, "loss": 0.6399, "step": 21229 }, { "epoch": 1.89, "grad_norm": 7.614265969846501, "learning_rate": 7.346617061081973e-08, "loss": 0.5408, "step": 21230 }, { "epoch": 1.89, "grad_norm": 7.1860568325518415, "learning_rate": 7.334284537601877e-08, "loss": 0.5397, "step": 21231 }, { "epoch": 1.89, "grad_norm": 8.134669127648996, "learning_rate": 7.3219622974397e-08, "loss": 0.6287, "step": 21232 }, { "epoch": 1.89, "grad_norm": 6.1714984310287395, "learning_rate": 7.309650340852515e-08, "loss": 0.5759, "step": 21233 }, { "epoch": 1.89, "grad_norm": 7.6210544328143435, "learning_rate": 7.297348668097448e-08, "loss": 0.5626, "step": 21234 }, { "epoch": 1.89, "grad_norm": 7.209082890118315, "learning_rate": 7.28505727943124e-08, "loss": 0.5376, "step": 21235 }, { "epoch": 1.89, "grad_norm": 5.9306570683673145, "learning_rate": 7.272776175110408e-08, "loss": 0.4996, "step": 21236 }, { "epoch": 1.89, "grad_norm": 5.755382082283328, "learning_rate": 7.260505355391356e-08, "loss": 0.5984, "step": 21237 }, { "epoch": 1.89, "grad_norm": 6.157364927243349, "learning_rate": 7.248244820530159e-08, "loss": 0.5346, "step": 21238 }, { "epoch": 1.89, "grad_norm": 7.819671326413819, "learning_rate": 7.235994570782723e-08, "loss": 0.5412, "step": 21239 }, { "epoch": 1.89, "grad_norm": 7.313787517730441, "learning_rate": 7.223754606404788e-08, "loss": 0.5412, "step": 21240 }, { "epoch": 1.89, "grad_norm": 5.93781517738872, "learning_rate": 7.211524927651814e-08, "loss": 0.5198, "step": 21241 }, { "epoch": 1.89, "grad_norm": 6.298157508584693, "learning_rate": 7.199305534779045e-08, "loss": 0.5681, "step": 21242 }, { "epoch": 1.9, "grad_norm": 8.712756795163743, "learning_rate": 7.187096428041606e-08, "loss": 0.5654, "step": 21243 }, { "epoch": 1.9, "grad_norm": 5.33761487636059, "learning_rate": 7.174897607694242e-08, "loss": 0.5601, "step": 21244 }, { "epoch": 1.9, "grad_norm": 6.338365844825216, "learning_rate": 7.162709073991691e-08, "loss": 0.6337, "step": 21245 }, { "epoch": 1.9, "grad_norm": 5.874363440735824, "learning_rate": 7.150530827188196e-08, "loss": 0.5564, "step": 21246 }, { "epoch": 1.9, "grad_norm": 5.406717761710562, "learning_rate": 7.138362867538107e-08, "loss": 0.6057, "step": 21247 }, { "epoch": 1.9, "grad_norm": 7.950410814305446, "learning_rate": 7.126205195295332e-08, "loss": 0.5323, "step": 21248 }, { "epoch": 1.9, "grad_norm": 6.897296804811561, "learning_rate": 7.114057810713615e-08, "loss": 0.5789, "step": 21249 }, { "epoch": 1.9, "grad_norm": 6.987618005323567, "learning_rate": 7.101920714046583e-08, "loss": 0.5398, "step": 21250 }, { "epoch": 1.9, "grad_norm": 5.7634494272940335, "learning_rate": 7.089793905547537e-08, "loss": 0.5898, "step": 21251 }, { "epoch": 1.9, "grad_norm": 6.288002343068503, "learning_rate": 7.077677385469551e-08, "loss": 0.4917, "step": 21252 }, { "epoch": 1.9, "grad_norm": 7.840368395463145, "learning_rate": 7.065571154065587e-08, "loss": 0.5097, "step": 21253 }, { "epoch": 1.9, "grad_norm": 5.190726768128051, "learning_rate": 7.053475211588279e-08, "loss": 0.5112, "step": 21254 }, { "epoch": 1.9, "grad_norm": 7.374897725960252, "learning_rate": 7.041389558290202e-08, "loss": 0.5601, "step": 21255 }, { "epoch": 1.9, "grad_norm": 5.867212252830151, "learning_rate": 7.029314194423486e-08, "loss": 0.5702, "step": 21256 }, { "epoch": 1.9, "grad_norm": 6.997787466702303, "learning_rate": 7.017249120240322e-08, "loss": 0.5939, "step": 21257 }, { "epoch": 1.9, "grad_norm": 7.389942410504035, "learning_rate": 7.005194335992504e-08, "loss": 0.571, "step": 21258 }, { "epoch": 1.9, "grad_norm": 5.626326598132809, "learning_rate": 6.993149841931502e-08, "loss": 0.4967, "step": 21259 }, { "epoch": 1.9, "grad_norm": 12.723864671590995, "learning_rate": 6.981115638308889e-08, "loss": 0.6317, "step": 21260 }, { "epoch": 1.9, "grad_norm": 7.528606360518601, "learning_rate": 6.969091725375798e-08, "loss": 0.5446, "step": 21261 }, { "epoch": 1.9, "grad_norm": 6.141327160649061, "learning_rate": 6.957078103383142e-08, "loss": 0.5505, "step": 21262 }, { "epoch": 1.9, "grad_norm": 5.510898770733263, "learning_rate": 6.945074772581773e-08, "loss": 0.5857, "step": 21263 }, { "epoch": 1.9, "grad_norm": 7.946808627047982, "learning_rate": 6.933081733222269e-08, "loss": 0.6042, "step": 21264 }, { "epoch": 1.9, "grad_norm": 9.044333302868512, "learning_rate": 6.921098985554764e-08, "loss": 0.5535, "step": 21265 }, { "epoch": 1.9, "grad_norm": 5.021392487641217, "learning_rate": 6.909126529829557e-08, "loss": 0.598, "step": 21266 }, { "epoch": 1.9, "grad_norm": 8.12615345551302, "learning_rate": 6.897164366296449e-08, "loss": 0.564, "step": 21267 }, { "epoch": 1.9, "grad_norm": 8.15088501287265, "learning_rate": 6.885212495205185e-08, "loss": 0.535, "step": 21268 }, { "epoch": 1.9, "grad_norm": 5.521672920163566, "learning_rate": 6.87327091680523e-08, "loss": 0.5609, "step": 21269 }, { "epoch": 1.9, "grad_norm": 7.681788648935735, "learning_rate": 6.861339631345776e-08, "loss": 0.5505, "step": 21270 }, { "epoch": 1.9, "grad_norm": 5.892789616363748, "learning_rate": 6.8494186390759e-08, "loss": 0.5983, "step": 21271 }, { "epoch": 1.9, "grad_norm": 7.40536060634718, "learning_rate": 6.837507940244515e-08, "loss": 0.6254, "step": 21272 }, { "epoch": 1.9, "grad_norm": 10.862060460469634, "learning_rate": 6.825607535100032e-08, "loss": 0.5531, "step": 21273 }, { "epoch": 1.9, "grad_norm": 5.702137927361141, "learning_rate": 6.813717423890975e-08, "loss": 0.6056, "step": 21274 }, { "epoch": 1.9, "grad_norm": 6.981637180233195, "learning_rate": 6.801837606865535e-08, "loss": 0.5903, "step": 21275 }, { "epoch": 1.9, "grad_norm": 7.484555391800655, "learning_rate": 6.789968084271681e-08, "loss": 0.5461, "step": 21276 }, { "epoch": 1.9, "grad_norm": 6.20522418035437, "learning_rate": 6.778108856357102e-08, "loss": 0.6109, "step": 21277 }, { "epoch": 1.9, "grad_norm": 6.465355543409743, "learning_rate": 6.766259923369323e-08, "loss": 0.5904, "step": 21278 }, { "epoch": 1.9, "grad_norm": 7.154501705374097, "learning_rate": 6.754421285555757e-08, "loss": 0.5889, "step": 21279 }, { "epoch": 1.9, "grad_norm": 10.24658216082087, "learning_rate": 6.742592943163428e-08, "loss": 0.5551, "step": 21280 }, { "epoch": 1.9, "grad_norm": 5.727183456471248, "learning_rate": 6.730774896439252e-08, "loss": 0.6214, "step": 21281 }, { "epoch": 1.9, "grad_norm": 7.577313713431559, "learning_rate": 6.718967145629974e-08, "loss": 0.5864, "step": 21282 }, { "epoch": 1.9, "grad_norm": 8.035321119385008, "learning_rate": 6.707169690981896e-08, "loss": 0.6083, "step": 21283 }, { "epoch": 1.9, "grad_norm": 5.934589613123476, "learning_rate": 6.695382532741379e-08, "loss": 0.5712, "step": 21284 }, { "epoch": 1.9, "grad_norm": 7.836845737951196, "learning_rate": 6.683605671154448e-08, "loss": 0.5675, "step": 21285 }, { "epoch": 1.9, "grad_norm": 6.3898090056858265, "learning_rate": 6.671839106466904e-08, "loss": 0.544, "step": 21286 }, { "epoch": 1.9, "grad_norm": 6.189011827662973, "learning_rate": 6.660082838924387e-08, "loss": 0.5462, "step": 21287 }, { "epoch": 1.9, "grad_norm": 7.452300240563997, "learning_rate": 6.648336868772254e-08, "loss": 0.573, "step": 21288 }, { "epoch": 1.9, "grad_norm": 9.657326938255489, "learning_rate": 6.636601196255644e-08, "loss": 0.5279, "step": 21289 }, { "epoch": 1.9, "grad_norm": 6.514370848100157, "learning_rate": 6.62487582161947e-08, "loss": 0.5402, "step": 21290 }, { "epoch": 1.9, "grad_norm": 7.419504622745159, "learning_rate": 6.613160745108593e-08, "loss": 0.6029, "step": 21291 }, { "epoch": 1.9, "grad_norm": 7.623082611581208, "learning_rate": 6.601455966967485e-08, "loss": 0.6, "step": 21292 }, { "epoch": 1.9, "grad_norm": 7.33329908400149, "learning_rate": 6.589761487440449e-08, "loss": 0.6153, "step": 21293 }, { "epoch": 1.9, "grad_norm": 7.231758448682552, "learning_rate": 6.578077306771624e-08, "loss": 0.6008, "step": 21294 }, { "epoch": 1.9, "grad_norm": 5.855029539282295, "learning_rate": 6.566403425204814e-08, "loss": 0.5209, "step": 21295 }, { "epoch": 1.9, "grad_norm": 8.949644031250369, "learning_rate": 6.554739842983771e-08, "loss": 0.5478, "step": 21296 }, { "epoch": 1.9, "grad_norm": 6.958494660912915, "learning_rate": 6.543086560351907e-08, "loss": 0.5414, "step": 21297 }, { "epoch": 1.9, "grad_norm": 5.785996567061647, "learning_rate": 6.53144357755242e-08, "loss": 0.5419, "step": 21298 }, { "epoch": 1.9, "grad_norm": 7.015172539154094, "learning_rate": 6.519810894828394e-08, "loss": 0.5188, "step": 21299 }, { "epoch": 1.9, "grad_norm": 6.565896080790453, "learning_rate": 6.508188512422575e-08, "loss": 0.583, "step": 21300 }, { "epoch": 1.9, "grad_norm": 5.802204822149363, "learning_rate": 6.496576430577661e-08, "loss": 0.5773, "step": 21301 }, { "epoch": 1.9, "grad_norm": 8.462789919547253, "learning_rate": 6.484974649535958e-08, "loss": 0.6077, "step": 21302 }, { "epoch": 1.9, "grad_norm": 7.99504791263156, "learning_rate": 6.473383169539605e-08, "loss": 0.5829, "step": 21303 }, { "epoch": 1.9, "grad_norm": 6.584529988402077, "learning_rate": 6.461801990830519e-08, "loss": 0.5553, "step": 21304 }, { "epoch": 1.9, "grad_norm": 6.389552597786946, "learning_rate": 6.450231113650563e-08, "loss": 0.6531, "step": 21305 }, { "epoch": 1.9, "grad_norm": 6.45104601379624, "learning_rate": 6.438670538241099e-08, "loss": 0.5332, "step": 21306 }, { "epoch": 1.9, "grad_norm": 8.589456007267785, "learning_rate": 6.4271202648436e-08, "loss": 0.5312, "step": 21307 }, { "epoch": 1.9, "grad_norm": 6.022145390691316, "learning_rate": 6.415580293699042e-08, "loss": 0.5696, "step": 21308 }, { "epoch": 1.9, "grad_norm": 8.873129385294643, "learning_rate": 6.40405062504823e-08, "loss": 0.5769, "step": 21309 }, { "epoch": 1.9, "grad_norm": 6.074253775255684, "learning_rate": 6.392531259131973e-08, "loss": 0.5511, "step": 21310 }, { "epoch": 1.9, "grad_norm": 7.067451883498281, "learning_rate": 6.381022196190633e-08, "loss": 0.5067, "step": 21311 }, { "epoch": 1.9, "grad_norm": 7.430163554536063, "learning_rate": 6.369523436464409e-08, "loss": 0.5934, "step": 21312 }, { "epoch": 1.9, "grad_norm": 5.872598635485549, "learning_rate": 6.358034980193384e-08, "loss": 0.6076, "step": 21313 }, { "epoch": 1.9, "grad_norm": 6.607174660533924, "learning_rate": 6.346556827617368e-08, "loss": 0.5321, "step": 21314 }, { "epoch": 1.9, "grad_norm": 7.130659308917549, "learning_rate": 6.33508897897589e-08, "loss": 0.5716, "step": 21315 }, { "epoch": 1.9, "grad_norm": 6.712218575965231, "learning_rate": 6.32363143450826e-08, "loss": 0.5608, "step": 21316 }, { "epoch": 1.9, "grad_norm": 5.873596512060934, "learning_rate": 6.31218419445373e-08, "loss": 0.6169, "step": 21317 }, { "epoch": 1.9, "grad_norm": 6.664751567692232, "learning_rate": 6.300747259051165e-08, "loss": 0.6056, "step": 21318 }, { "epoch": 1.9, "grad_norm": 8.189202080767128, "learning_rate": 6.289320628539319e-08, "loss": 0.5705, "step": 21319 }, { "epoch": 1.9, "grad_norm": 4.8799085852024175, "learning_rate": 6.277904303156723e-08, "loss": 0.5296, "step": 21320 }, { "epoch": 1.9, "grad_norm": 8.023611975770347, "learning_rate": 6.266498283141686e-08, "loss": 0.5518, "step": 21321 }, { "epoch": 1.9, "grad_norm": 6.196427945571334, "learning_rate": 6.25510256873213e-08, "loss": 0.5439, "step": 21322 }, { "epoch": 1.9, "grad_norm": 5.214545165534594, "learning_rate": 6.243717160166085e-08, "loss": 0.5455, "step": 21323 }, { "epoch": 1.9, "grad_norm": 6.317314380266117, "learning_rate": 6.23234205768114e-08, "loss": 0.5343, "step": 21324 }, { "epoch": 1.9, "grad_norm": 5.821251853454146, "learning_rate": 6.220977261514716e-08, "loss": 0.5453, "step": 21325 }, { "epoch": 1.9, "grad_norm": 5.681031012561174, "learning_rate": 6.209622771904012e-08, "loss": 0.5417, "step": 21326 }, { "epoch": 1.9, "grad_norm": 6.002304885837011, "learning_rate": 6.198278589086115e-08, "loss": 0.4771, "step": 21327 }, { "epoch": 1.9, "grad_norm": 9.267428650193255, "learning_rate": 6.186944713297671e-08, "loss": 0.5496, "step": 21328 }, { "epoch": 1.9, "grad_norm": 4.397417725045336, "learning_rate": 6.175621144775379e-08, "loss": 0.526, "step": 21329 }, { "epoch": 1.9, "grad_norm": 5.116603999827927, "learning_rate": 6.164307883755494e-08, "loss": 0.5945, "step": 21330 }, { "epoch": 1.9, "grad_norm": 6.144813342570023, "learning_rate": 6.153004930474215e-08, "loss": 0.5785, "step": 21331 }, { "epoch": 1.9, "grad_norm": 7.171124549102536, "learning_rate": 6.141712285167467e-08, "loss": 0.6322, "step": 21332 }, { "epoch": 1.9, "grad_norm": 9.272236093636987, "learning_rate": 6.130429948070948e-08, "loss": 0.6029, "step": 21333 }, { "epoch": 1.9, "grad_norm": 9.060489877448727, "learning_rate": 6.119157919420138e-08, "loss": 0.5628, "step": 21334 }, { "epoch": 1.9, "grad_norm": 7.744579559068785, "learning_rate": 6.107896199450291e-08, "loss": 0.5089, "step": 21335 }, { "epoch": 1.9, "grad_norm": 8.492473072669513, "learning_rate": 6.096644788396555e-08, "loss": 0.5773, "step": 21336 }, { "epoch": 1.9, "grad_norm": 7.80379316995366, "learning_rate": 6.085403686493685e-08, "loss": 0.5434, "step": 21337 }, { "epoch": 1.9, "grad_norm": 6.357029891972265, "learning_rate": 6.074172893976382e-08, "loss": 0.5114, "step": 21338 }, { "epoch": 1.9, "grad_norm": 6.467090747574635, "learning_rate": 6.062952411079015e-08, "loss": 0.5694, "step": 21339 }, { "epoch": 1.9, "grad_norm": 7.082034016544279, "learning_rate": 6.051742238035896e-08, "loss": 0.5087, "step": 21340 }, { "epoch": 1.9, "grad_norm": 7.679360045424672, "learning_rate": 6.040542375080838e-08, "loss": 0.5511, "step": 21341 }, { "epoch": 1.9, "grad_norm": 6.982653620090274, "learning_rate": 6.029352822447765e-08, "loss": 0.527, "step": 21342 }, { "epoch": 1.9, "grad_norm": 5.502862500067863, "learning_rate": 6.018173580370157e-08, "loss": 0.6024, "step": 21343 }, { "epoch": 1.9, "grad_norm": 7.267060355983231, "learning_rate": 6.007004649081383e-08, "loss": 0.5524, "step": 21344 }, { "epoch": 1.9, "grad_norm": 6.9607593961245575, "learning_rate": 5.995846028814534e-08, "loss": 0.5205, "step": 21345 }, { "epoch": 1.9, "grad_norm": 6.427406781860701, "learning_rate": 5.984697719802535e-08, "loss": 0.5312, "step": 21346 }, { "epoch": 1.9, "grad_norm": 7.696278884635256, "learning_rate": 5.973559722278144e-08, "loss": 0.5739, "step": 21347 }, { "epoch": 1.9, "grad_norm": 4.918737121566061, "learning_rate": 5.962432036473787e-08, "loss": 0.5852, "step": 21348 }, { "epoch": 1.9, "grad_norm": 6.569367538704405, "learning_rate": 5.9513146626217214e-08, "loss": 0.6018, "step": 21349 }, { "epoch": 1.9, "grad_norm": 6.453765576738879, "learning_rate": 5.9402076009539843e-08, "loss": 0.5531, "step": 21350 }, { "epoch": 1.9, "grad_norm": 5.2035410403665185, "learning_rate": 5.9291108517024463e-08, "loss": 0.5304, "step": 21351 }, { "epoch": 1.9, "grad_norm": 6.848260871027334, "learning_rate": 5.9180244150988106e-08, "loss": 0.6166, "step": 21352 }, { "epoch": 1.9, "grad_norm": 7.016532055159449, "learning_rate": 5.906948291374281e-08, "loss": 0.5567, "step": 21353 }, { "epoch": 1.9, "grad_norm": 7.804231185301059, "learning_rate": 5.895882480760229e-08, "loss": 0.5558, "step": 21354 }, { "epoch": 1.91, "grad_norm": 6.336289411796137, "learning_rate": 5.884826983487524e-08, "loss": 0.53, "step": 21355 }, { "epoch": 1.91, "grad_norm": 4.783032466300503, "learning_rate": 5.873781799786982e-08, "loss": 0.544, "step": 21356 }, { "epoch": 1.91, "grad_norm": 6.89802165450593, "learning_rate": 5.862746929889085e-08, "loss": 0.5845, "step": 21357 }, { "epoch": 1.91, "grad_norm": 7.594493626911466, "learning_rate": 5.851722374024316e-08, "loss": 0.5874, "step": 21358 }, { "epoch": 1.91, "grad_norm": 7.59827004375222, "learning_rate": 5.8407081324225456e-08, "loss": 0.5716, "step": 21359 }, { "epoch": 1.91, "grad_norm": 7.992995498437647, "learning_rate": 5.829704205313924e-08, "loss": 0.5296, "step": 21360 }, { "epoch": 1.91, "grad_norm": 6.046078495054364, "learning_rate": 5.8187105929279344e-08, "loss": 0.5806, "step": 21361 }, { "epoch": 1.91, "grad_norm": 6.069622644270423, "learning_rate": 5.8077272954941145e-08, "loss": 0.5795, "step": 21362 }, { "epoch": 1.91, "grad_norm": 8.989380039572852, "learning_rate": 5.796754313241726e-08, "loss": 0.6492, "step": 21363 }, { "epoch": 1.91, "grad_norm": 6.837520267272899, "learning_rate": 5.7857916463998634e-08, "loss": 0.5435, "step": 21364 }, { "epoch": 1.91, "grad_norm": 6.557491161467952, "learning_rate": 5.7748392951972324e-08, "loss": 0.5344, "step": 21365 }, { "epoch": 1.91, "grad_norm": 9.805066633644588, "learning_rate": 5.763897259862539e-08, "loss": 0.5481, "step": 21366 }, { "epoch": 1.91, "grad_norm": 8.262325253351532, "learning_rate": 5.7529655406241004e-08, "loss": 0.5781, "step": 21367 }, { "epoch": 1.91, "grad_norm": 7.249298985547511, "learning_rate": 5.742044137710123e-08, "loss": 0.6067, "step": 21368 }, { "epoch": 1.91, "grad_norm": 6.597185459926209, "learning_rate": 5.7311330513485364e-08, "loss": 0.6243, "step": 21369 }, { "epoch": 1.91, "grad_norm": 7.3657708157723105, "learning_rate": 5.720232281767157e-08, "loss": 0.537, "step": 21370 }, { "epoch": 1.91, "grad_norm": 6.815613225613898, "learning_rate": 5.709341829193527e-08, "loss": 0.5288, "step": 21371 }, { "epoch": 1.91, "grad_norm": 5.721985573575241, "learning_rate": 5.698461693854851e-08, "loss": 0.546, "step": 21372 }, { "epoch": 1.91, "grad_norm": 6.181521317986416, "learning_rate": 5.687591875978282e-08, "loss": 0.561, "step": 21373 }, { "epoch": 1.91, "grad_norm": 5.517124678400668, "learning_rate": 5.67673237579075e-08, "loss": 0.6032, "step": 21374 }, { "epoch": 1.91, "grad_norm": 7.12765622400804, "learning_rate": 5.6658831935187955e-08, "loss": 0.5604, "step": 21375 }, { "epoch": 1.91, "grad_norm": 9.256872768865327, "learning_rate": 5.6550443293890144e-08, "loss": 0.6227, "step": 21376 }, { "epoch": 1.91, "grad_norm": 8.220233972439093, "learning_rate": 5.64421578362756e-08, "loss": 0.6111, "step": 21377 }, { "epoch": 1.91, "grad_norm": 8.352521048898353, "learning_rate": 5.633397556460529e-08, "loss": 0.5812, "step": 21378 }, { "epoch": 1.91, "grad_norm": 8.163902218221494, "learning_rate": 5.62258964811363e-08, "loss": 0.5029, "step": 21379 }, { "epoch": 1.91, "grad_norm": 5.96831042558661, "learning_rate": 5.611792058812515e-08, "loss": 0.5482, "step": 21380 }, { "epoch": 1.91, "grad_norm": 8.710456687774462, "learning_rate": 5.6010047887825606e-08, "loss": 0.4915, "step": 21381 }, { "epoch": 1.91, "grad_norm": 6.1750553161035535, "learning_rate": 5.5902278382489185e-08, "loss": 0.5619, "step": 21382 }, { "epoch": 1.91, "grad_norm": 4.994320616012002, "learning_rate": 5.5794612074365204e-08, "loss": 0.5382, "step": 21383 }, { "epoch": 1.91, "grad_norm": 6.680109490805696, "learning_rate": 5.568704896570132e-08, "loss": 0.544, "step": 21384 }, { "epoch": 1.91, "grad_norm": 6.9411810572235675, "learning_rate": 5.557958905874183e-08, "loss": 0.6136, "step": 21385 }, { "epoch": 1.91, "grad_norm": 5.951999430672837, "learning_rate": 5.547223235573107e-08, "loss": 0.5463, "step": 21386 }, { "epoch": 1.91, "grad_norm": 5.5146187773066995, "learning_rate": 5.5364978858908904e-08, "loss": 0.5515, "step": 21387 }, { "epoch": 1.91, "grad_norm": 6.735158946381258, "learning_rate": 5.5257828570514094e-08, "loss": 0.548, "step": 21388 }, { "epoch": 1.91, "grad_norm": 8.470210319870674, "learning_rate": 5.515078149278319e-08, "loss": 0.5784, "step": 21389 }, { "epoch": 1.91, "grad_norm": 7.691901289769762, "learning_rate": 5.504383762795051e-08, "loss": 0.549, "step": 21390 }, { "epoch": 1.91, "grad_norm": 6.505807358698942, "learning_rate": 5.493699697824928e-08, "loss": 0.5669, "step": 21391 }, { "epoch": 1.91, "grad_norm": 8.512321133325601, "learning_rate": 5.483025954590771e-08, "loss": 0.6421, "step": 21392 }, { "epoch": 1.91, "grad_norm": 6.528042288440155, "learning_rate": 5.4723625333155115e-08, "loss": 0.5607, "step": 21393 }, { "epoch": 1.91, "grad_norm": 8.442187578827639, "learning_rate": 5.4617094342216956e-08, "loss": 0.5517, "step": 21394 }, { "epoch": 1.91, "grad_norm": 9.040884402851328, "learning_rate": 5.4510666575317004e-08, "loss": 0.561, "step": 21395 }, { "epoch": 1.91, "grad_norm": 8.616599415130706, "learning_rate": 5.4404342034676263e-08, "loss": 0.5776, "step": 21396 }, { "epoch": 1.91, "grad_norm": 8.095612286648642, "learning_rate": 5.429812072251406e-08, "loss": 0.5204, "step": 21397 }, { "epoch": 1.91, "grad_norm": 6.4600437453868755, "learning_rate": 5.419200264104807e-08, "loss": 0.4865, "step": 21398 }, { "epoch": 1.91, "grad_norm": 5.6556540501034425, "learning_rate": 5.408598779249263e-08, "loss": 0.5782, "step": 21399 }, { "epoch": 1.91, "grad_norm": 6.142954764627255, "learning_rate": 5.3980076179060424e-08, "loss": 0.5729, "step": 21400 }, { "epoch": 1.91, "grad_norm": 6.205970862928872, "learning_rate": 5.3874267802963006e-08, "loss": 0.6124, "step": 21401 }, { "epoch": 1.91, "grad_norm": 8.409992918834801, "learning_rate": 5.376856266640862e-08, "loss": 0.6009, "step": 21402 }, { "epoch": 1.91, "grad_norm": 5.8881805684984, "learning_rate": 5.3662960771602714e-08, "loss": 0.5127, "step": 21403 }, { "epoch": 1.91, "grad_norm": 10.258702135296515, "learning_rate": 5.35574621207513e-08, "loss": 0.5414, "step": 21404 }, { "epoch": 1.91, "grad_norm": 8.739939932591984, "learning_rate": 5.34520667160543e-08, "loss": 0.602, "step": 21405 }, { "epoch": 1.91, "grad_norm": 8.22835968568237, "learning_rate": 5.3346774559713266e-08, "loss": 0.498, "step": 21406 }, { "epoch": 1.91, "grad_norm": 8.653556087134426, "learning_rate": 5.3241585653924786e-08, "loss": 0.532, "step": 21407 }, { "epoch": 1.91, "grad_norm": 5.551220000413304, "learning_rate": 5.313650000088544e-08, "loss": 0.6128, "step": 21408 }, { "epoch": 1.91, "grad_norm": 7.102984350773631, "learning_rate": 5.303151760278791e-08, "loss": 0.5175, "step": 21409 }, { "epoch": 1.91, "grad_norm": 6.894878455958238, "learning_rate": 5.292663846182433e-08, "loss": 0.4853, "step": 21410 }, { "epoch": 1.91, "grad_norm": 5.027828369218479, "learning_rate": 5.28218625801824e-08, "loss": 0.5399, "step": 21411 }, { "epoch": 1.91, "grad_norm": 8.141860519107901, "learning_rate": 5.2717189960050375e-08, "loss": 0.5814, "step": 21412 }, { "epoch": 1.91, "grad_norm": 6.236755803706455, "learning_rate": 5.2612620603612606e-08, "loss": 0.5286, "step": 21413 }, { "epoch": 1.91, "grad_norm": 7.149004708070925, "learning_rate": 5.25081545130518e-08, "loss": 0.5422, "step": 21414 }, { "epoch": 1.91, "grad_norm": 6.238914681953426, "learning_rate": 5.2403791690548435e-08, "loss": 0.5371, "step": 21415 }, { "epoch": 1.91, "grad_norm": 7.531508990558202, "learning_rate": 5.229953213828132e-08, "loss": 0.5872, "step": 21416 }, { "epoch": 1.91, "grad_norm": 5.395012435633081, "learning_rate": 5.219537585842593e-08, "loss": 0.576, "step": 21417 }, { "epoch": 1.91, "grad_norm": 9.213480699418618, "learning_rate": 5.2091322853156654e-08, "loss": 0.5054, "step": 21418 }, { "epoch": 1.91, "grad_norm": 4.7647381904990995, "learning_rate": 5.198737312464508e-08, "loss": 0.5128, "step": 21419 }, { "epoch": 1.91, "grad_norm": 6.591314604813206, "learning_rate": 5.1883526675061136e-08, "loss": 0.5732, "step": 21420 }, { "epoch": 1.91, "grad_norm": 6.031386774914491, "learning_rate": 5.1779783506571976e-08, "loss": 0.5158, "step": 21421 }, { "epoch": 1.91, "grad_norm": 6.061250427727376, "learning_rate": 5.1676143621343654e-08, "loss": 0.5844, "step": 21422 }, { "epoch": 1.91, "grad_norm": 6.692733908468737, "learning_rate": 5.157260702153999e-08, "loss": 0.6054, "step": 21423 }, { "epoch": 1.91, "grad_norm": 6.44427224083828, "learning_rate": 5.146917370932037e-08, "loss": 0.5067, "step": 21424 }, { "epoch": 1.91, "grad_norm": 8.371100207749128, "learning_rate": 5.136584368684472e-08, "loss": 0.5893, "step": 21425 }, { "epoch": 1.91, "grad_norm": 7.057685765642618, "learning_rate": 5.126261695627022e-08, "loss": 0.5507, "step": 21426 }, { "epoch": 1.91, "grad_norm": 4.950222248270137, "learning_rate": 5.115949351975069e-08, "loss": 0.5393, "step": 21427 }, { "epoch": 1.91, "grad_norm": 7.278229544900507, "learning_rate": 5.1056473379438864e-08, "loss": 0.563, "step": 21428 }, { "epoch": 1.91, "grad_norm": 5.037304083051875, "learning_rate": 5.095355653748524e-08, "loss": 0.5067, "step": 21429 }, { "epoch": 1.91, "grad_norm": 8.682240359100392, "learning_rate": 5.0850742996038093e-08, "loss": 0.5439, "step": 21430 }, { "epoch": 1.91, "grad_norm": 7.152042617980153, "learning_rate": 5.0748032757243493e-08, "loss": 0.5783, "step": 21431 }, { "epoch": 1.91, "grad_norm": 6.891760321447729, "learning_rate": 5.064542582324472e-08, "loss": 0.5748, "step": 21432 }, { "epoch": 1.91, "grad_norm": 9.286852631776844, "learning_rate": 5.05429221961834e-08, "loss": 0.5606, "step": 21433 }, { "epoch": 1.91, "grad_norm": 11.047150444275132, "learning_rate": 5.044052187820003e-08, "loss": 0.5432, "step": 21434 }, { "epoch": 1.91, "grad_norm": 7.996704218233253, "learning_rate": 5.0338224871430695e-08, "loss": 0.5333, "step": 21435 }, { "epoch": 1.91, "grad_norm": 5.418592172421419, "learning_rate": 5.0236031178012015e-08, "loss": 0.5729, "step": 21436 }, { "epoch": 1.91, "grad_norm": 6.14752762352502, "learning_rate": 5.0133940800076165e-08, "loss": 0.5165, "step": 21437 }, { "epoch": 1.91, "grad_norm": 5.516788036842529, "learning_rate": 5.003195373975422e-08, "loss": 0.5083, "step": 21438 }, { "epoch": 1.91, "grad_norm": 5.9269353088502745, "learning_rate": 4.993006999917504e-08, "loss": 0.5784, "step": 21439 }, { "epoch": 1.91, "grad_norm": 8.591672249272733, "learning_rate": 4.9828289580465264e-08, "loss": 0.5623, "step": 21440 }, { "epoch": 1.91, "grad_norm": 10.18279867544072, "learning_rate": 4.972661248574928e-08, "loss": 0.5946, "step": 21441 }, { "epoch": 1.91, "grad_norm": 10.50019249855371, "learning_rate": 4.96250387171493e-08, "loss": 0.6232, "step": 21442 }, { "epoch": 1.91, "grad_norm": 6.949859513460178, "learning_rate": 4.952356827678528e-08, "loss": 0.5296, "step": 21443 }, { "epoch": 1.91, "grad_norm": 8.426314693945141, "learning_rate": 4.942220116677554e-08, "loss": 0.5627, "step": 21444 }, { "epoch": 1.91, "grad_norm": 6.861115161514378, "learning_rate": 4.9320937389236155e-08, "loss": 0.6362, "step": 21445 }, { "epoch": 1.91, "grad_norm": 7.10884018689446, "learning_rate": 4.921977694628044e-08, "loss": 0.5119, "step": 21446 }, { "epoch": 1.91, "grad_norm": 7.014274833860725, "learning_rate": 4.911871984001892e-08, "loss": 0.5639, "step": 21447 }, { "epoch": 1.91, "grad_norm": 6.464375744355344, "learning_rate": 4.90177660725627e-08, "loss": 0.6046, "step": 21448 }, { "epoch": 1.91, "grad_norm": 6.351580761099356, "learning_rate": 4.891691564601786e-08, "loss": 0.6123, "step": 21449 }, { "epoch": 1.91, "grad_norm": 11.42200622993026, "learning_rate": 4.881616856248994e-08, "loss": 0.4941, "step": 21450 }, { "epoch": 1.91, "grad_norm": 9.427342972913612, "learning_rate": 4.871552482408115e-08, "loss": 0.6114, "step": 21451 }, { "epoch": 1.91, "grad_norm": 5.058803591463046, "learning_rate": 4.861498443289314e-08, "loss": 0.5302, "step": 21452 }, { "epoch": 1.91, "grad_norm": 5.758235382033623, "learning_rate": 4.851454739102368e-08, "loss": 0.5724, "step": 21453 }, { "epoch": 1.91, "grad_norm": 8.381568892531039, "learning_rate": 4.8414213700569977e-08, "loss": 0.5786, "step": 21454 }, { "epoch": 1.91, "grad_norm": 9.504062094259034, "learning_rate": 4.831398336362481e-08, "loss": 0.5523, "step": 21455 }, { "epoch": 1.91, "grad_norm": 7.72514166448945, "learning_rate": 4.821385638228204e-08, "loss": 0.5077, "step": 21456 }, { "epoch": 1.91, "grad_norm": 6.024034845244637, "learning_rate": 4.8113832758630576e-08, "loss": 0.579, "step": 21457 }, { "epoch": 1.91, "grad_norm": 8.77898673563874, "learning_rate": 4.801391249475817e-08, "loss": 0.5078, "step": 21458 }, { "epoch": 1.91, "grad_norm": 6.703302777873915, "learning_rate": 4.7914095592750396e-08, "loss": 0.5673, "step": 21459 }, { "epoch": 1.91, "grad_norm": 6.921905457940107, "learning_rate": 4.7814382054692246e-08, "loss": 0.6212, "step": 21460 }, { "epoch": 1.91, "grad_norm": 5.43475290795116, "learning_rate": 4.771477188266316e-08, "loss": 0.5468, "step": 21461 }, { "epoch": 1.91, "grad_norm": 8.91399680225555, "learning_rate": 4.7615265078742587e-08, "loss": 0.5989, "step": 21462 }, { "epoch": 1.91, "grad_norm": 7.080605791604, "learning_rate": 4.7515861645007766e-08, "loss": 0.5705, "step": 21463 }, { "epoch": 1.91, "grad_norm": 7.704995036258877, "learning_rate": 4.741656158353369e-08, "loss": 0.5679, "step": 21464 }, { "epoch": 1.91, "grad_norm": 7.9916507776348835, "learning_rate": 4.731736489639316e-08, "loss": 0.5662, "step": 21465 }, { "epoch": 1.91, "grad_norm": 6.031492703454426, "learning_rate": 4.721827158565617e-08, "loss": 0.5438, "step": 21466 }, { "epoch": 1.92, "grad_norm": 5.025983980676027, "learning_rate": 4.7119281653391634e-08, "loss": 0.5511, "step": 21467 }, { "epoch": 1.92, "grad_norm": 7.345638638719595, "learning_rate": 4.702039510166512e-08, "loss": 0.6195, "step": 21468 }, { "epoch": 1.92, "grad_norm": 8.759056928000941, "learning_rate": 4.6921611932541075e-08, "loss": 0.5354, "step": 21469 }, { "epoch": 1.92, "grad_norm": 6.98172661570194, "learning_rate": 4.6822932148081205e-08, "loss": 0.6002, "step": 21470 }, { "epoch": 1.92, "grad_norm": 8.432431445860681, "learning_rate": 4.6724355750345506e-08, "loss": 0.5951, "step": 21471 }, { "epoch": 1.92, "grad_norm": 8.720835186220297, "learning_rate": 4.662588274139179e-08, "loss": 0.6468, "step": 21472 }, { "epoch": 1.92, "grad_norm": 6.678829404336922, "learning_rate": 4.6527513123274527e-08, "loss": 0.5411, "step": 21473 }, { "epoch": 1.92, "grad_norm": 7.44426436200547, "learning_rate": 4.642924689804818e-08, "loss": 0.571, "step": 21474 }, { "epoch": 1.92, "grad_norm": 9.04607724235734, "learning_rate": 4.633108406776277e-08, "loss": 0.5613, "step": 21475 }, { "epoch": 1.92, "grad_norm": 5.476541718782039, "learning_rate": 4.623302463446722e-08, "loss": 0.5785, "step": 21476 }, { "epoch": 1.92, "grad_norm": 5.195136852920775, "learning_rate": 4.613506860020933e-08, "loss": 0.5323, "step": 21477 }, { "epoch": 1.92, "grad_norm": 7.880610263491197, "learning_rate": 4.603721596703248e-08, "loss": 0.5586, "step": 21478 }, { "epoch": 1.92, "grad_norm": 6.660385437303607, "learning_rate": 4.593946673698002e-08, "loss": 0.5793, "step": 21479 }, { "epoch": 1.92, "grad_norm": 6.778083395066137, "learning_rate": 4.584182091209255e-08, "loss": 0.5499, "step": 21480 }, { "epoch": 1.92, "grad_norm": 8.44357857163459, "learning_rate": 4.5744278494407326e-08, "loss": 0.5226, "step": 21481 }, { "epoch": 1.92, "grad_norm": 6.465154941421891, "learning_rate": 4.5646839485960494e-08, "loss": 0.6021, "step": 21482 }, { "epoch": 1.92, "grad_norm": 5.999017891262438, "learning_rate": 4.5549503888785985e-08, "loss": 0.5881, "step": 21483 }, { "epoch": 1.92, "grad_norm": 5.853043337448896, "learning_rate": 4.545227170491551e-08, "loss": 0.528, "step": 21484 }, { "epoch": 1.92, "grad_norm": 4.550572688197819, "learning_rate": 4.535514293637855e-08, "loss": 0.5455, "step": 21485 }, { "epoch": 1.92, "grad_norm": 7.326586952613605, "learning_rate": 4.5258117585202935e-08, "loss": 0.5466, "step": 21486 }, { "epoch": 1.92, "grad_norm": 8.660951757315528, "learning_rate": 4.5161195653412593e-08, "loss": 0.5245, "step": 21487 }, { "epoch": 1.92, "grad_norm": 6.254171752919263, "learning_rate": 4.506437714303258e-08, "loss": 0.5583, "step": 21488 }, { "epoch": 1.92, "grad_norm": 7.425863039150528, "learning_rate": 4.496766205608127e-08, "loss": 0.5573, "step": 21489 }, { "epoch": 1.92, "grad_norm": 8.550187285960789, "learning_rate": 4.487105039457929e-08, "loss": 0.5926, "step": 21490 }, { "epoch": 1.92, "grad_norm": 6.724978060580899, "learning_rate": 4.4774542160542775e-08, "loss": 0.5758, "step": 21491 }, { "epoch": 1.92, "grad_norm": 6.07922892125418, "learning_rate": 4.46781373559857e-08, "loss": 0.5677, "step": 21492 }, { "epoch": 1.92, "grad_norm": 9.307440651001514, "learning_rate": 4.4581835982920873e-08, "loss": 0.528, "step": 21493 }, { "epoch": 1.92, "grad_norm": 7.944192586435162, "learning_rate": 4.4485638043357257e-08, "loss": 0.573, "step": 21494 }, { "epoch": 1.92, "grad_norm": 7.798318080138457, "learning_rate": 4.438954353930436e-08, "loss": 0.5328, "step": 21495 }, { "epoch": 1.92, "grad_norm": 4.959631056083193, "learning_rate": 4.429355247276668e-08, "loss": 0.5894, "step": 21496 }, { "epoch": 1.92, "grad_norm": 5.664137042958867, "learning_rate": 4.4197664845748166e-08, "loss": 0.6024, "step": 21497 }, { "epoch": 1.92, "grad_norm": 4.828269261544087, "learning_rate": 4.4101880660250564e-08, "loss": 0.5818, "step": 21498 }, { "epoch": 1.92, "grad_norm": 7.241043594227954, "learning_rate": 4.400619991827337e-08, "loss": 0.5302, "step": 21499 }, { "epoch": 1.92, "grad_norm": 8.125741292204633, "learning_rate": 4.391062262181278e-08, "loss": 0.5744, "step": 21500 }, { "epoch": 1.92, "grad_norm": 7.48875296169228, "learning_rate": 4.381514877286441e-08, "loss": 0.5362, "step": 21501 }, { "epoch": 1.92, "grad_norm": 8.755288449846281, "learning_rate": 4.371977837342112e-08, "loss": 0.5725, "step": 21502 }, { "epoch": 1.92, "grad_norm": 8.324566394596554, "learning_rate": 4.3624511425473525e-08, "loss": 0.5692, "step": 21503 }, { "epoch": 1.92, "grad_norm": 6.059433196692797, "learning_rate": 4.352934793100949e-08, "loss": 0.5397, "step": 21504 }, { "epoch": 1.92, "grad_norm": 10.370703237258407, "learning_rate": 4.343428789201631e-08, "loss": 0.5871, "step": 21505 }, { "epoch": 1.92, "grad_norm": 8.297872022742396, "learning_rate": 4.3339331310477404e-08, "loss": 0.5656, "step": 21506 }, { "epoch": 1.92, "grad_norm": 8.052184348689103, "learning_rate": 4.324447818837507e-08, "loss": 0.5831, "step": 21507 }, { "epoch": 1.92, "grad_norm": 8.171907712791318, "learning_rate": 4.3149728527689395e-08, "loss": 0.6148, "step": 21508 }, { "epoch": 1.92, "grad_norm": 7.850065732979883, "learning_rate": 4.305508233039768e-08, "loss": 0.5781, "step": 21509 }, { "epoch": 1.92, "grad_norm": 8.969325801468154, "learning_rate": 4.296053959847501e-08, "loss": 0.5648, "step": 21510 }, { "epoch": 1.92, "grad_norm": 5.847464223227468, "learning_rate": 4.2866100333895935e-08, "loss": 0.5812, "step": 21511 }, { "epoch": 1.92, "grad_norm": 8.924605067062105, "learning_rate": 4.2771764538631075e-08, "loss": 0.6016, "step": 21512 }, { "epoch": 1.92, "grad_norm": 7.880240859062239, "learning_rate": 4.267753221464943e-08, "loss": 0.5827, "step": 21513 }, { "epoch": 1.92, "grad_norm": 7.410131362128329, "learning_rate": 4.258340336391775e-08, "loss": 0.5387, "step": 21514 }, { "epoch": 1.92, "grad_norm": 10.087773348752298, "learning_rate": 4.248937798840114e-08, "loss": 0.515, "step": 21515 }, { "epoch": 1.92, "grad_norm": 7.363472980203379, "learning_rate": 4.2395456090061906e-08, "loss": 0.6186, "step": 21516 }, { "epoch": 1.92, "grad_norm": 8.432445162405642, "learning_rate": 4.230163767086126e-08, "loss": 0.5803, "step": 21517 }, { "epoch": 1.92, "grad_norm": 6.550571310040353, "learning_rate": 4.220792273275598e-08, "loss": 0.5414, "step": 21518 }, { "epoch": 1.92, "grad_norm": 8.528200996264104, "learning_rate": 4.211431127770338e-08, "loss": 0.5896, "step": 21519 }, { "epoch": 1.92, "grad_norm": 7.298528099268789, "learning_rate": 4.2020803307656345e-08, "loss": 0.5954, "step": 21520 }, { "epoch": 1.92, "grad_norm": 7.08489776765719, "learning_rate": 4.192739882456776e-08, "loss": 0.5333, "step": 21521 }, { "epoch": 1.92, "grad_norm": 5.126554713572835, "learning_rate": 4.183409783038661e-08, "loss": 0.6367, "step": 21522 }, { "epoch": 1.92, "grad_norm": 6.872196860474327, "learning_rate": 4.1740900327060244e-08, "loss": 0.6218, "step": 21523 }, { "epoch": 1.92, "grad_norm": 5.823065036902385, "learning_rate": 4.1647806316534314e-08, "loss": 0.5782, "step": 21524 }, { "epoch": 1.92, "grad_norm": 6.570855585598809, "learning_rate": 4.1554815800752266e-08, "loss": 0.5702, "step": 21525 }, { "epoch": 1.92, "grad_norm": 4.476622272937005, "learning_rate": 4.146192878165367e-08, "loss": 0.5109, "step": 21526 }, { "epoch": 1.92, "grad_norm": 6.399102059529293, "learning_rate": 4.136914526117919e-08, "loss": 0.5611, "step": 21527 }, { "epoch": 1.92, "grad_norm": 7.184404373269967, "learning_rate": 4.127646524126394e-08, "loss": 0.6016, "step": 21528 }, { "epoch": 1.92, "grad_norm": 7.224993888869598, "learning_rate": 4.11838887238436e-08, "loss": 0.5254, "step": 21529 }, { "epoch": 1.92, "grad_norm": 4.826091851265598, "learning_rate": 4.109141571084996e-08, "loss": 0.5528, "step": 21530 }, { "epoch": 1.92, "grad_norm": 6.160898513943865, "learning_rate": 4.099904620421258e-08, "loss": 0.5545, "step": 21531 }, { "epoch": 1.92, "grad_norm": 4.790212366595633, "learning_rate": 4.090678020586047e-08, "loss": 0.5792, "step": 21532 }, { "epoch": 1.92, "grad_norm": 6.989191100380436, "learning_rate": 4.081461771771933e-08, "loss": 0.5791, "step": 21533 }, { "epoch": 1.92, "grad_norm": 5.120979997113866, "learning_rate": 4.072255874171205e-08, "loss": 0.5399, "step": 21534 }, { "epoch": 1.92, "grad_norm": 10.070739244562043, "learning_rate": 4.0630603279760984e-08, "loss": 0.5094, "step": 21535 }, { "epoch": 1.92, "grad_norm": 6.961891859429244, "learning_rate": 4.053875133378515e-08, "loss": 0.5759, "step": 21536 }, { "epoch": 1.92, "grad_norm": 6.3305089832471735, "learning_rate": 4.044700290570247e-08, "loss": 0.5693, "step": 21537 }, { "epoch": 1.92, "grad_norm": 6.7909224249042035, "learning_rate": 4.035535799742696e-08, "loss": 0.507, "step": 21538 }, { "epoch": 1.92, "grad_norm": 5.271415838537933, "learning_rate": 4.026381661087153e-08, "loss": 0.5892, "step": 21539 }, { "epoch": 1.92, "grad_norm": 6.99852639158797, "learning_rate": 4.0172378747947435e-08, "loss": 0.552, "step": 21540 }, { "epoch": 1.92, "grad_norm": 8.085641378889687, "learning_rate": 4.0081044410563706e-08, "loss": 0.5753, "step": 21541 }, { "epoch": 1.92, "grad_norm": 6.074212159974657, "learning_rate": 3.9989813600625484e-08, "loss": 0.5796, "step": 21542 }, { "epoch": 1.92, "grad_norm": 7.108685634730223, "learning_rate": 3.9898686320037904e-08, "loss": 0.5949, "step": 21543 }, { "epoch": 1.92, "grad_norm": 5.245838488940144, "learning_rate": 3.980766257070279e-08, "loss": 0.5921, "step": 21544 }, { "epoch": 1.92, "grad_norm": 7.358307532348258, "learning_rate": 3.9716742354520275e-08, "loss": 0.5874, "step": 21545 }, { "epoch": 1.92, "grad_norm": 6.937147453757614, "learning_rate": 3.9625925673387745e-08, "loss": 0.5012, "step": 21546 }, { "epoch": 1.92, "grad_norm": 9.033436253883677, "learning_rate": 3.95352125292009e-08, "loss": 0.518, "step": 21547 }, { "epoch": 1.92, "grad_norm": 6.808512314667065, "learning_rate": 3.9444602923853216e-08, "loss": 0.5232, "step": 21548 }, { "epoch": 1.92, "grad_norm": 6.9495362315369364, "learning_rate": 3.9354096859235966e-08, "loss": 0.6129, "step": 21549 }, { "epoch": 1.92, "grad_norm": 7.641297029476702, "learning_rate": 3.926369433723876e-08, "loss": 0.598, "step": 21550 }, { "epoch": 1.92, "grad_norm": 7.438317151052993, "learning_rate": 3.917339535974784e-08, "loss": 0.5645, "step": 21551 }, { "epoch": 1.92, "grad_norm": 8.403117260392882, "learning_rate": 3.90831999286484e-08, "loss": 0.5959, "step": 21552 }, { "epoch": 1.92, "grad_norm": 8.466231752601669, "learning_rate": 3.8993108045822794e-08, "loss": 0.564, "step": 21553 }, { "epoch": 1.92, "grad_norm": 6.051660504254396, "learning_rate": 3.8903119713151193e-08, "loss": 0.5098, "step": 21554 }, { "epoch": 1.92, "grad_norm": 5.9552887083378065, "learning_rate": 3.8813234932512654e-08, "loss": 0.5252, "step": 21555 }, { "epoch": 1.92, "grad_norm": 7.018026794275556, "learning_rate": 3.872345370578345e-08, "loss": 0.5249, "step": 21556 }, { "epoch": 1.92, "grad_norm": 6.124714095828262, "learning_rate": 3.863377603483709e-08, "loss": 0.635, "step": 21557 }, { "epoch": 1.92, "grad_norm": 5.8491280939702515, "learning_rate": 3.8544201921544846e-08, "loss": 0.5562, "step": 21558 }, { "epoch": 1.92, "grad_norm": 4.909527367738023, "learning_rate": 3.8454731367777444e-08, "loss": 0.558, "step": 21559 }, { "epoch": 1.92, "grad_norm": 6.1765290007692615, "learning_rate": 3.836536437540172e-08, "loss": 0.5249, "step": 21560 }, { "epoch": 1.92, "grad_norm": 8.85935228599553, "learning_rate": 3.827610094628342e-08, "loss": 0.5987, "step": 21561 }, { "epoch": 1.92, "grad_norm": 7.163932264280189, "learning_rate": 3.8186941082285486e-08, "loss": 0.5668, "step": 21562 }, { "epoch": 1.92, "grad_norm": 8.343469255376862, "learning_rate": 3.8097884785269215e-08, "loss": 0.5896, "step": 21563 }, { "epoch": 1.92, "grad_norm": 7.357124716862172, "learning_rate": 3.800893205709311e-08, "loss": 0.5753, "step": 21564 }, { "epoch": 1.92, "grad_norm": 5.955465049010952, "learning_rate": 3.792008289961346e-08, "loss": 0.5732, "step": 21565 }, { "epoch": 1.92, "grad_norm": 5.5909924509584235, "learning_rate": 3.7831337314685467e-08, "loss": 0.5972, "step": 21566 }, { "epoch": 1.92, "grad_norm": 6.9820844862360865, "learning_rate": 3.7742695304161526e-08, "loss": 0.5802, "step": 21567 }, { "epoch": 1.92, "grad_norm": 7.036138003107151, "learning_rate": 3.765415686989182e-08, "loss": 0.5451, "step": 21568 }, { "epoch": 1.92, "grad_norm": 7.406601796515372, "learning_rate": 3.756572201372433e-08, "loss": 0.5448, "step": 21569 }, { "epoch": 1.92, "grad_norm": 8.320624998793535, "learning_rate": 3.7477390737504226e-08, "loss": 0.5821, "step": 21570 }, { "epoch": 1.92, "grad_norm": 7.1867340698653885, "learning_rate": 3.738916304307616e-08, "loss": 0.5395, "step": 21571 }, { "epoch": 1.92, "grad_norm": 5.996262084880464, "learning_rate": 3.730103893228143e-08, "loss": 0.5724, "step": 21572 }, { "epoch": 1.92, "grad_norm": 6.021966686049023, "learning_rate": 3.721301840695912e-08, "loss": 0.5769, "step": 21573 }, { "epoch": 1.92, "grad_norm": 6.810927163032706, "learning_rate": 3.712510146894721e-08, "loss": 0.6046, "step": 21574 }, { "epoch": 1.92, "grad_norm": 6.673354388763369, "learning_rate": 3.703728812008034e-08, "loss": 0.4794, "step": 21575 }, { "epoch": 1.92, "grad_norm": 8.421699061009063, "learning_rate": 3.694957836219149e-08, "loss": 0.5765, "step": 21576 }, { "epoch": 1.92, "grad_norm": 5.427353297524863, "learning_rate": 3.6861972197110854e-08, "loss": 0.4986, "step": 21577 }, { "epoch": 1.92, "grad_norm": 9.250760911872892, "learning_rate": 3.677446962666697e-08, "loss": 0.5092, "step": 21578 }, { "epoch": 1.93, "grad_norm": 7.5174971949680875, "learning_rate": 3.6687070652687285e-08, "loss": 0.5696, "step": 21579 }, { "epoch": 1.93, "grad_norm": 8.93650891446575, "learning_rate": 3.6599775276995875e-08, "loss": 0.5698, "step": 21580 }, { "epoch": 1.93, "grad_norm": 8.074392682373993, "learning_rate": 3.6512583501414064e-08, "loss": 0.578, "step": 21581 }, { "epoch": 1.93, "grad_norm": 5.64411955867253, "learning_rate": 3.642549532776263e-08, "loss": 0.4571, "step": 21582 }, { "epoch": 1.93, "grad_norm": 4.691337208689975, "learning_rate": 3.633851075785844e-08, "loss": 0.5146, "step": 21583 }, { "epoch": 1.93, "grad_norm": 7.312885166270491, "learning_rate": 3.625162979351782e-08, "loss": 0.529, "step": 21584 }, { "epoch": 1.93, "grad_norm": 5.69524241239654, "learning_rate": 3.6164852436553766e-08, "loss": 0.5647, "step": 21585 }, { "epoch": 1.93, "grad_norm": 5.72597093820198, "learning_rate": 3.6078178688778166e-08, "loss": 0.5545, "step": 21586 }, { "epoch": 1.93, "grad_norm": 5.871367210911551, "learning_rate": 3.599160855199957e-08, "loss": 0.5692, "step": 21587 }, { "epoch": 1.93, "grad_norm": 6.460081650978581, "learning_rate": 3.590514202802542e-08, "loss": 0.5838, "step": 21588 }, { "epoch": 1.93, "grad_norm": 6.46711196920435, "learning_rate": 3.5818779118660386e-08, "loss": 0.5592, "step": 21589 }, { "epoch": 1.93, "grad_norm": 6.31204337456974, "learning_rate": 3.573251982570636e-08, "loss": 0.6142, "step": 21590 }, { "epoch": 1.93, "grad_norm": 9.238336857412623, "learning_rate": 3.5646364150964674e-08, "loss": 0.5831, "step": 21591 }, { "epoch": 1.93, "grad_norm": 6.948616494561954, "learning_rate": 3.556031209623334e-08, "loss": 0.5709, "step": 21592 }, { "epoch": 1.93, "grad_norm": 7.045384683841549, "learning_rate": 3.54743636633087e-08, "loss": 0.5542, "step": 21593 }, { "epoch": 1.93, "grad_norm": 7.610896682560669, "learning_rate": 3.5388518853984314e-08, "loss": 0.5888, "step": 21594 }, { "epoch": 1.93, "grad_norm": 8.697616993052401, "learning_rate": 3.530277767005263e-08, "loss": 0.5909, "step": 21595 }, { "epoch": 1.93, "grad_norm": 9.934471946191781, "learning_rate": 3.5217140113302795e-08, "loss": 0.5275, "step": 21596 }, { "epoch": 1.93, "grad_norm": 6.371504483906515, "learning_rate": 3.513160618552281e-08, "loss": 0.6846, "step": 21597 }, { "epoch": 1.93, "grad_norm": 5.734714735315072, "learning_rate": 3.504617588849735e-08, "loss": 0.5929, "step": 21598 }, { "epoch": 1.93, "grad_norm": 8.454139423799212, "learning_rate": 3.496084922400944e-08, "loss": 0.559, "step": 21599 }, { "epoch": 1.93, "grad_norm": 5.647400243937806, "learning_rate": 3.487562619384155e-08, "loss": 0.4977, "step": 21600 }, { "epoch": 1.93, "grad_norm": 6.313605619347463, "learning_rate": 3.4790506799770585e-08, "loss": 0.5688, "step": 21601 }, { "epoch": 1.93, "grad_norm": 7.607160250221201, "learning_rate": 3.470549104357457e-08, "loss": 0.5965, "step": 21602 }, { "epoch": 1.93, "grad_norm": 7.0142550249419235, "learning_rate": 3.462057892702764e-08, "loss": 0.5575, "step": 21603 }, { "epoch": 1.93, "grad_norm": 7.191625118834433, "learning_rate": 3.453577045190226e-08, "loss": 0.5955, "step": 21604 }, { "epoch": 1.93, "grad_norm": 7.186947799277279, "learning_rate": 3.445106561996869e-08, "loss": 0.5877, "step": 21605 }, { "epoch": 1.93, "grad_norm": 4.400534298116333, "learning_rate": 3.436646443299441e-08, "loss": 0.5414, "step": 21606 }, { "epoch": 1.93, "grad_norm": 8.603893842729763, "learning_rate": 3.4281966892745765e-08, "loss": 0.5622, "step": 21607 }, { "epoch": 1.93, "grad_norm": 7.2124252213330315, "learning_rate": 3.4197573000985806e-08, "loss": 0.5134, "step": 21608 }, { "epoch": 1.93, "grad_norm": 7.173512831432326, "learning_rate": 3.4113282759477004e-08, "loss": 0.5753, "step": 21609 }, { "epoch": 1.93, "grad_norm": 11.688112270190844, "learning_rate": 3.402909616997851e-08, "loss": 0.6355, "step": 21610 }, { "epoch": 1.93, "grad_norm": 8.503052469381904, "learning_rate": 3.39450132342467e-08, "loss": 0.5535, "step": 21611 }, { "epoch": 1.93, "grad_norm": 6.318610798722964, "learning_rate": 3.3861033954037946e-08, "loss": 0.5419, "step": 21612 }, { "epoch": 1.93, "grad_norm": 8.103343772384733, "learning_rate": 3.3777158331104176e-08, "loss": 0.5372, "step": 21613 }, { "epoch": 1.93, "grad_norm": 6.895471075512797, "learning_rate": 3.369338636719677e-08, "loss": 0.5408, "step": 21614 }, { "epoch": 1.93, "grad_norm": 7.4670121857546645, "learning_rate": 3.360971806406321e-08, "loss": 0.5508, "step": 21615 }, { "epoch": 1.93, "grad_norm": 5.20245207504106, "learning_rate": 3.3526153423451005e-08, "loss": 0.6136, "step": 21616 }, { "epoch": 1.93, "grad_norm": 7.592729101963866, "learning_rate": 3.344269244710374e-08, "loss": 0.5844, "step": 21617 }, { "epoch": 1.93, "grad_norm": 7.077439577221369, "learning_rate": 3.3359335136763924e-08, "loss": 0.5805, "step": 21618 }, { "epoch": 1.93, "grad_norm": 6.657542859086552, "learning_rate": 3.327608149417127e-08, "loss": 0.557, "step": 21619 }, { "epoch": 1.93, "grad_norm": 11.887703522680765, "learning_rate": 3.319293152106329e-08, "loss": 0.5676, "step": 21620 }, { "epoch": 1.93, "grad_norm": 6.387175168808456, "learning_rate": 3.31098852191758e-08, "loss": 0.465, "step": 21621 }, { "epoch": 1.93, "grad_norm": 9.076237623559106, "learning_rate": 3.302694259024186e-08, "loss": 0.5635, "step": 21622 }, { "epoch": 1.93, "grad_norm": 6.689398449218368, "learning_rate": 3.294410363599343e-08, "loss": 0.5615, "step": 21623 }, { "epoch": 1.93, "grad_norm": 8.563658185164009, "learning_rate": 3.286136835815912e-08, "loss": 0.5131, "step": 21624 }, { "epoch": 1.93, "grad_norm": 5.9235884010767945, "learning_rate": 3.277873675846532e-08, "loss": 0.5522, "step": 21625 }, { "epoch": 1.93, "grad_norm": 6.4171803145443, "learning_rate": 3.2696208838638444e-08, "loss": 0.5333, "step": 21626 }, { "epoch": 1.93, "grad_norm": 6.3285573835640045, "learning_rate": 3.261378460039932e-08, "loss": 0.5335, "step": 21627 }, { "epoch": 1.93, "grad_norm": 6.555243017226559, "learning_rate": 3.2531464045468806e-08, "loss": 0.5063, "step": 21628 }, { "epoch": 1.93, "grad_norm": 6.431481238485403, "learning_rate": 3.244924717556552e-08, "loss": 0.56, "step": 21629 }, { "epoch": 1.93, "grad_norm": 8.070535263731713, "learning_rate": 3.236713399240588e-08, "loss": 0.5657, "step": 21630 }, { "epoch": 1.93, "grad_norm": 6.04229734061394, "learning_rate": 3.2285124497702935e-08, "loss": 0.5684, "step": 21631 }, { "epoch": 1.93, "grad_norm": 7.37767618504824, "learning_rate": 3.2203218693169224e-08, "loss": 0.5228, "step": 21632 }, { "epoch": 1.93, "grad_norm": 7.9367457962268215, "learning_rate": 3.212141658051449e-08, "loss": 0.6148, "step": 21633 }, { "epoch": 1.93, "grad_norm": 6.260695546082759, "learning_rate": 3.203971816144513e-08, "loss": 0.55, "step": 21634 }, { "epoch": 1.93, "grad_norm": 9.28486018788722, "learning_rate": 3.195812343766702e-08, "loss": 0.5923, "step": 21635 }, { "epoch": 1.93, "grad_norm": 5.12744097849967, "learning_rate": 3.187663241088323e-08, "loss": 0.5729, "step": 21636 }, { "epoch": 1.93, "grad_norm": 7.405021340508431, "learning_rate": 3.1795245082794635e-08, "loss": 0.5998, "step": 21637 }, { "epoch": 1.93, "grad_norm": 7.3479507943118705, "learning_rate": 3.1713961455099863e-08, "loss": 0.5603, "step": 21638 }, { "epoch": 1.93, "grad_norm": 5.92341285103511, "learning_rate": 3.163278152949645e-08, "loss": 0.6319, "step": 21639 }, { "epoch": 1.93, "grad_norm": 6.140741345856287, "learning_rate": 3.155170530767804e-08, "loss": 0.6084, "step": 21640 }, { "epoch": 1.93, "grad_norm": 6.81086848424523, "learning_rate": 3.147073279133661e-08, "loss": 0.6211, "step": 21641 }, { "epoch": 1.93, "grad_norm": 4.838572680155705, "learning_rate": 3.1389863982163036e-08, "loss": 0.6266, "step": 21642 }, { "epoch": 1.93, "grad_norm": 7.777214921172168, "learning_rate": 3.1309098881844854e-08, "loss": 0.5654, "step": 21643 }, { "epoch": 1.93, "grad_norm": 7.0289884298797, "learning_rate": 3.1228437492068495e-08, "loss": 0.5649, "step": 21644 }, { "epoch": 1.93, "grad_norm": 6.925940534185544, "learning_rate": 3.114787981451706e-08, "loss": 0.6166, "step": 21645 }, { "epoch": 1.93, "grad_norm": 7.5584978115646155, "learning_rate": 3.106742585087141e-08, "loss": 0.5393, "step": 21646 }, { "epoch": 1.93, "grad_norm": 6.431395716915658, "learning_rate": 3.098707560281189e-08, "loss": 0.5826, "step": 21647 }, { "epoch": 1.93, "grad_norm": 6.275768686059172, "learning_rate": 3.090682907201492e-08, "loss": 0.5595, "step": 21648 }, { "epoch": 1.93, "grad_norm": 7.463363317051256, "learning_rate": 3.0826686260155834e-08, "loss": 0.589, "step": 21649 }, { "epoch": 1.93, "grad_norm": 5.418174401169631, "learning_rate": 3.074664716890774e-08, "loss": 0.6099, "step": 21650 }, { "epoch": 1.93, "grad_norm": 7.279435585677287, "learning_rate": 3.0666711799940407e-08, "loss": 0.6163, "step": 21651 }, { "epoch": 1.93, "grad_norm": 6.990277697158665, "learning_rate": 3.0586880154923057e-08, "loss": 0.5729, "step": 21652 }, { "epoch": 1.93, "grad_norm": 6.398321651888566, "learning_rate": 3.050715223552214e-08, "loss": 0.6426, "step": 21653 }, { "epoch": 1.93, "grad_norm": 6.681328515219577, "learning_rate": 3.0427528043400765e-08, "loss": 0.5796, "step": 21654 }, { "epoch": 1.93, "grad_norm": 4.984316225684207, "learning_rate": 3.0348007580221493e-08, "loss": 0.5341, "step": 21655 }, { "epoch": 1.93, "grad_norm": 8.956535136888618, "learning_rate": 3.0268590847644663e-08, "loss": 0.5599, "step": 21656 }, { "epoch": 1.93, "grad_norm": 5.894537014662968, "learning_rate": 3.0189277847327283e-08, "loss": 0.5736, "step": 21657 }, { "epoch": 1.93, "grad_norm": 6.588749045979971, "learning_rate": 3.011006858092524e-08, "loss": 0.5685, "step": 21658 }, { "epoch": 1.93, "grad_norm": 8.237382776860303, "learning_rate": 3.003096305009168e-08, "loss": 0.587, "step": 21659 }, { "epoch": 1.93, "grad_norm": 5.745459412511875, "learning_rate": 2.995196125647748e-08, "loss": 0.575, "step": 21660 }, { "epoch": 1.93, "grad_norm": 5.75090511598569, "learning_rate": 2.987306320173189e-08, "loss": 0.565, "step": 21661 }, { "epoch": 1.93, "grad_norm": 8.671715078018352, "learning_rate": 2.979426888750192e-08, "loss": 0.6179, "step": 21662 }, { "epoch": 1.93, "grad_norm": 6.574905651049755, "learning_rate": 2.971557831543126e-08, "loss": 0.5837, "step": 21663 }, { "epoch": 1.93, "grad_norm": 8.578212737622344, "learning_rate": 2.9636991487164147e-08, "loss": 0.492, "step": 21664 }, { "epoch": 1.93, "grad_norm": 7.01954210654065, "learning_rate": 2.955850840433927e-08, "loss": 0.5608, "step": 21665 }, { "epoch": 1.93, "grad_norm": 9.184294196952886, "learning_rate": 2.9480129068595876e-08, "loss": 0.5868, "step": 21666 }, { "epoch": 1.93, "grad_norm": 7.684982419124985, "learning_rate": 2.940185348156932e-08, "loss": 0.5377, "step": 21667 }, { "epoch": 1.93, "grad_norm": 6.626160557581862, "learning_rate": 2.9323681644893854e-08, "loss": 0.4892, "step": 21668 }, { "epoch": 1.93, "grad_norm": 8.159275097691499, "learning_rate": 2.9245613560200392e-08, "loss": 0.5682, "step": 21669 }, { "epoch": 1.93, "grad_norm": 6.3972225065802375, "learning_rate": 2.91676492291193e-08, "loss": 0.5504, "step": 21670 }, { "epoch": 1.93, "grad_norm": 8.336891794623213, "learning_rate": 2.908978865327816e-08, "loss": 0.6123, "step": 21671 }, { "epoch": 1.93, "grad_norm": 9.962378175026108, "learning_rate": 2.901203183430068e-08, "loss": 0.5785, "step": 21672 }, { "epoch": 1.93, "grad_norm": 6.0873731368302755, "learning_rate": 2.8934378773811112e-08, "loss": 0.4597, "step": 21673 }, { "epoch": 1.93, "grad_norm": 5.770400705308239, "learning_rate": 2.8856829473429826e-08, "loss": 0.5584, "step": 21674 }, { "epoch": 1.93, "grad_norm": 5.98537114280287, "learning_rate": 2.8779383934776083e-08, "loss": 0.5543, "step": 21675 }, { "epoch": 1.93, "grad_norm": 7.26360424480145, "learning_rate": 2.8702042159465815e-08, "loss": 0.5161, "step": 21676 }, { "epoch": 1.93, "grad_norm": 6.903211358658218, "learning_rate": 2.862480414911273e-08, "loss": 0.5362, "step": 21677 }, { "epoch": 1.93, "grad_norm": 7.334159169442731, "learning_rate": 2.854766990533053e-08, "loss": 0.5241, "step": 21678 }, { "epoch": 1.93, "grad_norm": 5.155062263961702, "learning_rate": 2.8470639429727943e-08, "loss": 0.5638, "step": 21679 }, { "epoch": 1.93, "grad_norm": 6.013764467140999, "learning_rate": 2.8393712723913668e-08, "loss": 0.5653, "step": 21680 }, { "epoch": 1.93, "grad_norm": 8.62147477717422, "learning_rate": 2.8316889789492542e-08, "loss": 0.6257, "step": 21681 }, { "epoch": 1.93, "grad_norm": 7.48504150792668, "learning_rate": 2.824017062806883e-08, "loss": 0.4972, "step": 21682 }, { "epoch": 1.93, "grad_norm": 7.229000105739291, "learning_rate": 2.8163555241244033e-08, "loss": 0.5538, "step": 21683 }, { "epoch": 1.93, "grad_norm": 7.105741468743016, "learning_rate": 2.8087043630616316e-08, "loss": 0.5743, "step": 21684 }, { "epoch": 1.93, "grad_norm": 9.143191289090243, "learning_rate": 2.8010635797783293e-08, "loss": 0.5613, "step": 21685 }, { "epoch": 1.93, "grad_norm": 6.188738294250861, "learning_rate": 2.7934331744339795e-08, "loss": 0.6004, "step": 21686 }, { "epoch": 1.93, "grad_norm": 8.037861413711314, "learning_rate": 2.7858131471878434e-08, "loss": 0.5724, "step": 21687 }, { "epoch": 1.93, "grad_norm": 5.978432286737291, "learning_rate": 2.7782034981989613e-08, "loss": 0.5129, "step": 21688 }, { "epoch": 1.93, "grad_norm": 6.283690544180308, "learning_rate": 2.7706042276262612e-08, "loss": 0.5543, "step": 21689 }, { "epoch": 1.93, "grad_norm": 6.063971222062769, "learning_rate": 2.7630153356282273e-08, "loss": 0.5699, "step": 21690 }, { "epoch": 1.94, "grad_norm": 5.564173195424937, "learning_rate": 2.755436822363344e-08, "loss": 0.5426, "step": 21691 }, { "epoch": 1.94, "grad_norm": 4.813720855508775, "learning_rate": 2.7478686879897078e-08, "loss": 0.4985, "step": 21692 }, { "epoch": 1.94, "grad_norm": 7.237062708309648, "learning_rate": 2.7403109326654132e-08, "loss": 0.6252, "step": 21693 }, { "epoch": 1.94, "grad_norm": 5.906552619506226, "learning_rate": 2.7327635565481126e-08, "loss": 0.5749, "step": 21694 }, { "epoch": 1.94, "grad_norm": 4.164837117661732, "learning_rate": 2.725226559795402e-08, "loss": 0.5352, "step": 21695 }, { "epoch": 1.94, "grad_norm": 6.521367107648826, "learning_rate": 2.7176999425645443e-08, "loss": 0.5708, "step": 21696 }, { "epoch": 1.94, "grad_norm": 5.956684702550687, "learning_rate": 2.710183705012692e-08, "loss": 0.4719, "step": 21697 }, { "epoch": 1.94, "grad_norm": 7.9417550637537735, "learning_rate": 2.7026778472967197e-08, "loss": 0.5652, "step": 21698 }, { "epoch": 1.94, "grad_norm": 9.011630165752578, "learning_rate": 2.6951823695732794e-08, "loss": 0.5084, "step": 21699 }, { "epoch": 1.94, "grad_norm": 7.928921285653225, "learning_rate": 2.6876972719988015e-08, "loss": 0.5273, "step": 21700 }, { "epoch": 1.94, "grad_norm": 5.795206335142996, "learning_rate": 2.6802225547295502e-08, "loss": 0.6469, "step": 21701 }, { "epoch": 1.94, "grad_norm": 7.303985755631059, "learning_rate": 2.6727582179215673e-08, "loss": 0.5531, "step": 21702 }, { "epoch": 1.94, "grad_norm": 8.163472142916998, "learning_rate": 2.6653042617306168e-08, "loss": 0.5323, "step": 21703 }, { "epoch": 1.94, "grad_norm": 6.856084808659494, "learning_rate": 2.657860686312297e-08, "loss": 0.5508, "step": 21704 }, { "epoch": 1.94, "grad_norm": 6.57423562291821, "learning_rate": 2.650427491821983e-08, "loss": 0.5463, "step": 21705 }, { "epoch": 1.94, "grad_norm": 6.12631226421573, "learning_rate": 2.643004678414829e-08, "loss": 0.5873, "step": 21706 }, { "epoch": 1.94, "grad_norm": 10.032383195913038, "learning_rate": 2.6355922462457106e-08, "loss": 0.5439, "step": 21707 }, { "epoch": 1.94, "grad_norm": 6.705325154399892, "learning_rate": 2.6281901954693933e-08, "loss": 0.5455, "step": 21708 }, { "epoch": 1.94, "grad_norm": 5.88599536969444, "learning_rate": 2.6207985262404756e-08, "loss": 0.541, "step": 21709 }, { "epoch": 1.94, "grad_norm": 7.035639995313974, "learning_rate": 2.613417238713056e-08, "loss": 0.5448, "step": 21710 }, { "epoch": 1.94, "grad_norm": 5.849341234135785, "learning_rate": 2.60604633304129e-08, "loss": 0.5634, "step": 21711 }, { "epoch": 1.94, "grad_norm": 4.616220993979164, "learning_rate": 2.5986858093790533e-08, "loss": 0.5436, "step": 21712 }, { "epoch": 1.94, "grad_norm": 9.013412023019955, "learning_rate": 2.5913356678800017e-08, "loss": 0.564, "step": 21713 }, { "epoch": 1.94, "grad_norm": 7.690049020012235, "learning_rate": 2.5839959086974566e-08, "loss": 0.5867, "step": 21714 }, { "epoch": 1.94, "grad_norm": 9.324827819300607, "learning_rate": 2.5766665319846838e-08, "loss": 0.5619, "step": 21715 }, { "epoch": 1.94, "grad_norm": 5.499834939180944, "learning_rate": 2.569347537894673e-08, "loss": 0.5772, "step": 21716 }, { "epoch": 1.94, "grad_norm": 6.403407924147132, "learning_rate": 2.5620389265801902e-08, "loss": 0.6118, "step": 21717 }, { "epoch": 1.94, "grad_norm": 5.318616992698283, "learning_rate": 2.5547406981937252e-08, "loss": 0.5703, "step": 21718 }, { "epoch": 1.94, "grad_norm": 7.413373511667841, "learning_rate": 2.547452852887711e-08, "loss": 0.6351, "step": 21719 }, { "epoch": 1.94, "grad_norm": 8.38946448377567, "learning_rate": 2.5401753908141923e-08, "loss": 0.5962, "step": 21720 }, { "epoch": 1.94, "grad_norm": 8.033886722127164, "learning_rate": 2.532908312125104e-08, "loss": 0.6198, "step": 21721 }, { "epoch": 1.94, "grad_norm": 6.635767556240944, "learning_rate": 2.525651616972158e-08, "loss": 0.6014, "step": 21722 }, { "epoch": 1.94, "grad_norm": 7.048111145833499, "learning_rate": 2.5184053055067325e-08, "loss": 0.5288, "step": 21723 }, { "epoch": 1.94, "grad_norm": 7.139698722291098, "learning_rate": 2.5111693778801517e-08, "loss": 0.5694, "step": 21724 }, { "epoch": 1.94, "grad_norm": 9.55859996798095, "learning_rate": 2.5039438342434054e-08, "loss": 0.5734, "step": 21725 }, { "epoch": 1.94, "grad_norm": 7.6864111351829445, "learning_rate": 2.4967286747473174e-08, "loss": 0.5306, "step": 21726 }, { "epoch": 1.94, "grad_norm": 5.270800718907912, "learning_rate": 2.4895238995425452e-08, "loss": 0.5533, "step": 21727 }, { "epoch": 1.94, "grad_norm": 5.596191210622362, "learning_rate": 2.482329508779413e-08, "loss": 0.5254, "step": 21728 }, { "epoch": 1.94, "grad_norm": 4.719597001417965, "learning_rate": 2.475145502608134e-08, "loss": 0.5675, "step": 21729 }, { "epoch": 1.94, "grad_norm": 6.610054334619293, "learning_rate": 2.467971881178588e-08, "loss": 0.5595, "step": 21730 }, { "epoch": 1.94, "grad_norm": 6.433317693823554, "learning_rate": 2.4608086446406e-08, "loss": 0.6024, "step": 21731 }, { "epoch": 1.94, "grad_norm": 7.3088969381000455, "learning_rate": 2.4536557931436056e-08, "loss": 0.5494, "step": 21732 }, { "epoch": 1.94, "grad_norm": 9.325905916468637, "learning_rate": 2.44651332683693e-08, "loss": 0.569, "step": 21733 }, { "epoch": 1.94, "grad_norm": 6.943450737940932, "learning_rate": 2.4393812458697318e-08, "loss": 0.5482, "step": 21734 }, { "epoch": 1.94, "grad_norm": 5.886559314369704, "learning_rate": 2.432259550390781e-08, "loss": 0.5226, "step": 21735 }, { "epoch": 1.94, "grad_norm": 6.279691845873403, "learning_rate": 2.4251482405486803e-08, "loss": 0.5251, "step": 21736 }, { "epoch": 1.94, "grad_norm": 8.146489938986656, "learning_rate": 2.4180473164920336e-08, "loss": 0.4777, "step": 21737 }, { "epoch": 1.94, "grad_norm": 8.611729753779853, "learning_rate": 2.4109567783688892e-08, "loss": 0.5269, "step": 21738 }, { "epoch": 1.94, "grad_norm": 7.499968072533326, "learning_rate": 2.4038766263273506e-08, "loss": 0.557, "step": 21739 }, { "epoch": 1.94, "grad_norm": 6.387552872696248, "learning_rate": 2.3968068605151885e-08, "loss": 0.531, "step": 21740 }, { "epoch": 1.94, "grad_norm": 6.582810447730976, "learning_rate": 2.3897474810799516e-08, "loss": 0.505, "step": 21741 }, { "epoch": 1.94, "grad_norm": 7.207494358130621, "learning_rate": 2.3826984881690217e-08, "loss": 0.5507, "step": 21742 }, { "epoch": 1.94, "grad_norm": 6.0469528797532615, "learning_rate": 2.3756598819294484e-08, "loss": 0.5196, "step": 21743 }, { "epoch": 1.94, "grad_norm": 6.744261255695491, "learning_rate": 2.3686316625081697e-08, "loss": 0.6048, "step": 21744 }, { "epoch": 1.94, "grad_norm": 7.5594763097313304, "learning_rate": 2.3616138300519566e-08, "loss": 0.5272, "step": 21745 }, { "epoch": 1.94, "grad_norm": 4.747032405768726, "learning_rate": 2.3546063847071922e-08, "loss": 0.5161, "step": 21746 }, { "epoch": 1.94, "grad_norm": 5.847796168836106, "learning_rate": 2.3476093266202594e-08, "loss": 0.5579, "step": 21747 }, { "epoch": 1.94, "grad_norm": 6.429880582454862, "learning_rate": 2.3406226559371526e-08, "loss": 0.5411, "step": 21748 }, { "epoch": 1.94, "grad_norm": 5.72429520966671, "learning_rate": 2.3336463728036442e-08, "loss": 0.5687, "step": 21749 }, { "epoch": 1.94, "grad_norm": 7.101917867828132, "learning_rate": 2.326680477365395e-08, "loss": 0.5768, "step": 21750 }, { "epoch": 1.94, "grad_norm": 5.5558094214494265, "learning_rate": 2.319724969767845e-08, "loss": 0.4945, "step": 21751 }, { "epoch": 1.94, "grad_norm": 4.569155119608232, "learning_rate": 2.3127798501560993e-08, "loss": 0.5771, "step": 21752 }, { "epoch": 1.94, "grad_norm": 7.9547347612579, "learning_rate": 2.305845118675154e-08, "loss": 0.6309, "step": 21753 }, { "epoch": 1.94, "grad_norm": 5.89059520103603, "learning_rate": 2.2989207754697818e-08, "loss": 0.5517, "step": 21754 }, { "epoch": 1.94, "grad_norm": 4.933713526928987, "learning_rate": 2.2920068206844782e-08, "loss": 0.5618, "step": 21755 }, { "epoch": 1.94, "grad_norm": 6.755314729988431, "learning_rate": 2.285103254463572e-08, "loss": 0.531, "step": 21756 }, { "epoch": 1.94, "grad_norm": 6.495774901204524, "learning_rate": 2.2782100769511705e-08, "loss": 0.4857, "step": 21757 }, { "epoch": 1.94, "grad_norm": 8.669293251838015, "learning_rate": 2.271327288291103e-08, "loss": 0.5916, "step": 21758 }, { "epoch": 1.94, "grad_norm": 5.145806786956795, "learning_rate": 2.2644548886271434e-08, "loss": 0.6065, "step": 21759 }, { "epoch": 1.94, "grad_norm": 7.836509566957288, "learning_rate": 2.2575928781026214e-08, "loss": 0.6268, "step": 21760 }, { "epoch": 1.94, "grad_norm": 6.20792060934523, "learning_rate": 2.2507412568608112e-08, "loss": 0.5513, "step": 21761 }, { "epoch": 1.94, "grad_norm": 5.259909689467585, "learning_rate": 2.243900025044765e-08, "loss": 0.5766, "step": 21762 }, { "epoch": 1.94, "grad_norm": 7.320544831942432, "learning_rate": 2.237069182797147e-08, "loss": 0.5363, "step": 21763 }, { "epoch": 1.94, "grad_norm": 5.1207956274185875, "learning_rate": 2.2302487302606755e-08, "loss": 0.5388, "step": 21764 }, { "epoch": 1.94, "grad_norm": 6.953254225168298, "learning_rate": 2.223438667577682e-08, "loss": 0.539, "step": 21765 }, { "epoch": 1.94, "grad_norm": 5.237611795456866, "learning_rate": 2.2166389948902744e-08, "loss": 0.6134, "step": 21766 }, { "epoch": 1.94, "grad_norm": 8.85496628603445, "learning_rate": 2.2098497123404505e-08, "loss": 0.5434, "step": 21767 }, { "epoch": 1.94, "grad_norm": 7.669754841773753, "learning_rate": 2.2030708200698194e-08, "loss": 0.5986, "step": 21768 }, { "epoch": 1.94, "grad_norm": 4.664422438985362, "learning_rate": 2.1963023182199338e-08, "loss": 0.5951, "step": 21769 }, { "epoch": 1.94, "grad_norm": 5.006506347296762, "learning_rate": 2.1895442069320704e-08, "loss": 0.5758, "step": 21770 }, { "epoch": 1.94, "grad_norm": 8.788327276227715, "learning_rate": 2.1827964863472828e-08, "loss": 0.5903, "step": 21771 }, { "epoch": 1.94, "grad_norm": 6.246397108608395, "learning_rate": 2.1760591566064026e-08, "loss": 0.5403, "step": 21772 }, { "epoch": 1.94, "grad_norm": 8.172458586331109, "learning_rate": 2.1693322178500952e-08, "loss": 0.5824, "step": 21773 }, { "epoch": 1.94, "grad_norm": 5.264459942800589, "learning_rate": 2.1626156702187486e-08, "loss": 0.5647, "step": 21774 }, { "epoch": 1.94, "grad_norm": 7.144130890767491, "learning_rate": 2.1559095138525276e-08, "loss": 0.5734, "step": 21775 }, { "epoch": 1.94, "grad_norm": 7.704682392144669, "learning_rate": 2.1492137488914877e-08, "loss": 0.5421, "step": 21776 }, { "epoch": 1.94, "grad_norm": 7.882854072911726, "learning_rate": 2.1425283754752945e-08, "loss": 0.5387, "step": 21777 }, { "epoch": 1.94, "grad_norm": 7.127466073571384, "learning_rate": 2.135853393743559e-08, "loss": 0.593, "step": 21778 }, { "epoch": 1.94, "grad_norm": 9.047675678272398, "learning_rate": 2.1291888038355578e-08, "loss": 0.5452, "step": 21779 }, { "epoch": 1.94, "grad_norm": 6.543412211712778, "learning_rate": 2.1225346058904585e-08, "loss": 0.5344, "step": 21780 }, { "epoch": 1.94, "grad_norm": 5.855689262814037, "learning_rate": 2.115890800047038e-08, "loss": 0.6017, "step": 21781 }, { "epoch": 1.94, "grad_norm": 6.580160082893052, "learning_rate": 2.1092573864441303e-08, "loss": 0.5061, "step": 21782 }, { "epoch": 1.94, "grad_norm": 7.173886110891978, "learning_rate": 2.1026343652200688e-08, "loss": 0.5717, "step": 21783 }, { "epoch": 1.94, "grad_norm": 6.219702878251234, "learning_rate": 2.096021736513132e-08, "loss": 0.5475, "step": 21784 }, { "epoch": 1.94, "grad_norm": 6.644428623241483, "learning_rate": 2.089419500461376e-08, "loss": 0.5215, "step": 21785 }, { "epoch": 1.94, "grad_norm": 6.34236938578635, "learning_rate": 2.0828276572025796e-08, "loss": 0.5471, "step": 21786 }, { "epoch": 1.94, "grad_norm": 8.824664217231707, "learning_rate": 2.0762462068742993e-08, "loss": 0.5277, "step": 21787 }, { "epoch": 1.94, "grad_norm": 7.338586047857313, "learning_rate": 2.069675149613981e-08, "loss": 0.5594, "step": 21788 }, { "epoch": 1.94, "grad_norm": 7.177266013250777, "learning_rate": 2.0631144855587372e-08, "loss": 0.5313, "step": 21789 }, { "epoch": 1.94, "grad_norm": 9.316164731579128, "learning_rate": 2.0565642148455132e-08, "loss": 0.5409, "step": 21790 }, { "epoch": 1.94, "grad_norm": 7.359617511991421, "learning_rate": 2.050024337611034e-08, "loss": 0.5653, "step": 21791 }, { "epoch": 1.94, "grad_norm": 5.446679184579295, "learning_rate": 2.0434948539918564e-08, "loss": 0.5375, "step": 21792 }, { "epoch": 1.94, "grad_norm": 7.8127614413638735, "learning_rate": 2.0369757641241493e-08, "loss": 0.5156, "step": 21793 }, { "epoch": 1.94, "grad_norm": 6.677454040081512, "learning_rate": 2.0304670681440818e-08, "loss": 0.4932, "step": 21794 }, { "epoch": 1.94, "grad_norm": 8.754411831207612, "learning_rate": 2.023968766187434e-08, "loss": 0.5777, "step": 21795 }, { "epoch": 1.94, "grad_norm": 9.648765497180351, "learning_rate": 2.017480858389931e-08, "loss": 0.5648, "step": 21796 }, { "epoch": 1.94, "grad_norm": 5.8009615121102, "learning_rate": 2.0110033448869636e-08, "loss": 0.5529, "step": 21797 }, { "epoch": 1.94, "grad_norm": 4.770213002282771, "learning_rate": 2.0045362258137024e-08, "loss": 0.5726, "step": 21798 }, { "epoch": 1.94, "grad_norm": 5.980917992615495, "learning_rate": 1.99807950130515e-08, "loss": 0.6071, "step": 21799 }, { "epoch": 1.94, "grad_norm": 7.684523089804364, "learning_rate": 1.991633171496088e-08, "loss": 0.4982, "step": 21800 }, { "epoch": 1.94, "grad_norm": 6.531041802077858, "learning_rate": 1.9851972365210748e-08, "loss": 0.521, "step": 21801 }, { "epoch": 1.94, "grad_norm": 7.459834074668601, "learning_rate": 1.9787716965144476e-08, "loss": 0.582, "step": 21802 }, { "epoch": 1.95, "grad_norm": 6.117973298394805, "learning_rate": 1.972356551610266e-08, "loss": 0.5482, "step": 21803 }, { "epoch": 1.95, "grad_norm": 6.558900468054927, "learning_rate": 1.9659518019425338e-08, "loss": 0.5656, "step": 21804 }, { "epoch": 1.95, "grad_norm": 7.55750153360635, "learning_rate": 1.9595574476448663e-08, "loss": 0.5835, "step": 21805 }, { "epoch": 1.95, "grad_norm": 5.607593148334502, "learning_rate": 1.953173488850768e-08, "loss": 0.5316, "step": 21806 }, { "epoch": 1.95, "grad_norm": 10.303104718267381, "learning_rate": 1.94679992569341e-08, "loss": 0.5835, "step": 21807 }, { "epoch": 1.95, "grad_norm": 7.011168858637809, "learning_rate": 1.940436758305908e-08, "loss": 0.5564, "step": 21808 }, { "epoch": 1.95, "grad_norm": 6.163093958432611, "learning_rate": 1.9340839868210448e-08, "loss": 0.5985, "step": 21809 }, { "epoch": 1.95, "grad_norm": 9.001056973566198, "learning_rate": 1.927741611371492e-08, "loss": 0.5901, "step": 21810 }, { "epoch": 1.95, "grad_norm": 4.715968098574154, "learning_rate": 1.9214096320895325e-08, "loss": 0.5595, "step": 21811 }, { "epoch": 1.95, "grad_norm": 8.890050944620311, "learning_rate": 1.9150880491073388e-08, "loss": 0.5914, "step": 21812 }, { "epoch": 1.95, "grad_norm": 7.0531972736292765, "learning_rate": 1.9087768625569712e-08, "loss": 0.588, "step": 21813 }, { "epoch": 1.95, "grad_norm": 7.7830220939560295, "learning_rate": 1.902476072569992e-08, "loss": 0.5451, "step": 21814 }, { "epoch": 1.95, "grad_norm": 8.068092357125058, "learning_rate": 1.896185679278073e-08, "loss": 0.5356, "step": 21815 }, { "epoch": 1.95, "grad_norm": 6.753766068435255, "learning_rate": 1.889905682812443e-08, "loss": 0.5271, "step": 21816 }, { "epoch": 1.95, "grad_norm": 6.689748498497799, "learning_rate": 1.883636083304219e-08, "loss": 0.5753, "step": 21817 }, { "epoch": 1.95, "grad_norm": 7.2196375728296465, "learning_rate": 1.8773768808841852e-08, "loss": 0.5776, "step": 21818 }, { "epoch": 1.95, "grad_norm": 8.654680098237652, "learning_rate": 1.8711280756830153e-08, "loss": 0.576, "step": 21819 }, { "epoch": 1.95, "grad_norm": 8.237970805249667, "learning_rate": 1.864889667831271e-08, "loss": 0.5286, "step": 21820 }, { "epoch": 1.95, "grad_norm": 5.763275750601125, "learning_rate": 1.85866165745896e-08, "loss": 0.6088, "step": 21821 }, { "epoch": 1.95, "grad_norm": 8.257102946968551, "learning_rate": 1.8524440446962e-08, "loss": 0.5224, "step": 21822 }, { "epoch": 1.95, "grad_norm": 7.7462269593744, "learning_rate": 1.8462368296727207e-08, "loss": 0.5268, "step": 21823 }, { "epoch": 1.95, "grad_norm": 7.38745605621588, "learning_rate": 1.840040012518196e-08, "loss": 0.5516, "step": 21824 }, { "epoch": 1.95, "grad_norm": 9.16932150495451, "learning_rate": 1.8338535933618005e-08, "loss": 0.5818, "step": 21825 }, { "epoch": 1.95, "grad_norm": 7.448250294110211, "learning_rate": 1.8276775723327643e-08, "loss": 0.6393, "step": 21826 }, { "epoch": 1.95, "grad_norm": 6.208496487501599, "learning_rate": 1.8215119495599844e-08, "loss": 0.5226, "step": 21827 }, { "epoch": 1.95, "grad_norm": 6.964615951074387, "learning_rate": 1.8153567251721906e-08, "loss": 0.4891, "step": 21828 }, { "epoch": 1.95, "grad_norm": 5.236145817163048, "learning_rate": 1.809211899297725e-08, "loss": 0.4984, "step": 21829 }, { "epoch": 1.95, "grad_norm": 6.076123933909918, "learning_rate": 1.8030774720650402e-08, "loss": 0.5441, "step": 21830 }, { "epoch": 1.95, "grad_norm": 9.268996661913597, "learning_rate": 1.7969534436020343e-08, "loss": 0.5142, "step": 21831 }, { "epoch": 1.95, "grad_norm": 7.1965831933105315, "learning_rate": 1.7908398140366047e-08, "loss": 0.5797, "step": 21832 }, { "epoch": 1.95, "grad_norm": 8.596723438921137, "learning_rate": 1.784736583496316e-08, "loss": 0.5821, "step": 21833 }, { "epoch": 1.95, "grad_norm": 7.483842133285127, "learning_rate": 1.7786437521085664e-08, "loss": 0.5852, "step": 21834 }, { "epoch": 1.95, "grad_norm": 7.76343300403562, "learning_rate": 1.772561320000532e-08, "loss": 0.5282, "step": 21835 }, { "epoch": 1.95, "grad_norm": 11.697366139650574, "learning_rate": 1.7664892872992222e-08, "loss": 0.5285, "step": 21836 }, { "epoch": 1.95, "grad_norm": 6.096883579913403, "learning_rate": 1.7604276541313136e-08, "loss": 0.5902, "step": 21837 }, { "epoch": 1.95, "grad_norm": 6.213362822315107, "learning_rate": 1.7543764206233715e-08, "loss": 0.5237, "step": 21838 }, { "epoch": 1.95, "grad_norm": 6.585610807893996, "learning_rate": 1.7483355869016284e-08, "loss": 0.5794, "step": 21839 }, { "epoch": 1.95, "grad_norm": 6.701287970274538, "learning_rate": 1.7423051530922607e-08, "loss": 0.5773, "step": 21840 }, { "epoch": 1.95, "grad_norm": 6.450886686953002, "learning_rate": 1.7362851193211127e-08, "loss": 0.5627, "step": 21841 }, { "epoch": 1.95, "grad_norm": 6.9322337074016325, "learning_rate": 1.7302754857138617e-08, "loss": 0.5425, "step": 21842 }, { "epoch": 1.95, "grad_norm": 13.16693081989037, "learning_rate": 1.7242762523959068e-08, "loss": 0.6186, "step": 21843 }, { "epoch": 1.95, "grad_norm": 5.969367568409202, "learning_rate": 1.718287419492426e-08, "loss": 0.5421, "step": 21844 }, { "epoch": 1.95, "grad_norm": 5.692319397150576, "learning_rate": 1.7123089871285415e-08, "loss": 0.5786, "step": 21845 }, { "epoch": 1.95, "grad_norm": 7.86953665772673, "learning_rate": 1.706340955428931e-08, "loss": 0.5781, "step": 21846 }, { "epoch": 1.95, "grad_norm": 6.438864089678426, "learning_rate": 1.7003833245182178e-08, "loss": 0.5807, "step": 21847 }, { "epoch": 1.95, "grad_norm": 6.176698042411698, "learning_rate": 1.6944360945207462e-08, "loss": 0.5524, "step": 21848 }, { "epoch": 1.95, "grad_norm": 5.597929948814989, "learning_rate": 1.6884992655606392e-08, "loss": 0.622, "step": 21849 }, { "epoch": 1.95, "grad_norm": 12.396916505862915, "learning_rate": 1.6825728377618535e-08, "loss": 0.5468, "step": 21850 }, { "epoch": 1.95, "grad_norm": 11.085768458520963, "learning_rate": 1.6766568112480673e-08, "loss": 0.5746, "step": 21851 }, { "epoch": 1.95, "grad_norm": 7.356648510615586, "learning_rate": 1.6707511861427385e-08, "loss": 0.5636, "step": 21852 }, { "epoch": 1.95, "grad_norm": 6.933014765746647, "learning_rate": 1.6648559625691562e-08, "loss": 0.5763, "step": 21853 }, { "epoch": 1.95, "grad_norm": 5.993230392773871, "learning_rate": 1.6589711406503895e-08, "loss": 0.5999, "step": 21854 }, { "epoch": 1.95, "grad_norm": 7.859258525825007, "learning_rate": 1.6530967205092286e-08, "loss": 0.5565, "step": 21855 }, { "epoch": 1.95, "grad_norm": 5.854174751798261, "learning_rate": 1.647232702268353e-08, "loss": 0.5685, "step": 21856 }, { "epoch": 1.95, "grad_norm": 4.94363133675424, "learning_rate": 1.6413790860500546e-08, "loss": 0.5324, "step": 21857 }, { "epoch": 1.95, "grad_norm": 6.610979952608761, "learning_rate": 1.6355358719766235e-08, "loss": 0.5577, "step": 21858 }, { "epoch": 1.95, "grad_norm": 5.870284716571663, "learning_rate": 1.6297030601699625e-08, "loss": 0.5575, "step": 21859 }, { "epoch": 1.95, "grad_norm": 7.494656682444522, "learning_rate": 1.6238806507518635e-08, "loss": 0.6091, "step": 21860 }, { "epoch": 1.95, "grad_norm": 8.567903681096633, "learning_rate": 1.61806864384384e-08, "loss": 0.5535, "step": 21861 }, { "epoch": 1.95, "grad_norm": 6.061082844157451, "learning_rate": 1.6122670395671836e-08, "loss": 0.53, "step": 21862 }, { "epoch": 1.95, "grad_norm": 8.024174585562154, "learning_rate": 1.6064758380429646e-08, "loss": 0.5766, "step": 21863 }, { "epoch": 1.95, "grad_norm": 7.0795547998402135, "learning_rate": 1.6006950393921417e-08, "loss": 0.6001, "step": 21864 }, { "epoch": 1.95, "grad_norm": 7.190653695346521, "learning_rate": 1.5949246437353404e-08, "loss": 0.5302, "step": 21865 }, { "epoch": 1.95, "grad_norm": 6.780315010440447, "learning_rate": 1.5891646511929648e-08, "loss": 0.5711, "step": 21866 }, { "epoch": 1.95, "grad_norm": 5.969889115321199, "learning_rate": 1.5834150618852518e-08, "loss": 0.5071, "step": 21867 }, { "epoch": 1.95, "grad_norm": 7.839385771997263, "learning_rate": 1.5776758759323273e-08, "loss": 0.5763, "step": 21868 }, { "epoch": 1.95, "grad_norm": 7.401906382816063, "learning_rate": 1.5719470934538185e-08, "loss": 0.5585, "step": 21869 }, { "epoch": 1.95, "grad_norm": 4.975523783224906, "learning_rate": 1.5662287145694065e-08, "loss": 0.5851, "step": 21870 }, { "epoch": 1.95, "grad_norm": 6.611396542000379, "learning_rate": 1.5605207393983856e-08, "loss": 0.547, "step": 21871 }, { "epoch": 1.95, "grad_norm": 8.383525109905737, "learning_rate": 1.554823168059938e-08, "loss": 0.5657, "step": 21872 }, { "epoch": 1.95, "grad_norm": 9.83954082212749, "learning_rate": 1.5491360006729682e-08, "loss": 0.6231, "step": 21873 }, { "epoch": 1.95, "grad_norm": 5.938681088547739, "learning_rate": 1.5434592373562707e-08, "loss": 0.5453, "step": 21874 }, { "epoch": 1.95, "grad_norm": 8.937416017657387, "learning_rate": 1.5377928782281948e-08, "loss": 0.5836, "step": 21875 }, { "epoch": 1.95, "grad_norm": 8.044072807474645, "learning_rate": 1.5321369234070905e-08, "loss": 0.5321, "step": 21876 }, { "epoch": 1.95, "grad_norm": 6.508049724076611, "learning_rate": 1.52649137301103e-08, "loss": 0.5884, "step": 21877 }, { "epoch": 1.95, "grad_norm": 5.719405714296873, "learning_rate": 1.5208562271578076e-08, "loss": 0.5799, "step": 21878 }, { "epoch": 1.95, "grad_norm": 7.485684231468871, "learning_rate": 1.515231485965052e-08, "loss": 0.5772, "step": 21879 }, { "epoch": 1.95, "grad_norm": 6.04411687318923, "learning_rate": 1.5096171495501688e-08, "loss": 0.5612, "step": 21880 }, { "epoch": 1.95, "grad_norm": 7.583075991355989, "learning_rate": 1.5040132180303424e-08, "loss": 0.5576, "step": 21881 }, { "epoch": 1.95, "grad_norm": 8.184883709745376, "learning_rate": 1.4984196915225902e-08, "loss": 0.5517, "step": 21882 }, { "epoch": 1.95, "grad_norm": 5.265927244356894, "learning_rate": 1.4928365701435965e-08, "loss": 0.5295, "step": 21883 }, { "epoch": 1.95, "grad_norm": 9.212112351105205, "learning_rate": 1.487263854009935e-08, "loss": 0.5708, "step": 21884 }, { "epoch": 1.95, "grad_norm": 8.140673258708263, "learning_rate": 1.4817015432379012e-08, "loss": 0.5908, "step": 21885 }, { "epoch": 1.95, "grad_norm": 6.906077343135555, "learning_rate": 1.4761496379436247e-08, "loss": 0.5951, "step": 21886 }, { "epoch": 1.95, "grad_norm": 7.563371754420981, "learning_rate": 1.4706081382430127e-08, "loss": 0.5574, "step": 21887 }, { "epoch": 1.95, "grad_norm": 6.579715636400873, "learning_rate": 1.4650770442516949e-08, "loss": 0.61, "step": 21888 }, { "epoch": 1.95, "grad_norm": 7.2398307942044395, "learning_rate": 1.459556356085079e-08, "loss": 0.6473, "step": 21889 }, { "epoch": 1.95, "grad_norm": 6.775421381021399, "learning_rate": 1.454046073858406e-08, "loss": 0.5844, "step": 21890 }, { "epoch": 1.95, "grad_norm": 7.1549005551265035, "learning_rate": 1.4485461976868065e-08, "loss": 0.563, "step": 21891 }, { "epoch": 1.95, "grad_norm": 7.363805849071634, "learning_rate": 1.4430567276849661e-08, "loss": 0.5915, "step": 21892 }, { "epoch": 1.95, "grad_norm": 7.920287215275853, "learning_rate": 1.4375776639674599e-08, "loss": 0.5584, "step": 21893 }, { "epoch": 1.95, "grad_norm": 6.2360692838449925, "learning_rate": 1.4321090066487519e-08, "loss": 0.5505, "step": 21894 }, { "epoch": 1.95, "grad_norm": 6.999990945676075, "learning_rate": 1.4266507558428622e-08, "loss": 0.6143, "step": 21895 }, { "epoch": 1.95, "grad_norm": 6.774138504279165, "learning_rate": 1.4212029116638104e-08, "loss": 0.4765, "step": 21896 }, { "epoch": 1.95, "grad_norm": 5.632184774719297, "learning_rate": 1.4157654742252835e-08, "loss": 0.5642, "step": 21897 }, { "epoch": 1.95, "grad_norm": 5.302781226546555, "learning_rate": 1.4103384436407464e-08, "loss": 0.556, "step": 21898 }, { "epoch": 1.95, "grad_norm": 7.930624931829395, "learning_rate": 1.4049218200234971e-08, "loss": 0.5532, "step": 21899 }, { "epoch": 1.95, "grad_norm": 10.957703276202325, "learning_rate": 1.399515603486612e-08, "loss": 0.556, "step": 21900 }, { "epoch": 1.95, "grad_norm": 10.351634009908246, "learning_rate": 1.3941197941429453e-08, "loss": 0.6198, "step": 21901 }, { "epoch": 1.95, "grad_norm": 4.924738325696259, "learning_rate": 1.3887343921050733e-08, "loss": 0.5878, "step": 21902 }, { "epoch": 1.95, "grad_norm": 7.572627870313145, "learning_rate": 1.383359397485462e-08, "loss": 0.5988, "step": 21903 }, { "epoch": 1.95, "grad_norm": 7.290889158622327, "learning_rate": 1.3779948103962437e-08, "loss": 0.5889, "step": 21904 }, { "epoch": 1.95, "grad_norm": 9.103948705340398, "learning_rate": 1.37264063094944e-08, "loss": 0.5782, "step": 21905 }, { "epoch": 1.95, "grad_norm": 5.78117735924223, "learning_rate": 1.3672968592567393e-08, "loss": 0.5442, "step": 21906 }, { "epoch": 1.95, "grad_norm": 5.697354723469791, "learning_rate": 1.36196349542983e-08, "loss": 0.6252, "step": 21907 }, { "epoch": 1.95, "grad_norm": 6.148684828778051, "learning_rate": 1.3566405395798454e-08, "loss": 0.545, "step": 21908 }, { "epoch": 1.95, "grad_norm": 4.772800692487624, "learning_rate": 1.3513279918179744e-08, "loss": 0.5668, "step": 21909 }, { "epoch": 1.95, "grad_norm": 6.8589217326617575, "learning_rate": 1.3460258522551284e-08, "loss": 0.5387, "step": 21910 }, { "epoch": 1.95, "grad_norm": 9.67318223070198, "learning_rate": 1.3407341210019965e-08, "loss": 0.5173, "step": 21911 }, { "epoch": 1.95, "grad_norm": 6.80416538307019, "learning_rate": 1.3354527981689348e-08, "loss": 0.6291, "step": 21912 }, { "epoch": 1.95, "grad_norm": 5.896716495438963, "learning_rate": 1.3301818838662995e-08, "loss": 0.5804, "step": 21913 }, { "epoch": 1.95, "grad_norm": 7.750176277455353, "learning_rate": 1.3249213782040027e-08, "loss": 0.5737, "step": 21914 }, { "epoch": 1.96, "grad_norm": 7.351146238956724, "learning_rate": 1.3196712812919565e-08, "loss": 0.5814, "step": 21915 }, { "epoch": 1.96, "grad_norm": 8.865917051951245, "learning_rate": 1.314431593239629e-08, "loss": 0.6412, "step": 21916 }, { "epoch": 1.96, "grad_norm": 6.480307598431985, "learning_rate": 1.3092023141564325e-08, "loss": 0.5933, "step": 21917 }, { "epoch": 1.96, "grad_norm": 4.60316344967438, "learning_rate": 1.3039834441515576e-08, "loss": 0.5988, "step": 21918 }, { "epoch": 1.96, "grad_norm": 4.878466552420484, "learning_rate": 1.2987749833338614e-08, "loss": 0.5489, "step": 21919 }, { "epoch": 1.96, "grad_norm": 6.2431266300648405, "learning_rate": 1.2935769318120905e-08, "loss": 0.5266, "step": 21920 }, { "epoch": 1.96, "grad_norm": 6.659191266935583, "learning_rate": 1.2883892896948247e-08, "loss": 0.5183, "step": 21921 }, { "epoch": 1.96, "grad_norm": 7.959152571270004, "learning_rate": 1.2832120570901995e-08, "loss": 0.544, "step": 21922 }, { "epoch": 1.96, "grad_norm": 7.29953525912357, "learning_rate": 1.2780452341064064e-08, "loss": 0.5781, "step": 21923 }, { "epoch": 1.96, "grad_norm": 5.453785950370717, "learning_rate": 1.2728888208511925e-08, "loss": 0.4874, "step": 21924 }, { "epoch": 1.96, "grad_norm": 9.130338173795755, "learning_rate": 1.2677428174322493e-08, "loss": 0.5881, "step": 21925 }, { "epoch": 1.96, "grad_norm": 7.00744428559856, "learning_rate": 1.2626072239569908e-08, "loss": 0.5298, "step": 21926 }, { "epoch": 1.96, "grad_norm": 8.621002265769834, "learning_rate": 1.2574820405325538e-08, "loss": 0.5615, "step": 21927 }, { "epoch": 1.96, "grad_norm": 5.771571616857394, "learning_rate": 1.2523672672659638e-08, "loss": 0.5534, "step": 21928 }, { "epoch": 1.96, "grad_norm": 7.214323309320008, "learning_rate": 1.2472629042639683e-08, "loss": 0.5551, "step": 21929 }, { "epoch": 1.96, "grad_norm": 7.514961527279013, "learning_rate": 1.2421689516331492e-08, "loss": 0.6106, "step": 21930 }, { "epoch": 1.96, "grad_norm": 7.450719022597871, "learning_rate": 1.2370854094797547e-08, "loss": 0.6151, "step": 21931 }, { "epoch": 1.96, "grad_norm": 9.470032104902227, "learning_rate": 1.2320122779099219e-08, "loss": 0.602, "step": 21932 }, { "epoch": 1.96, "grad_norm": 6.12069260525122, "learning_rate": 1.2269495570295664e-08, "loss": 0.5986, "step": 21933 }, { "epoch": 1.96, "grad_norm": 6.984260729477265, "learning_rate": 1.2218972469443258e-08, "loss": 0.5669, "step": 21934 }, { "epoch": 1.96, "grad_norm": 5.405520364997448, "learning_rate": 1.2168553477597267e-08, "loss": 0.6238, "step": 21935 }, { "epoch": 1.96, "grad_norm": 6.08074038735939, "learning_rate": 1.2118238595809073e-08, "loss": 0.5624, "step": 21936 }, { "epoch": 1.96, "grad_norm": 8.809412100331906, "learning_rate": 1.2068027825129502e-08, "loss": 0.5379, "step": 21937 }, { "epoch": 1.96, "grad_norm": 6.359428103599272, "learning_rate": 1.2017921166606605e-08, "loss": 0.5884, "step": 21938 }, { "epoch": 1.96, "grad_norm": 7.442890664021551, "learning_rate": 1.1967918621285657e-08, "loss": 0.544, "step": 21939 }, { "epoch": 1.96, "grad_norm": 6.626399745658119, "learning_rate": 1.1918020190210821e-08, "loss": 0.5766, "step": 21940 }, { "epoch": 1.96, "grad_norm": 6.520841490804482, "learning_rate": 1.1868225874424044e-08, "loss": 0.5489, "step": 21941 }, { "epoch": 1.96, "grad_norm": 8.180498599058517, "learning_rate": 1.1818535674963939e-08, "loss": 0.5524, "step": 21942 }, { "epoch": 1.96, "grad_norm": 7.551170576438809, "learning_rate": 1.1768949592868006e-08, "loss": 0.5453, "step": 21943 }, { "epoch": 1.96, "grad_norm": 6.704618513275409, "learning_rate": 1.1719467629171533e-08, "loss": 0.5512, "step": 21944 }, { "epoch": 1.96, "grad_norm": 7.190979503938292, "learning_rate": 1.1670089784907024e-08, "loss": 0.5634, "step": 21945 }, { "epoch": 1.96, "grad_norm": 7.134477201072489, "learning_rate": 1.1620816061104768e-08, "loss": 0.5301, "step": 21946 }, { "epoch": 1.96, "grad_norm": 7.4272431658843185, "learning_rate": 1.1571646458793385e-08, "loss": 0.5736, "step": 21947 }, { "epoch": 1.96, "grad_norm": 7.688449350966891, "learning_rate": 1.1522580978999832e-08, "loss": 0.4847, "step": 21948 }, { "epoch": 1.96, "grad_norm": 7.100686894418062, "learning_rate": 1.1473619622747733e-08, "loss": 0.574, "step": 21949 }, { "epoch": 1.96, "grad_norm": 5.500174383408303, "learning_rate": 1.1424762391059053e-08, "loss": 0.5488, "step": 21950 }, { "epoch": 1.96, "grad_norm": 5.709011784445153, "learning_rate": 1.1376009284954081e-08, "loss": 0.5143, "step": 21951 }, { "epoch": 1.96, "grad_norm": 8.236468120039437, "learning_rate": 1.1327360305449785e-08, "loss": 0.5351, "step": 21952 }, { "epoch": 1.96, "grad_norm": 5.617655715063117, "learning_rate": 1.1278815453561465e-08, "loss": 0.577, "step": 21953 }, { "epoch": 1.96, "grad_norm": 5.2863836223556, "learning_rate": 1.1230374730303306e-08, "loss": 0.5201, "step": 21954 }, { "epoch": 1.96, "grad_norm": 7.4856289421205275, "learning_rate": 1.1182038136685613e-08, "loss": 0.5407, "step": 21955 }, { "epoch": 1.96, "grad_norm": 6.050657053953674, "learning_rate": 1.113380567371758e-08, "loss": 0.5825, "step": 21956 }, { "epoch": 1.96, "grad_norm": 6.605896617275991, "learning_rate": 1.1085677342405621e-08, "loss": 0.5642, "step": 21957 }, { "epoch": 1.96, "grad_norm": 5.8627772192426955, "learning_rate": 1.1037653143755045e-08, "loss": 0.5835, "step": 21958 }, { "epoch": 1.96, "grad_norm": 6.383109838268051, "learning_rate": 1.0989733078767273e-08, "loss": 0.5273, "step": 21959 }, { "epoch": 1.96, "grad_norm": 5.568363244460694, "learning_rate": 1.0941917148443726e-08, "loss": 0.5162, "step": 21960 }, { "epoch": 1.96, "grad_norm": 6.820935038919527, "learning_rate": 1.0894205353781383e-08, "loss": 0.5486, "step": 21961 }, { "epoch": 1.96, "grad_norm": 5.812980642933535, "learning_rate": 1.0846597695776672e-08, "loss": 0.5525, "step": 21962 }, { "epoch": 1.96, "grad_norm": 11.44807904690461, "learning_rate": 1.0799094175423241e-08, "loss": 0.5695, "step": 21963 }, { "epoch": 1.96, "grad_norm": 6.244917354329995, "learning_rate": 1.0751694793712519e-08, "loss": 0.589, "step": 21964 }, { "epoch": 1.96, "grad_norm": 6.54626715591563, "learning_rate": 1.0704399551634271e-08, "loss": 0.557, "step": 21965 }, { "epoch": 1.96, "grad_norm": 6.3111808969347045, "learning_rate": 1.0657208450174372e-08, "loss": 0.5661, "step": 21966 }, { "epoch": 1.96, "grad_norm": 7.142644238461012, "learning_rate": 1.0610121490319814e-08, "loss": 0.5323, "step": 21967 }, { "epoch": 1.96, "grad_norm": 6.3400509717002915, "learning_rate": 1.0563138673051476e-08, "loss": 0.5849, "step": 21968 }, { "epoch": 1.96, "grad_norm": 6.912608167487643, "learning_rate": 1.0516259999351353e-08, "loss": 0.5418, "step": 21969 }, { "epoch": 1.96, "grad_norm": 6.051678444441906, "learning_rate": 1.046948547019755e-08, "loss": 0.582, "step": 21970 }, { "epoch": 1.96, "grad_norm": 7.554760681012585, "learning_rate": 1.0422815086566506e-08, "loss": 0.5642, "step": 21971 }, { "epoch": 1.96, "grad_norm": 6.148481409068394, "learning_rate": 1.0376248849431891e-08, "loss": 0.6103, "step": 21972 }, { "epoch": 1.96, "grad_norm": 7.337231408671628, "learning_rate": 1.0329786759766258e-08, "loss": 0.5275, "step": 21973 }, { "epoch": 1.96, "grad_norm": 5.636411907614623, "learning_rate": 1.0283428818538833e-08, "loss": 0.5015, "step": 21974 }, { "epoch": 1.96, "grad_norm": 6.037040493504931, "learning_rate": 1.0237175026717727e-08, "loss": 0.6455, "step": 21975 }, { "epoch": 1.96, "grad_norm": 7.2340828448229, "learning_rate": 1.0191025385268282e-08, "loss": 0.6243, "step": 21976 }, { "epoch": 1.96, "grad_norm": 7.1624675177344646, "learning_rate": 1.0144979895153618e-08, "loss": 0.6369, "step": 21977 }, { "epoch": 1.96, "grad_norm": 5.80273814806708, "learning_rate": 1.0099038557335183e-08, "loss": 0.597, "step": 21978 }, { "epoch": 1.96, "grad_norm": 7.2661584165257995, "learning_rate": 1.0053201372771104e-08, "loss": 0.5906, "step": 21979 }, { "epoch": 1.96, "grad_norm": 5.975178625973742, "learning_rate": 1.0007468342418947e-08, "loss": 0.495, "step": 21980 }, { "epoch": 1.96, "grad_norm": 7.438040678075386, "learning_rate": 9.961839467232948e-09, "loss": 0.5808, "step": 21981 }, { "epoch": 1.96, "grad_norm": 5.903951127484068, "learning_rate": 9.916314748165123e-09, "loss": 0.6227, "step": 21982 }, { "epoch": 1.96, "grad_norm": 6.732914267140765, "learning_rate": 9.870894186166935e-09, "loss": 0.5197, "step": 21983 }, { "epoch": 1.96, "grad_norm": 8.184619130243997, "learning_rate": 9.825577782184848e-09, "loss": 0.5359, "step": 21984 }, { "epoch": 1.96, "grad_norm": 6.1911825675196495, "learning_rate": 9.780365537165881e-09, "loss": 0.5595, "step": 21985 }, { "epoch": 1.96, "grad_norm": 7.297835262637355, "learning_rate": 9.735257452053725e-09, "loss": 0.5323, "step": 21986 }, { "epoch": 1.96, "grad_norm": 7.073531597009415, "learning_rate": 9.690253527789296e-09, "loss": 0.5556, "step": 21987 }, { "epoch": 1.96, "grad_norm": 7.783862439026037, "learning_rate": 9.645353765311839e-09, "loss": 0.5541, "step": 21988 }, { "epoch": 1.96, "grad_norm": 7.0340979156831285, "learning_rate": 9.600558165559492e-09, "loss": 0.5949, "step": 21989 }, { "epoch": 1.96, "grad_norm": 8.154113051325519, "learning_rate": 9.555866729466511e-09, "loss": 0.5995, "step": 21990 }, { "epoch": 1.96, "grad_norm": 7.2145683659446975, "learning_rate": 9.511279457966038e-09, "loss": 0.5701, "step": 21991 }, { "epoch": 1.96, "grad_norm": 7.068402135801149, "learning_rate": 9.466796351988438e-09, "loss": 0.5793, "step": 21992 }, { "epoch": 1.96, "grad_norm": 9.010461106531999, "learning_rate": 9.422417412462415e-09, "loss": 0.562, "step": 21993 }, { "epoch": 1.96, "grad_norm": 6.378639819411811, "learning_rate": 9.378142640314448e-09, "loss": 0.5336, "step": 21994 }, { "epoch": 1.96, "grad_norm": 6.576083084200753, "learning_rate": 9.333972036468241e-09, "loss": 0.5058, "step": 21995 }, { "epoch": 1.96, "grad_norm": 8.795325550128819, "learning_rate": 9.289905601845839e-09, "loss": 0.6121, "step": 21996 }, { "epoch": 1.96, "grad_norm": 6.92419746254786, "learning_rate": 9.245943337367614e-09, "loss": 0.5292, "step": 21997 }, { "epoch": 1.96, "grad_norm": 7.132480552442316, "learning_rate": 9.20208524395061e-09, "loss": 0.5152, "step": 21998 }, { "epoch": 1.96, "grad_norm": 6.116611416200535, "learning_rate": 9.158331322510205e-09, "loss": 0.5376, "step": 21999 }, { "epoch": 1.96, "grad_norm": 6.531544254283831, "learning_rate": 9.114681573960116e-09, "loss": 0.5249, "step": 22000 }, { "epoch": 1.96, "grad_norm": 7.343820613449625, "learning_rate": 9.071135999211278e-09, "loss": 0.6509, "step": 22001 }, { "epoch": 1.96, "grad_norm": 6.372368469351335, "learning_rate": 9.02769459917241e-09, "loss": 0.5691, "step": 22002 }, { "epoch": 1.96, "grad_norm": 7.22842472211101, "learning_rate": 8.984357374750008e-09, "loss": 0.5823, "step": 22003 }, { "epoch": 1.96, "grad_norm": 7.400809834530073, "learning_rate": 8.94112432684946e-09, "loss": 0.581, "step": 22004 }, { "epoch": 1.96, "grad_norm": 9.135431674862053, "learning_rate": 8.897995456372266e-09, "loss": 0.5412, "step": 22005 }, { "epoch": 1.96, "grad_norm": 6.377397032466092, "learning_rate": 8.854970764219372e-09, "loss": 0.5297, "step": 22006 }, { "epoch": 1.96, "grad_norm": 5.4647485219523, "learning_rate": 8.812050251288951e-09, "loss": 0.5913, "step": 22007 }, { "epoch": 1.96, "grad_norm": 8.31079381063605, "learning_rate": 8.769233918475839e-09, "loss": 0.5965, "step": 22008 }, { "epoch": 1.96, "grad_norm": 5.824730585814763, "learning_rate": 8.726521766674877e-09, "loss": 0.5571, "step": 22009 }, { "epoch": 1.96, "grad_norm": 5.244154070236688, "learning_rate": 8.683913796777022e-09, "loss": 0.5675, "step": 22010 }, { "epoch": 1.96, "grad_norm": 6.134743156787807, "learning_rate": 8.641410009671003e-09, "loss": 0.5498, "step": 22011 }, { "epoch": 1.96, "grad_norm": 6.9586463174400155, "learning_rate": 8.599010406245002e-09, "loss": 0.5856, "step": 22012 }, { "epoch": 1.96, "grad_norm": 6.701266580456617, "learning_rate": 8.556714987383863e-09, "loss": 0.5769, "step": 22013 }, { "epoch": 1.96, "grad_norm": 5.40350960994866, "learning_rate": 8.514523753970217e-09, "loss": 0.5461, "step": 22014 }, { "epoch": 1.96, "grad_norm": 6.804541448174119, "learning_rate": 8.472436706885023e-09, "loss": 0.5859, "step": 22015 }, { "epoch": 1.96, "grad_norm": 11.94933422203528, "learning_rate": 8.430453847005916e-09, "loss": 0.5091, "step": 22016 }, { "epoch": 1.96, "grad_norm": 5.453164063403593, "learning_rate": 8.388575175209968e-09, "loss": 0.5248, "step": 22017 }, { "epoch": 1.96, "grad_norm": 8.573766905352766, "learning_rate": 8.346800692370926e-09, "loss": 0.6115, "step": 22018 }, { "epoch": 1.96, "grad_norm": 7.393434245607433, "learning_rate": 8.305130399360873e-09, "loss": 0.5202, "step": 22019 }, { "epoch": 1.96, "grad_norm": 7.704705040274676, "learning_rate": 8.263564297049664e-09, "loss": 0.5587, "step": 22020 }, { "epoch": 1.96, "grad_norm": 6.742572134135562, "learning_rate": 8.222102386304942e-09, "loss": 0.5616, "step": 22021 }, { "epoch": 1.96, "grad_norm": 7.6779874646595205, "learning_rate": 8.180744667992124e-09, "loss": 0.6313, "step": 22022 }, { "epoch": 1.96, "grad_norm": 7.251860091288871, "learning_rate": 8.139491142973855e-09, "loss": 0.6073, "step": 22023 }, { "epoch": 1.96, "grad_norm": 6.020825589095411, "learning_rate": 8.098341812112221e-09, "loss": 0.5919, "step": 22024 }, { "epoch": 1.96, "grad_norm": 6.018372498063385, "learning_rate": 8.057296676265425e-09, "loss": 0.5513, "step": 22025 }, { "epoch": 1.96, "grad_norm": 5.431005277798216, "learning_rate": 8.016355736290005e-09, "loss": 0.5429, "step": 22026 }, { "epoch": 1.97, "grad_norm": 8.686257589237979, "learning_rate": 7.975518993041386e-09, "loss": 0.5749, "step": 22027 }, { "epoch": 1.97, "grad_norm": 6.110633938157378, "learning_rate": 7.934786447371667e-09, "loss": 0.5277, "step": 22028 }, { "epoch": 1.97, "grad_norm": 5.757301984026148, "learning_rate": 7.894158100130723e-09, "loss": 0.5717, "step": 22029 }, { "epoch": 1.97, "grad_norm": 8.792961602503867, "learning_rate": 7.853633952166206e-09, "loss": 0.5548, "step": 22030 }, { "epoch": 1.97, "grad_norm": 6.5129451004531775, "learning_rate": 7.813214004324665e-09, "loss": 0.6364, "step": 22031 }, { "epoch": 1.97, "grad_norm": 7.265300075983106, "learning_rate": 7.772898257449868e-09, "loss": 0.6038, "step": 22032 }, { "epoch": 1.97, "grad_norm": 8.025821154123895, "learning_rate": 7.73268671238281e-09, "loss": 0.5812, "step": 22033 }, { "epoch": 1.97, "grad_norm": 7.002264569700141, "learning_rate": 7.692579369963371e-09, "loss": 0.5503, "step": 22034 }, { "epoch": 1.97, "grad_norm": 10.310596925211497, "learning_rate": 7.652576231028109e-09, "loss": 0.5853, "step": 22035 }, { "epoch": 1.97, "grad_norm": 7.557389622751604, "learning_rate": 7.612677296411907e-09, "loss": 0.5898, "step": 22036 }, { "epoch": 1.97, "grad_norm": 7.516050771078672, "learning_rate": 7.572882566948548e-09, "loss": 0.5575, "step": 22037 }, { "epoch": 1.97, "grad_norm": 6.3501230225520215, "learning_rate": 7.533192043467918e-09, "loss": 0.5622, "step": 22038 }, { "epoch": 1.97, "grad_norm": 8.652017036242453, "learning_rate": 7.493605726798247e-09, "loss": 0.6065, "step": 22039 }, { "epoch": 1.97, "grad_norm": 6.16884416942004, "learning_rate": 7.454123617766652e-09, "loss": 0.5219, "step": 22040 }, { "epoch": 1.97, "grad_norm": 10.382283209383926, "learning_rate": 7.414745717196359e-09, "loss": 0.5503, "step": 22041 }, { "epoch": 1.97, "grad_norm": 5.23880044193648, "learning_rate": 7.375472025909491e-09, "loss": 0.5181, "step": 22042 }, { "epoch": 1.97, "grad_norm": 7.283413367735502, "learning_rate": 7.3363025447259485e-09, "loss": 0.5467, "step": 22043 }, { "epoch": 1.97, "grad_norm": 10.221160789992052, "learning_rate": 7.2972372744639645e-09, "loss": 0.6034, "step": 22044 }, { "epoch": 1.97, "grad_norm": 8.448855123323428, "learning_rate": 7.258276215937887e-09, "loss": 0.5427, "step": 22045 }, { "epoch": 1.97, "grad_norm": 9.09320517833088, "learning_rate": 7.2194193699615115e-09, "loss": 0.5904, "step": 22046 }, { "epoch": 1.97, "grad_norm": 6.342640159012774, "learning_rate": 7.180666737345854e-09, "loss": 0.5692, "step": 22047 }, { "epoch": 1.97, "grad_norm": 6.63727606114676, "learning_rate": 7.1420183188991575e-09, "loss": 0.5591, "step": 22048 }, { "epoch": 1.97, "grad_norm": 11.150524553316345, "learning_rate": 7.10347411542911e-09, "loss": 0.579, "step": 22049 }, { "epoch": 1.97, "grad_norm": 5.518713834743456, "learning_rate": 7.065034127740067e-09, "loss": 0.5462, "step": 22050 }, { "epoch": 1.97, "grad_norm": 5.693131706775559, "learning_rate": 7.02669835663361e-09, "loss": 0.5867, "step": 22051 }, { "epoch": 1.97, "grad_norm": 8.014706095167034, "learning_rate": 6.988466802910765e-09, "loss": 0.547, "step": 22052 }, { "epoch": 1.97, "grad_norm": 5.595365422177443, "learning_rate": 6.950339467368672e-09, "loss": 0.5175, "step": 22053 }, { "epoch": 1.97, "grad_norm": 7.431034081272035, "learning_rate": 6.912316350803916e-09, "loss": 0.6336, "step": 22054 }, { "epoch": 1.97, "grad_norm": 6.881191852867751, "learning_rate": 6.8743974540097516e-09, "loss": 0.5238, "step": 22055 }, { "epoch": 1.97, "grad_norm": 5.789835769212451, "learning_rate": 6.836582777778322e-09, "loss": 0.4972, "step": 22056 }, { "epoch": 1.97, "grad_norm": 8.451533203167667, "learning_rate": 6.798872322897887e-09, "loss": 0.5873, "step": 22057 }, { "epoch": 1.97, "grad_norm": 5.716605302533124, "learning_rate": 6.761266090156149e-09, "loss": 0.5064, "step": 22058 }, { "epoch": 1.97, "grad_norm": 5.335588820362175, "learning_rate": 6.7237640803380356e-09, "loss": 0.5569, "step": 22059 }, { "epoch": 1.97, "grad_norm": 6.09852899925173, "learning_rate": 6.686366294226254e-09, "loss": 0.5977, "step": 22060 }, { "epoch": 1.97, "grad_norm": 7.507497077362959, "learning_rate": 6.649072732601292e-09, "loss": 0.5771, "step": 22061 }, { "epoch": 1.97, "grad_norm": 5.469035775836862, "learning_rate": 6.6118833962414145e-09, "loss": 0.5542, "step": 22062 }, { "epoch": 1.97, "grad_norm": 8.990233419545133, "learning_rate": 6.574798285923778e-09, "loss": 0.5836, "step": 22063 }, { "epoch": 1.97, "grad_norm": 5.9064707162611185, "learning_rate": 6.537817402421098e-09, "loss": 0.5781, "step": 22064 }, { "epoch": 1.97, "grad_norm": 7.75215925933674, "learning_rate": 6.500940746506646e-09, "loss": 0.5857, "step": 22065 }, { "epoch": 1.97, "grad_norm": 5.675528301910791, "learning_rate": 6.4641683189492486e-09, "loss": 0.5337, "step": 22066 }, { "epoch": 1.97, "grad_norm": 9.639202209277736, "learning_rate": 6.427500120516628e-09, "loss": 0.5596, "step": 22067 }, { "epoch": 1.97, "grad_norm": 7.9512503189924795, "learning_rate": 6.390936151974281e-09, "loss": 0.5554, "step": 22068 }, { "epoch": 1.97, "grad_norm": 7.832156784313221, "learning_rate": 6.354476414085486e-09, "loss": 0.4934, "step": 22069 }, { "epoch": 1.97, "grad_norm": 7.106877573812042, "learning_rate": 6.318120907610748e-09, "loss": 0.5489, "step": 22070 }, { "epoch": 1.97, "grad_norm": 7.657840992272151, "learning_rate": 6.281869633309457e-09, "loss": 0.5655, "step": 22071 }, { "epoch": 1.97, "grad_norm": 4.561311194892741, "learning_rate": 6.245722591937675e-09, "loss": 0.5917, "step": 22072 }, { "epoch": 1.97, "grad_norm": 4.599502076491484, "learning_rate": 6.209679784250911e-09, "loss": 0.5175, "step": 22073 }, { "epoch": 1.97, "grad_norm": 8.502509330463846, "learning_rate": 6.173741211000783e-09, "loss": 0.5923, "step": 22074 }, { "epoch": 1.97, "grad_norm": 5.378741671077124, "learning_rate": 6.1379068729372496e-09, "loss": 0.612, "step": 22075 }, { "epoch": 1.97, "grad_norm": 5.302700102833208, "learning_rate": 6.102176770808599e-09, "loss": 0.5873, "step": 22076 }, { "epoch": 1.97, "grad_norm": 7.65063508385373, "learning_rate": 6.066550905360347e-09, "loss": 0.565, "step": 22077 }, { "epoch": 1.97, "grad_norm": 7.3096121766280975, "learning_rate": 6.031029277336897e-09, "loss": 0.5853, "step": 22078 }, { "epoch": 1.97, "grad_norm": 8.535624149628614, "learning_rate": 5.995611887478769e-09, "loss": 0.5703, "step": 22079 }, { "epoch": 1.97, "grad_norm": 5.84459751838388, "learning_rate": 5.960298736525372e-09, "loss": 0.609, "step": 22080 }, { "epoch": 1.97, "grad_norm": 8.838705985100281, "learning_rate": 5.925089825214448e-09, "loss": 0.5645, "step": 22081 }, { "epoch": 1.97, "grad_norm": 9.004230324001808, "learning_rate": 5.8899851542798535e-09, "loss": 0.5338, "step": 22082 }, { "epoch": 1.97, "grad_norm": 5.540797378063944, "learning_rate": 5.854984724455448e-09, "loss": 0.5237, "step": 22083 }, { "epoch": 1.97, "grad_norm": 8.179243630508909, "learning_rate": 5.820088536470647e-09, "loss": 0.5327, "step": 22084 }, { "epoch": 1.97, "grad_norm": 5.722476560829915, "learning_rate": 5.785296591054868e-09, "loss": 0.5715, "step": 22085 }, { "epoch": 1.97, "grad_norm": 4.807088307366163, "learning_rate": 5.750608888933639e-09, "loss": 0.5596, "step": 22086 }, { "epoch": 1.97, "grad_norm": 6.992228054125155, "learning_rate": 5.716025430831384e-09, "loss": 0.515, "step": 22087 }, { "epoch": 1.97, "grad_norm": 6.293570108518441, "learning_rate": 5.681546217469747e-09, "loss": 0.5733, "step": 22088 }, { "epoch": 1.97, "grad_norm": 6.43114049042933, "learning_rate": 5.6471712495687056e-09, "loss": 0.5876, "step": 22089 }, { "epoch": 1.97, "grad_norm": 5.358056408875348, "learning_rate": 5.612900527845466e-09, "loss": 0.5827, "step": 22090 }, { "epoch": 1.97, "grad_norm": 6.728805194743041, "learning_rate": 5.5787340530150116e-09, "loss": 0.5631, "step": 22091 }, { "epoch": 1.97, "grad_norm": 7.436269068743016, "learning_rate": 5.544671825791215e-09, "loss": 0.5314, "step": 22092 }, { "epoch": 1.97, "grad_norm": 5.8127889538873685, "learning_rate": 5.510713846884619e-09, "loss": 0.565, "step": 22093 }, { "epoch": 1.97, "grad_norm": 6.4775666874461555, "learning_rate": 5.476860117004101e-09, "loss": 0.5529, "step": 22094 }, { "epoch": 1.97, "grad_norm": 8.962880985950262, "learning_rate": 5.443110636856319e-09, "loss": 0.5591, "step": 22095 }, { "epoch": 1.97, "grad_norm": 8.080982523960413, "learning_rate": 5.409465407146264e-09, "loss": 0.536, "step": 22096 }, { "epoch": 1.97, "grad_norm": 11.968851020415416, "learning_rate": 5.3759244285750415e-09, "loss": 0.5732, "step": 22097 }, { "epoch": 1.97, "grad_norm": 6.823497698329042, "learning_rate": 5.342487701843202e-09, "loss": 0.6448, "step": 22098 }, { "epoch": 1.97, "grad_norm": 5.882155755839524, "learning_rate": 5.3091552276490765e-09, "loss": 0.5811, "step": 22099 }, { "epoch": 1.97, "grad_norm": 5.888363314160799, "learning_rate": 5.2759270066882196e-09, "loss": 0.6211, "step": 22100 }, { "epoch": 1.97, "grad_norm": 7.51614011410341, "learning_rate": 5.242803039653965e-09, "loss": 0.6267, "step": 22101 }, { "epoch": 1.97, "grad_norm": 7.361870228347744, "learning_rate": 5.209783327237983e-09, "loss": 0.5357, "step": 22102 }, { "epoch": 1.97, "grad_norm": 7.4147805167896, "learning_rate": 5.1768678701297205e-09, "loss": 0.5448, "step": 22103 }, { "epoch": 1.97, "grad_norm": 8.002976013383211, "learning_rate": 5.144056669015296e-09, "loss": 0.6133, "step": 22104 }, { "epoch": 1.97, "grad_norm": 5.892810994609749, "learning_rate": 5.1113497245802725e-09, "loss": 0.5062, "step": 22105 }, { "epoch": 1.97, "grad_norm": 7.005982352958882, "learning_rate": 5.078747037507437e-09, "loss": 0.5532, "step": 22106 }, { "epoch": 1.97, "grad_norm": 7.4732680292932425, "learning_rate": 5.0462486084768005e-09, "loss": 0.5697, "step": 22107 }, { "epoch": 1.97, "grad_norm": 7.242304588383606, "learning_rate": 5.013854438167265e-09, "loss": 0.5819, "step": 22108 }, { "epoch": 1.97, "grad_norm": 7.463927517965537, "learning_rate": 4.9815645272544015e-09, "loss": 0.5824, "step": 22109 }, { "epoch": 1.97, "grad_norm": 7.0761084158727865, "learning_rate": 4.9493788764126696e-09, "loss": 0.5349, "step": 22110 }, { "epoch": 1.97, "grad_norm": 6.601976587011446, "learning_rate": 4.917297486313199e-09, "loss": 0.6146, "step": 22111 }, { "epoch": 1.97, "grad_norm": 7.2609533688160255, "learning_rate": 4.885320357626566e-09, "loss": 0.5715, "step": 22112 }, { "epoch": 1.97, "grad_norm": 6.749371350676115, "learning_rate": 4.853447491019458e-09, "loss": 0.6309, "step": 22113 }, { "epoch": 1.97, "grad_norm": 5.68412372668891, "learning_rate": 4.82167888715801e-09, "loss": 0.5612, "step": 22114 }, { "epoch": 1.97, "grad_norm": 5.322023494860424, "learning_rate": 4.790014546703914e-09, "loss": 0.6145, "step": 22115 }, { "epoch": 1.97, "grad_norm": 8.192507522074091, "learning_rate": 4.7584544703199734e-09, "loss": 0.5535, "step": 22116 }, { "epoch": 1.97, "grad_norm": 4.857025100381078, "learning_rate": 4.726998658662885e-09, "loss": 0.5628, "step": 22117 }, { "epoch": 1.97, "grad_norm": 8.11183746949163, "learning_rate": 4.69564711239101e-09, "loss": 0.5653, "step": 22118 }, { "epoch": 1.97, "grad_norm": 6.578953261384625, "learning_rate": 4.664399832157718e-09, "loss": 0.5474, "step": 22119 }, { "epoch": 1.97, "grad_norm": 6.225527538521193, "learning_rate": 4.633256818615261e-09, "loss": 0.6185, "step": 22120 }, { "epoch": 1.97, "grad_norm": 7.021118665588359, "learning_rate": 4.602218072414233e-09, "loss": 0.5916, "step": 22121 }, { "epoch": 1.97, "grad_norm": 9.745294281567059, "learning_rate": 4.571283594201892e-09, "loss": 0.5757, "step": 22122 }, { "epoch": 1.97, "grad_norm": 5.584740850532303, "learning_rate": 4.5404533846243885e-09, "loss": 0.5733, "step": 22123 }, { "epoch": 1.97, "grad_norm": 4.966059588790516, "learning_rate": 4.509727444325096e-09, "loss": 0.5046, "step": 22124 }, { "epoch": 1.97, "grad_norm": 8.809857586478492, "learning_rate": 4.4791057739451695e-09, "loss": 0.6573, "step": 22125 }, { "epoch": 1.97, "grad_norm": 7.274036380878682, "learning_rate": 4.4485883741235415e-09, "loss": 0.5631, "step": 22126 }, { "epoch": 1.97, "grad_norm": 7.187021233481863, "learning_rate": 4.4181752454980355e-09, "loss": 0.5951, "step": 22127 }, { "epoch": 1.97, "grad_norm": 7.3683257144403855, "learning_rate": 4.387866388703144e-09, "loss": 0.5447, "step": 22128 }, { "epoch": 1.97, "grad_norm": 5.332328173405535, "learning_rate": 4.357661804371138e-09, "loss": 0.5525, "step": 22129 }, { "epoch": 1.97, "grad_norm": 7.956887469671657, "learning_rate": 4.327561493132626e-09, "loss": 0.569, "step": 22130 }, { "epoch": 1.97, "grad_norm": 6.044841952625606, "learning_rate": 4.2975654556154375e-09, "loss": 0.575, "step": 22131 }, { "epoch": 1.97, "grad_norm": 9.136452939140426, "learning_rate": 4.26767369244685e-09, "loss": 0.4979, "step": 22132 }, { "epoch": 1.97, "grad_norm": 9.204335569801762, "learning_rate": 4.237886204249697e-09, "loss": 0.6336, "step": 22133 }, { "epoch": 1.97, "grad_norm": 8.276441138100301, "learning_rate": 4.20820299164626e-09, "loss": 0.6158, "step": 22134 }, { "epoch": 1.97, "grad_norm": 8.602306666521141, "learning_rate": 4.178624055256597e-09, "loss": 0.5555, "step": 22135 }, { "epoch": 1.97, "grad_norm": 10.158657946212927, "learning_rate": 4.149149395696883e-09, "loss": 0.5609, "step": 22136 }, { "epoch": 1.97, "grad_norm": 5.486343534985826, "learning_rate": 4.119779013582736e-09, "loss": 0.5071, "step": 22137 }, { "epoch": 1.97, "grad_norm": 6.259039308174903, "learning_rate": 4.09051290952811e-09, "loss": 0.5455, "step": 22138 }, { "epoch": 1.98, "grad_norm": 8.289409232534155, "learning_rate": 4.061351084142518e-09, "loss": 0.532, "step": 22139 }, { "epoch": 1.98, "grad_norm": 6.824789652604377, "learning_rate": 4.03229353803547e-09, "loss": 0.5362, "step": 22140 }, { "epoch": 1.98, "grad_norm": 5.01866749185576, "learning_rate": 4.003340271813705e-09, "loss": 0.5537, "step": 22141 }, { "epoch": 1.98, "grad_norm": 6.451386480236745, "learning_rate": 3.974491286080628e-09, "loss": 0.5816, "step": 22142 }, { "epoch": 1.98, "grad_norm": 6.065811389958894, "learning_rate": 3.945746581439092e-09, "loss": 0.5254, "step": 22143 }, { "epoch": 1.98, "grad_norm": 5.432251834196403, "learning_rate": 3.91710615848917e-09, "loss": 0.4919, "step": 22144 }, { "epoch": 1.98, "grad_norm": 7.18559907862879, "learning_rate": 3.888570017828164e-09, "loss": 0.5412, "step": 22145 }, { "epoch": 1.98, "grad_norm": 6.4948210962509485, "learning_rate": 3.8601381600522625e-09, "loss": 0.5826, "step": 22146 }, { "epoch": 1.98, "grad_norm": 5.821059562655254, "learning_rate": 3.831810585754325e-09, "loss": 0.4955, "step": 22147 }, { "epoch": 1.98, "grad_norm": 10.332970755238398, "learning_rate": 3.803587295526101e-09, "loss": 0.5782, "step": 22148 }, { "epoch": 1.98, "grad_norm": 7.012364968000539, "learning_rate": 3.775468289956008e-09, "loss": 0.601, "step": 22149 }, { "epoch": 1.98, "grad_norm": 7.69440640144949, "learning_rate": 3.7474535696319095e-09, "loss": 0.6077, "step": 22150 }, { "epoch": 1.98, "grad_norm": 5.063399284310162, "learning_rate": 3.719543135138337e-09, "loss": 0.5714, "step": 22151 }, { "epoch": 1.98, "grad_norm": 5.8253735070080825, "learning_rate": 3.6917369870576035e-09, "loss": 0.5786, "step": 22152 }, { "epoch": 1.98, "grad_norm": 7.959584968020636, "learning_rate": 3.6640351259698004e-09, "loss": 0.5251, "step": 22153 }, { "epoch": 1.98, "grad_norm": 7.273784351740838, "learning_rate": 3.6364375524533535e-09, "loss": 0.5567, "step": 22154 }, { "epoch": 1.98, "grad_norm": 6.703030261907116, "learning_rate": 3.6089442670844687e-09, "loss": 0.5568, "step": 22155 }, { "epoch": 1.98, "grad_norm": 7.087197639356992, "learning_rate": 3.5815552704371315e-09, "loss": 0.5561, "step": 22156 }, { "epoch": 1.98, "grad_norm": 7.38210605886417, "learning_rate": 3.5542705630831064e-09, "loss": 0.5525, "step": 22157 }, { "epoch": 1.98, "grad_norm": 6.531382011163246, "learning_rate": 3.527090145591383e-09, "loss": 0.6081, "step": 22158 }, { "epoch": 1.98, "grad_norm": 8.110667124787977, "learning_rate": 3.500014018529285e-09, "loss": 0.6429, "step": 22159 }, { "epoch": 1.98, "grad_norm": 5.7490661180437765, "learning_rate": 3.4730421824630267e-09, "loss": 0.5447, "step": 22160 }, { "epoch": 1.98, "grad_norm": 7.688546769864429, "learning_rate": 3.44617463795438e-09, "loss": 0.5698, "step": 22161 }, { "epoch": 1.98, "grad_norm": 7.335784763102597, "learning_rate": 3.4194113855645637e-09, "loss": 0.5577, "step": 22162 }, { "epoch": 1.98, "grad_norm": 7.273497132442035, "learning_rate": 3.3927524258520194e-09, "loss": 0.5738, "step": 22163 }, { "epoch": 1.98, "grad_norm": 7.223947758277503, "learning_rate": 3.366197759373524e-09, "loss": 0.5463, "step": 22164 }, { "epoch": 1.98, "grad_norm": 7.709585087684073, "learning_rate": 3.339747386683079e-09, "loss": 0.54, "step": 22165 }, { "epoch": 1.98, "grad_norm": 6.267381325808152, "learning_rate": 3.3134013083330197e-09, "loss": 0.5497, "step": 22166 }, { "epoch": 1.98, "grad_norm": 6.213439007137727, "learning_rate": 3.2871595248734624e-09, "loss": 0.5837, "step": 22167 }, { "epoch": 1.98, "grad_norm": 5.386012550700586, "learning_rate": 3.2610220368511914e-09, "loss": 0.5637, "step": 22168 }, { "epoch": 1.98, "grad_norm": 5.365631344937806, "learning_rate": 3.2349888448129918e-09, "loss": 0.5867, "step": 22169 }, { "epoch": 1.98, "grad_norm": 7.606292904383465, "learning_rate": 3.2090599493012074e-09, "loss": 0.5983, "step": 22170 }, { "epoch": 1.98, "grad_norm": 6.3191073737328125, "learning_rate": 3.1832353508581825e-09, "loss": 0.5538, "step": 22171 }, { "epoch": 1.98, "grad_norm": 7.09358528335141, "learning_rate": 3.15751505002182e-09, "loss": 0.5479, "step": 22172 }, { "epoch": 1.98, "grad_norm": 8.077366036389796, "learning_rate": 3.1318990473294677e-09, "loss": 0.5714, "step": 22173 }, { "epoch": 1.98, "grad_norm": 7.759599016384823, "learning_rate": 3.106387343315698e-09, "loss": 0.5531, "step": 22174 }, { "epoch": 1.98, "grad_norm": 5.371223976891939, "learning_rate": 3.0809799385128624e-09, "loss": 0.5615, "step": 22175 }, { "epoch": 1.98, "grad_norm": 7.712771022138083, "learning_rate": 3.0556768334516486e-09, "loss": 0.6137, "step": 22176 }, { "epoch": 1.98, "grad_norm": 4.740216674499526, "learning_rate": 3.030478028660522e-09, "loss": 0.5822, "step": 22177 }, { "epoch": 1.98, "grad_norm": 6.900454153541293, "learning_rate": 3.005383524664618e-09, "loss": 0.5791, "step": 22178 }, { "epoch": 1.98, "grad_norm": 5.277377046920171, "learning_rate": 2.980393321987962e-09, "loss": 0.5466, "step": 22179 }, { "epoch": 1.98, "grad_norm": 5.926660300689095, "learning_rate": 2.9555074211529144e-09, "loss": 0.5306, "step": 22180 }, { "epoch": 1.98, "grad_norm": 9.450147703442616, "learning_rate": 2.930725822677949e-09, "loss": 0.602, "step": 22181 }, { "epoch": 1.98, "grad_norm": 6.23072795635163, "learning_rate": 2.9060485270804294e-09, "loss": 0.4974, "step": 22182 }, { "epoch": 1.98, "grad_norm": 7.139245127956328, "learning_rate": 2.8814755348760548e-09, "loss": 0.5216, "step": 22183 }, { "epoch": 1.98, "grad_norm": 6.072932794699601, "learning_rate": 2.857006846577193e-09, "loss": 0.5472, "step": 22184 }, { "epoch": 1.98, "grad_norm": 7.014172715628268, "learning_rate": 2.832642462694546e-09, "loss": 0.5403, "step": 22185 }, { "epoch": 1.98, "grad_norm": 5.755293346563621, "learning_rate": 2.8083823837371516e-09, "loss": 0.5922, "step": 22186 }, { "epoch": 1.98, "grad_norm": 6.210067357817848, "learning_rate": 2.7842266102112714e-09, "loss": 0.5268, "step": 22187 }, { "epoch": 1.98, "grad_norm": 6.433993340813551, "learning_rate": 2.7601751426203917e-09, "loss": 0.5418, "step": 22188 }, { "epoch": 1.98, "grad_norm": 5.739319235087379, "learning_rate": 2.736227981467443e-09, "loss": 0.6033, "step": 22189 }, { "epoch": 1.98, "grad_norm": 11.828829244397937, "learning_rate": 2.712385127252026e-09, "loss": 0.5959, "step": 22190 }, { "epoch": 1.98, "grad_norm": 8.080743749123702, "learning_rate": 2.6886465804715214e-09, "loss": 0.5779, "step": 22191 }, { "epoch": 1.98, "grad_norm": 7.436903819573825, "learning_rate": 2.6650123416216423e-09, "loss": 0.5763, "step": 22192 }, { "epoch": 1.98, "grad_norm": 4.911249957389032, "learning_rate": 2.6414824111958836e-09, "loss": 0.6041, "step": 22193 }, { "epoch": 1.98, "grad_norm": 7.232144292057686, "learning_rate": 2.618056789684964e-09, "loss": 0.6618, "step": 22194 }, { "epoch": 1.98, "grad_norm": 5.413317533312572, "learning_rate": 2.5947354775779364e-09, "loss": 0.5561, "step": 22195 }, { "epoch": 1.98, "grad_norm": 7.6933992529764295, "learning_rate": 2.571518475361634e-09, "loss": 0.5807, "step": 22196 }, { "epoch": 1.98, "grad_norm": 5.5778530256922085, "learning_rate": 2.548405783520669e-09, "loss": 0.5852, "step": 22197 }, { "epoch": 1.98, "grad_norm": 9.504704421050342, "learning_rate": 2.525397402537988e-09, "loss": 0.5824, "step": 22198 }, { "epoch": 1.98, "grad_norm": 7.472714847819385, "learning_rate": 2.5024933328926525e-09, "loss": 0.6358, "step": 22199 }, { "epoch": 1.98, "grad_norm": 8.686526010731642, "learning_rate": 2.479693575064279e-09, "loss": 0.4889, "step": 22200 }, { "epoch": 1.98, "grad_norm": 6.660111327322217, "learning_rate": 2.4569981295269328e-09, "loss": 0.4986, "step": 22201 }, { "epoch": 1.98, "grad_norm": 7.1897590493941355, "learning_rate": 2.4344069967557893e-09, "loss": 0.5649, "step": 22202 }, { "epoch": 1.98, "grad_norm": 7.165602658702933, "learning_rate": 2.4119201772221377e-09, "loss": 0.5355, "step": 22203 }, { "epoch": 1.98, "grad_norm": 6.628992049957699, "learning_rate": 2.389537671395048e-09, "loss": 0.574, "step": 22204 }, { "epoch": 1.98, "grad_norm": 5.829832212205508, "learning_rate": 2.3672594797413684e-09, "loss": 0.5225, "step": 22205 }, { "epoch": 1.98, "grad_norm": 7.515512038807537, "learning_rate": 2.3450856027268378e-09, "loss": 0.552, "step": 22206 }, { "epoch": 1.98, "grad_norm": 7.585175524009155, "learning_rate": 2.3230160408138637e-09, "loss": 0.544, "step": 22207 }, { "epoch": 1.98, "grad_norm": 7.345445328426475, "learning_rate": 2.3010507944637437e-09, "loss": 0.6015, "step": 22208 }, { "epoch": 1.98, "grad_norm": 7.495279014794338, "learning_rate": 2.2791898641338906e-09, "loss": 0.6049, "step": 22209 }, { "epoch": 1.98, "grad_norm": 12.291813328485503, "learning_rate": 2.2574332502811603e-09, "loss": 0.6349, "step": 22210 }, { "epoch": 1.98, "grad_norm": 8.08497310401628, "learning_rate": 2.2357809533596344e-09, "loss": 0.5677, "step": 22211 }, { "epoch": 1.98, "grad_norm": 6.919938005389356, "learning_rate": 2.214232973821173e-09, "loss": 0.5685, "step": 22212 }, { "epoch": 1.98, "grad_norm": 6.343612501917077, "learning_rate": 2.1927893121159725e-09, "loss": 0.5938, "step": 22213 }, { "epoch": 1.98, "grad_norm": 10.344741235101608, "learning_rate": 2.1714499686908974e-09, "loss": 0.5364, "step": 22214 }, { "epoch": 1.98, "grad_norm": 6.825223256582493, "learning_rate": 2.1502149439917017e-09, "loss": 0.5892, "step": 22215 }, { "epoch": 1.98, "grad_norm": 6.764974202400133, "learning_rate": 2.1290842384619202e-09, "loss": 0.5035, "step": 22216 }, { "epoch": 1.98, "grad_norm": 7.768045893454325, "learning_rate": 2.108057852542311e-09, "loss": 0.5486, "step": 22217 }, { "epoch": 1.98, "grad_norm": 5.972623124734009, "learning_rate": 2.087135786671968e-09, "loss": 0.5835, "step": 22218 }, { "epoch": 1.98, "grad_norm": 6.97417744155347, "learning_rate": 2.0663180412872075e-09, "loss": 0.5661, "step": 22219 }, { "epoch": 1.98, "grad_norm": 5.641611624202848, "learning_rate": 2.045604616822683e-09, "loss": 0.5063, "step": 22220 }, { "epoch": 1.98, "grad_norm": 7.628411831617069, "learning_rate": 2.0249955137108257e-09, "loss": 0.5359, "step": 22221 }, { "epoch": 1.98, "grad_norm": 6.043586395512921, "learning_rate": 2.0044907323824027e-09, "loss": 0.5438, "step": 22222 }, { "epoch": 1.98, "grad_norm": 6.945288377132015, "learning_rate": 1.9840902732642943e-09, "loss": 0.6092, "step": 22223 }, { "epoch": 1.98, "grad_norm": 10.037462558735797, "learning_rate": 1.9637941367828263e-09, "loss": 0.5533, "step": 22224 }, { "epoch": 1.98, "grad_norm": 6.857549463318732, "learning_rate": 1.9436023233615485e-09, "loss": 0.5999, "step": 22225 }, { "epoch": 1.98, "grad_norm": 8.21846382219361, "learning_rate": 1.9235148334223464e-09, "loss": 0.5613, "step": 22226 }, { "epoch": 1.98, "grad_norm": 8.293212826670509, "learning_rate": 1.9035316673837734e-09, "loss": 0.5662, "step": 22227 }, { "epoch": 1.98, "grad_norm": 8.136819790584159, "learning_rate": 1.883652825663829e-09, "loss": 0.5767, "step": 22228 }, { "epoch": 1.98, "grad_norm": 8.574258718161198, "learning_rate": 1.8638783086766253e-09, "loss": 0.6195, "step": 22229 }, { "epoch": 1.98, "grad_norm": 7.783385237031454, "learning_rate": 1.8442081168351667e-09, "loss": 0.573, "step": 22230 }, { "epoch": 1.98, "grad_norm": 5.935179416352692, "learning_rate": 1.8246422505502349e-09, "loss": 0.5704, "step": 22231 }, { "epoch": 1.98, "grad_norm": 6.68243050718667, "learning_rate": 1.805180710229837e-09, "loss": 0.6041, "step": 22232 }, { "epoch": 1.98, "grad_norm": 7.324448294348646, "learning_rate": 1.7858234962808696e-09, "loss": 0.5946, "step": 22233 }, { "epoch": 1.98, "grad_norm": 6.498361189339322, "learning_rate": 1.766570609106899e-09, "loss": 0.5711, "step": 22234 }, { "epoch": 1.98, "grad_norm": 5.648677367647626, "learning_rate": 1.7474220491092708e-09, "loss": 0.6099, "step": 22235 }, { "epoch": 1.98, "grad_norm": 6.7046150412222145, "learning_rate": 1.7283778166887755e-09, "loss": 0.585, "step": 22236 }, { "epoch": 1.98, "grad_norm": 11.710284260365597, "learning_rate": 1.7094379122423177e-09, "loss": 0.6785, "step": 22237 }, { "epoch": 1.98, "grad_norm": 5.999304942109115, "learning_rate": 1.690602336165137e-09, "loss": 0.5459, "step": 22238 }, { "epoch": 1.98, "grad_norm": 5.5749067549380085, "learning_rate": 1.671871088850252e-09, "loss": 0.5015, "step": 22239 }, { "epoch": 1.98, "grad_norm": 5.963760518397808, "learning_rate": 1.6532441706890168e-09, "loss": 0.5222, "step": 22240 }, { "epoch": 1.98, "grad_norm": 4.886729681365275, "learning_rate": 1.634721582070564e-09, "loss": 0.5023, "step": 22241 }, { "epoch": 1.98, "grad_norm": 6.901001368099837, "learning_rate": 1.6163033233806969e-09, "loss": 0.5121, "step": 22242 }, { "epoch": 1.98, "grad_norm": 6.280376982148588, "learning_rate": 1.597989395004107e-09, "loss": 0.5578, "step": 22243 }, { "epoch": 1.98, "grad_norm": 4.962712680704794, "learning_rate": 1.5797797973232665e-09, "loss": 0.5569, "step": 22244 }, { "epoch": 1.98, "grad_norm": 7.467670250219082, "learning_rate": 1.5616745307184266e-09, "loss": 0.5509, "step": 22245 }, { "epoch": 1.98, "grad_norm": 6.9459056192842885, "learning_rate": 1.5436735955670634e-09, "loss": 0.5772, "step": 22246 }, { "epoch": 1.98, "grad_norm": 8.20946625054735, "learning_rate": 1.5257769922449871e-09, "loss": 0.5482, "step": 22247 }, { "epoch": 1.98, "grad_norm": 6.089134057213708, "learning_rate": 1.5079847211263432e-09, "loss": 0.5544, "step": 22248 }, { "epoch": 1.98, "grad_norm": 7.841388483297915, "learning_rate": 1.4902967825813909e-09, "loss": 0.567, "step": 22249 }, { "epoch": 1.98, "grad_norm": 6.479539293737445, "learning_rate": 1.4727131769803892e-09, "loss": 0.5478, "step": 22250 }, { "epoch": 1.99, "grad_norm": 7.927137508014311, "learning_rate": 1.4552339046891572e-09, "loss": 0.5597, "step": 22251 }, { "epoch": 1.99, "grad_norm": 6.126243134725354, "learning_rate": 1.4378589660740683e-09, "loss": 0.5536, "step": 22252 }, { "epoch": 1.99, "grad_norm": 6.278325038199034, "learning_rate": 1.4205883614970551e-09, "loss": 0.5958, "step": 22253 }, { "epoch": 1.99, "grad_norm": 7.141252265722902, "learning_rate": 1.4034220913178297e-09, "loss": 0.512, "step": 22254 }, { "epoch": 1.99, "grad_norm": 4.45279660630959, "learning_rate": 1.3863601558961047e-09, "loss": 0.5593, "step": 22255 }, { "epoch": 1.99, "grad_norm": 7.557801985810364, "learning_rate": 1.3694025555871516e-09, "loss": 0.5869, "step": 22256 }, { "epoch": 1.99, "grad_norm": 7.04424509941981, "learning_rate": 1.352549290745131e-09, "loss": 0.5611, "step": 22257 }, { "epoch": 1.99, "grad_norm": 6.332266655060409, "learning_rate": 1.3358003617214289e-09, "loss": 0.6042, "step": 22258 }, { "epoch": 1.99, "grad_norm": 6.257661228968681, "learning_rate": 1.3191557688663204e-09, "loss": 0.5523, "step": 22259 }, { "epoch": 1.99, "grad_norm": 6.012924967951881, "learning_rate": 1.3026155125267504e-09, "loss": 0.5298, "step": 22260 }, { "epoch": 1.99, "grad_norm": 7.399939545851276, "learning_rate": 1.2861795930485533e-09, "loss": 0.5823, "step": 22261 }, { "epoch": 1.99, "grad_norm": 6.053853205971235, "learning_rate": 1.2698480107736777e-09, "loss": 0.5411, "step": 22262 }, { "epoch": 1.99, "grad_norm": 7.250067634357125, "learning_rate": 1.2536207660440724e-09, "loss": 0.5044, "step": 22263 }, { "epoch": 1.99, "grad_norm": 7.710928004119726, "learning_rate": 1.2374978591983556e-09, "loss": 0.5704, "step": 22264 }, { "epoch": 1.99, "grad_norm": 7.205464468927915, "learning_rate": 1.2214792905723694e-09, "loss": 0.6067, "step": 22265 }, { "epoch": 1.99, "grad_norm": 6.9075908969721755, "learning_rate": 1.2055650605008462e-09, "loss": 0.4714, "step": 22266 }, { "epoch": 1.99, "grad_norm": 7.1937000282384655, "learning_rate": 1.1897551693162978e-09, "loss": 0.5307, "step": 22267 }, { "epoch": 1.99, "grad_norm": 6.376717580199231, "learning_rate": 1.1740496173479054e-09, "loss": 0.6077, "step": 22268 }, { "epoch": 1.99, "grad_norm": 5.317375885253524, "learning_rate": 1.158448404924295e-09, "loss": 0.5225, "step": 22269 }, { "epoch": 1.99, "grad_norm": 5.042049362703318, "learning_rate": 1.142951532370762e-09, "loss": 0.5424, "step": 22270 }, { "epoch": 1.99, "grad_norm": 6.83602617519204, "learning_rate": 1.127559000010936e-09, "loss": 0.5723, "step": 22271 }, { "epoch": 1.99, "grad_norm": 6.802615294239427, "learning_rate": 1.112270808166227e-09, "loss": 0.5449, "step": 22272 }, { "epoch": 1.99, "grad_norm": 9.202207906290852, "learning_rate": 1.0970869571547138e-09, "loss": 0.5837, "step": 22273 }, { "epoch": 1.99, "grad_norm": 7.265828290557696, "learning_rate": 1.0820074472944753e-09, "loss": 0.5396, "step": 22274 }, { "epoch": 1.99, "grad_norm": 7.881601205138442, "learning_rate": 1.0670322789002597e-09, "loss": 0.5168, "step": 22275 }, { "epoch": 1.99, "grad_norm": 5.931135708019771, "learning_rate": 1.0521614522840395e-09, "loss": 0.6316, "step": 22276 }, { "epoch": 1.99, "grad_norm": 6.242427372973868, "learning_rate": 1.0373949677561223e-09, "loss": 0.55, "step": 22277 }, { "epoch": 1.99, "grad_norm": 8.920528474313231, "learning_rate": 1.0227328256251501e-09, "loss": 0.553, "step": 22278 }, { "epoch": 1.99, "grad_norm": 7.186142473633845, "learning_rate": 1.008175026196989e-09, "loss": 0.5513, "step": 22279 }, { "epoch": 1.99, "grad_norm": 6.788405926784339, "learning_rate": 9.937215697758406e-10, "loss": 0.5029, "step": 22280 }, { "epoch": 1.99, "grad_norm": 10.79056993594378, "learning_rate": 9.793724566631301e-10, "loss": 0.5983, "step": 22281 }, { "epoch": 1.99, "grad_norm": 7.791232439571525, "learning_rate": 9.651276871580628e-10, "loss": 0.5864, "step": 22282 }, { "epoch": 1.99, "grad_norm": 6.065739833620824, "learning_rate": 9.509872615587334e-10, "loss": 0.567, "step": 22283 }, { "epoch": 1.99, "grad_norm": 4.932181675816728, "learning_rate": 9.369511801593512e-10, "loss": 0.6063, "step": 22284 }, { "epoch": 1.99, "grad_norm": 6.715989144499419, "learning_rate": 9.230194432535699e-10, "loss": 0.532, "step": 22285 }, { "epoch": 1.99, "grad_norm": 5.982465027337448, "learning_rate": 9.091920511317131e-10, "loss": 0.5802, "step": 22286 }, { "epoch": 1.99, "grad_norm": 7.671407215443881, "learning_rate": 8.954690040829939e-10, "loss": 0.5336, "step": 22287 }, { "epoch": 1.99, "grad_norm": 5.492344413048058, "learning_rate": 8.818503023932945e-10, "loss": 0.519, "step": 22288 }, { "epoch": 1.99, "grad_norm": 6.405840338861232, "learning_rate": 8.683359463473873e-10, "loss": 0.5873, "step": 22289 }, { "epoch": 1.99, "grad_norm": 6.450353987258691, "learning_rate": 8.549259362267138e-10, "loss": 0.5332, "step": 22290 }, { "epoch": 1.99, "grad_norm": 6.714702304148322, "learning_rate": 8.416202723116051e-10, "loss": 0.5497, "step": 22291 }, { "epoch": 1.99, "grad_norm": 5.442051105345391, "learning_rate": 8.284189548796173e-10, "loss": 0.5782, "step": 22292 }, { "epoch": 1.99, "grad_norm": 6.370225491270838, "learning_rate": 8.153219842066406e-10, "loss": 0.5651, "step": 22293 }, { "epoch": 1.99, "grad_norm": 6.828418350659571, "learning_rate": 8.0232936056579e-10, "loss": 0.5253, "step": 22294 }, { "epoch": 1.99, "grad_norm": 7.670642525389698, "learning_rate": 7.894410842279599e-10, "loss": 0.5811, "step": 22295 }, { "epoch": 1.99, "grad_norm": 6.278670727596919, "learning_rate": 7.766571554623792e-10, "loss": 0.5846, "step": 22296 }, { "epoch": 1.99, "grad_norm": 7.605596781918805, "learning_rate": 7.639775745366118e-10, "loss": 0.5681, "step": 22297 }, { "epoch": 1.99, "grad_norm": 6.166738500166348, "learning_rate": 7.514023417143356e-10, "loss": 0.5779, "step": 22298 }, { "epoch": 1.99, "grad_norm": 7.902794973528408, "learning_rate": 7.389314572581185e-10, "loss": 0.5002, "step": 22299 }, { "epoch": 1.99, "grad_norm": 6.597344245559136, "learning_rate": 7.265649214283077e-10, "loss": 0.5255, "step": 22300 }, { "epoch": 1.99, "grad_norm": 6.435326122372914, "learning_rate": 7.143027344841403e-10, "loss": 0.6062, "step": 22301 }, { "epoch": 1.99, "grad_norm": 7.349023318259661, "learning_rate": 7.021448966798572e-10, "loss": 0.5843, "step": 22302 }, { "epoch": 1.99, "grad_norm": 7.318057371022918, "learning_rate": 6.900914082702548e-10, "loss": 0.5766, "step": 22303 }, { "epoch": 1.99, "grad_norm": 7.999874908401292, "learning_rate": 6.781422695067985e-10, "loss": 0.5532, "step": 22304 }, { "epoch": 1.99, "grad_norm": 7.528025393665032, "learning_rate": 6.662974806387335e-10, "loss": 0.5757, "step": 22305 }, { "epoch": 1.99, "grad_norm": 5.985371905465533, "learning_rate": 6.545570419130842e-10, "loss": 0.5148, "step": 22306 }, { "epoch": 1.99, "grad_norm": 7.385778714980955, "learning_rate": 6.429209535757652e-10, "loss": 0.5717, "step": 22307 }, { "epoch": 1.99, "grad_norm": 6.213825595346466, "learning_rate": 6.313892158688051e-10, "loss": 0.5196, "step": 22308 }, { "epoch": 1.99, "grad_norm": 7.309890260913602, "learning_rate": 6.199618290331222e-10, "loss": 0.582, "step": 22309 }, { "epoch": 1.99, "grad_norm": 7.980942061758254, "learning_rate": 6.086387933068594e-10, "loss": 0.5758, "step": 22310 }, { "epoch": 1.99, "grad_norm": 6.956185146405218, "learning_rate": 5.974201089276043e-10, "loss": 0.5031, "step": 22311 }, { "epoch": 1.99, "grad_norm": 7.6842002281884945, "learning_rate": 5.863057761279489e-10, "loss": 0.5921, "step": 22312 }, { "epoch": 1.99, "grad_norm": 8.798489501381205, "learning_rate": 5.752957951410398e-10, "loss": 0.5907, "step": 22313 }, { "epoch": 1.99, "grad_norm": 6.339016468784381, "learning_rate": 5.643901661961382e-10, "loss": 0.591, "step": 22314 }, { "epoch": 1.99, "grad_norm": 8.114318059968355, "learning_rate": 5.535888895213948e-10, "loss": 0.6101, "step": 22315 }, { "epoch": 1.99, "grad_norm": 7.21171990190858, "learning_rate": 5.428919653410747e-10, "loss": 0.5087, "step": 22316 }, { "epoch": 1.99, "grad_norm": 5.770672321506481, "learning_rate": 5.322993938799981e-10, "loss": 0.5194, "step": 22317 }, { "epoch": 1.99, "grad_norm": 7.0048908511743155, "learning_rate": 5.218111753579891e-10, "loss": 0.6396, "step": 22318 }, { "epoch": 1.99, "grad_norm": 9.026122080328829, "learning_rate": 5.114273099948719e-10, "loss": 0.5753, "step": 22319 }, { "epoch": 1.99, "grad_norm": 6.976888163200731, "learning_rate": 5.0114779800714e-10, "loss": 0.5295, "step": 22320 }, { "epoch": 1.99, "grad_norm": 8.39176762603536, "learning_rate": 4.909726396090664e-10, "loss": 0.5576, "step": 22321 }, { "epoch": 1.99, "grad_norm": 7.770527919307246, "learning_rate": 4.809018350132588e-10, "loss": 0.5481, "step": 22322 }, { "epoch": 1.99, "grad_norm": 6.746287697568165, "learning_rate": 4.709353844295494e-10, "loss": 0.6207, "step": 22323 }, { "epoch": 1.99, "grad_norm": 5.5358198566113135, "learning_rate": 4.610732880666602e-10, "loss": 0.5902, "step": 22324 }, { "epoch": 1.99, "grad_norm": 8.052025380772362, "learning_rate": 4.513155461299823e-10, "loss": 0.5657, "step": 22325 }, { "epoch": 1.99, "grad_norm": 7.276108048712554, "learning_rate": 4.416621588226866e-10, "loss": 0.5547, "step": 22326 }, { "epoch": 1.99, "grad_norm": 6.0606784068956925, "learning_rate": 4.321131263473888e-10, "loss": 0.5261, "step": 22327 }, { "epoch": 1.99, "grad_norm": 6.849666064705663, "learning_rate": 4.2266844890281877e-10, "loss": 0.6201, "step": 22328 }, { "epoch": 1.99, "grad_norm": 7.120413400383429, "learning_rate": 4.133281266865963e-10, "loss": 0.5803, "step": 22329 }, { "epoch": 1.99, "grad_norm": 4.420417292552385, "learning_rate": 4.040921598924552e-10, "loss": 0.5109, "step": 22330 }, { "epoch": 1.99, "grad_norm": 5.719843020374216, "learning_rate": 3.9496054871412946e-10, "loss": 0.56, "step": 22331 }, { "epoch": 1.99, "grad_norm": 6.582330392989247, "learning_rate": 3.8593329334257746e-10, "loss": 0.6524, "step": 22332 }, { "epoch": 1.99, "grad_norm": 7.91985564661322, "learning_rate": 3.7701039396542683e-10, "loss": 0.6175, "step": 22333 }, { "epoch": 1.99, "grad_norm": 5.779922628269397, "learning_rate": 3.6819185076919506e-10, "loss": 0.592, "step": 22334 }, { "epoch": 1.99, "grad_norm": 8.293256949136987, "learning_rate": 3.5947766393762405e-10, "loss": 0.5785, "step": 22335 }, { "epoch": 1.99, "grad_norm": 9.05589642566504, "learning_rate": 3.5086783365334554e-10, "loss": 0.5371, "step": 22336 }, { "epoch": 1.99, "grad_norm": 6.9634224402748215, "learning_rate": 3.4236236009510536e-10, "loss": 0.502, "step": 22337 }, { "epoch": 1.99, "grad_norm": 8.884155631538732, "learning_rate": 3.339612434416495e-10, "loss": 0.585, "step": 22338 }, { "epoch": 1.99, "grad_norm": 6.009075989522639, "learning_rate": 3.2566448386728287e-10, "loss": 0.5285, "step": 22339 }, { "epoch": 1.99, "grad_norm": 6.387055988646861, "learning_rate": 3.1747208154575546e-10, "loss": 0.4872, "step": 22340 }, { "epoch": 1.99, "grad_norm": 9.108964611415423, "learning_rate": 3.093840366480416e-10, "loss": 0.6097, "step": 22341 }, { "epoch": 1.99, "grad_norm": 7.148411506460795, "learning_rate": 3.014003493423401e-10, "loss": 0.5504, "step": 22342 }, { "epoch": 1.99, "grad_norm": 8.599427651303984, "learning_rate": 2.935210197957394e-10, "loss": 0.5592, "step": 22343 }, { "epoch": 1.99, "grad_norm": 5.5391121830018495, "learning_rate": 2.857460481731078e-10, "loss": 0.5769, "step": 22344 }, { "epoch": 1.99, "grad_norm": 6.326672883376822, "learning_rate": 2.7807543463598265e-10, "loss": 0.6348, "step": 22345 }, { "epoch": 1.99, "grad_norm": 6.550443305800905, "learning_rate": 2.7050917934534625e-10, "loss": 0.5963, "step": 22346 }, { "epoch": 1.99, "grad_norm": 6.606377003823034, "learning_rate": 2.6304728245829524e-10, "loss": 0.4975, "step": 22347 }, { "epoch": 1.99, "grad_norm": 10.035261457528115, "learning_rate": 2.5568974413081596e-10, "loss": 0.5611, "step": 22348 }, { "epoch": 1.99, "grad_norm": 8.283573293595728, "learning_rate": 2.484365645166742e-10, "loss": 0.5977, "step": 22349 }, { "epoch": 1.99, "grad_norm": 4.655153255367234, "learning_rate": 2.412877437668604e-10, "loss": 0.5872, "step": 22350 }, { "epoch": 1.99, "grad_norm": 6.809887664082592, "learning_rate": 2.342432820312546e-10, "loss": 0.5875, "step": 22351 }, { "epoch": 1.99, "grad_norm": 6.091482825061737, "learning_rate": 2.2730317945640622e-10, "loss": 0.5707, "step": 22352 }, { "epoch": 1.99, "grad_norm": 6.684219507773038, "learning_rate": 2.2046743618719946e-10, "loss": 0.5636, "step": 22353 }, { "epoch": 1.99, "grad_norm": 5.346664814013866, "learning_rate": 2.1373605236685302e-10, "loss": 0.5844, "step": 22354 }, { "epoch": 1.99, "grad_norm": 6.058181236891613, "learning_rate": 2.071090281346999e-10, "loss": 0.533, "step": 22355 }, { "epoch": 1.99, "grad_norm": 10.791933835727232, "learning_rate": 2.0058636363007312e-10, "loss": 0.5618, "step": 22356 }, { "epoch": 1.99, "grad_norm": 7.141190109263506, "learning_rate": 1.9416805898897496e-10, "loss": 0.5381, "step": 22357 }, { "epoch": 1.99, "grad_norm": 6.284056051986168, "learning_rate": 1.878541143451873e-10, "loss": 0.5887, "step": 22358 }, { "epoch": 1.99, "grad_norm": 6.5037453415255415, "learning_rate": 1.816445298308267e-10, "loss": 0.5679, "step": 22359 }, { "epoch": 1.99, "grad_norm": 5.61125549703276, "learning_rate": 1.7553930557467903e-10, "loss": 0.5106, "step": 22360 }, { "epoch": 1.99, "grad_norm": 6.461146545693215, "learning_rate": 1.6953844170497503e-10, "loss": 0.5776, "step": 22361 }, { "epoch": 1.99, "grad_norm": 9.008774372504956, "learning_rate": 1.636419383466148e-10, "loss": 0.6448, "step": 22362 }, { "epoch": 2.0, "grad_norm": 4.877827375155695, "learning_rate": 1.5784979562283308e-10, "loss": 0.5749, "step": 22363 }, { "epoch": 2.0, "grad_norm": 7.221856642801281, "learning_rate": 1.5216201365464422e-10, "loss": 0.5199, "step": 22364 }, { "epoch": 2.0, "grad_norm": 7.256023992441402, "learning_rate": 1.4657859256028695e-10, "loss": 0.5573, "step": 22365 }, { "epoch": 2.0, "grad_norm": 8.71890946017384, "learning_rate": 1.410995324568898e-10, "loss": 0.6086, "step": 22366 }, { "epoch": 2.0, "grad_norm": 6.0002069364918444, "learning_rate": 1.357248334582506e-10, "loss": 0.573, "step": 22367 }, { "epoch": 2.0, "grad_norm": 6.678948267962412, "learning_rate": 1.3045449567705704e-10, "loss": 0.6239, "step": 22368 }, { "epoch": 2.0, "grad_norm": 9.297306240328377, "learning_rate": 1.2528851922322117e-10, "loss": 0.4796, "step": 22369 }, { "epoch": 2.0, "grad_norm": 6.295557165961889, "learning_rate": 1.202269042038795e-10, "loss": 0.5714, "step": 22370 }, { "epoch": 2.0, "grad_norm": 7.8568982674559775, "learning_rate": 1.1526965072561347e-10, "loss": 0.5684, "step": 22371 }, { "epoch": 2.0, "grad_norm": 5.644673644257143, "learning_rate": 1.104167588916738e-10, "loss": 0.4973, "step": 22372 }, { "epoch": 2.0, "grad_norm": 6.967873596400147, "learning_rate": 1.056682288030908e-10, "loss": 0.5184, "step": 22373 }, { "epoch": 2.0, "grad_norm": 8.820118735951032, "learning_rate": 1.0102406055922942e-10, "loss": 0.5802, "step": 22374 }, { "epoch": 2.0, "grad_norm": 6.92771371377611, "learning_rate": 9.648425425667907e-11, "loss": 0.5705, "step": 22375 }, { "epoch": 2.0, "grad_norm": 7.284148147656708, "learning_rate": 9.20488099903638e-11, "loss": 0.5394, "step": 22376 }, { "epoch": 2.0, "grad_norm": 8.102470022045958, "learning_rate": 8.771772785354238e-11, "loss": 0.5259, "step": 22377 }, { "epoch": 2.0, "grad_norm": 5.404947199674627, "learning_rate": 8.349100793503262e-11, "loss": 0.4875, "step": 22378 }, { "epoch": 2.0, "grad_norm": 5.317725236868967, "learning_rate": 7.93686503247626e-11, "loss": 0.6658, "step": 22379 }, { "epoch": 2.0, "grad_norm": 7.041970215851293, "learning_rate": 7.535065510766437e-11, "loss": 0.5965, "step": 22380 }, { "epoch": 2.0, "grad_norm": 7.099479591242325, "learning_rate": 7.143702236811489e-11, "loss": 0.5327, "step": 22381 }, { "epoch": 2.0, "grad_norm": 5.766664031887119, "learning_rate": 6.762775218771556e-11, "loss": 0.5871, "step": 22382 }, { "epoch": 2.0, "grad_norm": 7.430349653011086, "learning_rate": 6.39228446458473e-11, "loss": 0.5581, "step": 22383 }, { "epoch": 2.0, "grad_norm": 6.355883248614428, "learning_rate": 6.032229981967063e-11, "loss": 0.5807, "step": 22384 }, { "epoch": 2.0, "grad_norm": 6.012090487980204, "learning_rate": 5.6826117784680725e-11, "loss": 0.6077, "step": 22385 }, { "epoch": 2.0, "grad_norm": 6.2942664284429135, "learning_rate": 5.343429861415228e-11, "loss": 0.5158, "step": 22386 }, { "epoch": 2.0, "grad_norm": 6.935991773709688, "learning_rate": 5.014684237802936e-11, "loss": 0.6201, "step": 22387 }, { "epoch": 2.0, "grad_norm": 11.412210861086258, "learning_rate": 4.6963749145700896e-11, "loss": 0.5305, "step": 22388 }, { "epoch": 2.0, "grad_norm": 5.9518147321920045, "learning_rate": 4.388501898322517e-11, "loss": 0.5432, "step": 22389 }, { "epoch": 2.0, "grad_norm": 8.402078870438466, "learning_rate": 4.0910651954995105e-11, "loss": 0.5094, "step": 22390 }, { "epoch": 2.0, "grad_norm": 5.893806644745177, "learning_rate": 3.804064812262809e-11, "loss": 0.563, "step": 22391 }, { "epoch": 2.0, "grad_norm": 6.367471351414142, "learning_rate": 3.527500754663127e-11, "loss": 0.5852, "step": 22392 }, { "epoch": 2.0, "grad_norm": 8.015465006262144, "learning_rate": 3.2613730284736245e-11, "loss": 0.5592, "step": 22393 }, { "epoch": 2.0, "grad_norm": 4.705097361994824, "learning_rate": 3.005681639245417e-11, "loss": 0.5928, "step": 22394 }, { "epoch": 2.0, "grad_norm": 8.775501331283172, "learning_rate": 2.7604265922520636e-11, "loss": 0.564, "step": 22395 }, { "epoch": 2.0, "grad_norm": 7.073701683811733, "learning_rate": 2.5256078926561012e-11, "loss": 0.614, "step": 22396 }, { "epoch": 2.0, "grad_norm": 6.815057691720495, "learning_rate": 2.3012255453980226e-11, "loss": 0.5899, "step": 22397 }, { "epoch": 2.0, "grad_norm": 9.389336987418718, "learning_rate": 2.0872795551407643e-11, "loss": 0.5649, "step": 22398 }, { "epoch": 2.0, "grad_norm": 6.328919634755875, "learning_rate": 1.8837699262697075e-11, "loss": 0.5788, "step": 22399 }, { "epoch": 2.0, "grad_norm": 6.483283933560163, "learning_rate": 1.6906966631702327e-11, "loss": 0.5674, "step": 22400 }, { "epoch": 2.0, "grad_norm": 7.691343781323884, "learning_rate": 1.5080597697281206e-11, "loss": 0.5634, "step": 22401 }, { "epoch": 2.0, "grad_norm": 6.101840360271895, "learning_rate": 1.3358592498846634e-11, "loss": 0.5501, "step": 22402 }, { "epoch": 2.0, "grad_norm": 7.053449336032607, "learning_rate": 1.1740951071370632e-11, "loss": 0.5978, "step": 22403 }, { "epoch": 2.0, "grad_norm": 6.891303774073509, "learning_rate": 1.0227673449270114e-11, "loss": 0.5771, "step": 22404 }, { "epoch": 2.0, "grad_norm": 7.17420441629277, "learning_rate": 8.818759663631327e-12, "loss": 0.5934, "step": 22405 }, { "epoch": 2.0, "grad_norm": 5.358482228732947, "learning_rate": 7.51420974443029e-12, "loss": 0.4902, "step": 22406 }, { "epoch": 2.0, "grad_norm": 9.069459840481255, "learning_rate": 6.314023718312356e-12, "loss": 0.4829, "step": 22407 }, { "epoch": 2.0, "grad_norm": 7.216518776848019, "learning_rate": 5.218201610812657e-12, "loss": 0.6042, "step": 22408 }, { "epoch": 2.0, "grad_norm": 7.80248427150608, "learning_rate": 4.22674344413565e-12, "loss": 0.5938, "step": 22409 }, { "epoch": 2.0, "grad_norm": 8.167349009318386, "learning_rate": 3.339649239930687e-12, "loss": 0.5479, "step": 22410 }, { "epoch": 2.0, "grad_norm": 6.506836064545649, "learning_rate": 2.5569190159613343e-12, "loss": 0.5285, "step": 22411 }, { "epoch": 2.0, "grad_norm": 6.0534463018330795, "learning_rate": 1.8785527888809384e-12, "loss": 0.5861, "step": 22412 }, { "epoch": 2.0, "grad_norm": 5.022926734891331, "learning_rate": 1.3045505731223984e-12, "loss": 0.5834, "step": 22413 }, { "epoch": 2.0, "grad_norm": 5.976569029159346, "learning_rate": 8.349123797879444e-13, "loss": 0.5953, "step": 22414 }, { "epoch": 2.0, "grad_norm": 6.229401353629282, "learning_rate": 4.696382194246951e-13, "loss": 0.561, "step": 22415 }, { "epoch": 2.0, "grad_norm": 10.141267721370903, "learning_rate": 2.0872809924910032e-13, "loss": 0.5424, "step": 22416 }, { "epoch": 2.0, "grad_norm": 6.615625312178967, "learning_rate": 5.218202536738659e-14, "loss": 0.5515, "step": 22417 }, { "epoch": 2.0, "grad_norm": 6.552558411704704, "learning_rate": 0.0, "loss": 0.5172, "step": 22418 }, { "epoch": 2.0, "step": 22418, "total_flos": 3.696053433321652e+19, "train_loss": 0.7196700681658665, "train_runtime": 405070.881, "train_samples_per_second": 7.084, "train_steps_per_second": 0.055 } ], "logging_steps": 1.0, "max_steps": 22418, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "total_flos": 3.696053433321652e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }